From e9ad618b58530ee6e998371f71d9110772daa506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 23 Apr 2023 20:23:04 +0300 Subject: [PATCH 001/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=B0=D1=87=D0=B0=D0=BB?= =?UTF-8?q?=D0=BE=20=D0=B2=D0=B5=D1=82=D0=BA=D0=B8=20`0.13`,=20=D1=81=20?= =?UTF-8?q?=D0=BD=D0=BE=D0=B2=D1=8B=D0=BC=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=BE=D0=BC=20=D0=B8=20=D0=B8?= =?UTF-8?q?=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Планируется очистка от функций и возможностей ранее объявленных устаревшими. В частности, будет удалена поддержка пользовательских функций сравнения, которые были обьявлены устаревшими начиная с версии 0.9, более 33 месяцев назад. --- ChangeLog.md | 9 +++++++++ mdbx.h | 4 ++-- src/man1/mdbx_chk.1 | 2 +- src/man1/mdbx_copy.1 | 2 +- src/man1/mdbx_drop.1 | 2 +- src/man1/mdbx_dump.1 | 2 +- src/man1/mdbx_load.1 | 2 +- src/man1/mdbx_stat.1 | 2 +- 8 files changed, 17 insertions(+), 8 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 5a8f0af6..53ca0059 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,6 +5,14 @@ English version [by Google](https://gitflic-ru.translate.goog/project/erthink/li and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +## v0.13.0 at 2023-04-23 + +Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API. + + +******************************************************************************** + + ## v0.12.8 (сопровождение и подготовка к релизу) Поддержка стабильной ветки. @@ -22,6 +30,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic ## v0.12.8 "Владимир Уткин" от 2023-10-17 + Стабилизирующий выпуск с исправлением обнаруженных ошибок и устранением недочетов, в день 100-летия со дня рождения выдающегося советского и российского ученого и конструктора [Влади́мира Фёдоровича У́ткина](https://ru.wikipedia.org/wiki/Уткин,_Владимир_Фёдорович). diff --git a/mdbx.h b/mdbx.h index ece77eda..47d3e12a 100644 --- a/mdbx.h +++ b/mdbx.h @@ -634,9 +634,9 @@ typedef mode_t mdbx_mode_t; extern "C" { #endif -/* MDBX version 0.12.x */ +/* MDBX version 0.13.x */ #define MDBX_VERSION_MAJOR 0 -#define MDBX_VERSION_MINOR 12 +#define MDBX_VERSION_MINOR 13 #ifndef LIBMDBX_API #if defined(LIBMDBX_EXPORTS) diff --git a/src/man1/mdbx_chk.1 b/src/man1/mdbx_chk.1 index aa4e9868..6f26166c 100644 --- a/src/man1/mdbx_chk.1 +++ b/src/man1/mdbx_chk.1 @@ -1,6 +1,6 @@ .\" Copyright 2015-2023 Leonid Yuriev . .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_CHK 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_CHK 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_chk \- MDBX checking tool .SH SYNOPSIS diff --git a/src/man1/mdbx_copy.1 b/src/man1/mdbx_copy.1 index 4e67a5b8..18658782 100644 --- a/src/man1/mdbx_copy.1 +++ b/src/man1/mdbx_copy.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_COPY 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_COPY 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_copy \- MDBX environment copy tool .SH SYNOPSIS diff --git a/src/man1/mdbx_drop.1 b/src/man1/mdbx_drop.1 index 425eecd2..634150ac 100644 --- a/src/man1/mdbx_drop.1 +++ b/src/man1/mdbx_drop.1 @@ -1,7 +1,7 @@ .\" Copyright 2021-2023 Leonid Yuriev . .\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DROP 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_DROP 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_drop \- MDBX database delete tool .SH SYNOPSIS diff --git a/src/man1/mdbx_dump.1 b/src/man1/mdbx_dump.1 index d236b93c..13a746b7 100644 --- a/src/man1/mdbx_dump.1 +++ b/src/man1/mdbx_dump.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DUMP 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_DUMP 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_dump \- MDBX environment export tool .SH SYNOPSIS diff --git a/src/man1/mdbx_load.1 b/src/man1/mdbx_load.1 index ae8e7596..fdd2fc24 100644 --- a/src/man1/mdbx_load.1 +++ b/src/man1/mdbx_load.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_LOAD 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_LOAD 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_load \- MDBX environment import tool .SH SYNOPSIS diff --git a/src/man1/mdbx_stat.1 b/src/man1/mdbx_stat.1 index c330d2e6..0260bb71 100644 --- a/src/man1/mdbx_stat.1 +++ b/src/man1/mdbx_stat.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_STAT 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_STAT 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_stat \- MDBX environment status tool .SH SYNOPSIS From dd9fc963d2221d4880ee536a0501ad985d9d796b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 Mar 2023 21:24:18 +0300 Subject: [PATCH 002/443] =?UTF-8?q?mdbx:=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D1=80=D0=B0=D1=81=D1=88?= =?UTF-8?q?=D0=B8=D1=80=D0=B5=D0=BD=D0=B8=D0=B5=20API=20=D1=84=D1=83=D0=BD?= =?UTF-8?q?=D0=BA=D1=86=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=BE=D0=BC=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D1=86=D0=B5=D0=BB?= =?UTF-8?q?=D0=BE=D1=81=D1=82=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20=D1=81=D1=82?= =?UTF-8?q?=D1=80=D1=83=D0=BA=D1=82=D1=83=D1=80=D1=8B=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 226 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index 47d3e12a..768e9b00 100644 --- a/mdbx.h +++ b/mdbx.h @@ -816,7 +816,7 @@ typedef struct iovec MDBX_val; #endif /* ! SunOS */ enum MDBX_constants { - /** The hard limit for DBI handles */ + /** The hard limit for DBI handles. */ MDBX_MAX_DBI = UINT32_C(32765), /** The maximum size of a data item. */ @@ -5519,9 +5519,9 @@ LIBMDBX_API int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr_callback); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_hsr_func * mdbx_env_get_hsr(const MDBX_env *env); -/** \defgroup btree_traversal B-tree Traversal - * This is internal API for mdbx_chk tool. You should avoid to use it, except - * some extremal special cases. +/** \defgroup chk Checking and Recovery + * Basically this is internal API for `mdbx_chk` tool, etc. + * You should avoid to use it, except some extremal special cases. * \ingroup c_extra * @{ */ @@ -5562,6 +5562,16 @@ MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, void *ctx, bool dont_check_keys_ordering); +/** \brief Acquires write-transaction lock. + * Provided for custom and/or complex locking scenarios. + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); + +/** \brief Releases write-transaction lock. + * Provided for custom and/or complex locking scenarios. + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); + /** \brief Open an environment instance using specific meta-page * for checking and recovery. * @@ -5592,7 +5602,218 @@ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, * leg(s). */ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); -/** end of btree_traversal @} */ +/** \brief Флаги/опции для проверки целостности БД. + * \see mdbx_env_chk() */ +enum MDBX_chk_flags_t { + /** Режим проверки по-умолчанию, в том числе в режиме только-чтения. */ + MDBX_CHK_DEFAULTS = 0, + + /** Проверка в режиме чтения-записи, с захватом блокировки и приостановки + * пишущих транзакций. */ + MDBX_CHK_READWRITE = 1, + + /** Пропустить обход дерева страниц. */ + MDBX_CHK_SKIP_BTREE_TRAVERSAL = 2, + + /** Пропустить просмотр записей ключ-значение. */ + MDBX_CHK_SKIP_KV_TRAVERSAL = 4, + + /** Игнорировать порядок ключей и записей. + * \note Требуется при проверке унаследованных БД созданных с использованием + * нестандартных (пользовательских) функций сравнения ключей или значений. */ + MDBX_CHK_IGNORE_ORDER = 8 +}; +#ifndef __cplusplus +/** \ingroup c_opening */ +typedef enum MDBX_chk_flags_t MDBX_chk_flags_t; +#else +DEFINE_ENUM_FLAG_OPERATORS(MDBX_chk_flags_t) +#endif + +/** \brief Уровни логирование/детализации информации, + * поставляемой через обратные вызовы при проверке целостности БД. + * \see mdbx_env_chk() */ +enum MDBX_chk_severity { + MDBX_chk_severity_prio_shift = 4, + MDBX_chk_severity_kind_mask = 0xF, + MDBX_chk_fatal = 0x00u, + MDBX_chk_error = 0x11u, + MDBX_chk_warning = 0x22u, + MDBX_chk_notice = 0x33u, + MDBX_chk_result = 0x44u, + MDBX_chk_resolution = 0x55u, + MDBX_chk_processing = 0x56u, + MDBX_chk_info = 0x67u, + MDBX_chk_verbose = 0x78u, + MDBX_chk_details = 0x89u, + MDBX_chk_extra = 0x9Au +}; + +/** \brief Стадии проверки, + * сообщаемые через обратные вызовы при проверке целостности БД. + * \see mdbx_env_chk() */ +enum MDBX_chk_stage { + MDBX_chk_none, + MDBX_chk_init, + MDBX_chk_lock, + MDBX_chk_meta, + MDBX_chk_traversal_tree, + MDBX_chk_traversal_freedb, + MDBX_chk_space, + MDBX_chk_traversal_maindb, + MDBX_chk_traversal_subdbs, + MDBX_chk_conclude, + MDBX_chk_unlock, + MDBX_chk_finalize +}; + +/** \brief Виртуальная строка отчета, формируемого при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_line { + struct MDBX_chk_context *ctx; + uint8_t severity, scope_depth, empty; + char *begin, *end, *out; +} MDBX_chk_line_t; + +/** \brief Проблема обнаруженная при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_issue { + struct MDBX_chk_issue *next; + size_t count; + const char *caption; +} MDBX_chk_issue_t; + +/** \brief Иерархический контекст при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_scope { + MDBX_chk_issue_t *issues; + struct MDBX_chk_internal *internal; + const void *object; + enum MDBX_chk_stage stage; + enum MDBX_chk_severity verbosity; + size_t subtotal_issues; + union { + void *ptr; + size_t number; + } usr_z, usr_v, usr_o; +} MDBX_chk_scope_t; + +/** \brief Пользовательский тип для привязки дополнительных данных, + * связанных с некоторой таблицей ключ-значение, при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_user_subdb_cookie MDBX_chk_user_subdb_cookie_t; + +/** \brief Гистограмма с некоторой статистической информацией, + * собираемой при проверке целостности БД. + * \see mdbx_env_chk() */ +struct MDBX_chk_histogram { + size_t amount, count, ones, pad; + struct { + size_t begin, end, amount, count; + } ranges[9]; +}; + +/** \brief Информация о некоторой таблицей ключ-значение, + * при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_subdb { + MDBX_chk_user_subdb_cookie_t *cookie; + MDBX_val name; + MDBX_db_flags_t flags; + int id; + + size_t payload_bytes, lost_bytes; + struct { + size_t all, empty, other; + size_t branch, leaf; + size_t nested_branch, nested_leaf, nested_subleaf; + } pages; + struct { + /// Tree deep histogram + struct MDBX_chk_histogram deep; + /// Histogram of large/overflow pages length + struct MDBX_chk_histogram large_pages; + /// Histogram of nested trees height, span length for GC + struct MDBX_chk_histogram nested_tree; + /// Keys length histogram + struct MDBX_chk_histogram key_len; + /// Values length histogram + struct MDBX_chk_histogram val_len; + } histogram; +} MDBX_chk_subdb_t; + +/** \brief Контекст проверки целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_context { + struct MDBX_chk_internal *internal; + MDBX_env *env; + MDBX_txn *txn; + MDBX_chk_scope_t *scope; + unsigned scope_nesting; + struct { + size_t total_payload_bytes; + size_t subdb_total, subdb_processed; + size_t total_unused_bytes, unused_pages; + size_t processed_pages, reclaimable_pages, gc_pages, alloc_pages, + backed_pages; + size_t problems_meta, tree_problems, gc_tree_problems, kv_tree_problems, + problems_gc, problems_kv, total_problems; + uint64_t steady_txnid, recent_txnid; + /** Указатель на массив размером subdb_total с указателями на экземпляры + * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значние, + * включая MainDB и GC/FreeDB. */ + const MDBX_chk_subdb_t *const *subdbs; + } result; +} MDBX_chk_context_t; + +/** FIXME */ +typedef struct MDBX_chk_callbacks { + bool (*check_break)(MDBX_chk_context_t *ctx); + int (*scope_push)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, + MDBX_chk_scope_t *inner, const char *fmt, va_list args); + int (*scope_conclude)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, + MDBX_chk_scope_t *inner, int err); + void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, + MDBX_chk_scope_t *inner); + void (*issue)(MDBX_chk_context_t *ctx, const char *object, + size_t entry_number, const char *issue, const char *extra_fmt, + va_list extra_args); + MDBX_chk_user_subdb_cookie_t *(*subdb_filter)(MDBX_chk_context_t *ctx, + const MDBX_val *name, + MDBX_db_flags_t flags); + int (*subdb_conclude)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb, + MDBX_cursor *cursor, int err); + void (*subdb_dispose)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb); + + int (*subdb_handle_kv)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb, + size_t entry_number, const MDBX_val *key, + const MDBX_val *value); + + int (*stage_begin)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage); + int (*stage_end)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage, int err); + + struct { + MDBX_chk_line_t *(*begin)(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity); + void (*flush)(MDBX_chk_line_t *); + void (*done)(MDBX_chk_line_t *); + void (*chars)(MDBX_chk_line_t *, const char *str, size_t len); + void (*format)(MDBX_chk_line_t *, const char *fmt, va_list args); + void (*size)(MDBX_chk_line_t *, const char *prefix, const uint64_t value, + const char *suffix); + } print; +} MDBX_chk_callbacks_t; + +/** FIXME */ +LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, + MDBX_chk_context_t *ctx, + const enum MDBX_chk_flags_t flags, + enum MDBX_chk_severity verbosity, + unsigned timeout_seconds_16dot16); +/** FIXME */ +LIBMDBX_API int mdbx_env_chk_problem(MDBX_chk_context_t *ctx); + +/** end of chk @} */ /** end of c_api @} */ From f0d523c507042cc70eeeb690778c9b2be6a8b33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 10 Oct 2023 23:14:40 +0300 Subject: [PATCH 003/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20API=20=D1=84=D1=83=D0=BD=D0=BA?= =?UTF-8?q?=D1=86=D0=B8=D1=8F=D0=BC=D0=B8=20lock/unlock/upgrade/downgrade?= =?UTF-8?q?=20=D0=BE=D1=81=D0=BD=D0=BE=D0=B2=D0=BD=D0=BE=D0=B9=20=D0=B1?= =?UTF-8?q?=D0=BB=D0=BE=D0=BA=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- src/core.c | 78 ++++++++++++++++++++++++++++++++--------------- src/internals.h | 1 - src/lck-posix.c | 61 +++++++++++++++++++++++++++--------- src/lck-windows.c | 33 +++++++++++--------- src/osal.h | 14 ++++----- 6 files changed, 126 insertions(+), 63 deletions(-) diff --git a/mdbx.h b/mdbx.h index 768e9b00..de16ccff 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5570,7 +5570,7 @@ LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); /** \brief Releases write-transaction lock. * Provided for custom and/or complex locking scenarios. * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); +LIBMDBX_API int mdbx_txn_unlock(MDBX_env *env); /** \brief Open an environment instance using specific meta-page * for checking and recovery. diff --git a/src/core.c b/src/core.c index 899b65af..d0cb0914 100644 --- a/src/core.c +++ b/src/core.c @@ -8200,7 +8200,7 @@ retry:; rc = MDBX_SUCCESS /* means "some data was synced" */; } - err = mdbx_txn_lock(env, nonblock); + err = osal_txn_lock(env, nonblock); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -8247,7 +8247,7 @@ retry:; bailout: if (locked) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -8442,7 +8442,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { } else if (env->me_flags & MDBX_RDONLY) { /* read-only mode, no write-txn, no wlock mutex */ last = NUM_METAS; - } else if (mdbx_txn_lock(env, true) == MDBX_SUCCESS) { + } else if (osal_txn_lock(env, true) == MDBX_SUCCESS) { /* no write-txn */ last = NUM_METAS; should_unlock = true; @@ -8463,7 +8463,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { pgno2bytes(env, edge - last)); } if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); } } #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ @@ -8840,6 +8840,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; return MDBX_SUCCESS; } + txn->mt_owner = tid; /* Seek & fetch the last meta */ uint64_t timestamp = 0; @@ -8915,12 +8916,11 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - eASSERT(env, txn->mt_txnid >= env->me_lck->mti_oldest_reader.weak); txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); - txn->mt_numdbs = env->me_numdbs; } else { eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | MDBX_WRITEMAP)) == 0); @@ -8946,16 +8946,16 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { /* Not yet touching txn == env->me_txn0, it may be active */ jitter4testing(false); - rc = mdbx_txn_lock(env, !!(flags & MDBX_TXN_TRY)); + rc = osal_txn_lock(env, !!(flags & MDBX_TXN_TRY)); if (unlikely(rc)) return rc; if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_PANIC; } #if defined(_WIN32) || defined(_WIN64) if (unlikely(!env->me_map)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_EPERM; } #endif /* Windows */ @@ -9129,7 +9129,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ - txn->mt_owner = tid; return MDBX_SUCCESS; } bailout: @@ -9810,9 +9809,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - atomic_add32(&env->me_ignore_EDEADLK, 1); txn_valgrind(env, nullptr); - atomic_sub32(&env->me_ignore_EDEADLK, 1); #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); @@ -9845,7 +9842,6 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; - txn->mt_owner = 0; env->me_txn = txn->mt_parent; pnl_free(txn->tw.spilled.list); txn->tw.spilled.list = nullptr; @@ -9858,7 +9854,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (!(env->me_flags & MDBX_WRITEMAP)) dlist_free(txn); /* The writer mutex was locked in mdbx_txn_begin. */ - mdbx_txn_unlock(env); + osal_txn_unlock(env); } else { eASSERT(env, txn->mt_parent != NULL); MDBX_txn *const parent = txn->mt_parent; @@ -9870,6 +9866,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, sizeof(meta_troika_t)) == 0); + txn->mt_owner = 0; if (txn->tw.lifo_reclaimed) { eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) >= (uintptr_t)parent->tw.lifo_reclaimed); @@ -13258,7 +13255,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, return MDBX_EACCESS; if (!inside_txn) { - int err = mdbx_txn_lock(env, false); + int err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; need_unlock = true; @@ -13609,7 +13606,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, bailout: if (need_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -21675,13 +21672,13 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, return rc; /* Temporarily block writers until we snapshot the meta pages */ - rc = mdbx_txn_lock(env, false); + rc = osal_txn_lock(env, false); if (unlikely(rc != MDBX_SUCCESS)) return rc; rc = txn_renew(read_txn, MDBX_TXN_RDONLY); if (unlikely(rc != MDBX_SUCCESS)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -21693,7 +21690,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, memcpy(buffer, env->me_map, meta_bytes); MDBX_meta *const headcopy = /* LY: get pointer to the snapshot copy */ ptr_disp(buffer, ptr_dist(meta_recent(env, &troika).ptr_c, env->me_map)); - mdbx_txn_unlock(env); + osal_txn_unlock(env); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(headcopy); @@ -21953,7 +21950,7 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, env->me_txn0->mt_owner != osal_thread_self(); bool should_unlock = false; if (lock_needed) { - rc = mdbx_txn_lock(env, false); + rc = osal_txn_lock(env, false); if (unlikely(rc)) return rc; should_unlock = true; @@ -21965,7 +21962,7 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, env->me_flags &= ~flags; if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_SUCCESS; } @@ -24828,7 +24825,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return MDBX_EINVAL; if (env->me_options.dp_reserve_limit != (unsigned)value) { if (lock_needed) { - err = mdbx_txn_lock(env, false); + err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; should_unlock = true; @@ -24868,7 +24865,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; if (lock_needed) { - err = mdbx_txn_lock(env, false); + err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; should_unlock = true; @@ -24968,7 +24965,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, } if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return err; } @@ -25490,6 +25487,39 @@ mdbx_key_from_int32(const int32_t i32) { #endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */ +/*------------------------------------------------------------------------------ + * Locking API */ + +int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->me_txn0->mt_owner || + (env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + return osal_txn_lock(env, dont_wait); +} + +int mdbx_txn_unlock(MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->me_txn0->mt_owner != osal_thread_self())) + return MDBX_THREAD_MISMATCH; + if (unlikely((env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + osal_txn_unlock(env); + return MDBX_SUCCESS; +} + /******************************************************************************/ /* *INDENT-OFF* */ /* clang-format off */ diff --git a/src/internals.h b/src/internals.h index 3d1fd706..1664dcd7 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1482,7 +1482,6 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/src/lck-posix.c b/src/lck-posix.c index 17c50ddd..7f58e9ed 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -120,7 +120,7 @@ mdbx_global_destructor(void) { * - Блокировка таблицы читателей для регистрации, * т.е. функции osal_rdt_lock() и osal_rdt_unlock(). * - Блокировка БД для пишущих транзакций, - * т.е. функции mdbx_txn_lock() и mdbx_txn_unlock(). + * т.е. функции osal_txn_lock() и osal_txn_unlock(). * * Остальной функционал реализуется отдельно посредством файловых блокировок: * - Первоначальный захват БД в режиме exclusive/shared и последующий перевод @@ -527,6 +527,34 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return rc; } +MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); + if (unlikely(osal_getpid() != env->me_pid)) + return MDBX_PANIC; + + const int cmd = dont_wait ? op_setlk : op_setlkw; + int rc = lck_op(env->me_lfd, cmd, F_WRLCK, 0, 1); + if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_EXCLUSIVE) == 0) { + rc = (env->me_pid > 1) + ? lck_op(env->me_lazy_fd, cmd, F_WRLCK, 0, env->me_pid - 1) + : MDBX_SUCCESS; + if (rc == MDBX_SUCCESS) { + rc = lck_op(env->me_lazy_fd, cmd, F_WRLCK, env->me_pid + 1, + OFF_T_MAX - env->me_pid - 1); + if (rc != MDBX_SUCCESS && env->me_pid > 1 && + lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, 0, env->me_pid - 1)) + rc = MDBX_PANIC; + } + if (rc != MDBX_SUCCESS && lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1)) + rc = MDBX_PANIC; + } + if (unlikely(rc != 0)) { + ERROR("%s, err %u", "lck", rc); + assert(MDBX_IS_ERROR(rc)); + } + return rc; +} + __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor) { if (unlikely(osal_getpid() != env->me_pid)) @@ -822,11 +850,6 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, #error "FIXME" #endif /* MDBX_LOCKING */ -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - if (rc == EDEADLK && atomic_load32(&env->me_ignore_EDEADLK, mo_Relaxed) > 0) - return rc; -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ - ERROR("mutex (un)lock failed, %s", mdbx_strerror(err)); if (rc != EDEADLK) env->me_flags |= MDBX_FATAL_ERROR; @@ -931,20 +954,28 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { jitter4testing(true); } -int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { +int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); + eASSERT(env, !env->me_txn0->mt_owner); jitter4testing(true); - int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); - TRACE("<< rc %d", rc); - return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS; + const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); + int rc = err; + if (likely(!MDBX_IS_ERROR(err))) { + env->me_txn0->mt_owner = osal_thread_self(); + rc = MDBX_SUCCESS; + } + TRACE("<< rc %d", err); + return rc; } -void mdbx_txn_unlock(MDBX_env *env) { +void osal_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); - TRACE("<< rc %d", rc); - if (unlikely(rc != MDBX_SUCCESS)) - mdbx_panic("%s() failed: err %d\n", __func__, rc); + eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); + env->me_txn0->mt_owner = 0; + int err = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); + TRACE("<< err %d", err); + if (unlikely(err != MDBX_SUCCESS)) + mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } diff --git a/src/lck-windows.c b/src/lck-windows.c index 8ffccb1b..ed77da30 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -178,7 +178,8 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { #define DXB_BODY (env->me_psize * (size_t)NUM_METAS), DXB_MAXLEN #define DXB_WHOLE 0, DXB_MAXLEN -int mdbx_txn_lock(MDBX_env *env, bool dontwait) { +int osal_txn_lock(MDBX_env *env, bool dontwait) { + eASSERT(env, !env->me_txn0->mt_owner); if (dontwait) { if (!TryEnterCriticalSection(&env->me_windowsbug_lock)) return MDBX_BUSY; @@ -194,12 +195,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } } - if (env->me_flags & MDBX_EXCLUSIVE) { - /* Zap: Failing to release lock 'env->me_windowsbug_lock' - * in function 'mdbx_txn_lock' */ - MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - return MDBX_SUCCESS; - } + if (env->me_flags & MDBX_EXCLUSIVE) + goto done; const HANDLE fd4data = env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; @@ -218,17 +215,20 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } } if (rc == MDBX_SUCCESS) { + done: /* Zap: Failing to release lock 'env->me_windowsbug_lock' * in function 'mdbx_txn_lock' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - return rc; + env->me_txn0->mt_owner = osal_thread_self(); + return MDBX_SUCCESS; } LeaveCriticalSection(&env->me_windowsbug_lock); return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY; } -void mdbx_txn_unlock(MDBX_env *env) { +void osal_txn_unlock(MDBX_env *env) { + eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { const HANDLE fd4data = env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; @@ -236,6 +236,7 @@ void mdbx_txn_unlock(MDBX_env *env) { if (err != MDBX_SUCCESS) mdbx_panic("%s failed: err %u", __func__, err); } + env->me_txn0->mt_owner = 0; LeaveCriticalSection(&env->me_windowsbug_lock); } @@ -442,7 +443,7 @@ osal_resume_threads_after_remap(mdbx_handle_array_t *array) { * The osal_lck_downgrade() moves the locking-FSM from "exclusive write" * state to the "used" (i.e. shared) state. * - * The mdbx_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) + * The osal_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) * state to the "exclusive write" state. */ @@ -615,7 +616,7 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return MDBX_SUCCESS /* 5) now at S-? (used), done */; } -MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { +MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { /* Transite from used state (S-?) to exclusive-write (E-E) */ assert(env->me_lfd != INVALID_HANDLE_VALUE); @@ -625,7 +626,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { /* 1) now on S-? (used), try S-E (locked) */ jitter4testing(false); - int rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER); + int rc = flock(env->me_lfd, + dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, + LCK_UPPER); if (rc != MDBX_SUCCESS) { /* 2) something went wrong, give up */; VERBOSE("%s, err %u", "S-?(used) >> S-E(locked)", rc); @@ -640,7 +643,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { /* 4) now on ?-E (middle), try E-E (exclusive-write) */ jitter4testing(false); - rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER); + rc = flock(env->me_lfd, + dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, + LCK_LOWER); if (rc != MDBX_SUCCESS) { /* 5) something went wrong, give up */; VERBOSE("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc); @@ -686,7 +691,7 @@ MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0; osal_munmap(&env->me_lck_mmap); if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE && - mdbx_lck_upgrade(env) == MDBX_SUCCESS) + osal_lck_upgrade(env, true) == MDBX_SUCCESS) /* this will fail if LCK is used/mmapped by other process(es) */ osal_ftruncate(env->me_lfd, 0); } diff --git a/src/osal.h b/src/osal.h index 3d45da4b..1b5c317f 100644 --- a/src/osal.h +++ b/src/osal.h @@ -718,6 +718,8 @@ MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env); /// operational lock. /// \return Error code or zero on success MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env); +MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, + bool dont_wait); /// \brief Locks LCK-file or/and table of readers for (de)registering. /// \return Error code or zero on success @@ -726,16 +728,12 @@ MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env); /// \brief Unlocks LCK-file or/and table of readers after (de)registering. MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env); -/// \brief Acquires lock for DB change (on writing transaction start) -/// Reading transactions will not be blocked. -/// Declared as LIBMDBX_API because it is used in mdbx_chk. +/// \brief Acquires write-transaction lock. /// \return Error code or zero on success -LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); +MDBX_INTERNAL_FUNC int osal_txn_lock(MDBX_env *env, bool dont_wait); -/// \brief Releases lock once DB changes is made (after writing transaction -/// has finished). -/// Declared as LIBMDBX_API because it is used in mdbx_chk. -LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); +/// \brief Releases write-transaction lock.. +MDBX_INTERNAL_FUNC void osal_txn_unlock(MDBX_env *env); /// \brief Sets alive-flag of reader presence (indicative lock) for PID of /// the current process. The function does no more than needed for From 253a56206b60fffd8ed7f4575607fb06717cef33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 24 Apr 2023 20:59:18 +0300 Subject: [PATCH 004/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B8=20=D0=BF=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=BD=D0=BE=D1=81=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=B0=20=D1=83=D1=82=D0=B8=D0=BB?= =?UTF-8?q?=D0=B8=D1=82=D1=8B=20`mdbx=5Fchk`=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D1=8C=20=D0=B1=D0=B8=D0=B1=D0=BB=D0=B8=D0=BE=D1=82=D0=B5?= =?UTF-8?q?=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 2 +- mdbx.h | 73 +- src/base.h | 1 + src/core.c | 2340 ++++++++++++++++++++++++++++++++++++++++++++--- src/internals.h | 41 +- src/mdbx_chk.c | 1898 ++++++++------------------------------ 6 files changed, 2637 insertions(+), 1718 deletions(-) diff --git a/TODO.md b/TODO.md index 0d9fd46d..d8e2d0b7 100644 --- a/TODO.md +++ b/TODO.md @@ -11,7 +11,6 @@ For the same reason ~~Github~~ is blacklisted forever. So currently most of the links are broken due to noted malicious ~~Github~~ sabotage. - - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOTLS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). - [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200). - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). @@ -23,6 +22,7 @@ So currently most of the links are broken due to noted malicious ~~Github~~ sabo Done ---- + - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223). - [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224). - [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192). diff --git a/mdbx.h b/mdbx.h index de16ccff..c94bde3f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2571,9 +2571,7 @@ struct MDBX_envinfo { uint64_t mi_latter_reader_txnid; /**< ID of the last reader transaction */ uint64_t mi_self_latter_reader_txnid; /**< ID of the last reader transaction of caller process */ - uint64_t mi_meta0_txnid, mi_meta0_sign; - uint64_t mi_meta1_txnid, mi_meta1_sign; - uint64_t mi_meta2_txnid, mi_meta2_sign; + uint64_t mi_meta_txnid[3], mi_meta_sign[3]; uint32_t mi_maxreaders; /**< Total reader slots in the environment */ uint32_t mi_numreaders; /**< Max reader slots used in the environment */ uint32_t mi_dxb_pagesize; /**< Database pagesize */ @@ -2590,7 +2588,7 @@ struct MDBX_envinfo { struct { struct { uint64_t x, y; - } current, meta0, meta1, meta2; + } current, meta[3]; } mi_bootid; /** Bytes not explicitly synchronized to disk */ @@ -5525,43 +5523,6 @@ mdbx_env_get_hsr(const MDBX_env *env); * \ingroup c_extra * @{ */ -/** \brief Page types for traverse the b-tree. - * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ -enum MDBX_page_type_t { - MDBX_page_broken, - MDBX_page_meta, - MDBX_page_large, - MDBX_page_branch, - MDBX_page_leaf, - MDBX_page_dupfixed_leaf, - MDBX_subpage_leaf, - MDBX_subpage_dupfixed_leaf, - MDBX_subpage_broken, -}; -#ifndef __cplusplus -typedef enum MDBX_page_type_t MDBX_page_type_t; -#endif - -/** \brief Pseudo-name for MainDB */ -#define MDBX_PGWALK_MAIN ((void *)((ptrdiff_t)0)) -/** \brief Pseudo-name for GarbageCollectorDB */ -#define MDBX_PGWALK_GC ((void *)((ptrdiff_t)-1)) -/** \brief Pseudo-name for MetaPages */ -#define MDBX_PGWALK_META ((void *)((ptrdiff_t)-2)) - -/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ -typedef int -MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, - const int deep, const MDBX_val *dbi_name, - const size_t page_size, const MDBX_page_type_t type, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) MDBX_CXX17_NOEXCEPT; - -/** \brief B-tree traversal function. */ -LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, - void *ctx, bool dont_check_keys_ordering); - /** \brief Acquires write-transaction lock. * Provided for custom and/or complex locking scenarios. * \returns A non-zero error value on failure and 0 on success. */ @@ -5718,6 +5679,14 @@ struct MDBX_chk_histogram { * \see mdbx_env_chk() */ typedef struct MDBX_chk_subdb { MDBX_chk_user_subdb_cookie_t *cookie; + +/** \brief Pseudo-name for MainDB */ +#define MDBX_CHK_MAIN ((void *)((ptrdiff_t)0)) +/** \brief Pseudo-name for GarbageCollectorDB */ +#define MDBX_CHK_GC ((void *)((ptrdiff_t)-1)) +/** \brief Pseudo-name for MetaPages */ +#define MDBX_CHK_META ((void *)((ptrdiff_t)-2)) + MDBX_val name; MDBX_db_flags_t flags; int id; @@ -5749,7 +5718,7 @@ typedef struct MDBX_chk_context { MDBX_env *env; MDBX_txn *txn; MDBX_chk_scope_t *scope; - unsigned scope_nesting; + uint8_t scope_nesting; struct { size_t total_payload_bytes; size_t subdb_total, subdb_processed; @@ -5776,7 +5745,7 @@ typedef struct MDBX_chk_callbacks { void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, MDBX_chk_scope_t *inner); void (*issue)(MDBX_chk_context_t *ctx, const char *object, - size_t entry_number, const char *issue, const char *extra_fmt, + uint64_t entry_number, const char *issue, const char *extra_fmt, va_list extra_args); MDBX_chk_user_subdb_cookie_t *(*subdb_filter)(MDBX_chk_context_t *ctx, const MDBX_val *name, @@ -5792,16 +5761,14 @@ typedef struct MDBX_chk_callbacks { int (*stage_begin)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage); int (*stage_end)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage, int err); - struct { - MDBX_chk_line_t *(*begin)(MDBX_chk_context_t *ctx, - enum MDBX_chk_severity severity); - void (*flush)(MDBX_chk_line_t *); - void (*done)(MDBX_chk_line_t *); - void (*chars)(MDBX_chk_line_t *, const char *str, size_t len); - void (*format)(MDBX_chk_line_t *, const char *fmt, va_list args); - void (*size)(MDBX_chk_line_t *, const char *prefix, const uint64_t value, - const char *suffix); - } print; + MDBX_chk_line_t *(*print_begin)(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity); + void (*print_flush)(MDBX_chk_line_t *); + void (*print_done)(MDBX_chk_line_t *); + void (*print_chars)(MDBX_chk_line_t *, const char *str, size_t len); + void (*print_format)(MDBX_chk_line_t *, const char *fmt, va_list args); + void (*print_size)(MDBX_chk_line_t *, const char *prefix, + const uint64_t value, const char *suffix); } MDBX_chk_callbacks_t; /** FIXME */ diff --git a/src/base.h b/src/base.h index b8a243e8..fd730945 100644 --- a/src/base.h +++ b/src/base.h @@ -48,6 +48,7 @@ #include #include +#include #include #include #include diff --git a/src/core.c b/src/core.c index d0cb0914..fec25bed 100644 --- a/src/core.c +++ b/src/core.c @@ -5572,7 +5572,7 @@ __cold static void meta_troika_dump(const MDBX_env *env, const meta_ptr_t recent = meta_recent(env, troika); const meta_ptr_t prefer_steady = meta_prefer_steady(env, troika); const meta_ptr_t tail = meta_tail(env, troika); - NOTICE("%" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " + NOTICE("troika: %" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " "head=%d-%" PRIaTXN ".%c, " "base=%d-%" PRIaTXN ".%c, " "tail=%d-%" PRIaTXN ".%c, " @@ -12143,6 +12143,10 @@ static __always_inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) { eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); } +static int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return eq_fast(a, b) ? 0 : 1; +} + static int validate_meta(MDBX_env *env, MDBX_meta *const meta, const MDBX_page *const page, const unsigned meta_number, unsigned *guess_pagesize) { @@ -22247,9 +22251,9 @@ __cold int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; } -__cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *arg, const size_t bytes) { - +__cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, + MDBX_envinfo *out, const size_t bytes, + meta_troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); @@ -22259,18 +22263,18 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, /* environment not yet opened */ #if 1 /* default behavior: returns the available info but zeroed the rest */ - memset(arg, 0, bytes); - arg->mi_geo.lower = env->me_dbgeo.lower; - arg->mi_geo.upper = env->me_dbgeo.upper; - arg->mi_geo.shrink = env->me_dbgeo.shrink; - arg->mi_geo.grow = env->me_dbgeo.grow; - arg->mi_geo.current = env->me_dbgeo.now; - arg->mi_maxreaders = env->me_maxreaders; - arg->mi_dxb_pagesize = env->me_psize; - arg->mi_sys_pagesize = env->me_os_psize; + memset(out, 0, bytes); + out->mi_geo.lower = env->me_dbgeo.lower; + out->mi_geo.upper = env->me_dbgeo.upper; + out->mi_geo.shrink = env->me_dbgeo.shrink; + out->mi_geo.grow = env->me_dbgeo.grow; + out->mi_geo.current = env->me_dbgeo.now; + out->mi_maxreaders = env->me_maxreaders; + out->mi_dxb_pagesize = env->me_psize; + out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { - arg->mi_bootid.current.x = bootid.x; - arg->mi_bootid.current.y = bootid.y; + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; } return MDBX_SUCCESS; #else @@ -22285,123 +22289,119 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) return MDBX_PANIC; - meta_troika_t holder; - meta_troika_t const *troika; if (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) - troika = &txn->tw.troika; - else { - holder = meta_tap(env); - troika = &holder; - } + *troika = txn->tw.troika; + else + *troika = meta_tap(env); const meta_ptr_t head = meta_recent(env, troika); - arg->mi_recent_txnid = head.txnid; - arg->mi_meta0_txnid = troika->txnid[0]; - arg->mi_meta0_sign = unaligned_peek_u64(4, meta0->mm_sign); - arg->mi_meta1_txnid = troika->txnid[1]; - arg->mi_meta1_sign = unaligned_peek_u64(4, meta1->mm_sign); - arg->mi_meta2_txnid = troika->txnid[2]; - arg->mi_meta2_sign = unaligned_peek_u64(4, meta2->mm_sign); + out->mi_recent_txnid = head.txnid; + out->mi_meta_txnid[0] = troika->txnid[0]; + out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->mm_sign); + out->mi_meta_txnid[1] = troika->txnid[1]; + out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->mm_sign); + out->mi_meta_txnid[2] = troika->txnid[2]; + out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->mm_sign); if (likely(bytes > size_before_bootid)) { - memcpy(&arg->mi_bootid.meta0, &meta0->mm_bootid, 16); - memcpy(&arg->mi_bootid.meta1, &meta1->mm_bootid, 16); - memcpy(&arg->mi_bootid.meta2, &meta2->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[0], &meta0->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[1], &meta1->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[2], &meta2->mm_bootid, 16); } const volatile MDBX_meta *txn_meta = head.ptr_v; - arg->mi_last_pgno = txn_meta->mm_geo.next - 1; - arg->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); + out->mi_last_pgno = txn_meta->mm_geo.next - 1; + out->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); if (txn) { - arg->mi_last_pgno = txn->mt_next_pgno - 1; - arg->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); + out->mi_last_pgno = txn->mt_next_pgno - 1; + out->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); const txnid_t wanna_meta_txnid = (txn->mt_flags & MDBX_TXN_RDONLY) ? txn->mt_txnid : txn->mt_txnid - xMDBX_TXNID_STEP; - txn_meta = (arg->mi_meta0_txnid == wanna_meta_txnid) ? meta0 : txn_meta; - txn_meta = (arg->mi_meta1_txnid == wanna_meta_txnid) ? meta1 : txn_meta; - txn_meta = (arg->mi_meta2_txnid == wanna_meta_txnid) ? meta2 : txn_meta; + txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; + txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; + txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; } - arg->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); - arg->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); - arg->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); - arg->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); + out->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); + out->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); + out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); + out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); const uint64_t unsynced_pages = atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) + (atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)arg->mi_recent_txnid); + (uint32_t)out->mi_recent_txnid); - arg->mi_mapsize = env->me_dxb_mmap.limit; + out->mi_mapsize = env->me_dxb_mmap.limit; const MDBX_lockinfo *const lck = env->me_lck; - arg->mi_maxreaders = env->me_maxreaders; - arg->mi_numreaders = env->me_lck_mmap.lck + out->mi_maxreaders = env->me_maxreaders; + out->mi_numreaders = env->me_lck_mmap.lck ? atomic_load32(&lck->mti_numreaders, mo_Relaxed) : INT32_MAX; - arg->mi_dxb_pagesize = env->me_psize; - arg->mi_sys_pagesize = env->me_os_psize; + out->mi_dxb_pagesize = env->me_psize; + out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { - arg->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); + out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->mti_eoos_timestamp, mo_Relaxed); - arg->mi_since_sync_seconds16dot16 = + out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; ts = atomic_load64(&lck->mti_reader_check_timestamp, mo_Relaxed); - arg->mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; - arg->mi_autosync_threshold = pgno2bytes( + out->mi_autosync_threshold = pgno2bytes( env, atomic_load32(&lck->mti_autosync_threshold, mo_Relaxed)); - arg->mi_autosync_period_seconds16dot16 = + out->mi_autosync_period_seconds16dot16 = osal_monotime_to_16dot16_noUnderflow( atomic_load64(&lck->mti_autosync_period, mo_Relaxed)); - arg->mi_bootid.current.x = bootid.x; - arg->mi_bootid.current.y = bootid.y; - arg->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; + out->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; } if (likely(bytes > size_before_pgop_stat)) { #if MDBX_ENABLE_PGOP_STAT - arg->mi_pgop_stat.newly = + out->mi_pgop_stat.newly = atomic_load64(&lck->mti_pgop_stat.newly, mo_Relaxed); - arg->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); - arg->mi_pgop_stat.clone = + out->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); + out->mi_pgop_stat.clone = atomic_load64(&lck->mti_pgop_stat.clone, mo_Relaxed); - arg->mi_pgop_stat.split = + out->mi_pgop_stat.split = atomic_load64(&lck->mti_pgop_stat.split, mo_Relaxed); - arg->mi_pgop_stat.merge = + out->mi_pgop_stat.merge = atomic_load64(&lck->mti_pgop_stat.merge, mo_Relaxed); - arg->mi_pgop_stat.spill = + out->mi_pgop_stat.spill = atomic_load64(&lck->mti_pgop_stat.spill, mo_Relaxed); - arg->mi_pgop_stat.unspill = + out->mi_pgop_stat.unspill = atomic_load64(&lck->mti_pgop_stat.unspill, mo_Relaxed); - arg->mi_pgop_stat.wops = + out->mi_pgop_stat.wops = atomic_load64(&lck->mti_pgop_stat.wops, mo_Relaxed); - arg->mi_pgop_stat.prefault = + out->mi_pgop_stat.prefault = atomic_load64(&lck->mti_pgop_stat.prefault, mo_Relaxed); - arg->mi_pgop_stat.mincore = + out->mi_pgop_stat.mincore = atomic_load64(&lck->mti_pgop_stat.mincore, mo_Relaxed); - arg->mi_pgop_stat.msync = + out->mi_pgop_stat.msync = atomic_load64(&lck->mti_pgop_stat.msync, mo_Relaxed); - arg->mi_pgop_stat.fsync = + out->mi_pgop_stat.fsync = atomic_load64(&lck->mti_pgop_stat.fsync, mo_Relaxed); #else memset(&arg->mi_pgop_stat, 0, sizeof(arg->mi_pgop_stat)); #endif /* MDBX_ENABLE_PGOP_STAT*/ } - arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid = - arg->mi_recent_txnid; + out->mi_self_latter_reader_txnid = out->mi_latter_reader_txnid = + out->mi_recent_txnid; if (env->me_lck_mmap.lck) { - for (size_t i = 0; i < arg->mi_numreaders; ++i) { + for (size_t i = 0; i < out->mi_numreaders; ++i) { const uint32_t pid = atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); if (pid) { const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (arg->mi_latter_reader_txnid > txnid) - arg->mi_latter_reader_txnid = txnid; - if (pid == env->me_pid && arg->mi_self_latter_reader_txnid > txnid) - arg->mi_self_latter_reader_txnid = txnid; + if (out->mi_latter_reader_txnid > txnid) + out->mi_latter_reader_txnid = txnid; + if (pid == env->me_pid && out->mi_self_latter_reader_txnid > txnid) + out->mi_self_latter_reader_txnid = txnid; } } } @@ -22410,6 +22410,26 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, return MDBX_SUCCESS; } +__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, + size_t bytes, meta_troika_t *troika) { + MDBX_envinfo snap; + int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + while (1) { + rc = env_info_snap(env, txn, out, bytes, troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; + snap.mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16; + if (likely(memcmp(&snap, out, bytes) == 0)) + return MDBX_SUCCESS; + memcpy(&snap, out, bytes); + } +} + __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == NULL && txn == NULL) || arg == NULL)) @@ -22436,22 +22456,8 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, bytes != size_before_pgop_stat) return MDBX_EINVAL; - MDBX_envinfo snap; - int rc = fetch_envinfo_ex(env, txn, &snap, sizeof(snap)); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - while (1) { - rc = fetch_envinfo_ex(env, txn, arg, bytes); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - snap.mi_since_sync_seconds16dot16 = arg->mi_since_sync_seconds16dot16; - snap.mi_since_reader_check_seconds16dot16 = - arg->mi_since_reader_check_seconds16dot16; - if (likely(memcmp(&snap, arg, bytes) == 0)) - return MDBX_SUCCESS; - memcpy(&snap, arg, bytes); - } + meta_troika_t troika; + return env_info(env, txn, arg, bytes, &troika); } static __inline MDBX_cmp_func *get_default_keycmp(MDBX_db_flags_t flags) { @@ -22572,23 +22578,21 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } /* main table? */ - if (table_name == MDBX_PGWALK_MAIN || - table_name->iov_base == MDBX_PGWALK_MAIN) { + if (table_name == MDBX_CHK_MAIN || table_name->iov_base == MDBX_CHK_MAIN) { rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; *dbi = MAIN_DBI; return rc; } - if (table_name == MDBX_PGWALK_GC || table_name->iov_base == MDBX_PGWALK_GC) { + if (table_name == MDBX_CHK_GC || table_name->iov_base == MDBX_CHK_GC) { rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; *dbi = FREE_DBI; return rc; } - if (table_name == MDBX_PGWALK_META || - table_name->iov_base == MDBX_PGWALK_META) { + if (table_name == MDBX_CHK_META || table_name->iov_base == MDBX_CHK_META) { rc = MDBX_EINVAL; goto bailout; } @@ -22781,8 +22785,8 @@ static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { MDBX_val thunk, *name; - if (name_cstr == MDBX_PGWALK_MAIN || name_cstr == MDBX_PGWALK_GC || - name_cstr == MDBX_PGWALK_META) + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) name = (void *)name_cstr; else { thunk.iov_len = strlen(name_cstr); @@ -23457,12 +23461,12 @@ typedef struct mdbx_walk_ctx { bool mw_dont_check_keys_ordering; } mdbx_walk_ctx_t; -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, - const MDBX_val *name, int deep); +__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, + int deep); static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { if (mp) - switch (mp->mp_flags) { + switch (mp->mp_flags & ~P_SPILLED) { case P_BRANCH: return MDBX_page_branch; case P_LEAF: @@ -23471,15 +23475,13 @@ static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { return MDBX_page_dupfixed_leaf; case P_OVERFLOW: return MDBX_page_large; - case P_META: - return MDBX_page_meta; } return MDBX_page_broken; } /* Depth-first tree traversal. */ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, - const MDBX_val *name, int deep, + MDBX_walk_sdb_t *sdb, int deep, txnid_t parent_txnid) { assert(pgno != P_INVALID); MDBX_page *mp = nullptr; @@ -23536,7 +23538,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages); const size_t over_unused = pagesize - over_payload - over_header; const int rc = ctx->mw_visitor(large_pgno, npages, ctx->mw_user, deep, - name, pagesize, MDBX_page_large, err, 1, + sdb, pagesize, MDBX_page_large, err, 1, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -23606,7 +23608,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node), + ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_ds(node), subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -23624,7 +23626,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = ctx->mw_visitor( - pgno, 1, ctx->mw_user, deep, name, ctx->mw_txn->mt_env->me_psize, type, + pgno, 1, ctx->mw_user, deep, sdb, ctx->mw_txn->mt_env->me_psize, type, err, nentries, payload_size, header_size, unused_size + align_bytes); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -23636,7 +23638,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, MDBX_node *node = page_node(mp, i); if (type == MDBX_page_branch) { assert(err == MDBX_SUCCESS); - err = walk_tree(ctx, node_pgno(node), name, deep + 1, mp->mp_txnid); + err = walk_tree(ctx, node_pgno(node), sdb, deep + 1, mp->mp_txnid); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_RESULT_TRUE) break; @@ -23655,11 +23657,13 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); - const MDBX_val subdb_name = {node_key(node), node_ks(node)}; + MDBX_db aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); + MDBX_walk_sdb_t sdb_info = { + {node_key(node), node_ks(node)}, nullptr, nullptr}; + sdb_info.internal = &aligned_db; assert(err == MDBX_SUCCESS); - err = walk_sdb(ctx, &db, &subdb_name, deep + 1); + err = walk_sdb(ctx, &sdb_info, deep + 1); } break; @@ -23669,15 +23673,17 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); + MDBX_db aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); assert(ctx->mw_cursor->mc_xcursor == &container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner); assert(err == MDBX_SUCCESS); err = cursor_xinit1(ctx->mw_cursor, node, mp); if (likely(err == MDBX_SUCCESS)) { ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor; - err = walk_tree(ctx, db.md_root, name, deep + 1, mp->mp_txnid); + sdb->nested = &aligned_db; + err = walk_tree(ctx, aligned_db.md_root, sdb, deep + 1, mp->mp_txnid); + sdb->nested = nullptr; MDBX_xcursor *inner_xcursor = container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor); MDBX_cursor_couple *couple = @@ -23692,15 +23698,16 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, return MDBX_SUCCESS; } -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, - const MDBX_val *name, int deep) { - if (unlikely(sdb->md_root == P_INVALID)) +__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, + int deep) { + struct MDBX_db *const db = sdb->internal; + if (unlikely(db->md_root == P_INVALID)) return MDBX_SUCCESS; /* empty db */ MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; uint8_t dbistate = DBI_VALID | DBI_AUDITED; - int rc = couple_init(&couple, ~0u, ctx->mw_txn, sdb, &dbx, &dbistate); + int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -23712,8 +23719,8 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, : CC_PAGECHECK; couple.outer.mc_next = ctx->mw_cursor; ctx->mw_cursor = &couple.outer; - rc = walk_tree(ctx, sdb->md_root, name, deep, - sdb->md_mod_txnid ? sdb->md_mod_txnid : ctx->mw_txn->mt_txnid); + rc = walk_tree(ctx, db->md_root, sdb, deep, + db->md_mod_txnid ? db->md_mod_txnid : ctx->mw_txn->mt_txnid); ctx->mw_cursor = couple.outer.mc_next; return rc; } @@ -23731,15 +23738,13 @@ __cold int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, ctx.mw_visitor = visitor; ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering; - rc = visitor(0, NUM_METAS, user, 0, MDBX_PGWALK_META, - pgno2bytes(txn->mt_env, NUM_METAS), MDBX_page_meta, MDBX_SUCCESS, - NUM_METAS, sizeof(MDBX_meta) * NUM_METAS, PAGEHDRSZ * NUM_METAS, - (txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * - NUM_METAS); - if (!MDBX_IS_ERROR(rc)) - rc = walk_sdb(&ctx, &txn->mt_dbs[FREE_DBI], MDBX_PGWALK_GC, 0); - if (!MDBX_IS_ERROR(rc)) - rc = walk_sdb(&ctx, &txn->mt_dbs[MAIN_DBI], MDBX_PGWALK_MAIN, 0); + MDBX_walk_sdb_t sdb = {{MDBX_CHK_GC, 0}, &txn->mt_dbs[FREE_DBI], nullptr}; + rc = walk_sdb(&ctx, &sdb, 0); + if (!MDBX_IS_ERROR(rc)) { + sdb.name.iov_base = MDBX_CHK_MAIN; + sdb.internal = &txn->mt_dbs[MAIN_DBI]; + rc = walk_sdb(&ctx, &sdb, 0); + } return rc; } @@ -25520,6 +25525,2079 @@ int mdbx_txn_unlock(MDBX_env *env) { return MDBX_SUCCESS; } +/******************************************************************************* + * Checking API */ + +typedef struct MDBX_chk_internal { + MDBX_chk_context_t *usr; + const struct MDBX_chk_callbacks *cb; + uint64_t monotime_timeout; + + size_t *problem_counter; + uint8_t flags; + bool got_break; + bool write_locked; + uint8_t scope_depth; + + MDBX_chk_subdb_t subdb_gc, subdb_main; + int16_t *pagemap; + MDBX_chk_subdb_t *last_lookup; + const void *last_nested; + MDBX_chk_scope_t scope_stack[12]; + MDBX_chk_subdb_t *subdb[MDBX_MAX_DBI + CORE_DBS]; + + MDBX_envinfo envinfo; + meta_troika_t troika; + MDBX_val v2a_buf; +} MDBX_chk_internal_t; + +__cold static int chk_check_break(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + return (chk->got_break || (chk->cb->check_break && + (chk->got_break = chk->cb->check_break(chk->usr)))) + ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; +} + +__cold static void chk_line_end(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_done)) + chk->cb->print_done(line); + } +} + +__cold __must_check_result static MDBX_chk_line_t * +chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { + MDBX_chk_internal_t *const chk = scope->internal; + if (severity < MDBX_chk_warning) + mdbx_env_chk_problem(chk->usr); + MDBX_chk_line_t *line = nullptr; + if (likely(chk->cb->print_begin)) { + line = chk->cb->print_begin(chk->usr, severity); + if (likely(line)) { + assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->ctx = chk->usr; + } + } + return line; +} + +__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + enum MDBX_chk_severity severity = line->severity; + chk_line_end(line); + line = chk_line_begin(chk->usr->scope, severity); + } + return line; +} + +__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_flush)) { + chk->cb->print_flush(line); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->out = line->begin; + } + } + return line; +} + +__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { + if (likely(line && need)) { + size_t have = line->end - line->out; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (need > have) { + line = chk_flush(line); + have = line->end - line->out; + } + return (need < have) ? need : have; + } + return 0; +} + +__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, + const char *str) { + if (likely(line && str && *str)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + size_t left = strlen(str); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_chars) { + chk->cb->print_chars(line, str, left); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else + do { + size_t chunk = chk_print_wanna(line, left); + assert(chunk <= left); + if (unlikely(!chunk)) + break; + memcpy(line->out, str, chunk); + line->out += chunk; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + str += chunk; + left -= chunk; + } while (left); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, + const char *fmt, va_list args) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_format) { + chk->cb->print_format(line, fmt, args); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else { + va_list ones; + va_copy(ones, args); + const int needed = vsnprintf(nullptr, 0, fmt, ones); + va_end(ones); + if (likely(needed > 0)) { + const size_t have = chk_print_wanna(line, needed); + if (likely(have > 0)) { + int written = vsnprintf(line->out, have, fmt, args); + if (likely(written > 0)) + line->out += written; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } + } + } + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) + chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { + if (likely(line)) { + // MDBX_chk_internal_t *chk = line->ctx->internal; + va_list args; + va_start(args, fmt); + line = chk_print_va(line, fmt, args); + va_end(args); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, + const char *prefix, + const uint64_t value, + const char *suffix) { + static const char sf[] = + "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + prefix = prefix ? prefix : ""; + suffix = suffix ? suffix : ""; + if (chk->cb->print_size) + chk->cb->print_size(line, prefix, value, suffix); + else + for (unsigned i = 0;; ++i) { + const unsigned scale = 10 + i * 10; + const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); + const uint64_t integer = rounded >> scale; + const uint64_t fractional = + (rounded - (integer << scale)) * 100u >> scale; + if ((rounded >> scale) <= 1000) + return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, + value, (unsigned)integer, (unsigned)fractional, + sf[i], suffix); + } + line->empty = false; + } + return line; +} + +__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, + const char *subj) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (line) + chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, + mdbx_strerror(err), err))); + else + debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", + subj, mdbx_strerror(err), err); + return err; +} + +__cold static void MDBX_PRINTF_ARGS(5, 6) + chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, + uint64_t entry_number, const char *caption, + const char *extra_fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_issue_t *issue = chk->usr->scope->issues; + while (issue) { + if (issue->caption == caption) { + issue->count += 1; + break; + } else + issue = issue->next; + } + const bool fresh = issue == nullptr; + if (fresh) { + issue = osal_malloc(sizeof(*issue)); + if (likely(issue)) { + issue->caption = caption; + issue->count = 1; + issue->next = chk->usr->scope->issues; + chk->usr->scope->issues = issue; + } else + chk_error_rc(scope, ENOMEM, "adding issue"); + } + + va_list args; + va_start(args, extra_fmt); + if (chk->cb->issue) { + mdbx_env_chk_problem(chk->usr); + chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); + } else { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (entry_number != UINT64_MAX) + chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption); + else + chk_print(line, "%s: %s", object, caption); + if (extra_fmt) + chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")"); + chk_line_end(fresh ? chk_flush(line) : line); + } + va_end(args); +} + +__cold static void MDBX_PRINTF_ARGS(2, 3) + chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + va_list args; + va_start(args, fmt); + if (likely(chk->cb->issue)) { + mdbx_env_chk_problem(chk->usr); + chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); + } else + chk_line_end( + chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); + va_end(args); +} + +__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { + assert(chk->scope_depth > 0); + MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth; + MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr; + if (!outer || outer->stage != inner->stage) { + if (err == MDBX_SUCCESS && *chk->problem_counter) + err = MDBX_PROBLEM; + else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err)) + *chk->problem_counter = 1; + if (chk->problem_counter != &chk->usr->result.total_problems) { + chk->usr->result.total_problems += *chk->problem_counter; + chk->problem_counter = &chk->usr->result.total_problems; + } + if (chk->cb->stage_end) + err = chk->cb->stage_end(chk->usr, inner->stage, err); + } + if (chk->cb->scope_conclude) + err = chk->cb->scope_conclude(chk->usr, outer, inner, err); + chk->usr->scope = outer; + chk->usr->scope_nesting = chk->scope_depth -= 1; + if (outer) + outer->subtotal_issues += inner->subtotal_issues; + if (chk->cb->scope_pop) + chk->cb->scope_pop(chk->usr, outer, inner); + + while (inner->issues) { + MDBX_chk_issue_t *next = inner->issues->next; + osal_free(inner->issues); + inner->issues = next; + } + memset(inner, -1, sizeof(*inner)); + return err; +} + +__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, + int verbosity_adjustment, + enum MDBX_chk_stage stage, + const void *object, size_t *problems, + const char *fmt, va_list args) { + if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) + return MDBX_BACKLOG_DEPLETED; + + MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; + const int verbosity = + outer->verbosity + + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); + MDBX_chk_scope_t *const inner = outer + 1; + memset(inner, 0, sizeof(*inner)); + inner->internal = outer->internal; + inner->stage = stage ? stage : (stage = outer->stage); + inner->object = object; + inner->verbosity = (verbosity < MDBX_chk_warning) + ? MDBX_chk_warning + : (enum MDBX_chk_severity)verbosity; + if (problems) + chk->problem_counter = problems; + else if (!chk->problem_counter || outer->stage != stage) + chk->problem_counter = &chk->usr->result.total_problems; + + if (chk->cb->scope_push) { + const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + chk->usr->scope = inner; + chk->usr->scope_nesting = chk->scope_depth += 1; + + if (stage != outer->stage && chk->cb->stage_begin) { + int err = chk->cb->stage_begin(chk->usr, stage); + if (unlikely(err != MDBX_SUCCESS)) { + err = chk_scope_end(chk, err); + assert(err != MDBX_SUCCESS); + return err ? err : MDBX_RESULT_TRUE; + } + } + return MDBX_SUCCESS; +} + +__cold static int MDBX_PRINTF_ARGS(6, 7) + chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, + enum MDBX_chk_stage stage, const void *object, + size_t *problems, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, + problems, fmt, args); + va_end(args); + return rc; +} + +__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) { + MDBX_chk_internal_t *const chk = target->internal; + assert(target <= chk->usr->scope); + while (chk->usr->scope > target) + err = chk_scope_end(chk, err); + return err; +} + +__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { + if (inner && inner > inner->internal->scope_stack) + chk_scope_restore(inner - 1, MDBX_SUCCESS); +} + +__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) + chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, + const char *fmt, ...) { + chk_scope_restore(scope, MDBX_SUCCESS); + va_list args; + va_start(args, fmt); + int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, + scope->stage, nullptr, nullptr, fmt, args); + va_end(args); + return err ? nullptr : scope + 1; +} + +__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, + const MDBX_val *val) { + if (val == MDBX_CHK_MAIN) + return "@MAIN"; + if (val == MDBX_CHK_GC) + return "@GC"; + if (val == MDBX_CHK_META) + return "@META"; + + const unsigned char *const data = val->iov_base; + const size_t len = val->iov_len; + if (data == MDBX_CHK_MAIN) + return "@MAIN"; + if (data == MDBX_CHK_GC) + return "@GC"; + if (data == MDBX_CHK_META) + return "@META"; + + if (!len) + return ""; + if (!data) + return ""; + if (len > 65536) { + const size_t enough = 42; + if (chk->v2a_buf.iov_len < enough) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = enough; + } + snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, + "", len); + return chk->v2a_buf.iov_base; + } + + bool printable = true; + bool quoting = false; + size_t xchars = 0; + for (size_t i = 0; i < len && printable; ++i) { + quoting = quoting || !(data[i] == '_' || isalnum(data[i])); + printable = + isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); + } + + size_t need = len + 1; + if (quoting || !printable) + need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; + if (need > chk->v2a_buf.iov_len) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, need); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = need; + } + + static const char hex[] = "0123456789abcdef"; + char *w = chk->v2a_buf.iov_base; + if (!quoting) { + memcpy(w, data, len); + w += len; + } else if (printable) { + *w++ = '\''; + for (size_t i = 0; i < len; ++i) { + if (data[i] < ' ') { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4); + w[0] = '\\'; + w[1] = 'x'; + w[2] = hex[data[i] >> 4]; + w[3] = hex[data[i] & 15]; + w += 4; + } else if (strchr("\"'`\\", data[i])) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = '\\'; + w[1] = data[i]; + w += 2; + } else { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1); + *w++ = data[i]; + } + } + *w++ = '\''; + } else { + *w++ = '\\'; + *w++ = 'x'; + for (size_t i = 0; i < len; ++i) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = hex[data[i] >> 4]; + w[1] = hex[data[i] & 15]; + w += 2; + } + } + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w); + *w = 0; + return chk->v2a_buf.iov_base; +} + +__cold static void chk_dispose(MDBX_chk_internal_t *chk) { + assert(chk->subdb[FREE_DBI] == &chk->subdb_gc); + assert(chk->subdb[MAIN_DBI] == &chk->subdb_main); + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + if (sdb) { + chk->subdb[i] = nullptr; + if (chk->cb->subdb_dispose && sdb->cookie) { + chk->cb->subdb_dispose(chk->usr, sdb); + sdb->cookie = nullptr; + } + if (sdb != &chk->subdb_gc && sdb != &chk->subdb_main) { + osal_free(sdb); + } + } + } + osal_free(chk->v2a_buf.iov_base); + osal_free(chk->pagemap); + chk->usr->internal = nullptr; + chk->usr->scope = nullptr; + chk->pagemap = nullptr; + memset(chk, 0xDD, sizeof(*chk)); + osal_free(chk); +} + +static size_t div_8s(size_t numerator, size_t divider) { + assert(numerator <= (SIZE_MAX >> 8)); + return (numerator << 8) / divider; +} + +static size_t mul_8s(size_t quotient, size_t multiplier) { + size_t hi = multiplier * (quotient >> 8); + size_t lo = multiplier * (quotient & 255) + 128; + return hi + (lo >> 8); +} + +static void histogram_reduce(struct MDBX_chk_histogram *p) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + // ищем пару для слияния с минимальной ошибкой + size_t min_err = SIZE_MAX, min_i = last - 1; + for (size_t i = 0; i < last; ++i) { + const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, + s1 = p->ranges[i].amount; + const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, + s2 = p->ranges[i + 1].amount; + const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; + assert(s1 > 0 && b1 > 0 && b1 < e1); + assert(s2 > 0 && b2 > 0 && b2 < e2); + assert(e1 <= b2); + // за ошибку принимаем площадь изменений на гистограмме при слиянии + const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx); + const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1); + const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2); + const size_t dx = mul_8s(hx, b2 - e1); + const size_t err = d1 + d2 + dx; + if (min_err >= err) { + min_i = i; + min_err = err; + } + } + // объединяем + p->ranges[min_i].end = p->ranges[min_i + 1].end; + p->ranges[min_i].amount += p->ranges[min_i + 1].amount; + p->ranges[min_i].count += p->ranges[min_i + 1].count; + if (min_i < last) + // перемещаем хвост + memmove(p->ranges + min_i, p->ranges + min_i + 1, + (last - min_i) * sizeof(p->ranges[0])); + // обнуляем последний элемент и продолжаем + p->ranges[last].count = 0; +} + +static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { + STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2); + p->amount += n; + p->count += 1; + if (likely(n < 2)) { + p->ones += n; + p->pad += 1; + } else + for (;;) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + size_t i = 0; + while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) { + if (n < p->ranges[i].end) { + // значение попадает в существующий интервал + p->ranges[i].amount += n; + p->ranges[i].count += 1; + return; + } + ++i; + } + if (p->ranges[last].count == 0) { + // использованы еще не все слоты, добавляем интервал + assert(i < size); + if (p->ranges[i].count) { + assert(i < last); + // раздвигаем + memmove(p->ranges + i + 1, p->ranges + i, + (last - i) * sizeof(p->ranges[0])); + } + p->ranges[i].begin = n; + p->ranges[i].end = n + 1; + p->ranges[i].amount = n; + p->ranges[i].count = 1; + return; + } + histogram_reduce(p); + } +} + +__cold static MDBX_chk_line_t * +histogram_dist(MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + line = chk_print(line, "%s:", prefix); + const char *comma = ""; + const size_t first_val = amount ? histogram->ones : histogram->pad; + if (first_val) { + chk_print(line, " %s=%" PRIuSIZE, first, first_val); + comma = ","; + } + for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n) + if (histogram->ranges[n].count) { + chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); + if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) + chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); + line = chk_print(line, "=%" PRIuSIZE, + amount ? histogram->ranges[n].amount + : histogram->ranges[n].count); + comma = ","; + } + return line; +} + +__cold static MDBX_chk_line_t * +histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + if (histogram->count) { + line = chk_print(line, "%s %" PRIuSIZE, prefix, + amount ? histogram->amount : histogram->count); + if (scope->verbosity > MDBX_chk_info) + line = chk_puts( + histogram_dist(line, histogram, " (distribution", first, amount), + ")"); + } + return line; +} + +//----------------------------------------------------------------------------- + +__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, + const MDBX_walk_sdb_t *in, + MDBX_chk_subdb_t **out) { + MDBX_chk_internal_t *const chk = scope->internal; + if (chk->last_lookup && + chk->last_lookup->name.iov_base == in->name.iov_base) { + *out = chk->last_lookup; + return MDBX_SUCCESS; + } + + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *sdb = chk->subdb[i]; + if (!sdb) { + sdb = osal_calloc(1, sizeof(MDBX_chk_subdb_t)); + if (unlikely(!sdb)) { + *out = nullptr; + return chk_error_rc(scope, MDBX_ENOMEM, "alloc_subDB"); + } + chk->subdb[i] = sdb; + sdb->flags = in->internal->md_flags; + sdb->id = -1; + sdb->name = in->name; + } + if (sdb->name.iov_base == in->name.iov_base) { + if (sdb->id < 0) { + sdb->id = (int)i; + sdb->cookie = + chk->cb->subdb_filter + ? chk->cb->subdb_filter(chk->usr, &sdb->name, sdb->flags) + : (void *)(intptr_t)-1; + } + *out = (chk->last_lookup = sdb); + return MDBX_SUCCESS; + } + } + chk_scope_issue(scope, "too many subDBs > %u", + (unsigned)ARRAY_LENGTH(chk->subdb) - CORE_DBS - /* meta */ 1); + *out = nullptr; + return MDBX_PROBLEM; +} + +//------------------------------------------------------------------------------ + +__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, + const unsigned num) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); + MDBX_chk_internal_t *const chk = scope->internal; + if (line) { + MDBX_env *const env = chk->usr->env; + const bool have_bootid = (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0; + const bool bootid_match = + have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], + &chk->envinfo.mi_bootid.current, + sizeof(chk->envinfo.mi_bootid.current)) == 0; + + line = chk_print(line, "meta-%u: ", num); + switch (chk->envinfo.mi_meta_sign[num]) { + case MDBX_DATASIGN_NONE: + line = chk_puts(line, "no-sync/legacy"); + break; + case MDBX_DATASIGN_WEAK: + line = chk_print(line, "weak-%s", + have_bootid + ? (bootid_match ? "intact (same boot-id)" : "dead") + : "unknown (no boot-id)"); + break; + default: + line = chk_puts(line, "steady"); + break; + } + const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; + line = chk_print(line, " txn#%" PRIaTXN, meta_txnid); + + const char *status = "stay"; + if (num == chk->troika.recent) + status = "head"; + else if (num == TROIKA_TAIL(&chk->troika)) + status = "tail"; + line = chk_print(line, ", %s", status); + + if (env->me_stuck_meta >= 0) { + if (num == (unsigned)env->me_stuck_meta) + line = chk_print(line, ", %s", "forced for checking"); + } else if (meta_txnid > chk->envinfo.mi_recent_txnid && + (env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == + MDBX_EXCLUSIVE) + line = chk_print(line, + ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 + " >>> %" PRIu64 ")", + meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, + chk->envinfo.mi_recent_txnid); + chk_line_end(line); + } +} + +__cold static int +chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, + const int deep, const MDBX_walk_sdb_t *sdb_info, + const size_t page_size, const MDBX_page_type_t pagetype, + const MDBX_error_t page_err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes) { + MDBX_chk_scope_t *const scope = ctx; + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + + MDBX_chk_subdb_t *sdb; + int err = chk_get_sdb(scope, sdb_info, &sdb); + if (unlikely(err)) + return err; + + if (deep > 42) { + chk_scope_issue(scope, "too deeply %u", deep); + return MDBX_CORRUPTED /* avoid infinite loop/recursion */; + } + histogram_acc(deep, &sdb->histogram.deep); + usr->result.processed_pages += npages; + const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; + + int height = deep + 1; + if (sdb->id >= CORE_DBS) + height -= usr->txn->mt_dbs[MAIN_DBI].md_depth; + const struct MDBX_db *nested = sdb_info->nested; + if (nested) { + if (sdb->flags & MDBX_DUPSORT) + height -= sdb_info->internal->md_depth; + else { + chk_object_issue(scope, "nested tree", pgno, "unexpected", + "subDb %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), + sdb->flags, deep); + nested = nullptr; + } + } else + chk->last_nested = nullptr; + + const char *pagetype_caption; + bool branch = false; + switch (pagetype) { + default: + chk_object_issue(scope, "page", pgno, "unknown page-type", + "type %u, deep %i", (unsigned)pagetype, deep); + pagetype_caption = "unknown"; + sdb->pages.other += npages; + break; + case MDBX_page_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken"; + sdb->pages.other += npages; + break; + case MDBX_subpage_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken-subpage"; + sdb->pages.other += npages; + break; + case MDBX_page_large: + pagetype_caption = "large"; + histogram_acc(npages, &sdb->histogram.large_pages); + if (sdb->flags & MDBX_DUPSORT) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + case MDBX_page_branch: + branch = true; + if (!nested) { + pagetype_caption = "branch"; + sdb->pages.branch += 1; + } else { + pagetype_caption = "nested-branch"; + sdb->pages.nested_branch += 1; + } + break; + case MDBX_page_dupfixed_leaf: + if (!nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + /* fall through */ + __fallthrough; + case MDBX_page_leaf: + if (!nested) { + pagetype_caption = "leaf"; + sdb->pages.leaf += 1; + if (height != sdb_info->internal->md_depth) + chk_object_issue(scope, "page", pgno, "wrong tree height", + "actual %i != %i subDb %s", height, + sdb_info->internal->md_depth, + chk_v2a(chk, &sdb->name)); + } else { + pagetype_caption = + (pagetype == MDBX_page_leaf) ? "nested-leaf" : "nested-leaf-dupfixed"; + sdb->pages.nested_leaf += 1; + if (chk->last_nested != nested) { + histogram_acc(height, &sdb->histogram.nested_tree); + chk->last_nested = nested; + } + if (height != nested->md_depth) + chk_object_issue(scope, "page", pgno, "wrong nested-tree height", + "actual %i != %i dupsort-node %s", height, + nested->md_depth, chk_v2a(chk, &sdb->name)); + } + break; + case MDBX_subpage_dupfixed_leaf: + case MDBX_subpage_leaf: + pagetype_caption = (pagetype == MDBX_subpage_leaf) ? "subleaf-dupsort" + : "subleaf-dupfixed"; + sdb->pages.nested_subleaf += 1; + if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + } + + if (npages) { + if (sdb->cookie) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (npages == 1) + chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); + else + chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, + npages); + chk_line_end( + chk_print(line, + " of %s: header %" PRIiPTR ", %s %" PRIiPTR + ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", + chk_v2a(chk, &sdb->name), header_bytes, + (pagetype == MDBX_page_branch) ? "keys" : "entries", + nentries, payload_bytes, unused_bytes, deep)); + } + + bool already_used = false; + for (unsigned n = 0; n < npages; ++n) { + const size_t spanpgno = pgno + n; + if (spanpgno >= usr->result.alloc_pages) { + chk_object_issue(scope, "page", spanpgno, "wrong page-no", + "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", + pagetype_caption, spanpgno, usr->result.alloc_pages, + deep); + sdb->pages.all += 1; + } else if (chk->pagemap[spanpgno]) { + const MDBX_chk_subdb_t *const rival = + chk->subdb[chk->pagemap[spanpgno] - 1]; + chk_object_issue(scope, "page", spanpgno, + (branch && rival == sdb) ? "loop" : "already used", + "%s-page: by %s, deep %i", pagetype_caption, + chk_v2a(chk, &rival->name), deep); + already_used = true; + } else { + chk->pagemap[spanpgno] = (int16_t)sdb->id + 1; + sdb->pages.all += 1; + } + } + + if (already_used) + return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ + : MDBX_SUCCESS; + } + + if (MDBX_IS_ERROR(page_err)) { + chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", + pagetype_caption); + } else { + if (unused_bytes > page_size) + chk_object_issue(scope, "page", pgno, "illegal unused-bytes", + "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, + unused_bytes, env->me_psize); + + if (header_bytes < (int)sizeof(long) || + (size_t)header_bytes >= env->me_psize - sizeof(long)) { + chk_object_issue(scope, "page", pgno, "illegal header-length", + "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, + pagetype_caption, sizeof(long), header_bytes, + env->me_psize - sizeof(long)); + } + if (payload_bytes < 1) { + if (nentries > 1) { + chk_object_issue(scope, "page", pgno, "zero size-of-entry", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries", + pagetype_caption, payload_bytes, nentries); + } else { + chk_object_issue(scope, "page", pgno, "empty", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + sdb->pages.empty += 1; + } + } + + if (npages) { + if (page_bytes != page_size) { + chk_object_issue(scope, "page", pgno, "misused", + "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR + "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", + pagetype_caption, page_size, page_bytes, header_bytes, + payload_bytes, unused_bytes, deep); + if (page_size > page_bytes) + sdb->lost_bytes += page_size - page_bytes; + } else { + sdb->payload_bytes += payload_bytes + header_bytes; + usr->result.total_payload_bytes += payload_bytes + header_bytes; + } + } + } + return chk_check_break(scope); +} + +__cold static int chk_tree(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + +#if defined(_WIN32) || defined(_WIN64) + SetLastError(ERROR_SUCCESS); +#else + errno = 0; +#endif /* Windows */ + chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap)); + if (!chk->pagemap) { + int err = osal_get_errno(); + return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc"); + } + + if (scope->verbosity > MDBX_chk_info) + chk_scope_push(scope, 0, "Walking pages..."); + /* always skip key ordering checking + * to avoid MDBX_CORRUPTED in case custom comparators were used */ + usr->result.processed_pages = NUM_METAS; + int err = mdbx_env_pgwalk(txn, chk_pgvisitor, scope, true); + if (MDBX_IS_ERROR(err) && err != MDBX_EINTR) + chk_error_rc(scope, err, "mdbx_env_pgwalk"); + + for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n) + if (!chk->pagemap[n]) + usr->result.unused_pages += 1; + + MDBX_chk_subdb_t total; + memset(&total, 0, sizeof(total)); + total.pages.all = NUM_METAS; + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + total.payload_bytes += sdb->payload_bytes; + total.lost_bytes += sdb->lost_bytes; + total.pages.all += sdb->pages.all; + total.pages.empty += sdb->pages.empty; + total.pages.other += sdb->pages.other; + total.pages.branch += sdb->pages.branch; + total.pages.leaf += sdb->pages.leaf; + total.pages.nested_branch += sdb->pages.nested_branch; + total.pages.nested_leaf += sdb->pages.nested_leaf; + total.pages.nested_subleaf += sdb->pages.nested_subleaf; + } + assert(total.pages.all == usr->result.processed_pages); + + const size_t total_page_bytes = pgno2bytes(env, total.pages.all); + if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose) + chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "walked %zu pages, left/unused %zu" + ", %" PRIuSIZE " problem(s)", + usr->result.processed_pages, + usr->result.unused_pages, + usr->scope->subtotal_issues)); + + err = chk_scope_restore(scope, err); + if (scope->verbosity > MDBX_chk_info) { + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + MDBX_chk_scope_t *inner = + chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); + if (sdb->pages.all == 0) + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); + else { + MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); + if (line) { + line = chk_print(line, "page usage: subtotal %" PRIuSIZE, + sdb->pages.all); + const size_t branch_pages = + sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf + + sdb->pages.nested_subleaf; + if (sdb->pages.other) + line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other); + if (sdb->pages.other == 0 || + (branch_pages | leaf_pages | sdb->histogram.large_pages.count) != + 0) { + line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, + branch_pages, leaf_pages); + if (sdb->histogram.large_pages.count || + (sdb->flags & MDBX_DUPSORT) == 0) { + line = chk_print(line, ", large %" PRIuSIZE, + sdb->histogram.large_pages.count); + if (sdb->histogram.large_pages.amount | + sdb->histogram.large_pages.count) + line = histogram_print(inner, line, &sdb->histogram.large_pages, + " amount", "single", true); + } + } + line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, + "tree deep density", "1", false); + if (sdb != &chk->subdb_gc && sdb->histogram.nested_tree.count) { + line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, + sdb->histogram.nested_tree.count); + line = histogram_dist(line, &sdb->histogram.nested_tree, " density", + "1", false); + line = chk_print(chk_line_feed(line), + "nested tree(s) pages %" PRIuSIZE + ": branch %" PRIuSIZE ", leaf %" PRIuSIZE + ", subleaf %" PRIuSIZE, + sdb->pages.nested_branch + sdb->pages.nested_leaf, + sdb->pages.nested_branch, sdb->pages.nested_leaf, + sdb->pages.nested_subleaf); + } + + const size_t bytes = pgno2bytes(env, sdb->pages.all); + line = chk_print( + chk_line_feed(line), + "page filling: subtotal %" PRIuSIZE + " bytes (%.1f%%), payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", + bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes, + sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes, + (bytes - sdb->payload_bytes) * 100.0 / bytes); + if (sdb->pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", + sdb->pages.empty); + if (sdb->lost_bytes) + line = + chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes); + chk_line_end(line); + } + } + chk_scope_restore(scope, 0); + } + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, + "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," + " average fill %.1f%%", + total_page_bytes, usr->result.total_payload_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes, + total_page_bytes - usr->result.total_payload_bytes, + (total_page_bytes - usr->result.total_payload_bytes) * + 100.0 / total_page_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes); + if (total.pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); + if (total.lost_bytes) + line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes); + chk_line_end(line); + return err; +} + +typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, const size_t record_number, + const MDBX_val *key, const MDBX_val *data); + +__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + int err = MDBX_SUCCESS; + assert(sdb->cookie); + if (chk->cb->subdb_handle_kv) + err = chk->cb->subdb_handle_kv(chk->usr, sdb, record_number, key, data); + return err ? err : chk_check_break(scope); +} + +__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, + MDBX_chk_subdb_t *sdb, chk_kv_visitor *handler) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + MDBX_cursor *cursor = nullptr; + size_t record_count = 0, dups = 0, sub_databases = 0; + int err; + + if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->mt_flags) { + chk_line_end( + chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), + "abort processing %s due to a previous error", + chk_v2a(chk, &sdb->name)))); + err = MDBX_BAD_TXN; + goto bailout; + } + + if (0 > (int)dbi) { + err = dbi_open( + txn, &sdb->name, MDBX_DB_ACCEDE, &dbi, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_dbi_open"); + goto bailout; + } + } + + const MDBX_db *const db = txn->mt_dbs + dbi; + if (handler) { + const char *key_mode = nullptr; + switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + case 0: + key_mode = "usual"; + break; + case MDBX_REVERSEKEY: + key_mode = "reserve"; + break; + case MDBX_INTEGERKEY: + key_mode = "ordinal"; + break; + case MDBX_REVERSEKEY | MDBX_INTEGERKEY: + key_mode = "msgpack"; + break; + default: + key_mode = "inconsistent"; + chk_scope_issue(scope, "wrong key-mode (0x%x)", + sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); + } + + const char *value_mode = nullptr; + switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | + MDBX_INTEGERDUP)) { + case 0: + value_mode = "single"; + break; + case MDBX_DUPSORT: + value_mode = "multi"; + break; + case MDBX_DUPSORT | MDBX_REVERSEDUP: + value_mode = "multi-reverse"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED: + value_mode = "multi-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + value_mode = "multi-reverse-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + value_mode = "multi-ordinal"; + break; + case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "multi-msgpack"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "reserved"; + break; + default: + value_mode = "inconsistent"; + chk_scope_issue(scope, "wrong value-mode (0x%x)", + sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + MDBX_DUPFIXED | MDBX_INTEGERDUP)); + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, + value_mode); + line = chk_print(line, ", flags:"); + if (!sdb->flags) + line = chk_print(line, " none"); + else { + const uint8_t f[] = {MDBX_DUPSORT, + MDBX_INTEGERKEY, + MDBX_REVERSEKEY, + MDBX_DUPFIXED, + MDBX_REVERSEDUP, + MDBX_INTEGERDUP, + 0}; + const char *const t[] = {"dupsort", "integerkey", "reversekey", + "dupfixed", "reversedup", "integerdup"}; + for (size_t i = 0; f[i]; i++) + if (sdb->flags & f[i]) + line = chk_print(line, " %s", t[i]); + } + chk_line_end(chk_print(line, " (0x%02X)", sdb->flags)); + + line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "entries %" PRIu64 ", sequence %" PRIu64, db->md_entries, + db->md_seq); + if (db->md_mod_txnid) + line = chk_print(line, ", last modification txn#%" PRIaTXN, + db->md_mod_txnid); + if (db->md_root != P_INVALID) + line = chk_print(line, ", root #%" PRIaPGNO, db->md_root); + chk_line_end(line); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "b-tree depth %u, pages: branch %" PRIaPGNO + ", leaf %" PRIaPGNO ", large %" PRIaPGNO, + db->md_depth, db->md_branch_pages, db->md_leaf_pages, + db->md_overflow_pages)); + + if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf; + const size_t subtotal_pages = + db->md_branch_pages + db->md_leaf_pages + db->md_overflow_pages; + if (subtotal_pages != sdb->pages.all) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", + "subtotal", subtotal_pages, sdb->pages.all); + if (db->md_branch_pages != branch_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "branch", db->md_branch_pages, branch_pages); + if (db->md_leaf_pages != leaf_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "all-leaf", db->md_leaf_pages, leaf_pages); + if (db->md_overflow_pages != sdb->histogram.large_pages.amount) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "large/overlow", db->md_overflow_pages, + sdb->histogram.large_pages.amount); + } + } + + err = mdbx_cursor_open(txn, dbi, &cursor); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_cursor_open"); + goto bailout; + } + if (chk->flags & MDBX_CHK_IGNORE_ORDER) { + cursor->mc_checking |= CC_SKIPORD | CC_PAGECHECK; + if (cursor->mc_xcursor) + cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD | CC_PAGECHECK; + } + + const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags); + MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; + MDBX_val key, data; + err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); + while (err == MDBX_SUCCESS) { + err = chk_check_break(scope); + if (unlikely(err)) + goto bailout; + + bool bad_key = false; + if (key.iov_len > maxkeysize) { + chk_object_issue(scope, "entry", record_count, + "key length exceeds max-key-size", + "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); + bad_key = true; + } else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && + key.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong key length", + "%" PRIuPTR " != 4or8", key.iov_len); + bad_key = true; + } + + bool bad_data = false; + if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && + data.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong data length", + "%" PRIuPTR " != 4or8", data.iov_len); + bad_data = true; + } + + if (prev_key.iov_base) { + if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) && + prev_data.iov_len != data.iov_len) { + chk_object_issue(scope, "entry", record_count, "different data length", + "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, + data.iov_len); + bad_data = true; + } + + if (!bad_key) { + int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); + if (cmp == 0) { + ++dups; + if ((sdb->flags & MDBX_DUPSORT) == 0) { + chk_object_issue(scope, "entry", record_count, "duplicated entries", + nullptr); + if (prev_data.iov_base && data.iov_len == prev_data.iov_len && + memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + } else if (!bad_data && prev_data.iov_base) { + cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); + if (cmp == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of multi-values", nullptr); + } + } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of entries", nullptr); + } + } + + if (!bad_key) { + if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY)) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed key-size %" PRIuSIZE, key.iov_len)); + prev_key = key; + } + if (!bad_data) { + if (!prev_data.iov_base && + (sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed data-size %" PRIuSIZE, data.iov_len)); + prev_data = data; + } + + record_count++; + histogram_acc(key.iov_len, &sdb->histogram.key_len); + histogram_acc(data.iov_len, &sdb->histogram.val_len); + + const MDBX_node *const node = + page_node(cursor->mc_pg[cursor->mc_top], cursor->mc_ki[cursor->mc_top]); + if (node_flags(node) == F_SUBDATA) { + if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + chk_object_issue(scope, "entry", record_count, + "unexpected sub-database", "node-flags 0x%x", + node_flags(node)); + else if (data.iov_len != sizeof(MDBX_db)) + chk_object_issue(scope, "entry", record_count, + "wrong sub-database node size", + "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, + sizeof(MDBX_db)); + else if (scope->stage == MDBX_chk_traversal_maindb) + /* подсчитываем subDB при первом проходе */ + sub_databases += 1; + else { + /* обработка subDB при втором проходе */ + MDBX_db aligned_db; + memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); + MDBX_walk_sdb_t sdb_info = {key, nullptr, nullptr}; + sdb_info.internal = &aligned_db; + MDBX_chk_subdb_t *subdb; + err = chk_get_sdb(scope, &sdb_info, &subdb); + if (unlikely(err)) + goto bailout; + if (subdb->cookie) { + err = chk_scope_begin(chk, 0, MDBX_chk_traversal_subdbs, subdb, + &usr->result.problems_kv, + "Processing subDB %s...", + chk_v2a(chk, &subdb->name)); + if (likely(!err)) { + err = chk_db(usr->scope, (MDBX_dbi)-1, subdb, chk_handle_kv); + if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) + usr->result.subdb_processed += 1; + } + err = chk_scope_restore(scope, err); + if (unlikely(err)) + goto bailout; + } else + chk_line_end(chk_flush( + chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s...", chk_v2a(chk, &subdb->name)))); + } + } else if (handler) { + err = handler(scope, sdb, record_count, &key, &data); + if (unlikely(err)) + goto bailout; + } + + err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); + } + + err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") + : MDBX_SUCCESS; + if (err == MDBX_SUCCESS && record_count != db->md_entries) + chk_scope_issue(scope, + "different number of entries %" PRIuSIZE " != %" PRIu64, + record_count, db->md_entries); +bailout: + if (cursor) { + if (handler) { + if (sdb->histogram.key_len.count) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = histogram_dist(line, &sdb->histogram.key_len, + "key length density", "0/1", false); + chk_line_feed(line); + line = histogram_dist(line, &sdb->histogram.val_len, + "value length density", "0/1", false); + chk_line_end(line); + } + if (scope->stage == MDBX_chk_traversal_maindb) + usr->result.subdb_total = sub_databases; + if (chk->cb->subdb_conclude) + err = chk->cb->subdb_conclude(usr, sdb, cursor, err); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); + if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + line = chk_print(line, " %" PRIuSIZE " dups,", dups); + if (sub_databases || dbi == MAIN_DBI) + line = chk_print(line, " %" PRIuSIZE " sub-databases,", sub_databases); + line = chk_print(line, + " %" PRIuSIZE " key's bytes," + " %" PRIuSIZE " data's bytes," + " %" PRIuSIZE " problem(s)", + sdb->histogram.key_len.amount, + sdb->histogram.val_len.amount, scope->subtotal_issues); + chk_line_end(chk_flush(line)); + } + + mdbx_cursor_close(cursor); + if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && + txn->mt_dbistate[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + mdbx_dbi_close(env, dbi); + } + return err; +} + +__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + assert(sdb == &chk->subdb_gc); + (void)sdb; + const char *bad = ""; + pgno_t *iptr = data->iov_base; + + if (key->iov_len != sizeof(txnid_t)) + chk_object_issue(scope, "entry", record_number, "wrong txn-id size", + "key-size %" PRIuSIZE, key->iov_len); + else { + txnid_t txnid; + memcpy(&txnid, key->iov_base, sizeof(txnid)); + if (txnid < 1 || txnid > usr->txn->mt_txnid) + chk_object_issue(scope, "entry", record_number, "wrong txn-id", + "%" PRIaTXN, txnid); + else { + if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) + chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, + data->iov_len); + size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; + if (number < 1 || number > MDBX_PGL_LIMIT) + chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, + number); + else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { + chk_object_issue(scope, "entry", txnid, "trimmed idl", + "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", + (number + 1) * sizeof(pgno_t), data->iov_len); + number = data->iov_len / sizeof(pgno_t) - 1; + } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= + /* LY: allow gap up to one page. it is ok + * and better than shink-and-retry inside update_gc() */ + usr->env->me_psize) + chk_object_issue(scope, "entry", txnid, "extra idl space", + "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", + (number + 1) * sizeof(pgno_t), data->iov_len); + + usr->result.gc_pages += number; + if (chk->envinfo.mi_latter_reader_txnid > txnid) + usr->result.reclaimable_pages += number; + + size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->mt_next_pgno; + size_t span = 1; + for (size_t i = 0; i < number; ++i) { + const size_t pgno = iptr[i]; + if (pgno < NUM_METAS) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " < meta-pages %u", pgno, + NUM_METAS); + else if (pgno >= usr->result.backed_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, + usr->result.backed_pages); + else if (pgno >= usr->result.alloc_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, + usr->result.alloc_pages - 1); + else { + if (MDBX_PNL_DISORDERED(prev, pgno)) { + bad = " [bad sequence]"; + chk_object_issue( + scope, "entry", txnid, "bad sequence", + "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, + (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, + pgno); + } + if (chk->pagemap) { + const intptr_t id = chk->pagemap[pgno]; + if (id == 0) + chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; + else if (id > 0) { + assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->subdb)); + chk_object_issue(scope, "page", pgno, "already used", "by %s", + chk_v2a(chk, &chk->subdb[id - 1]->name)); + } else + chk_object_issue(scope, "page", pgno, "already listed in GC", + nullptr); + } + } + prev = pgno; + while (i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span))) + ++span; + } + if (sdb->cookie) { + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), + "transaction %" PRIaTXN ", %" PRIuSIZE + " pages, maxspan %" PRIuSIZE "%s", + txnid, number, span, bad)); + for (size_t i = 0; i < number; i += span) { + const size_t pgno = iptr[i]; + for (span = 1; + i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span)); + ++span) + ; + histogram_acc(span, &sdb->histogram.nested_tree); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (line) { + if (span > 1) + line = + chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); + else + line = chk_print(line, "%9" PRIuSIZE, pgno); + chk_line_end(line); + int err = chk_check_break(scope); + if (err) + return err; + } + } + } + } + } + return chk_check_break(scope); +} + +__cold static int env_chk(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + int err = + env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); + if (unlikely(err)) + return chk_error_rc(scope, err, "env_info"); + + MDBX_chk_line_t *line = + chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); + if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, + chk->envinfo.mi_bootid.current.x, + chk->envinfo.mi_bootid.current.y); + else + line = chk_puts(line, "unavailable"); + chk_line_end(line); + + err = osal_filesize(env->me_lazy_fd, &env->me_dxb_mmap.filesize); + if (unlikely(err)) + return chk_error_rc(scope, err, "osal_filesize"); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, + &usr->result.problems_meta, "Peek the meta-pages..."); + if (likely(!err)) { + MDBX_chk_scope_t *const inner = usr->scope; + const uint64_t dxbfile_pages = + env->me_dxb_mmap.filesize >> env->me_psize2log; + usr->result.alloc_pages = txn->mt_next_pgno; + usr->result.backed_pages = bytes2pgno(env, env->me_dxb_mmap.current); + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + if (unlikely(dxbfile_pages < NUM_METAS)) + chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS)) + chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS || + dxbfile_pages < NUM_METAS)) + return MDBX_CORRUPTED; + if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { + chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", + usr->result.backed_pages, (size_t)MAX_PAGENO + 1); + usr->result.backed_pages = MAX_PAGENO + 1; + } + + if ((env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { + if (unlikely(usr->result.backed_pages > dxbfile_pages)) { + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + usr->result.backed_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + } else { + /* DB may be shrunk by writer down to the allocated (but unused) pages. */ + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, + usr->result.alloc_pages, dxbfile_pages); + usr->result.alloc_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + usr->result.backed_pages = (size_t)dxbfile_pages; + } + + line = chk_line_feed(chk_print( + chk_line_begin(inner, MDBX_chk_info), + "pagesize %u (%u system), max keysize %u..%u" + ", max readers %u", + env->me_psize, env->me_os_psize, + mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), + mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->me_maxreaders)); + line = chk_line_feed( + chk_print_size(line, "mapsize ", env->me_dxb_mmap.current, nullptr)); + if (txn->mt_geo.lower == txn->mt_geo.upper) + line = chk_print_size( + line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); + else { + line = chk_print_size( + line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); + line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); + line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); + + line = chk_line_feed( + chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); + line = chk_print_size( + line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); + } + tASSERT(txn, txn->mt_geo.now == chk->envinfo.mi_geo.current / + chk->envinfo.mi_dxb_pagesize); + chk_line_end(chk_print(line, ", %u pages", txn->mt_geo.now)); +#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG + if (txn->mt_geo.shrink_pv && txn->mt_geo.now != txn->mt_geo.upper && + scope->verbosity >= MDBX_chk_verbose) { + line = chk_line_begin(inner, MDBX_chk_notice); + chk_line_feed(chk_print( + line, " > WARNING: Due Windows system limitations a file couldn't")); + chk_line_feed(chk_print( + line, " > be truncated while the database is opened. So, the size")); + chk_line_feed(chk_print( + line, " > database file of may by large than the database itself,")); + chk_line_end(chk_print( + line, " > until it will be closed or reopened in read-write mode.")); + } +#endif /* Windows || Debug */ + chk_verbose_meta(inner, 0); + chk_verbose_meta(inner, 1); + chk_verbose_meta(inner, 2); + + if (env->me_stuck_meta >= 0) { + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing), + "skip checking meta-pages since the %u" + " is selected for verification", + env->me_stuck_meta)); + line = chk_line_feed( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", " + "selected for verification %" PRIu64 ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, + chk->envinfo.mi_meta_txnid[env->me_stuck_meta], + chk->envinfo.mi_recent_txnid - + chk->envinfo.mi_meta_txnid[env->me_stuck_meta])); + chk_line_end(line); + } else { + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs check for meta-pages clashes")); + const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); + if (meta_clash_mask & 1) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); + if (meta_clash_mask & 2) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2); + if (meta_clash_mask & 4) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); + + const unsigned prefer_steady_metanum = chk->troika.prefer_steady; + const uint64_t prefer_steady_txnid = + chk->troika.txnid[prefer_steady_metanum]; + const unsigned recent_metanum = chk->troika.recent; + const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; + if (env->me_flags & MDBX_EXCLUSIVE) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs full check recent-txn-id with meta-pages")); + if (prefer_steady_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue( + inner, + "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, + chk->envinfo.mi_recent_txnid); + } + } else if (chk->write_locked) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs lite check recent-txn-id with meta-pages (not a " + "monopolistic mode)")); + if (recent_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue(inner, + "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + recent_metanum, recent_txnid, + chk->envinfo.mi_recent_txnid); + } + } else { + chk_line_end(chk_puts( + chk_line_begin(inner, MDBX_chk_verbose), + "skip check recent-txn-id with meta-pages (monopolistic or " + "read-write mode only)")); + } + + chk_line_end(chk_print( + chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", latter reader %" PRIu64 + ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); + } + } + err = chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + eASSERT(env, err == MDBX_SUCCESS); + if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", "b-tree")); + else { + err = chk_scope_begin( + chk, -1, MDBX_chk_traversal_tree, nullptr, &usr->result.tree_problems, + "Traversal %s by txn#%" PRIaTXN "...", "b-tree", txn->mt_txnid); + if (likely(!err)) + err = chk_tree(usr->scope); + if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) + usr->result.gc_tree_problems = usr->result.tree_problems; + if (usr->result.tree_problems && usr->result.kv_tree_problems == 0) + usr->result.kv_tree_problems = usr->result.tree_problems; + chk_scope_restore(scope, err); + } + + if (usr->result.gc_tree_problems > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + chk_v2a(chk, MDBX_CHK_GC), "b-tree", + usr->result.problems_gc = usr->result.gc_tree_problems)); + else { + err = chk_scope_begin(chk, -1, MDBX_chk_traversal_freedb, &chk->subdb_gc, + &usr->result.problems_gc, + "Traversal %s by txn#%" PRIaTXN "...", "GC/freeDB", + txn->mt_txnid); + if (likely(!err)) + err = chk_db(usr->scope, FREE_DBI, &chk->subdb_gc, chk_handle_gc); + line = chk_line_begin(scope, MDBX_chk_info); + if (line) { + histogram_print(scope, line, &chk->subdb_gc.histogram.nested_tree, + "span(s)", "single", false); + chk_line_end(line); + } + if (usr->result.problems_gc == 0 && + (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; + if (usr->result.processed_pages != used_pages) + chk_scope_issue(usr->scope, + "used pages mismatch (%" PRIuSIZE + "(walked) != %" PRIuSIZE "(allocated - GC))", + usr->result.processed_pages, used_pages); + if (usr->result.unused_pages != usr->result.gc_pages) + chk_scope_issue(usr->scope, + "GC pages mismatch (%" PRIuSIZE + "(expected) != %" PRIuSIZE "(GC))", + usr->result.unused_pages, usr->result.gc_pages); + } + } + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, + "Page allocation:"); + const double percent_boundary_reciprocal = 100.0 / txn->mt_geo.upper; + const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; + const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; + const size_t available2boundary = txn->mt_geo.upper - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t available2backed = usr->result.backed_pages - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t remained2boundary = txn->mt_geo.upper - usr->result.alloc_pages; + const size_t remained2backed = + usr->result.backed_pages - usr->result.alloc_pages; + + const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + ? usr->result.alloc_pages - usr->result.gc_pages + : usr->result.processed_pages; + + line = chk_line_begin(usr->scope, MDBX_chk_info); + line = chk_print(line, + "backed by file: %" PRIuSIZE " pages (%.1f%%)" + ", %" PRIuSIZE " left to boundary (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal, + txn->mt_geo.upper - usr->result.backed_pages, + (txn->mt_geo.upper - usr->result.backed_pages) * + percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "used", used, used * percent_backed_reciprocal, + used * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "remained", remained2backed, remained2backed * percent_backed_reciprocal, + remained2boundary, remained2boundary * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" + ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", + usr->result.reclaimable_pages, + usr->result.reclaimable_pages * percent_backed_reciprocal, + usr->result.reclaimable_pages * percent_boundary_reciprocal, + usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, + usr->result.gc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "detained by reader(s): %" PRIuSIZE + " (%.1f%% of backed, %.1f%% of boundary)" + ", %u reader(s), lag %" PRIi64, + detained, detained * percent_backed_reciprocal, + detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "allocated", usr->result.alloc_pages, + usr->result.alloc_pages * percent_backed_reciprocal, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print(line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "available", available2backed, + available2backed * percent_backed_reciprocal, + available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + + line = chk_line_begin(usr->scope, MDBX_chk_resolution); + line = chk_print(line, "%s %" PRIaPGNO " pages", + (txn->mt_geo.upper == txn->mt_geo.now) ? "total" : "upto", + txn->mt_geo.upper); + line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal); + line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", + usr->result.alloc_pages, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = + chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", "key-value")); + else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + chk_v2a(chk, MDBX_CHK_MAIN), "key-value", + usr->result.problems_kv = usr->result.kv_tree_problems)); + else { + err = + chk_scope_begin(chk, 0, MDBX_chk_traversal_maindb, &chk->subdb_main, + &usr->result.problems_kv, "Processing %s...", "MainDB"); + if (likely(!err)) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, chk_handle_kv); + chk_scope_restore(scope, err); + + if (usr->result.problems_kv && usr->result.subdb_total) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s", "sub-database(s)")); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total == 0) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", + "sub-database(s)")); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total) { + err = chk_scope_begin(chk, 1, MDBX_chk_traversal_subdbs, nullptr, + &usr->result.problems_kv, + "Traversal %s by txn#%" PRIaTXN "...", + "sub-database(s)", txn->mt_txnid); + if (!err) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, nullptr); + if (usr->scope->subtotal_issues) + chk_line_end( + chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "processed %" PRIuSIZE " of %" PRIuSIZE " subDb(s)" + ", %" PRIuSIZE " problems(s)", + usr->result.subdb_processed, usr->result.subdb_total, + usr->scope->subtotal_issues)); + } + chk_scope_restore(scope, err); + } + + return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, + nullptr, nullptr)); +} + +__cold int mdbx_env_chk_problem(MDBX_chk_context_t *ctx) { + if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && + ctx->internal->problem_counter && ctx->scope)) { + *ctx->internal->problem_counter += 1; + ctx->scope->subtotal_issues += 1; + return MDBX_SUCCESS; + } + return MDBX_EINVAL; +} + +__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, + MDBX_chk_context_t *ctx, + const enum MDBX_chk_flags_t flags, + enum MDBX_chk_severity verbosity, + unsigned timeout_seconds_16dot16) { + int err, rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(!cb || !ctx || ctx->internal)) + return MDBX_EINVAL; + + MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); + if (unlikely(!chk)) + return MDBX_ENOMEM; + + chk->cb = cb; + chk->usr = ctx; + chk->usr->internal = chk; + chk->usr->env = env; + chk->flags = flags; + + chk->subdb_gc.id = -1; + chk->subdb_gc.name.iov_base = MDBX_CHK_GC; + chk->subdb[FREE_DBI] = &chk->subdb_gc; + + chk->subdb_main.id = -1; + chk->subdb_main.name.iov_base = MDBX_CHK_MAIN; + chk->subdb[MAIN_DBI] = &chk->subdb_main; + + chk->monotime_timeout = + timeout_seconds_16dot16 + ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() + : 0; + chk->usr->scope_nesting = 0; + chk->usr->result.subdbs = (const void *)&chk->subdb; + + MDBX_chk_scope_t *const top = chk->scope_stack; + top->verbosity = verbosity; + top->internal = chk; + + // init + rc = chk_scope_end( + chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); + + // lock + if (likely(!rc)) + rc = chk_scope_begin( + chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", + (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); + if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { + rc = mdbx_txn_lock(env, false); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); + else + chk->write_locked = true; + } + if (likely(!rc)) { + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_begin"); + } + chk_scope_end(chk, rc); + + // doit + if (likely(!rc)) { + chk->subdb_gc.flags = ctx->txn->mt_dbs[FREE_DBI].md_flags; + chk->subdb_main.flags = ctx->txn->mt_dbs[MAIN_DBI].md_flags; + rc = env_chk(top); + } + + // unlock + if (ctx->txn || chk->write_locked) { + chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr); + if (ctx->txn) { + err = mdbx_txn_abort(ctx->txn); + if (err && !rc) + rc = err; + ctx->txn = nullptr; + } + if (chk->write_locked) + mdbx_txn_unlock(env); + rc = chk_scope_end(chk, rc); + } + + // finalize + err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); + rc = chk_scope_end(chk, err ? err : rc); + chk_dispose(chk); + return rc; +} + /******************************************************************************/ /* *INDENT-OFF* */ /* clang-format off */ diff --git a/src/internals.h b/src/internals.h index 1664dcd7..c871b3df 100644 --- a/src/internals.h +++ b/src/internals.h @@ -703,7 +703,8 @@ typedef struct MDBX_page { #define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags) -/* Drop legacy P_DIRTY flag for sub-pages for compatilibity */ +/* Drop legacy P_DIRTY flag for sub-pages for compatilibity, + * for assertions only. */ #define PAGETYPE_COMPAT(p) \ (unlikely(PAGETYPE_WHOLE(p) & P_SUBP) \ ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY) \ @@ -1136,10 +1137,10 @@ typedef struct troika { #if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */ uint32_t unused_pad; #endif -#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7) -#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64) -#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128) -#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3) +#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7u) +#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64u) +#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128u) +#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3u) txnid_t txnid[NUM_METAS]; } meta_troika_t; @@ -1787,3 +1788,33 @@ MDBX_MAYBE_UNUSED static void static_checks(void) { (size_t)(size), __LINE__); \ ASAN_UNPOISON_MEMORY_REGION(addr, size); \ } while (0) + +/******************************************************************************/ + +/** \brief Page types for traverse the b-tree. + * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ +enum MDBX_page_type_t { + MDBX_page_broken, + MDBX_page_large, + MDBX_page_branch, + MDBX_page_leaf, + MDBX_page_dupfixed_leaf, + MDBX_subpage_leaf, + MDBX_subpage_dupfixed_leaf, + MDBX_subpage_broken, +}; +typedef enum MDBX_page_type_t MDBX_page_type_t; + +typedef struct MDBX_walk_sdb { + MDBX_val name; + struct MDBX_db *internal, *nested; +} MDBX_walk_sdb_t; + +/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ +typedef int +MDBX_pgvisitor_func(const size_t pgno, const unsigned number, void *const ctx, + const int deep, const MDBX_walk_sdb_t *subdb, + const size_t page_size, const MDBX_page_type_t page_type, + const MDBX_error_t err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes); diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index a8c97372..c590253d 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -25,19 +25,6 @@ #include -typedef struct flagbit { - int bit; - const char *name; -} flagbit; - -const flagbit dbflags[] = {{MDBX_DUPSORT, "dupsort"}, - {MDBX_INTEGERKEY, "integerkey"}, - {MDBX_REVERSEKEY, "reversekey"}, - {MDBX_DUPFIXED, "dupfixed"}, - {MDBX_REVERSEDUP, "reversedup"}, - {MDBX_INTEGERDUP, "integerdup"}, - {0, nullptr}}; - #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" @@ -72,181 +59,171 @@ static void signal_handler(int sig) { #define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1) #define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE -typedef struct { - MDBX_val name; - struct { - uint64_t branch, large_count, large_volume, leaf; - uint64_t subleaf_dupsort, leaf_dupfixed, subleaf_dupfixed; - uint64_t total, empty, other; - } pages; - uint64_t payload_bytes; - uint64_t lost_bytes; -} walk_dbi_t; - -struct { - short *pagemap; - uint64_t total_payload_bytes; - uint64_t pgcount; - walk_dbi_t - dbi[MDBX_MAX_DBI + CORE_DBS + /* account pseudo-entry for meta */ 1]; -} walk; - -#define dbi_free walk.dbi[FREE_DBI] -#define dbi_main walk.dbi[MAIN_DBI] -#define dbi_meta walk.dbi[CORE_DBS] - -int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; +enum MDBX_env_flags_t env_flags = + MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; MDBX_env *env; MDBX_txn *txn; -MDBX_envinfo envinfo; -size_t userdb_count, skipped_subdb; -uint64_t total_unused_bytes, reclaimable_pages, gc_pages, alloc_pages, - unused_pages, backed_pages; -unsigned verbose; -bool ignore_wrong_order, quiet, dont_traversal; +unsigned verbose = 0; +bool quiet; MDBX_val only_subdb; int stuck_meta = -1; +MDBX_chk_context_t chk; +bool turn_meta = false; +bool force_turn_meta = false; +enum MDBX_chk_flags_t chk_flags = MDBX_CHK_DEFAULTS; +enum MDBX_chk_stage chk_stage = MDBX_chk_none; -struct problem { - struct problem *pr_next; - size_t count; - const char *caption; -}; +static MDBX_chk_line_t line_struct; +static size_t anchor_lineno; +static size_t line_count; +static FILE *line_output; -struct problem *problems_list; -unsigned total_problems, data_tree_problems, gc_tree_problems; - -static void MDBX_PRINTF_ARGS(1, 2) print(const char *msg, ...) { - if (!quiet) { - va_list args; - - fflush(stderr); - va_start(args, msg); - vfprintf(stdout, msg, args); - va_end(args); - } -} - -static MDBX_val printable_buf; -static void free_printable_buf(void) { osal_free(printable_buf.iov_base); } - -static const char *sdb_name(const MDBX_val *val) { - if (val == MDBX_PGWALK_MAIN) - return "@MAIN"; - if (val == MDBX_PGWALK_GC) - return "@GC"; - if (val == MDBX_PGWALK_META) - return "@META"; - - const unsigned char *const data = val->iov_base; - const size_t len = val->iov_len; - if (data == MDBX_PGWALK_MAIN) - return "@MAIN"; - if (data == MDBX_PGWALK_GC) - return "@GC"; - if (data == MDBX_PGWALK_META) - return "@META"; - - if (!len) - return ""; - if (!data) - return ""; - if (len > 65536) { - static char buf[64]; - /* NOTE: There is MSYS2 MinGW bug if you here got - * the "unknown conversion type character ‘z’ in format [-Werror=format=]" - * https://stackoverflow.com/questions/74504432/whats-the-proper-way-to-tell-mingw-based-gcc-to-use-ansi-stdio-output-on-windo - */ - snprintf(buf, sizeof(buf), "", len); - return buf; - } - - bool printable = true; - bool quoting = false; - size_t xchars = 0; - for (size_t i = 0; i < val->iov_len && printable; ++i) { - quoting |= data[i] != '_' && isalnum(data[i]) == 0; - printable = isprint(data[i]) != 0 || - (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); - } - - size_t need = len + 1; - if (quoting || !printable) - need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; - if (need > printable_buf.iov_len) { - void *ptr = osal_realloc(printable_buf.iov_base, need); - if (!ptr) - return ""; - if (!printable_buf.iov_base) - atexit(free_printable_buf); - printable_buf.iov_base = ptr; - printable_buf.iov_len = need; - } - - char *out = printable_buf.iov_base; - if (!quoting) { - memcpy(out, data, len); - out += len; - } else if (printable) { - *out++ = '\''; - for (size_t i = 0; i < len; ++i) { - if (data[i] < ' ') { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 4); - static const char hex[] = "0123456789abcdef"; - out[0] = '\\'; - out[1] = 'x'; - out[2] = hex[data[i] >> 4]; - out[3] = hex[data[i] & 15]; - out += 4; - } else if (strchr("\"'`\\", data[i])) { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 2); - out[0] = '\\'; - out[1] = data[i]; - out += 2; - } else { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 1); - *out++ = data[i]; - } +#define LINE_SEVERITY_NONE 255 +static bool lf(void) { + if (!line_struct.empty) { + line_count += 1; + line_struct.empty = true; + line_struct.severity = LINE_SEVERITY_NONE; + line_struct.scope_depth = 0; + if (line_output) { + fputc('\n', line_output); + return true; } - *out++ = '\''; } - assert((char *)printable_buf.iov_base + printable_buf.iov_len > out); - *out = 0; - return printable_buf.iov_base; + return false; } -static void va_log(MDBX_log_level_t level, const char *function, int line, - const char *msg, va_list args) { - static const char *const prefixes[] = { - "!!!fatal: ", " ! " /* error */, " ~ " /* warning */, - " " /* notice */, " // " /* verbose */, " //// " /* debug */, - " ////// " /* trace */ +static void flush(void) { fflush(nullptr); } + +static void lf_flush(void) { + if (lf()) + flush(); +} + +static bool silently(enum MDBX_chk_severity severity) { + int cutoff = + chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift + : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); + int prio = (severity >> MDBX_chk_severity_prio_shift); + if (chk.scope && chk.scope->stage == MDBX_chk_traversal_subdbs && verbose < 2) + prio += 1; + return quiet || cutoff < ((prio > 0) ? prio : 0); +} + +static FILE *prefix(enum MDBX_chk_severity severity) { + if (silently(severity)) + return nullptr; + + static const char *const prefixes[16] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + "", // 4 result + " = ", // 5 resolution + " - ", // 6 processing + " ", // 7 info + " ", // 8 verbose + " ", // 9 details + " // ", // A lib-verbose + " //// ", // B lib-debug + " ////// ", // C lib-trace + " ////// ", // D lib-extra + " ////// ", // E +1 + " ////// " // F +2 }; - FILE *out = stdout; - if (level <= MDBX_LOG_ERROR) { - total_problems++; - out = stderr; + const bool nl = + line_struct.scope_depth != chk.scope_nesting || + (line_struct.severity != severity && + (line_struct.severity != MDBX_chk_processing || + severity < MDBX_chk_result || severity > MDBX_chk_resolution)); + if (nl) + lf(); + if (severity < MDBX_chk_warning) + flush(); + FILE *out = (severity > MDBX_chk_error) ? stdout : stderr; + if (nl || line_struct.empty) { + line_struct.severity = severity; + line_struct.scope_depth = chk.scope_nesting; + unsigned kind = line_struct.severity & MDBX_chk_severity_kind_mask; + if (line_struct.scope_depth || *prefixes[kind]) { + line_struct.empty = false; + for (size_t i = 0; i < line_struct.scope_depth; ++i) + fputs(" ", out); + fputs(prefixes[kind], out); + } } + return line_output = out; +} - if (!quiet && verbose + 1 >= (unsigned)level && - (unsigned)level < ARRAY_LENGTH(prefixes)) { - fflush(nullptr); - fputs(prefixes[level], out); +static void suffix(size_t cookie, const char *str) { + if (cookie == line_count && !line_struct.empty) { + fprintf(line_output, " %s", str); + line_struct.empty = false; + lf(); + } +} + +static size_t MDBX_PRINTF_ARGS(2, 3) + print(enum MDBX_chk_severity severity, const char *msg, ...) { + FILE *out = prefix(severity); + if (out) { + va_list args; + va_start(args, msg); vfprintf(out, msg, args); - - const bool have_lf = msg[strlen(msg) - 1] == '\n'; - if (level == MDBX_LOG_FATAL && function && line) - fprintf(out, have_lf ? " %s(), %u\n" : " (%s:%u)\n", - function + (strncmp(function, "mdbx_", 5) ? 5 : 0), line); - else if (!have_lf) - fputc('\n', out); - fflush(nullptr); + va_end(args); + line_struct.empty = false; + return line_count; } + return 0; +} +static FILE *MDBX_PRINTF_ARGS(2, 3) + print_ln(enum MDBX_chk_severity severity, const char *msg, ...) { + FILE *out = prefix(severity); + if (out) { + va_list args; + va_start(args, msg); + vfprintf(out, msg, args); + va_end(args); + line_struct.empty = false; + lf(); + } + return out; +} + +static void logger(MDBX_log_level_t level, const char *function, int line, + const char *fmt, va_list args) { + if (level <= MDBX_LOG_ERROR) + mdbx_env_chk_problem(&chk); + + const unsigned kind = (level > MDBX_LOG_NOTICE) + ? level - MDBX_LOG_NOTICE + + (MDBX_chk_extra & MDBX_chk_severity_kind_mask) + : level; + const unsigned prio = kind << MDBX_chk_severity_prio_shift; + enum MDBX_chk_severity severity = prio + kind; + FILE *out = prefix(severity); + if (out) { + vfprintf(out, fmt, args); + const bool have_lf = fmt[strlen(fmt) - 1] == '\n'; + if (level == MDBX_LOG_FATAL && function && line) { + if (have_lf) + for (size_t i = 0; i < line_struct.scope_depth; ++i) + fputs(" ", out); + fprintf(out, have_lf ? " %s(), %u" : " (%s:%u)", + function + (strncmp(function, "mdbx_", 5) ? 0 : 5), line); + lf(); + } else if (have_lf) { + line_struct.empty = true; + line_struct.severity = LINE_SEVERITY_NONE; + line_count += 1; + } else + lf(); + } + if (level < MDBX_LOG_VERBOSE) + flush(); if (level == MDBX_LOG_FATAL) { #if !MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS exit(EXIT_FAILURE_MDBX); @@ -255,767 +232,144 @@ static void va_log(MDBX_log_level_t level, const char *function, int line, } } -static void MDBX_PRINTF_ARGS(1, 2) error(const char *msg, ...) { +static void MDBX_PRINTF_ARGS(1, 2) error_fmt(const char *msg, ...) { va_list args; va_start(args, msg); - va_log(MDBX_LOG_ERROR, nullptr, 0, msg, args); + logger(MDBX_LOG_ERROR, nullptr, 0, msg, args); va_end(args); } -static void logger(MDBX_log_level_t level, const char *function, int line, - const char *msg, va_list args) { - (void)line; - (void)function; - if (level < MDBX_LOG_EXTRA) - va_log(level, function, line, msg, args); +static int error_fn(const char *fn, int err) { + if (err) + error_fmt("%s() failed, error %d, %s", fn, err, mdbx_strerror(err)); + return err; } -static int check_user_break(void) { - switch (user_break) { - case 0: - return MDBX_SUCCESS; - case 1: - print(" - interrupted by signal\n"); - fflush(nullptr); +static bool check_break(MDBX_chk_context_t *ctx) { + (void)ctx; + if (!user_break) + return false; + if (user_break == 1) { + print(MDBX_chk_resolution, "interrupted by signal"); + lf_flush(); user_break = 2; } - return MDBX_EINTR; + return true; } -static void pagemap_cleanup(void) { - osal_free(walk.pagemap); - walk.pagemap = nullptr; -} - -static bool eq(const MDBX_val a, const MDBX_val b) { - return a.iov_len == b.iov_len && - (a.iov_base == b.iov_base || a.iov_len == 0 || - !memcmp(a.iov_base, b.iov_base, a.iov_len)); -} - -static walk_dbi_t *pagemap_lookup_dbi(const MDBX_val *dbi_name, bool silent) { - static walk_dbi_t *last; - - if (dbi_name == MDBX_PGWALK_MAIN) - return &dbi_main; - if (dbi_name == MDBX_PGWALK_GC) - return &dbi_free; - if (dbi_name == MDBX_PGWALK_META) - return &dbi_meta; - - if (last && eq(last->name, *dbi_name)) - return last; - - walk_dbi_t *dbi = walk.dbi + CORE_DBS + /* account pseudo-entry for meta */ 1; - for (; dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - if (eq(dbi->name, *dbi_name)) - return last = dbi; - } - - if (verbose > 0 && !silent) { - print(" - found %s area\n", sdb_name(dbi_name)); - fflush(nullptr); - } - - if (dbi == ARRAY_END(walk.dbi)) - return nullptr; - - dbi->name = *dbi_name; - return last = dbi; -} - -static void MDBX_PRINTF_ARGS(4, 5) - problem_add(const char *object, uint64_t entry_number, const char *msg, - const char *extra, ...) { - total_problems++; - - if (!quiet) { - int need_fflush = 0; - struct problem *p; - - for (p = problems_list; p; p = p->pr_next) - if (p->caption == msg) - break; - - if (!p) { - p = osal_calloc(1, sizeof(*p)); - if (unlikely(!p)) - return; - p->caption = msg; - p->pr_next = problems_list; - problems_list = p; - need_fflush = 1; - } - - p->count++; - if (verbose > 1) { - print(" %s #%" PRIu64 ": %s", object, entry_number, msg); - if (extra) { - va_list args; - printf(" ("); - va_start(args, extra); - vfprintf(stdout, extra, args); - va_end(args); - printf(")"); - } - printf("\n"); - if (need_fflush) - fflush(nullptr); +static int scope_push(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, + MDBX_chk_scope_t *inner, const char *fmt, va_list args) { + (void)scope; + if (fmt && *fmt) { + FILE *out = prefix(MDBX_chk_processing); + if (out) { + vfprintf(out, fmt, args); + inner->usr_o.number = line_count; + line_struct.ctx = ctx; + flush(); } } + return MDBX_SUCCESS; } -static struct problem *problems_push(void) { - struct problem *p = problems_list; - problems_list = nullptr; - return p; -} - -static size_t problems_pop(struct problem *list) { - size_t count = 0; - - if (problems_list) { - int i; - - print(" - problems: "); - for (i = 0; problems_list; ++i) { - struct problem *p = problems_list->pr_next; - count += problems_list->count; - print("%s%s (%" PRIuPTR ")", i ? ", " : "", problems_list->caption, - problems_list->count); - osal_free(problems_list); - problems_list = p; - } - print("\n"); - fflush(nullptr); - } - - problems_list = list; - return count; -} - -static int pgvisitor(const uint64_t pgno, const unsigned pgnumber, - void *const ctx, const int deep, const MDBX_val *dbi_name, - const size_t page_size, const MDBX_page_type_t pagetype, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) { +static void scope_pop(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, + MDBX_chk_scope_t *inner) { (void)ctx; - const bool is_gc_tree = dbi_name == MDBX_PGWALK_GC; - if (deep > 42) { - problem_add("deep", deep, "too large", nullptr); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - return MDBX_CORRUPTED /* avoid infinite loop/recursion */; - } - - walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name, false); - if (!dbi) { - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - return MDBX_ENOMEM; - } - - const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; - walk.pgcount += pgnumber; - - const char *pagetype_caption; - bool branch = false; - switch (pagetype) { - default: - problem_add("page", pgno, "unknown page-type", "type %u, deep %i", - (unsigned)pagetype, deep); - pagetype_caption = "unknown"; - dbi->pages.other += pgnumber; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_page_broken: - pagetype_caption = "broken"; - dbi->pages.other += pgnumber; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_subpage_broken: - pagetype_caption = "broken-subpage"; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_page_meta: - pagetype_caption = "meta"; - dbi->pages.other += pgnumber; - break; - case MDBX_page_large: - pagetype_caption = "large"; - dbi->pages.large_volume += pgnumber; - dbi->pages.large_count += 1; - break; - case MDBX_page_branch: - pagetype_caption = "branch"; - dbi->pages.branch += pgnumber; - branch = true; - break; - case MDBX_page_leaf: - pagetype_caption = "leaf"; - dbi->pages.leaf += pgnumber; - break; - case MDBX_page_dupfixed_leaf: - pagetype_caption = "leaf-dupfixed"; - dbi->pages.leaf_dupfixed += pgnumber; - break; - case MDBX_subpage_leaf: - pagetype_caption = "subleaf-dupsort"; - dbi->pages.subleaf_dupsort += 1; - break; - case MDBX_subpage_dupfixed_leaf: - pagetype_caption = "subleaf-dupfixed"; - dbi->pages.subleaf_dupfixed += 1; - break; - } - - if (pgnumber) { - if (verbose > 3 && (!only_subdb.iov_base || eq(only_subdb, dbi->name))) { - if (pgnumber == 1) - print(" %s-page %" PRIu64, pagetype_caption, pgno); - else - print(" %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber); - print(" of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR - ", unused %" PRIiPTR ", deep %i\n", - sdb_name(&dbi->name), header_bytes, - (pagetype == MDBX_page_branch) ? "keys" : "entries", nentries, - payload_bytes, unused_bytes, deep); - } - - bool already_used = false; - for (unsigned n = 0; n < pgnumber; ++n) { - uint64_t spanpgno = pgno + n; - if (spanpgno >= alloc_pages) { - problem_add("page", spanpgno, "wrong page-no", - "%s-page: %" PRIu64 " > %" PRIu64 ", deep %i", - pagetype_caption, spanpgno, alloc_pages, deep); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else if (walk.pagemap[spanpgno]) { - walk_dbi_t *coll_dbi = &walk.dbi[walk.pagemap[spanpgno] - 1]; - problem_add("page", spanpgno, - (branch && coll_dbi == dbi) ? "loop" : "already used", - "%s-page: by %s, deep %i", pagetype_caption, - sdb_name(&coll_dbi->name), deep); - already_used = true; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - walk.pagemap[spanpgno] = (short)(dbi - walk.dbi + 1); - dbi->pages.total += 1; - } - } - - if (already_used) - return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ - : MDBX_SUCCESS; - } - - if (MDBX_IS_ERROR(err)) { - problem_add("page", pgno, "invalid/corrupted", "%s-page", pagetype_caption); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - if (unused_bytes > page_size) { - problem_add("page", pgno, "illegal unused-bytes", - "%s-page: %u < %" PRIuPTR " < %u", pagetype_caption, 0, - unused_bytes, envinfo.mi_dxb_pagesize); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - - if (header_bytes < (int)sizeof(long) || - (size_t)header_bytes >= envinfo.mi_dxb_pagesize - sizeof(long)) { - problem_add("page", pgno, "illegal header-length", - "%s-page: %" PRIuPTR " < %" PRIuPTR " < %" PRIuPTR, - pagetype_caption, sizeof(long), header_bytes, - envinfo.mi_dxb_pagesize - sizeof(long)); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - if (payload_bytes < 1) { - if (nentries > 1) { - problem_add("page", pgno, "zero size-of-entry", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries", - pagetype_caption, payload_bytes, nentries); - /* if ((size_t)header_bytes + unused_bytes < page_size) { - // LY: hush a misuse error - page_bytes = page_size; - } */ - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - problem_add("page", pgno, "empty", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - dbi->pages.empty += 1; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - } - - if (pgnumber) { - if (page_bytes != page_size) { - problem_add("page", pgno, "misused", - "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR - "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", - pagetype_caption, page_size, page_bytes, header_bytes, - payload_bytes, unused_bytes, deep); - if (page_size > page_bytes) - dbi->lost_bytes += page_size - page_bytes; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - dbi->payload_bytes += (uint64_t)payload_bytes + header_bytes; - walk.total_payload_bytes += (uint64_t)payload_bytes + header_bytes; - } - } - } - - return check_user_break(); + (void)scope; + suffix(inner->usr_o.number, inner->subtotal_issues ? "error(s)" : "done"); + flush(); } -typedef int(visitor)(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data); -static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name, - visitor *handler); - -static int handle_userdb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - (void)record_number; - (void)key; - (void)data; - return check_user_break(); +static MDBX_chk_user_subdb_cookie_t *subdb_filter(MDBX_chk_context_t *ctx, + const MDBX_val *name, + MDBX_db_flags_t flags) { + (void)ctx; + (void)flags; + return (!only_subdb.iov_base || + (only_subdb.iov_len == name->iov_len && + memcmp(only_subdb.iov_base, name->iov_base, name->iov_len) == 0)) + ? (void *)(intptr_t)-1 + : nullptr; } -static int handle_freedb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - char *bad = ""; - pgno_t *iptr = data->iov_base; - - if (key->iov_len != sizeof(txnid_t)) - problem_add("entry", record_number, "wrong txn-id size", - "key-size %" PRIiPTR, key->iov_len); - else { - txnid_t txnid; - memcpy(&txnid, key->iov_base, sizeof(txnid)); - if (txnid < 1 || txnid > envinfo.mi_recent_txnid) - problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid); - else { - if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) - problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR, - data->iov_len); - size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number < 1 || number > MDBX_PGL_LIMIT) - problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number); - else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { - problem_add("entry", txnid, "trimmed idl", - "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", - (number + 1) * sizeof(pgno_t), data->iov_len); - number = data->iov_len / sizeof(pgno_t) - 1; - } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= - /* LY: allow gap up to one page. it is ok - * and better than shink-and-retry inside update_gc() */ - envinfo.mi_dxb_pagesize) - problem_add("entry", txnid, "extra idl space", - "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", - (number + 1) * sizeof(pgno_t), data->iov_len); - - gc_pages += number; - if (envinfo.mi_latter_reader_txnid > txnid) - reclaimable_pages += number; - - pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno; - pgno_t span = 1; - for (size_t i = 0; i < number; ++i) { - if (check_user_break()) - return MDBX_EINTR; - const pgno_t pgno = iptr[i]; - if (pgno < NUM_METAS) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " < meta-pages %u", pgno, NUM_METAS); - else if (pgno >= backed_pages) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " > backed-pages %" PRIu64, pgno, - backed_pages); - else if (pgno >= alloc_pages) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " > alloc-pages %" PRIu64, pgno, - alloc_pages - 1); - else { - if (MDBX_PNL_DISORDERED(prev, pgno)) { - bad = " [bad sequence]"; - problem_add("entry", txnid, "bad sequence", - "%" PRIaPGNO " %c [%zu].%" PRIaPGNO, prev, - (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), - i, pgno); - } - if (walk.pagemap) { - int idx = walk.pagemap[pgno]; - if (idx == 0) - walk.pagemap[pgno] = -1; - else if (idx > 0) - problem_add("page", pgno, "already used", "by %s", - sdb_name(&walk.dbi[idx - 1].name)); - else - problem_add("page", pgno, "already listed in GC", nullptr); - } - } - prev = pgno; - while (i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span))) - ++span; - } - if (verbose > 3 && !only_subdb.iov_base) { - print(" transaction %" PRIaTXN ", %" PRIuPTR - " pages, maxspan %" PRIaPGNO "%s\n", - txnid, number, span, bad); - if (verbose > 4) { - for (size_t i = 0; i < number; i += span) { - const pgno_t pgno = iptr[i]; - for (span = 1; - i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span)); - ++span) - ; - if (span > 1) { - print(" %9" PRIaPGNO "[%" PRIaPGNO "]\n", pgno, span); - } else - print(" %9" PRIaPGNO "\n", pgno); - } - } - } - } - } - - return check_user_break(); +static int stage_begin(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage) { + (void)ctx; + chk_stage = stage; + anchor_lineno = line_count; + flush(); + return MDBX_SUCCESS; } -static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return eq(*a, *b) ? 0 : 1; +static int conclude(MDBX_chk_context_t *ctx); +static int stage_end(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage, + int err) { + if (stage == MDBX_chk_conclude && !err) + err = conclude(ctx); + suffix(anchor_lineno, err ? "error(s)" : "done"); + flush(); + chk_stage = MDBX_chk_none; + return err; } -static int handle_maindb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - if (data->iov_len == sizeof(MDBX_db)) { - int rc = process_db(~0u, key, handle_userdb); - if (rc != MDBX_INCOMPATIBLE) { - userdb_count++; - return rc; - } +static MDBX_chk_line_t *print_begin(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity) { + (void)ctx; + if (silently(severity)) + return nullptr; + if (line_struct.ctx) { + if (line_struct.severity == MDBX_chk_processing && + severity >= MDBX_chk_result && severity <= MDBX_chk_resolution && + line_output) + fputc(' ', line_output); + else + lf(); + line_struct.ctx = nullptr; } - return handle_userdb(record_number, key, data); + line_struct.severity = severity; + return &line_struct; } -static const char *db_flags2keymode(unsigned flags) { - flags &= (MDBX_REVERSEKEY | MDBX_INTEGERKEY); - switch (flags) { - case 0: - return "usual"; - case MDBX_REVERSEKEY: - return "reserve"; - case MDBX_INTEGERKEY: - return "ordinal"; - case MDBX_REVERSEKEY | MDBX_INTEGERKEY: - return "msgpack"; - default: - assert(false); - __unreachable(); - } +static void print_flush(MDBX_chk_line_t *line) { + (void)line; + flush(); } -static const char *db_flags2valuemode(unsigned flags) { - flags &= (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP); - switch (flags) { - case 0: - return "single"; - case MDBX_DUPSORT: - return "multi"; - case MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_REVERSEDUP: - return "multi-reverse"; - case MDBX_DUPFIXED: - case MDBX_DUPSORT | MDBX_DUPFIXED: - return "multi-samelength"; - case MDBX_DUPFIXED | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: - return "multi-reverse-samelength"; - case MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: - case MDBX_DUPFIXED | MDBX_INTEGERDUP: - return "multi-ordinal"; - case MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - return "multi-msgpack"; - case MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - return "reserved"; - default: - assert(false); - __unreachable(); - } +static void print_done(MDBX_chk_line_t *line) { + lf(); + line->ctx = nullptr; } -static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name, - visitor *handler) { - MDBX_cursor *mc; - MDBX_stat ms; - MDBX_val key, data; - MDBX_val prev_key, prev_data; - unsigned flags; - int rc, i; - struct problem *saved_list; - uint64_t problems_count; - const bool second_pass = dbi_handle == MAIN_DBI; - - uint64_t record_count = 0, dups = 0; - uint64_t key_bytes = 0, data_bytes = 0; - - if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & mdbx_txn_flags(txn)) { - print(" ! abort processing %s due to a previous error\n", - sdb_name(dbi_name)); - return MDBX_BAD_TXN; - } - - if (dbi_handle == ~0u) { - rc = mdbx_dbi_open_ex2( - txn, dbi_name, MDBX_DB_ACCEDE, &dbi_handle, - (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr, - (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr); - if (rc) { - if (!dbi_name || - rc != - MDBX_INCOMPATIBLE) /* LY: mainDB's record is not a user's DB. */ { - error("mdbx_dbi_open(%s) failed, error %d %s\n", sdb_name(dbi_name), rc, - mdbx_strerror(rc)); - } - return rc; - } - } - - if (dbi_handle >= CORE_DBS && dbi_name && only_subdb.iov_base && - !eq(only_subdb, *dbi_name)) { - if (verbose) { - print("Skip processing %s...\n", sdb_name(dbi_name)); - fflush(nullptr); - } - skipped_subdb++; - return MDBX_SUCCESS; - } - - if (!second_pass && verbose) - print("Processing %s...\n", sdb_name(dbi_name)); - fflush(nullptr); - - rc = mdbx_dbi_flags(txn, dbi_handle, &flags); - if (rc) { - error("mdbx_dbi_flags() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - rc = mdbx_dbi_stat(txn, dbi_handle, &ms, sizeof(ms)); - if (rc) { - error("mdbx_dbi_stat() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - if (!second_pass && verbose) { - print(" - key-value kind: %s-key => %s-value", db_flags2keymode(flags), - db_flags2valuemode(flags)); - if (verbose > 1) { - print(", flags:"); - if (!flags) - print(" none"); - else { - for (i = 0; dbflags[i].bit; i++) - if (flags & dbflags[i].bit) - print(" %s", dbflags[i].name); - } - if (verbose > 2) - print(" (0x%02X), dbi-id %d", flags, dbi_handle); - } - print("\n"); - if (ms.ms_mod_txnid) - print(" - last modification txn#%" PRIu64 "\n", ms.ms_mod_txnid); - if (verbose > 1) { - print(" - page size %u, entries %" PRIu64 "\n", ms.ms_psize, - ms.ms_entries); - print(" - b-tree depth %u, pages: branch %" PRIu64 ", leaf %" PRIu64 - ", overflow %" PRIu64 "\n", - ms.ms_depth, ms.ms_branch_pages, ms.ms_leaf_pages, - ms.ms_overflow_pages); - } - } - - walk_dbi_t *dbi = (dbi_handle < CORE_DBS) - ? &walk.dbi[dbi_handle] - : pagemap_lookup_dbi(dbi_name, true); - if (!dbi) { - error("too many DBIs or out of memory\n"); - return MDBX_ENOMEM; - } - if (!dont_traversal) { - const uint64_t subtotal_pages = - ms.ms_branch_pages + ms.ms_leaf_pages + ms.ms_overflow_pages; - if (subtotal_pages != dbi->pages.total) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "subtotal", subtotal_pages, dbi->pages.total); - if (ms.ms_branch_pages != dbi->pages.branch) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "branch", - ms.ms_branch_pages, dbi->pages.branch); - const uint64_t allleaf_pages = dbi->pages.leaf + dbi->pages.leaf_dupfixed; - if (ms.ms_leaf_pages != allleaf_pages) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "all-leaf", ms.ms_leaf_pages, allleaf_pages); - if (ms.ms_overflow_pages != dbi->pages.large_volume) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "large/overlow", ms.ms_overflow_pages, dbi->pages.large_volume); - } - rc = mdbx_cursor_open(txn, dbi_handle, &mc); - if (rc) { - error("mdbx_cursor_open() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - if (ignore_wrong_order) { /* for debugging with enabled assertions */ - mc->mc_checking |= CC_SKIPORD; - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD; - } - - const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, flags); - saved_list = problems_push(); - prev_key.iov_base = nullptr; - prev_key.iov_len = 0; - prev_data.iov_base = nullptr; - prev_data.iov_len = 0; - rc = mdbx_cursor_get(mc, &key, &data, MDBX_FIRST); - while (rc == MDBX_SUCCESS) { - rc = check_user_break(); - if (rc) - goto bailout; - - if (!second_pass) { - bool bad_key = false; - if (key.iov_len > maxkeysize) { - problem_add("entry", record_count, "key length exceeds max-key-size", - "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); - bad_key = true; - } else if ((flags & MDBX_INTEGERKEY) && key.iov_len != sizeof(uint64_t) && - key.iov_len != sizeof(uint32_t)) { - problem_add("entry", record_count, "wrong key length", - "%" PRIuPTR " != 4or8", key.iov_len); - bad_key = true; - } - - bool bad_data = false; - if ((flags & MDBX_INTEGERDUP) && data.iov_len != sizeof(uint64_t) && - data.iov_len != sizeof(uint32_t)) { - problem_add("entry", record_count, "wrong data length", - "%" PRIuPTR " != 4or8", data.iov_len); - bad_data = true; - } - - if (prev_key.iov_base) { - if (prev_data.iov_base && !bad_data && (flags & MDBX_DUPFIXED) && - prev_data.iov_len != data.iov_len) { - problem_add("entry", record_count, "different data length", - "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, - data.iov_len); - bad_data = true; - } - - if (!bad_key) { - int cmp = mdbx_cmp(txn, dbi_handle, &key, &prev_key); - if (cmp == 0) { - ++dups; - if ((flags & MDBX_DUPSORT) == 0) { - problem_add("entry", record_count, "duplicated entries", nullptr); - if (prev_data.iov_base && data.iov_len == prev_data.iov_len && - memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == - 0) { - problem_add("entry", record_count, "complete duplicate", - nullptr); - } - } else if (!bad_data && prev_data.iov_base) { - cmp = mdbx_dcmp(txn, dbi_handle, &data, &prev_data); - if (cmp == 0) { - problem_add("entry", record_count, "complete duplicate", - nullptr); - } else if (cmp < 0 && !ignore_wrong_order) { - problem_add("entry", record_count, - "wrong order of multi-values", nullptr); - } - } - } else if (cmp < 0 && !ignore_wrong_order) { - problem_add("entry", record_count, "wrong order of entries", - nullptr); - } - } - } - - if (!bad_key) { - if (verbose && (flags & MDBX_INTEGERKEY) && !prev_key.iov_base) - print(" - fixed key-size %" PRIuPTR "\n", key.iov_len); - prev_key = key; - } - if (!bad_data) { - if (verbose && (flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) && - !prev_data.iov_base) - print(" - fixed data-size %" PRIuPTR "\n", data.iov_len); - prev_data = data; - } - } - - if (handler) { - rc = handler(record_count, &key, &data); - if (MDBX_IS_ERROR(rc)) - goto bailout; - } - - record_count++; - key_bytes += key.iov_len; - data_bytes += data.iov_len; - - rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT); - } - if (rc != MDBX_NOTFOUND) - error("mdbx_cursor_get() failed, error %d %s\n", rc, mdbx_strerror(rc)); - else - rc = 0; - - if (record_count != ms.ms_entries) - problem_add("entry", record_count, "different number of entries", - "%" PRIu64 " != %" PRIu64, record_count, ms.ms_entries); -bailout: - problems_count = problems_pop(saved_list); - if (!second_pass && verbose) { - print(" - summary: %" PRIu64 " records, %" PRIu64 " dups, %" PRIu64 - " key's bytes, %" PRIu64 " data's " - "bytes, %" PRIu64 " problems\n", - record_count, dups, key_bytes, data_bytes, problems_count); - fflush(nullptr); - } - - mdbx_cursor_close(mc); - return (rc || problems_count) ? MDBX_RESULT_TRUE : MDBX_SUCCESS; +static void print_chars(MDBX_chk_line_t *line, const char *str, size_t len) { + if (line->empty) + prefix(line->severity); + fwrite(str, 1, len, line_output); } +static void print_format(MDBX_chk_line_t *line, const char *fmt, va_list args) { + if (line->empty) + prefix(line->severity); + vfprintf(line_output, fmt, args); +} + +static const MDBX_chk_callbacks_t cb = {.check_break = check_break, + .scope_push = scope_push, + .scope_pop = scope_pop, + .subdb_filter = subdb_filter, + .stage_begin = stage_begin, + .stage_end = stage_end, + .print_begin = print_begin, + .print_flush = print_flush, + .print_done = print_done, + .print_chars = print_chars, + .print_format = print_format}; + static void usage(char *prog) { fprintf( stderr, "usage: %s " "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n" " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be used multiple times\n" + " -v\t\tmore verbose, could be repeated upto 9 times\n" " -q\t\tbe quiet\n" " -c\t\tforce cooperative mode (don't try exclusive)\n" " -w\t\twrite-mode checking\n" @@ -1031,144 +385,68 @@ static void usage(char *prog) { exit(EXIT_INTERRUPTED); } -static bool meta_ot(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b, - uint64_t sign_b, const bool wanna_steady) { - if (txn_a == txn_b) - return SIGN_IS_STEADY(sign_b); - - if (wanna_steady && SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b)) - return SIGN_IS_STEADY(sign_b); - - return txn_a < txn_b; -} - -static bool meta_eq(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b, - uint64_t sign_b) { - if (!txn_a || txn_a != txn_b) - return false; - - if (SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b)) - return false; - - return true; -} - -static int meta_recent(const bool wanna_steady) { - if (meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady)) - return meta_ot(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady) - ? 1 - : 2; - else - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, wanna_steady) - ? 2 - : 0; -} - -static int meta_tail(int head) { - switch (head) { - case 0: - return meta_ot(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true) - ? 1 - : 2; - case 1: - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true) - ? 0 - : 2; - case 2: - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, true) - ? 0 - : 1; - default: - assert(false); - return -1; +static int conclude(MDBX_chk_context_t *ctx) { + int err = MDBX_SUCCESS; + if (ctx->result.total_problems == 1 && ctx->result.problems_meta == 1 && + (chk_flags & + (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + (env_flags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && + stuck_meta < 0 && ctx->result.steady_txnid < ctx->result.recent_txnid) { + const size_t step_lineno = + print(MDBX_chk_resolution, + "Perform sync-to-disk for make steady checkpoint" + " at txn-id #%" PRIi64 "...", + ctx->result.recent_txnid); + flush(); + err = error_fn("mdbx_env_pgwalk", mdbx_env_sync_ex(ctx->env, true, false)); + if (err == MDBX_SUCCESS) { + ctx->result.problems_meta -= 1; + ctx->result.total_problems -= 1; + suffix(step_lineno, "done"); + } } -} -static int meta_head(void) { return meta_recent(false); } - -void verbose_meta(int num, txnid_t txnid, uint64_t sign, uint64_t bootid_x, - uint64_t bootid_y) { - const bool have_bootid = (bootid_x | bootid_y) != 0; - const bool bootid_match = bootid_x == envinfo.mi_bootid.current.x && - bootid_y == envinfo.mi_bootid.current.y; - - print(" - meta-%d: ", num); - switch (sign) { - case MDBX_DATASIGN_NONE: - print("no-sync/legacy"); - break; - case MDBX_DATASIGN_WEAK: - print("weak-%s", bootid_match ? (have_bootid ? "intact (same boot-id)" - : "unknown (no boot-id") - : "dead"); - break; - default: - print("steady"); - break; + if (turn_meta && stuck_meta >= 0 && + (chk_flags & + (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + !only_subdb.iov_base && + (env_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { + const bool successful_check = + (err | ctx->result.total_problems | ctx->result.problems_meta) == 0; + if (successful_check || force_turn_meta) { + const size_t step_lineno = print( + MDBX_chk_resolution, + "Performing turn to the specified meta-page (%d) due to %s!", + stuck_meta, + successful_check ? "successful check" : "the -T option was given"); + flush(); + err = mdbx_env_turn_for_recovery(ctx->env, stuck_meta); + if (err != MDBX_SUCCESS) + error_fn("mdbx_env_turn_for_recovery", err); + else + suffix(step_lineno, "done"); + } else { + print(MDBX_chk_resolution, + "Skipping turn to the specified meta-page (%d) due to " + "unsuccessful check!", + stuck_meta); + lf_flush(); + } } - print(" txn#%" PRIu64, txnid); - const int head = meta_head(); - if (num == head) - print(", head"); - else if (num == meta_tail(head)) - print(", tail"); - else - print(", stay"); - - if (stuck_meta >= 0) { - if (num == stuck_meta) - print(", forced for checking"); - } else if (txnid > envinfo.mi_recent_txnid && - (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) - print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")", - txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid); - print("\n"); -} - -static uint64_t get_meta_txnid(const unsigned meta_id) { - switch (meta_id) { - default: - assert(false); - error("unexpected meta_id %u\n", meta_id); - return 0; - case 0: - return envinfo.mi_meta0_txnid; - case 1: - return envinfo.mi_meta1_txnid; - case 2: - return envinfo.mi_meta2_txnid; - } -} - -static void print_size(const char *prefix, const uint64_t value, - const char *suffix) { - const char sf[] = - "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ - double k = 1024.0; - size_t i; - for (i = 0; sf[i + 1] && value / k > 1000.0; ++i) - k *= 1024; - print("%s%" PRIu64 " (%.2f %cb)%s", prefix, value, value / k, sf[i], suffix); + return err; } int main(int argc, char *argv[]) { int rc; char *prog = argv[0]; char *envname; - unsigned problems_maindb = 0, problems_freedb = 0, problems_meta = 0; - bool write_locked = false; - bool turn_meta = false; - bool force_turn_meta = false; bool warmup = false; MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default; + if (argc < 2) + usage(prog); + double elapsed; #if defined(_WIN32) || defined(_WIN64) uint64_t timestamp_start, timestamp_finish; @@ -1176,20 +454,11 @@ int main(int argc, char *argv[]) { #else struct timespec timestamp_start, timestamp_finish; if (clock_gettime(CLOCK_MONOTONIC, ×tamp_start)) { - rc = errno; - error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } #endif - dbi_meta.name.iov_base = MDBX_PGWALK_META; - dbi_free.name.iov_base = MDBX_PGWALK_GC; - dbi_main.name.iov_base = MDBX_PGWALK_MAIN; - atexit(pagemap_cleanup); - - if (argc < 2) - usage(prog); - for (int i; (i = getopt(argc, argv, "uU" "0" @@ -1222,7 +491,10 @@ int main(int argc, char *argv[]) { mdbx_build.options); return EXIT_SUCCESS; case 'v': - verbose++; + if (verbose >= 9 && 0) + usage(prog); + else + verbose += 1; break; case '0': stuck_meta = 0; @@ -1239,8 +511,6 @@ int main(int argc, char *argv[]) { case 'T': turn_meta = force_turn_meta = true; quiet = false; - if (verbose < 2) - verbose = 2; break; case 'q': quiet = true; @@ -1248,27 +518,30 @@ int main(int argc, char *argv[]) { case 'n': break; case 'w': - envflags &= ~MDBX_RDONLY; + env_flags &= ~MDBX_RDONLY; + chk_flags |= MDBX_CHK_READWRITE; #if MDBX_MMAP_INCOHERENT_FILE_WRITE /* Temporary `workaround` for OpenBSD kernel's flaw. * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ - envflags |= MDBX_WRITEMAP; + env_flags |= MDBX_WRITEMAP; #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ break; case 'c': - envflags = (envflags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE; + env_flags = (env_flags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE; break; case 'd': - dont_traversal = true; + chk_flags |= MDBX_CHK_SKIP_BTREE_TRAVERSAL; break; case 's': if (only_subdb.iov_base && strcmp(only_subdb.iov_base, optarg)) usage(prog); - only_subdb.iov_base = optarg; - only_subdb.iov_len = strlen(optarg); + else { + only_subdb.iov_base = optarg; + only_subdb.iov_len = strlen(optarg); + } break; case 'i': - ignore_wrong_order = true; + chk_flags |= MDBX_CHK_IGNORE_ORDER; break; case 'u': warmup = true; @@ -1287,26 +560,29 @@ int main(int argc, char *argv[]) { usage(prog); rc = MDBX_SUCCESS; - if (stuck_meta >= 0 && (envflags & MDBX_EXCLUSIVE) == 0) { - error("exclusive mode is required to using specific meta-page(%d) for " - "checking.\n", - stuck_meta); + if (stuck_meta >= 0 && (env_flags & MDBX_EXCLUSIVE) == 0) { + error_fmt("exclusive mode is required to using specific meta-page(%d) for " + "checking.", + stuck_meta); rc = EXIT_INTERRUPTED; } if (turn_meta) { if (stuck_meta < 0) { - error("meta-page must be specified (by -0, -1 or -2 options) to turn to " - "it.\n"); + error_fmt( + "meta-page must be specified (by -0, -1 or -2 options) to turn to " + "it."); rc = EXIT_INTERRUPTED; } - if (envflags & MDBX_RDONLY) { - error("write-mode must be enabled to turn to the specified meta-page.\n"); + if (env_flags & MDBX_RDONLY) { + error_fmt( + "write-mode must be enabled to turn to the specified meta-page."); rc = EXIT_INTERRUPTED; } - if (only_subdb.iov_base || dont_traversal) { - error( + if (only_subdb.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | + MDBX_CHK_SKIP_KV_TRAVERSAL))) { + error_fmt( "whole database checking with b-tree traversal are required to turn " - "to the specified meta-page.\n"); + "to the specified meta-page."); rc = EXIT_INTERRUPTED; } } @@ -1327,13 +603,14 @@ int main(int argc, char *argv[]) { #endif /* !WINDOWS */ envname = argv[optind]; - print("mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...\n", + print(MDBX_chk_result, + "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, - (envflags & MDBX_RDONLY) ? "only" : "write"); - fflush(nullptr); - mdbx_setup_debug((verbose < MDBX_LOG_TRACE - 1) - ? (MDBX_log_level_t)(verbose + 1) + (env_flags & MDBX_RDONLY) ? "only" : "write"); + lf_flush(); + mdbx_setup_debug((verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) + ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) : MDBX_LOG_TRACE, MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE, @@ -1341,22 +618,22 @@ int main(int argc, char *argv[]) { rc = mdbx_env_create(&env); if (rc) { - error("mdbx_env_create() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("mdbx_env_create", rc); return rc < 0 ? EXIT_FAILURE_MDBX : EXIT_FAILURE_SYS; } - rc = mdbx_env_set_maxdbs(env, MDBX_MAX_DBI); + rc = mdbx_env_set_maxdbs(env, CORE_DBS); if (rc) { - error("mdbx_env_set_maxdbs() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("mdbx_env_set_maxdbs", rc); goto bailout; } if (stuck_meta >= 0) { rc = mdbx_env_open_for_recovery(env, envname, stuck_meta, - (envflags & MDBX_RDONLY) ? false : true); + (env_flags & MDBX_RDONLY) ? false : true); } else { - rc = mdbx_env_open(env, envname, envflags, 0); - if ((envflags & MDBX_EXCLUSIVE) && + rc = mdbx_env_open(env, envname, env_flags, 0); + if ((env_flags & MDBX_EXCLUSIVE) && (rc == MDBX_BUSY || #if defined(_WIN32) || defined(_WIN64) rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION @@ -1364,489 +641,51 @@ int main(int argc, char *argv[]) { rc == EBUSY || rc == EAGAIN #endif )) { - envflags &= ~MDBX_EXCLUSIVE; - rc = mdbx_env_open(env, envname, envflags | MDBX_ACCEDE, 0); + env_flags &= ~MDBX_EXCLUSIVE; + rc = mdbx_env_open(env, envname, env_flags | MDBX_ACCEDE, 0); } } if (rc) { - error("mdbx_env_open() failed, error %d %s\n", rc, mdbx_strerror(rc)); - if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY)) - print("Please run %s in the read-write mode (with '-w' option).\n", prog); + error_fn("mdbx_env_open", rc); + if (rc == MDBX_WANNA_RECOVERY && (env_flags & MDBX_RDONLY)) + print_ln(MDBX_chk_result, + "Please run %s in the read-write mode (with '-w' option).", + prog); goto bailout; } - if (verbose) - print(" - %s mode\n", - (envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); - - if ((envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { - if (verbose) { - print(" - taking write lock..."); - fflush(nullptr); - } - rc = mdbx_txn_lock(env, false); - if (rc != MDBX_SUCCESS) { - error("mdbx_txn_lock() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - if (verbose) - print(" done\n"); - write_locked = true; - } + print_ln(MDBX_chk_verbose, "%s mode", + (env_flags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); if (warmup) { - if (verbose) { - print(" - warming up..."); - fflush(nullptr); - } + anchor_lineno = print(MDBX_chk_verbose, "warming up..."); + flush(); rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536); if (MDBX_IS_ERROR(rc)) { - error("mdbx_env_warmup(flags %u) failed, error %d %s\n", warmup_flags, rc, - mdbx_strerror(rc)); + error_fn("mdbx_env_warmup", rc); goto bailout; } - if (verbose) - print(" %s\n", rc ? "timeout" : "done"); + suffix(anchor_lineno, rc ? "timeout" : "done"); } - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + rc = mdbx_env_chk(env, &cb, &chk, chk_flags, + MDBX_chk_result + (verbose << MDBX_chk_severity_prio_shift), + 0); if (rc) { - error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - rc = mdbx_env_info_ex(env, txn, &envinfo, sizeof(envinfo)); - if (rc) { - error("mdbx_env_info_ex() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - if (verbose) { - print(" - current boot-id "); - if (envinfo.mi_bootid.current.x | envinfo.mi_bootid.current.y) - print("%016" PRIx64 "-%016" PRIx64 "\n", envinfo.mi_bootid.current.x, - envinfo.mi_bootid.current.y); - else - print("unavailable\n"); - } - - mdbx_filehandle_t dxb_fd; - rc = mdbx_env_get_fd(env, &dxb_fd); - if (rc) { - error("mdbx_env_get_fd() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - uint64_t dxb_filesize = 0; -#if defined(_WIN32) || defined(_WIN64) - { - BY_HANDLE_FILE_INFORMATION info; - if (!GetFileInformationByHandle(dxb_fd, &info)) - rc = GetLastError(); - else - dxb_filesize = info.nFileSizeLow | (uint64_t)info.nFileSizeHigh << 32; - } -#else - { - struct stat st; - STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); - if (fstat(dxb_fd, &st)) - rc = errno; - else - dxb_filesize = st.st_size; - } -#endif - if (rc) { - error("osal_filesize() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - errno = 0; - const uint64_t dxbfile_pages = dxb_filesize / envinfo.mi_dxb_pagesize; - alloc_pages = txn->mt_next_pgno; - backed_pages = envinfo.mi_geo.current / envinfo.mi_dxb_pagesize; - if (backed_pages > dxbfile_pages) { - print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - backed_pages, dxbfile_pages); - ++problems_meta; - } - if (dxbfile_pages < NUM_METAS) - print(" ! file-pages %" PRIu64 " < %u\n", dxbfile_pages, NUM_METAS); - if (backed_pages < NUM_METAS) - print(" ! backed-pages %" PRIu64 " < %u\n", backed_pages, NUM_METAS); - if (backed_pages < NUM_METAS || dxbfile_pages < NUM_METAS) - goto bailout; - if (backed_pages > MAX_PAGENO + 1) { - print(" ! backed-pages %" PRIu64 " > max-pages %" PRIaPGNO "\n", - backed_pages, MAX_PAGENO + 1); - ++problems_meta; - backed_pages = MAX_PAGENO + 1; - } - - if ((envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (backed_pages > dxbfile_pages) { - print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - backed_pages, dxbfile_pages); - ++problems_meta; - backed_pages = dxbfile_pages; - } - if (alloc_pages > backed_pages) { - print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n", - alloc_pages, backed_pages); - ++problems_meta; - alloc_pages = backed_pages; - } - } else { - /* LY: DB may be shrunk by writer down to the allocated pages. */ - if (alloc_pages > backed_pages) { - print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n", - alloc_pages, backed_pages); - ++problems_meta; - alloc_pages = backed_pages; - } - if (alloc_pages > dxbfile_pages) { - print(" ! alloc-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - alloc_pages, dxbfile_pages); - ++problems_meta; - alloc_pages = dxbfile_pages; - } - if (backed_pages > dxbfile_pages) - backed_pages = dxbfile_pages; - } - - if (verbose) { - print(" - pagesize %u (%u system), max keysize %d..%d" - ", max readers %u\n", - envinfo.mi_dxb_pagesize, envinfo.mi_sys_pagesize, - mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), - mdbx_env_get_maxkeysize_ex(env, 0), envinfo.mi_maxreaders); - print_size(" - mapsize ", envinfo.mi_mapsize, "\n"); - if (envinfo.mi_geo.lower == envinfo.mi_geo.upper) - print_size(" - fixed datafile: ", envinfo.mi_geo.current, ""); - else { - print_size(" - dynamic datafile: ", envinfo.mi_geo.lower, ""); - print_size(" .. ", envinfo.mi_geo.upper, ", "); - print_size("+", envinfo.mi_geo.grow, ", "); - print_size("-", envinfo.mi_geo.shrink, "\n"); - print_size(" - current datafile: ", envinfo.mi_geo.current, ""); - } - printf(", %" PRIu64 " pages\n", - envinfo.mi_geo.current / envinfo.mi_dxb_pagesize); -#if defined(_WIN32) || defined(_WIN64) - if (envinfo.mi_geo.shrink && envinfo.mi_geo.current != envinfo.mi_geo.upper) - print( - " WARNING: Due Windows system limitations a " - "file couldn't\n be truncated while the database " - "is opened. So, the size\n database file " - "of may by large than the database itself,\n " - "until it will be closed or reopened in read-write mode.\n"); -#endif - verbose_meta(0, envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_bootid.meta0.x, envinfo.mi_bootid.meta0.y); - verbose_meta(1, envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_bootid.meta1.x, envinfo.mi_bootid.meta1.y); - verbose_meta(2, envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_bootid.meta2.x, envinfo.mi_bootid.meta2.y); - } - - if (stuck_meta >= 0) { - if (verbose) { - print(" - skip checking meta-pages since the %u" - " is selected for verification\n", - stuck_meta); - print(" - transactions: recent %" PRIu64 - ", selected for verification %" PRIu64 ", lag %" PRIi64 "\n", - envinfo.mi_recent_txnid, get_meta_txnid(stuck_meta), - envinfo.mi_recent_txnid - get_meta_txnid(stuck_meta)); - } - } else { - if (verbose > 1) - print(" - performs check for meta-pages clashes\n"); - if (meta_eq(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 0, 1); - ++problems_meta; - } - if (meta_eq(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 1, 2); - ++problems_meta; - } - if (meta_eq(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 2, 0); - ++problems_meta; - } - - const unsigned steady_meta_id = meta_recent(true); - const uint64_t steady_meta_txnid = get_meta_txnid(steady_meta_id); - const unsigned weak_meta_id = meta_recent(false); - const uint64_t weak_meta_txnid = get_meta_txnid(weak_meta_id); - if (envflags & MDBX_EXCLUSIVE) { - if (verbose > 1) - print(" - performs full check recent-txn-id with meta-pages\n"); - if (steady_meta_txnid != envinfo.mi_recent_txnid) { - print(" ! steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")\n", - steady_meta_id, steady_meta_txnid, envinfo.mi_recent_txnid); - ++problems_meta; - } - } else if (write_locked) { - if (verbose > 1) - print(" - performs lite check recent-txn-id with meta-pages (not a " - "monopolistic mode)\n"); - if (weak_meta_txnid != envinfo.mi_recent_txnid) { - print(" ! weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")\n", - weak_meta_id, weak_meta_txnid, envinfo.mi_recent_txnid); - ++problems_meta; - } - } else if (verbose) { - print(" - skip check recent-txn-id with meta-pages (monopolistic or " - "read-write mode only)\n"); - } - total_problems += problems_meta; - - if (verbose) - print(" - transactions: recent %" PRIu64 ", latter reader %" PRIu64 - ", lag %" PRIi64 "\n", - envinfo.mi_recent_txnid, envinfo.mi_latter_reader_txnid, - envinfo.mi_recent_txnid - envinfo.mi_latter_reader_txnid); - } - - if (!dont_traversal) { - struct problem *saved_list; - size_t traversal_problems; - uint64_t empty_pages, lost_bytes; - - print("Traversal b-tree by txn#%" PRIaTXN "...\n", txn->mt_txnid); - fflush(nullptr); - walk.pagemap = osal_calloc((size_t)backed_pages, sizeof(*walk.pagemap)); - if (!walk.pagemap) { - rc = errno ? errno : MDBX_ENOMEM; - error("calloc() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - saved_list = problems_push(); - rc = mdbx_env_pgwalk(txn, pgvisitor, nullptr, - true /* always skip key ordering checking to avoid - MDBX_CORRUPTED when using custom comparators */); - traversal_problems = problems_pop(saved_list); - - if (rc) { - if (rc != MDBX_EINTR || !check_user_break()) - error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - for (uint64_t n = 0; n < alloc_pages; ++n) - if (!walk.pagemap[n]) - unused_pages += 1; - - empty_pages = lost_bytes = 0; - for (walk_dbi_t *dbi = &dbi_main; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - empty_pages += dbi->pages.empty; - lost_bytes += dbi->lost_bytes; - } - - if (verbose) { - uint64_t total_page_bytes = walk.pgcount * envinfo.mi_dxb_pagesize; - print(" - pages: walked %" PRIu64 ", left/unused %" PRIu64 "\n", - walk.pgcount, unused_pages); - if (verbose > 1) { - for (walk_dbi_t *dbi = walk.dbi; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - print(" %s: subtotal %" PRIu64, sdb_name(&dbi->name), - dbi->pages.total); - if (dbi->pages.other && dbi->pages.other != dbi->pages.total) - print(", other %" PRIu64, dbi->pages.other); - if (dbi->pages.branch) - print(", branch %" PRIu64, dbi->pages.branch); - if (dbi->pages.large_count) - print(", large %" PRIu64, dbi->pages.large_count); - uint64_t all_leaf = dbi->pages.leaf + dbi->pages.leaf_dupfixed; - if (all_leaf) { - print(", leaf %" PRIu64, all_leaf); - if (verbose > 2 && - (dbi->pages.subleaf_dupsort | dbi->pages.leaf_dupfixed | - dbi->pages.subleaf_dupfixed)) - print(" (usual %" PRIu64 ", sub-dupsort %" PRIu64 - ", dupfixed %" PRIu64 ", sub-dupfixed %" PRIu64 ")", - dbi->pages.leaf, dbi->pages.subleaf_dupsort, - dbi->pages.leaf_dupfixed, dbi->pages.subleaf_dupfixed); - } - print("\n"); - } - } - - if (verbose > 1) - print(" - usage: total %" PRIu64 " bytes, payload %" PRIu64 - " (%.1f%%), unused " - "%" PRIu64 " (%.1f%%)\n", - total_page_bytes, walk.total_payload_bytes, - walk.total_payload_bytes * 100.0 / total_page_bytes, - total_page_bytes - walk.total_payload_bytes, - (total_page_bytes - walk.total_payload_bytes) * 100.0 / - total_page_bytes); - if (verbose > 2) { - for (walk_dbi_t *dbi = walk.dbi; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) - if (dbi->pages.total) { - uint64_t dbi_bytes = dbi->pages.total * envinfo.mi_dxb_pagesize; - print(" %s: subtotal %" PRIu64 " bytes (%.1f%%)," - " payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)", - sdb_name(&dbi->name), dbi_bytes, - dbi_bytes * 100.0 / total_page_bytes, dbi->payload_bytes, - dbi->payload_bytes * 100.0 / dbi_bytes, - dbi_bytes - dbi->payload_bytes, - (dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes); - if (dbi->pages.empty) - print(", %" PRIu64 " empty pages", dbi->pages.empty); - if (dbi->lost_bytes) - print(", %" PRIu64 " bytes lost", dbi->lost_bytes); - print("\n"); - } else - print(" %s: empty\n", sdb_name(&dbi->name)); - } - print(" - summary: average fill %.1f%%", - walk.total_payload_bytes * 100.0 / total_page_bytes); - if (empty_pages) - print(", %" PRIu64 " empty pages", empty_pages); - if (lost_bytes) - print(", %" PRIu64 " bytes lost", lost_bytes); - print(", %" PRIuPTR " problems\n", traversal_problems); - } - } else if (verbose) { - print("Skipping b-tree walk...\n"); - fflush(nullptr); - } - - if (gc_tree_problems) { - print("Skip processing %s since %s is corrupted (%u problems)\n", "@GC", - "b-tree", gc_tree_problems); - problems_freedb = gc_tree_problems; - } else - problems_freedb = process_db(FREE_DBI, MDBX_PGWALK_GC, handle_freedb); - - if (verbose) { - uint64_t value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize; - double percent = value / 100.0; - print(" - space: %" PRIu64 " total pages", value); - print(", backed %" PRIu64 " (%.1f%%)", backed_pages, - backed_pages / percent); - print(", allocated %" PRIu64 " (%.1f%%)", alloc_pages, - alloc_pages / percent); - - if (verbose > 1) { - value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages; - print(", remained %" PRIu64 " (%.1f%%)", value, value / percent); - - value = dont_traversal ? alloc_pages - gc_pages : walk.pgcount; - print(", used %" PRIu64 " (%.1f%%)", value, value / percent); - - print(", gc %" PRIu64 " (%.1f%%)", gc_pages, gc_pages / percent); - - value = gc_pages - reclaimable_pages; - print(", detained %" PRIu64 " (%.1f%%)", value, value / percent); - - print(", reclaimable %" PRIu64 " (%.1f%%)", reclaimable_pages, - reclaimable_pages / percent); - } - - value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages + - reclaimable_pages; - print(", available %" PRIu64 " (%.1f%%)\n", value, value / percent); - } - - if ((problems_maindb = data_tree_problems) == 0 && problems_freedb == 0) { - if (!dont_traversal && - (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (walk.pgcount != alloc_pages - gc_pages) { - error("used pages mismatch (%" PRIu64 "(walked) != %" PRIu64 - "(allocated - GC))\n", - walk.pgcount, alloc_pages - gc_pages); - } - if (unused_pages != gc_pages) { - error("GC pages mismatch (%" PRIu64 "(expected) != %" PRIu64 "(GC))\n", - unused_pages, gc_pages); - } - } else if (verbose) { - print(" - skip check used and GC pages (btree-traversal with " - "monopolistic or read-write mode only)\n"); - } - - problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr); - if (problems_maindb == 0) { - print("Scanning %s for %s...\n", "@MAIN", "sub-database(s)"); - if (!process_db(MAIN_DBI, nullptr, handle_maindb)) { - if (!userdb_count && verbose) - print(" - does not contain multiple databases\n"); - } - } else { - print("Skip processing %s since %s is corrupted (%u problems)\n", - "sub-database(s)", "@MAIN", problems_maindb); - } - } else { - print("Skip processing %s since %s is corrupted (%u problems)\n", "@MAIN", - "b-tree", data_tree_problems); - } - - if (rc == 0 && total_problems == 1 && problems_meta == 1 && !dont_traversal && - (envflags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && stuck_meta < 0 && - get_meta_txnid(meta_recent(true)) < envinfo.mi_recent_txnid) { - print("Perform sync-to-disk for make steady checkpoint at txn-id #%" PRIi64 - "\n", - envinfo.mi_recent_txnid); - fflush(nullptr); - if (write_locked) { - mdbx_txn_unlock(env); - write_locked = false; - } - rc = mdbx_env_sync_ex(env, true, false); - if (rc != MDBX_SUCCESS) - error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc)); - else { - total_problems -= 1; - problems_meta -= 1; - } - } - - if (turn_meta && stuck_meta >= 0 && !dont_traversal && !only_subdb.iov_base && - (envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { - const bool successful_check = (rc | total_problems | problems_meta) == 0; - if (successful_check || force_turn_meta) { - fflush(nullptr); - print(" = Performing turn to the specified meta-page (%d) due to %s!\n", - stuck_meta, - successful_check ? "successful check" : "the -T option was given"); - fflush(nullptr); - rc = mdbx_env_turn_for_recovery(env, stuck_meta); - if (rc != MDBX_SUCCESS) - error("mdbx_env_turn_for_recovery() failed, error %d %s\n", rc, - mdbx_strerror(rc)); - } else { - print(" = Skipping turn to the specified meta-page (%d) due to " - "unsuccessful check!\n", - stuck_meta); - } + if (chk.result.total_problems == 0) + error_fn("mdbx_env_chk", rc); + else if (rc != MDBX_EINTR && rc != MDBX_RESULT_TRUE && !user_break) + rc = 0; } bailout: - if (txn) - mdbx_txn_abort(txn); - if (write_locked) { - mdbx_txn_unlock(env); - write_locked = false; - } if (env) { - const bool dont_sync = rc != 0 || total_problems; + const bool dont_sync = rc != 0 || chk.result.total_problems; mdbx_env_close_ex(env, dont_sync); } - fflush(nullptr); + flush(); if (rc) { - if (rc < 0) + if (rc > 0) return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS; return EXIT_FAILURE_MDBX; } @@ -1856,21 +695,24 @@ bailout: elapsed = (timestamp_finish - timestamp_start) * 1e-3; #else if (clock_gettime(CLOCK_MONOTONIC, ×tamp_finish)) { - rc = errno; - error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } elapsed = timestamp_finish.tv_sec - timestamp_start.tv_sec + (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9; #endif /* !WINDOWS */ - if (total_problems) { - print("Total %u error%s detected, elapsed %.3f seconds.\n", total_problems, - (total_problems > 1) ? "s are" : " is", elapsed); - if (problems_meta || problems_maindb || problems_freedb) + if (chk.result.total_problems) { + print_ln(MDBX_chk_result, + "Total %" PRIuSIZE " error%s detected, elapsed %.3f seconds.", + chk.result.total_problems, + (chk.result.total_problems > 1) ? "s are" : " is", elapsed); + if (chk.result.problems_meta || chk.result.problems_kv || + chk.result.problems_gc) return EXIT_FAILURE_CHECK_MAJOR; return EXIT_FAILURE_CHECK_MINOR; } - print("No error is detected, elapsed %.3f seconds\n", elapsed); + print_ln(MDBX_chk_result, "No error is detected, elapsed %.3f seconds.", + elapsed); return EXIT_SUCCESS; } From 786da2b089ceabaa567d3170a051eab45a8d37df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 09:07:52 +0300 Subject: [PATCH 005/443] =?UTF-8?q?mdbx-tools:=20=D0=B2=D1=8B=D0=B2=D0=BE?= =?UTF-8?q?=D0=B4=20=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20=D0=BE=D0=B1=20=D1=83=D1=80=D0=BE=D0=B2=D0=BD=D0=B5=20?= =?UTF-8?q?=D0=B4=D0=B5=D1=82=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8/verbosity.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx_chk.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index c590253d..55e6f98d 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -369,7 +369,7 @@ static void usage(char *prog) { "usage: %s " "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n" " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be repeated upto 9 times\n" + " -v\t\tmore verbose, could be repeated upto 9 times for extra details\n" " -q\t\tbe quiet\n" " -c\t\tforce cooperative mode (don't try exclusive)\n" " -w\t\twrite-mode checking\n" @@ -493,8 +493,14 @@ int main(int argc, char *argv[]) { case 'v': if (verbose >= 9 && 0) usage(prog); - else + else { verbose += 1; + if (verbose == 0 && !MDBX_DEBUG) + printf("Verbosity level %u exposures only to" + " a debug/extra-logging-enabled builds (with NDEBUG undefined" + " or MDBX_DEBUG > 0)\n", + verbose); + } break; case '0': stuck_meta = 0; @@ -604,10 +610,15 @@ int main(int argc, char *argv[]) { envname = argv[optind]; print(MDBX_chk_result, - "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...", + "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode with " + "verbosity level %u (%s)...", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, - (env_flags & MDBX_RDONLY) ? "only" : "write"); + (env_flags & MDBX_RDONLY) ? "only" : "write", verbose, + (verbose > 8) + ? (MDBX_DEBUG ? "extra details for debugging" + : "same as 8 for non-debug builds with MDBX_DEBUG=0") + : "of 0..9"); lf_flush(); mdbx_setup_debug((verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) From cdbcf54af1a050d596d8203095daa6651d719189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 13:40:41 +0300 Subject: [PATCH 006/443] =?UTF-8?q?mdbx-tests:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`--read-var-info=3Dyes`?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20Valgrind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- GNUmakefile | 2 +- test/long_stochastic.sh | 2 +- test/stochastic_small.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 33e6233d..50bd1b4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -305,7 +305,7 @@ else() "${CMAKE_CURRENT_SOURCE_DIR}/test/valgrind_suppress.txt" CACHE FILEPATH "Suppressions file for Valgrind" FORCE) set(MEMORYCHECK_COMMAND_OPTIONS - "--trace-children=yes --leak-check=full --track-origins=yes --error-exitcode=42 --error-markers=@ --errors-for-leak-kinds=definite --fair-sched=yes --suppressions=${MEMORYCHECK_SUPPRESSIONS_FILE}" + "--trace-children=yes --leak-check=full --track-origins=yes --track-origins=yes --error-exitcode=42 --error-markers=@ --errors-for-leak-kinds=definite --fair-sched=yes --suppressions=${MEMORYCHECK_SUPPRESSIONS_FILE}" CACHE STRING "Valgrind options" FORCE) set(VALGRIND_COMMAND_OPTIONS "${MEMORYCHECK_COMMAND_OPTIONS}" CACHE STRING "Valgrind options" FORCE) endif() diff --git a/GNUmakefile b/GNUmakefile index 566feee1..c8d79a95 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -433,7 +433,7 @@ test-valgrind: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) -memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt +memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt memcheck: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 900c1319..491ec695 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -62,7 +62,7 @@ do echo " For instance, when the process 'A' explicitly marks a memory" echo " region as 'undefined', the process 'B' fill it," echo " and after this process 'A' read such region, etc." - MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" + MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" rm -f valgrind-*.log ;; --skip-make) diff --git a/test/stochastic_small.sh b/test/stochastic_small.sh index 50497f85..20785a22 100755 --- a/test/stochastic_small.sh +++ b/test/stochastic_small.sh @@ -60,7 +60,7 @@ do echo " For instance, when the process 'A' explicitly marks a memory" echo " region as 'undefined', the process 'B' fill it," echo " and after this process 'A' read such region, etc." - MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" + MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" rm -f valgrind-*.log ;; --skip-make) From fc1685a178044ca2a2ad3b629f77281b133a0545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 12:26:09 +0300 Subject: [PATCH 007/443] =?UTF-8?q?mdbx:=20`STATIC=5FASSERT()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20`MDBX=5FTXN=5FRDONLY=5FPREPARE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core.c b/src/core.c index fec25bed..380ab461 100644 --- a/src/core.c +++ b/src/core.c @@ -8828,6 +8828,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { r = brs.rslot; } txn->to.reader = r; + STATIC_ASSERT(MDBX_TXN_RDONLY_PREPARE > MDBX_TXN_RDONLY); if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) { eASSERT(env, txn->mt_txnid == 0); eASSERT(env, txn->mt_owner == 0); From 224f26813e60e371c3779ec1e8d0d987d0adbfb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 12:27:31 +0300 Subject: [PATCH 008/443] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80?= =?UTF-8?q?=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FTXN=5FINVALID`?= =?UTF-8?q?=20(`INT32=5FMIN`)=20=D0=B8=D0=B7=20`mdbx=5Ftxn=5Fflags()`=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4=D0=B0=D1=87?= =?UTF-8?q?=D0=B5=20=D0=BD=D0=B5=D0=B2=D0=B0=D0=BB=D0=B8=D0=B4=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- src/core.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index c94bde3f..84355922 100644 --- a/mdbx.h +++ b/mdbx.h @@ -3764,7 +3764,7 @@ mdbx_txn_env(const MDBX_txn *txn); * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). * * \returns A transaction flags, valid if input is an valid transaction, - * otherwise -1. */ + * otherwise \ref MDBX_TXN_INVALID. */ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_txn_flags(const MDBX_txn *txn); /** \brief Return the transaction's ID. diff --git a/src/core.c b/src/core.c index 380ab461..911f92ea 100644 --- a/src/core.c +++ b/src/core.c @@ -9570,10 +9570,13 @@ uint64_t mdbx_txn_id(const MDBX_txn *txn) { } int mdbx_txn_flags(const MDBX_txn *txn) { - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) { - assert((-1 & (int)MDBX_TXN_INVALID) != 0); - return -1; - } + STATIC_ASSERT( + (MDBX_TXN_INVALID & + (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | + MDBX_TXN_HAS_CHILD | MDBX_TXN_DRAINED_GC | MDBX_SHRINK_ALLOWED | + MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) == 0); + if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDBX_TXN_INVALID; assert(0 == (int)(txn->mt_flags & MDBX_TXN_INVALID)); return txn->mt_flags; } From a67b9b972989548dfb52e436d0423adc2527cc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 23:05:50 +0300 Subject: [PATCH 009/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`env=5Finfo=5Fsnap()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/core.c b/src/core.c index 911f92ea..dab0d468 100644 --- a/src/core.c +++ b/src/core.c @@ -22260,6 +22260,8 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, meta_troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) + return MDBX_PANIC; /* is the environment open? * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */ @@ -22287,18 +22289,12 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, #endif } + *troika = (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) ? txn->tw.troika + : meta_tap(env); + const meta_ptr_t head = meta_recent(env, troika); const MDBX_meta *const meta0 = METAPAGE(env, 0); const MDBX_meta *const meta1 = METAPAGE(env, 1); const MDBX_meta *const meta2 = METAPAGE(env, 2); - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) - return MDBX_PANIC; - - if (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) - *troika = txn->tw.troika; - else - *troika = meta_tap(env); - - const meta_ptr_t head = meta_recent(env, troika); out->mi_recent_txnid = head.txnid; out->mi_meta_txnid[0] = troika->txnid[0]; out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->mm_sign); @@ -22330,11 +22326,6 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); - const uint64_t unsynced_pages = - atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) + - (atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)out->mi_recent_txnid); - out->mi_mapsize = env->me_dxb_mmap.limit; const MDBX_lockinfo *const lck = env->me_lck; @@ -22346,6 +22337,10 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { + const uint64_t unsynced_pages = + atomic_load64(&lck->mti_unsynced_pages, mo_Relaxed) + + ((uint32_t)out->mi_recent_txnid != + atomic_load32(&lck->mti_meta_sync_txnid, mo_Relaxed)); out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->mti_eoos_timestamp, mo_Relaxed); @@ -22390,25 +22385,27 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_pgop_stat.fsync = atomic_load64(&lck->mti_pgop_stat.fsync, mo_Relaxed); #else - memset(&arg->mi_pgop_stat, 0, sizeof(arg->mi_pgop_stat)); + memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat)); #endif /* MDBX_ENABLE_PGOP_STAT*/ } - out->mi_self_latter_reader_txnid = out->mi_latter_reader_txnid = - out->mi_recent_txnid; + txnid_t overall_latter_reader_txnid = out->mi_recent_txnid; + txnid_t self_latter_reader_txnid = overall_latter_reader_txnid; if (env->me_lck_mmap.lck) { for (size_t i = 0; i < out->mi_numreaders; ++i) { const uint32_t pid = atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); if (pid) { const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (out->mi_latter_reader_txnid > txnid) - out->mi_latter_reader_txnid = txnid; - if (pid == env->me_pid && out->mi_self_latter_reader_txnid > txnid) - out->mi_self_latter_reader_txnid = txnid; + if (overall_latter_reader_txnid > txnid) + overall_latter_reader_txnid = txnid; + if (pid == env->me_pid && self_latter_reader_txnid > txnid) + self_latter_reader_txnid = txnid; } } } + out->mi_self_latter_reader_txnid = self_latter_reader_txnid; + out->mi_latter_reader_txnid = overall_latter_reader_txnid; osal_compiler_barrier(); return MDBX_SUCCESS; @@ -22421,6 +22418,7 @@ __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, if (unlikely(rc != MDBX_SUCCESS)) return rc; + eASSERT(env, sizeof(snap) >= bytes); while (1) { rc = env_info_snap(env, txn, out, bytes, troika); if (unlikely(rc != MDBX_SUCCESS)) @@ -22439,6 +22437,12 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely((env == NULL && txn == NULL) || arg == NULL)) return MDBX_EINVAL; + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && + bytes != size_before_pgop_stat) + return MDBX_EINVAL; + if (txn) { int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(err != MDBX_SUCCESS)) @@ -22454,12 +22458,6 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, env = txn->mt_env; } - const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); - const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); - if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat) - return MDBX_EINVAL; - meta_troika_t troika; return env_info(env, txn, arg, bytes, &troika); } From 5f274eb4c61a418759e16df41aca78ac6b8cefcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 12 Oct 2023 10:16:31 +0300 Subject: [PATCH 010/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=B2=D0=BE=D0=B4?= =?UTF-8?q?=20=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8=D0=B8?= =?UTF-8?q?=20=D0=B8=D0=B7=20`mdbx=5Fenv=5Fchk()`=20=D0=BE=20boot-id=20?= =?UTF-8?q?=D0=B2=20=D0=BA=D0=B0=D0=B6=D0=B4=D0=BE=D0=B9=20=D0=BC=D0=B5?= =?UTF-8?q?=D1=82=D0=B0-=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/core.c b/src/core.c index dab0d468..a84dbbc5 100644 --- a/src/core.c +++ b/src/core.c @@ -26219,7 +26219,13 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, &chk->envinfo.mi_bootid.current, sizeof(chk->envinfo.mi_bootid.current)) == 0; - line = chk_print(line, "meta-%u: ", num); + const char *status = "stay"; + if (num == chk->troika.recent) + status = "head"; + else if (num == TROIKA_TAIL(&chk->troika)) + status = "tail"; + line = chk_print(line, "meta-%u: %s, ", num, status); + switch (chk->envinfo.mi_meta_sign[num]) { case MDBX_DATASIGN_NONE: line = chk_puts(line, "no-sync/legacy"); @@ -26235,14 +26241,14 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, break; } const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; - line = chk_print(line, " txn#%" PRIaTXN, meta_txnid); - - const char *status = "stay"; - if (num == chk->troika.recent) - status = "head"; - else if (num == TROIKA_TAIL(&chk->troika)) - status = "tail"; - line = chk_print(line, ", %s", status); + line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid); + if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y) + line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", + chk->envinfo.mi_bootid.meta[num].x, + chk->envinfo.mi_bootid.meta[num].y, + bootid_match ? "live" : "not match"); + else + line = chk_puts(line, "no boot-id"); if (env->me_stuck_meta >= 0) { if (num == (unsigned)env->me_stuck_meta) From d28a397b2d927213eedb11d4b98100c98f28d360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Oct 2023 17:36:21 +0300 Subject: [PATCH 011/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Funbind()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 22 +++++++++++++++++++ mdbx.h++ | 7 ++++++ src/core.c | 64 +++++++++++++++++++++++++++++++++--------------------- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/mdbx.h b/mdbx.h index 84355922..2c062316 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4732,6 +4732,28 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); LIBMDBX_API int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *cursor, MDBX_dbi dbi); +/** \brief Unbind cursor from a transaction. + * \ingroup c_cursors + * + * Unbinded cursor is disassociated with any transactions but still holds + * the original DBI-handle internally. Thus it could be renewed with any running + * transaction or closed. + * + * \see mdbx_cursor_renew() + * \see mdbx_cursor_bind() + * \see mdbx_cursor_close() + * + * \note In contrast to LMDB, the MDBX required that any opened cursors can be + * reused and must be freed explicitly, regardless ones was opened in a + * read-only or write transaction. The REASON for this is eliminates ambiguity + * which helps to avoid errors such as: use-after-free, double-free, i.e. + * memory corruption and segfaults. + * + * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_cursor_unbind(MDBX_cursor *cursor); + /** \brief Create a cursor handle for the specified transaction and DBI handle. * \ingroup c_cursors * diff --git a/mdbx.h++ b/mdbx.h++ index d4cd7077..e3607b61 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4223,6 +4223,9 @@ public: /// map handle. inline void bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle); + /// \brief Unbind cursor from a transaction. + inline void unbind(); + /// \brief Returns the cursor's transaction. inline ::mdbx::txn txn() const; inline map_handle map() const; @@ -6110,6 +6113,10 @@ inline void cursor::bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle) { error::success_or_throw(::mdbx_cursor_bind(txn, handle_, map_handle.dbi)); } +inline void cursor::unbind() { + error::success_or_throw(::mdbx_cursor_unbind(handle_)); +} + inline txn cursor::txn() const { MDBX_txn *txn = ::mdbx_cursor_txn(handle_); error::throw_on_nullptr(txn, MDBX_EINVAL); diff --git a/src/core.c b/src/core.c index a84dbbc5..8e4b364f 100644 --- a/src/core.c +++ b/src/core.c @@ -18846,6 +18846,38 @@ void *mdbx_cursor_get_userctx(const MDBX_cursor *mc) { return couple->mc_userctx; } +int mdbx_cursor_unbind(MDBX_cursor *mc) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_SUCCESS + : MDBX_EBADSIGN; + + if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ + return MDBX_EINVAL; + + eASSERT(nullptr, mc->mc_txn && mc->mc_txn->mt_signature == MDBX_MT_SIGNATURE); + cASSERT(mc, mc->mc_signature == MDBX_MC_LIVE); + cASSERT(mc, !mc->mc_backup); + if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) { + ERROR("Wrong cursor's transaction %p 0x%x", + __Wpedantic_format_voidptr(mc->mc_txn), + mc->mc_txn ? mc->mc_txn->mt_signature : 0); + return MDBX_PROBLEM; + } + if (mc->mc_flags & C_UNTRACK) { + MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->mc_next; + cASSERT(mc, *prev == mc); + *prev = mc->mc_next; + } + mc->mc_signature = MDBX_MC_READY4CLOSE; + mc->mc_flags = 0; + return MDBX_SUCCESS; +} + int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return MDBX_EINVAL; @@ -18871,10 +18903,10 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { mc->mc_txn != txn)) return MDBX_EINVAL; - assert(mc->mc_db == &txn->mt_dbs[dbi]); - assert(mc->mc_dbx == &txn->mt_dbxs[dbi]); - assert(mc->mc_dbi == dbi); - assert(mc->mc_dbistate == &txn->mt_dbistate[dbi]); + cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); + cASSERT(mc, mc->mc_dbx == &txn->mt_dbxs[dbi]); + cASSERT(mc, mc->mc_dbi == dbi); + cASSERT(mc, mc->mc_dbistate == &txn->mt_dbistate[dbi]); return likely(mc->mc_dbi == dbi && /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && mc->mc_txn == txn) @@ -18883,27 +18915,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { } if (mc->mc_signature == MDBX_MC_LIVE) { - if (unlikely(!mc->mc_txn || - mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) { - ERROR("Wrong cursor's transaction %p 0x%x", - __Wpedantic_format_voidptr(mc->mc_txn), - mc->mc_txn ? mc->mc_txn->mt_signature : 0); - return MDBX_PROBLEM; - } - if (mc->mc_flags & C_UNTRACK) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - cASSERT(mc, *prev == mc); - *prev = mc->mc_next; - } - mc->mc_signature = MDBX_MC_READY4CLOSE; - mc->mc_flags = 0; - mc->mc_dbi = UINT_MAX; - mc->mc_next = NULL; - mc->mc_db = NULL; - mc->mc_dbx = NULL; - mc->mc_dbistate = NULL; + rc = mdbx_cursor_unbind(mc); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; } cASSERT(mc, !(mc->mc_flags & C_UNTRACK)); From 4d3f7e1edc1f3bacc474df6f56677a781eb86a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Oct 2023 22:38:51 +0300 Subject: [PATCH 012/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Ftxn=5Frelease=5Fall=5F?= =?UTF-8?q?cursors()`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 21 +++++++++++++++++++++ mdbx.h++ | 16 ++++++++++++++++ src/core.c | 26 ++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/mdbx.h b/mdbx.h index 2c062316..68493003 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4803,6 +4803,27 @@ LIBMDBX_API int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, * or \ref mdbx_cursor_create(). */ LIBMDBX_API void mdbx_cursor_close(MDBX_cursor *cursor); +/** \brief Unbind or closes all cursors of a given transaction. + * \ingroup c_cursors + * + * Unbinds either closes all cursors associated (opened or renewed) with + * a given transaction in a bulk with minimal overhead. + * + * \see mdbx_cursor_unbind() + * \see mdbx_cursor_close() + * + * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). + * \param [in] unbind If non-zero, unbinds cursors and leaves ones reusable. + * Otherwise close and dispose cursors. + * + * \returns A negative error value on failure or the number of closed cursors + * on success, some possible errors are: + * \retval MDBX_THREAD_MISMATCH Given transaction is not owned + * by current thread. + * \retval MDBX_BAD_TXN Given transaction is invalid or has + * a child/nested transaction transaction. */ +LIBMDBX_API int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind); + /** \brief Renew a cursor handle for use within the given transaction. * \ingroup c_cursors * diff --git a/mdbx.h++ b/mdbx.h++ index e3607b61..216c0631 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3841,6 +3841,15 @@ public: /// \brief Opens cursor for specified key-value map handle. inline cursor_managed open_cursor(map_handle map); + /// \brief Unbind or close all cursors. + inline size_t release_all_cursors(bool unbind) const; + + /// \brief Close all cursors. + inline size_t close_all_cursors() const { return release_all_cursors(false); } + + /// \brief Unbind all cursors. + inline size_t unbind_all_cursors() const { return release_all_cursors(true); } + /// \brief Open existing key-value map. inline map_handle open_map( const char *name, @@ -5466,6 +5475,13 @@ inline cursor_managed txn::open_cursor(map_handle map) { return cursor_managed(ptr); } +inline size_t txn::release_all_cursors(bool unbind) const { + int err = ::mdbx_txn_release_all_cursors(handle_, unbind); + if (MDBX_UNLIKELY(err < 0)) + MDBX_CXX20_UNLIKELY error::throw_exception(err); + return size_t(err); +} + inline ::mdbx::map_handle txn::open_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) const { diff --git a/src/core.c b/src/core.c index 8e4b364f..293b4aed 100644 --- a/src/core.c +++ b/src/core.c @@ -19019,6 +19019,32 @@ void mdbx_cursor_close(MDBX_cursor *mc) { } } +int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { + int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD); + if (likely(rc == MDBX_SUCCESS)) { + for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) { + while (txn->mt_cursors[i]) { + MDBX_cursor *mc = txn->mt_cursors[i]; + ENSURE(NULL, mc->mc_signature == MDBX_MC_LIVE && + (mc->mc_flags & C_UNTRACK) && !mc->mc_backup); + rc = likely(rc < INT_MAX) ? rc + 1 : rc; + txn->mt_cursors[i] = mc->mc_next; + if (unbind) { + mc->mc_signature = MDBX_MC_READY4CLOSE; + mc->mc_flags = 0; + } else { + mc->mc_signature = 0; + mc->mc_next = mc; + osal_free(mc); + } + } + } + } else { + eASSERT(nullptr, rc < 0); + } + return rc; +} + MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) return NULL; From 0e4c6d61a4a5455ce4d8f989974bf884a4fab8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 07:26:19 +0300 Subject: [PATCH 013/443] =?UTF-8?q?mdbx-tools:=20=D0=BD=D0=B5=D1=81=D1=83?= =?UTF-8?q?=D1=89=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D1=8B=D0=B9=20?= =?UTF-8?q?=D1=80=D0=B5=D1=84=D0=B0=D0=BA=D1=82=D0=BE=D1=80=D0=B8=D0=BD?= =?UTF-8?q?=D0=B3=20`mdbx=5Fload`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx_load.c | 235 +++++++++++++++++++++++++----------------------- 1 file changed, 121 insertions(+), 114 deletions(-) diff --git a/src/mdbx_load.c b/src/mdbx_load.c index 552fedc8..d4ff1db9 100644 --- a/src/mdbx_load.c +++ b/src/mdbx_load.c @@ -505,7 +505,7 @@ static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { } int main(int argc, char *argv[]) { - int i, rc; + int i, err; MDBX_env *env = nullptr; MDBX_txn *txn = nullptr; MDBX_cursor *mc = nullptr; @@ -608,40 +608,45 @@ int main(int argc, char *argv[]) { dbuf.iov_len = 4096; dbuf.iov_base = osal_malloc(dbuf.iov_len); if (!dbuf.iov_base) { - rc = MDBX_ENOMEM; - error("value-buffer", rc); - goto env_close; + err = MDBX_ENOMEM; + error("value-buffer", err); + goto bailout; } /* read first header for mapsize= */ if (!(mode & NOHDR)) { - rc = readhdr(); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == EOF) - rc = MDBX_ENODATA; - error("readheader", rc); - goto env_close; + err = readhdr(); + if (unlikely(err != MDBX_SUCCESS)) { + if (err == EOF) + err = MDBX_ENODATA; + error("readheader", err); + goto bailout; } } - rc = mdbx_env_create(&env); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_create", rc); - return EXIT_FAILURE; + err = mdbx_env_create(&env); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_create", err); + goto bailout; + } + + err = mdbx_env_set_maxdbs(env, 2); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_maxdbs", err); + goto bailout; } - mdbx_env_set_maxdbs(env, 2); if (envinfo.mi_maxreaders) { - rc = mdbx_env_set_maxreaders(env, envinfo.mi_maxreaders); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_set_maxreaders", rc); - goto env_close; + err = mdbx_env_set_maxreaders(env, envinfo.mi_maxreaders); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_maxreaders", err); + goto bailout; } } if (envinfo.mi_geo.current | envinfo.mi_mapsize) { if (envinfo.mi_geo.current) { - rc = mdbx_env_set_geometry( + err = mdbx_env_set_geometry( env, (intptr_t)envinfo.mi_geo.lower, (intptr_t)envinfo.mi_geo.current, (intptr_t)envinfo.mi_geo.upper, (intptr_t)envinfo.mi_geo.shrink, (intptr_t)envinfo.mi_geo.grow, @@ -654,23 +659,23 @@ int main(int argc, char *argv[]) { "Database size is too large for current system (mapsize=%" PRIu64 " is great than system-limit %zu)\n", envinfo.mi_mapsize, (size_t)MAX_MAPSIZE); - goto env_close; + goto bailout; } - rc = mdbx_env_set_geometry( + err = mdbx_env_set_geometry( env, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, 0, 0, envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); } - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_set_geometry", rc); - goto env_close; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_geometry", err); + goto bailout; } } - rc = mdbx_env_open(env, envname, envflags, 0664); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_open", rc); - goto env_close; + err = mdbx_env_open(env, envname, envflags, 0664); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_open", err); + goto bailout; } kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, 0) + (size_t)1; @@ -678,54 +683,54 @@ int main(int argc, char *argv[]) { if (!quiet) fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n", kbuf.iov_len); - goto env_close; + goto bailout; } kbuf.iov_base = malloc(kbuf.iov_len); if (!kbuf.iov_base) { - rc = MDBX_ENOMEM; - error("key-buffer", rc); - goto env_close; + err = MDBX_ENOMEM; + error("key-buffer", err); + goto bailout; } - while (rc == MDBX_SUCCESS) { + while (err == MDBX_SUCCESS) { if (user_break) { - rc = MDBX_EINTR; + err = MDBX_EINTR; break; } - rc = mdbx_txn_begin(env, nullptr, 0, &txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_begin", rc); - goto env_close; + err = mdbx_txn_begin(env, nullptr, 0, &txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_begin", err); + goto bailout; } if (mode & GLOBAL) { mode -= GLOBAL; if (canary.v | canary.x | canary.y | canary.z) { - rc = mdbx_canary_put(txn, &canary); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_canary_put", rc); - goto txn_abort; + err = mdbx_canary_put(txn, &canary); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_canary_put", err); + goto bailout; } } } const char *const dbi_name = subname ? subname : "@MAIN"; - rc = + err = mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi, (putflags & MDBX_APPEND) ? equal_or_greater : nullptr, (putflags & MDBX_APPEND) ? equal_or_greater : nullptr); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_open_ex", rc); - goto txn_abort; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_open_ex", err); + goto bailout; } uint64_t present_sequence; - rc = mdbx_dbi_sequence(txn, dbi, &present_sequence, 0); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_sequence", rc); - goto txn_abort; + err = mdbx_dbi_sequence(txn, dbi, &present_sequence, 0); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", err); + goto bailout; } if (present_sequence > sequence) { if (!quiet) @@ -733,22 +738,22 @@ int main(int argc, char *argv[]) { "present sequence for '%s' value (%" PRIu64 ") is greater than loaded (%" PRIu64 ")\n", dbi_name, present_sequence, sequence); - rc = MDBX_RESULT_TRUE; - goto txn_abort; + err = MDBX_RESULT_TRUE; + goto bailout; } if (present_sequence < sequence) { - rc = mdbx_dbi_sequence(txn, dbi, nullptr, sequence - present_sequence); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_sequence", rc); - goto txn_abort; + err = mdbx_dbi_sequence(txn, dbi, nullptr, sequence - present_sequence); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", err); + goto bailout; } } if (purge) { - rc = mdbx_drop(txn, dbi, false); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_drop", rc); - goto txn_abort; + err = mdbx_drop(txn, dbi, false); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_drop", err); + goto bailout; } } @@ -756,85 +761,85 @@ int main(int argc, char *argv[]) { putflags = (dbi_flags & MDBX_DUPSORT) ? putflags | MDBX_APPENDDUP : putflags & ~MDBX_APPENDDUP; - rc = mdbx_cursor_open(txn, dbi, &mc); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_open", rc); - goto txn_abort; + err = mdbx_cursor_open(txn, dbi, &mc); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_open", err); + goto bailout; } int batch = 0; - while (rc == MDBX_SUCCESS) { + while (err == MDBX_SUCCESS) { MDBX_val key, data; - rc = readline(&key, &kbuf); - if (rc == EOF) + err = readline(&key, &kbuf); + if (err == EOF) break; - if (rc == MDBX_SUCCESS) - rc = readline(&data, &dbuf); - if (rc) { + if (err == MDBX_SUCCESS) + err = readline(&data, &dbuf); + if (err) { if (!quiet) fprintf(stderr, "%s: line %" PRIiSIZE ": failed to read key value\n", prog, lineno); - goto txn_abort; + goto bailout; } - rc = mdbx_cursor_put(mc, &key, &data, putflags); - if (rc == MDBX_KEYEXIST && putflags) + err = mdbx_cursor_put(mc, &key, &data, putflags); + if (err == MDBX_KEYEXIST && putflags) continue; - if (rc == MDBX_BAD_VALSIZE && rescue) { + if (err == MDBX_BAD_VALSIZE && rescue) { if (!quiet) fprintf(stderr, "%s: skip line %" PRIiSIZE ": due %s\n", prog, lineno, - mdbx_strerror(rc)); + mdbx_strerror(err)); continue; } - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_put", rc); - goto txn_abort; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_put", err); + goto bailout; } batch++; MDBX_txn_info txn_info; - rc = mdbx_txn_info(txn, &txn_info, false); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_info", rc); - goto txn_abort; + err = mdbx_txn_info(txn, &txn_info, false); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_info", err); + goto bailout; } if (batch == 10000 || txn_info.txn_space_dirty > MEGABYTE * 256) { - rc = mdbx_txn_commit(txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_commit", rc); - goto env_close; + err = mdbx_txn_commit(txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_commit", err); + goto bailout; } batch = 0; - rc = mdbx_txn_begin(env, nullptr, 0, &txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_begin", rc); - goto env_close; + err = mdbx_txn_begin(env, nullptr, 0, &txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_begin", err); + goto bailout; } - rc = mdbx_cursor_bind(txn, mc, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_bind", rc); - goto txn_abort; + err = mdbx_cursor_bind(txn, mc, dbi); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_bind", err); + goto bailout; } } } mdbx_cursor_close(mc); mc = nullptr; - rc = mdbx_txn_commit(txn); + err = mdbx_txn_commit(txn); txn = nullptr; - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_commit", rc); - goto env_close; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_commit", err); + goto bailout; } if (subname) { assert(dbi != MAIN_DBI); - rc = mdbx_dbi_close(env, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_close", rc); - goto env_close; + err = mdbx_dbi_close(env, dbi); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_close", err); + goto bailout; } } else { assert(dbi == MAIN_DBI); @@ -842,14 +847,14 @@ int main(int argc, char *argv[]) { /* try read next header */ if (!(mode & NOHDR)) - rc = readhdr(); + err = readhdr(); else if (ferror(stdin) || feof(stdin)) break; } - switch (rc) { + switch (err) { case EOF: - rc = MDBX_SUCCESS; + err = MDBX_SUCCESS; case MDBX_SUCCESS: break; case MDBX_EINTR: @@ -857,17 +862,19 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Interrupted by signal/user\n"); break; default: - if (unlikely(rc != MDBX_SUCCESS)) - error("readline", rc); + if (unlikely(err != MDBX_SUCCESS)) + error("readline", err); } -txn_abort: - mdbx_cursor_close(mc); - mdbx_txn_abort(txn); -env_close: - mdbx_env_close(env); +bailout: + if (mc) + mdbx_cursor_close(mc); + if (txn) + mdbx_txn_abort(txn); + if (env) + mdbx_env_close(env); free(kbuf.iov_base); free(dbuf.iov_base); - return rc ? EXIT_FAILURE : EXIT_SUCCESS; + return err ? EXIT_FAILURE : EXIT_SUCCESS; } From 04511a7a99050659ac657112e5571c6fd7b78dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 09:04:06 +0300 Subject: [PATCH 014/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`const=20MDB?= =?UTF-8?q?X=5Ftxn`=20=D0=B3=D0=B4=D0=B5=20=D1=8D=D1=82=D0=BE=20=D0=B2?= =?UTF-8?q?=D0=BE=D0=B7=D0=BC=D0=BE=D0=B6=D0=BD=D0=BE=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 26 +++++++++++++------------- mdbx.h++ | 13 +++++++------ src/core.c | 42 ++++++++++++++++++++++-------------------- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/mdbx.h b/mdbx.h index 68493003..5c43ab89 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4291,8 +4291,8 @@ mdbx_int64_from_key(const MDBX_val); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, - size_t bytes); +LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, + MDBX_stat *stat, size_t bytes); /** \brief Retrieve depth (bitmask) information of nested dupsort (multi-value) * B+trees for given database. @@ -4309,7 +4309,7 @@ LIBMDBX_API int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. * \retval MDBX_RESULT_TRUE The dbi isn't a dupsort (multi-value) database. */ -LIBMDBX_API int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask); /** \brief DBI state bits returted by \ref mdbx_dbi_flags_ex() @@ -4341,13 +4341,13 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state_t) * \param [out] state Address where the state will be returned. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, - unsigned *state); +LIBMDBX_API int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, + unsigned *flags, unsigned *state); /** \brief The shortcut to calling \ref mdbx_dbi_flags_ex() with `state=NULL` * for discarding it result. * \ingroup c_statinfo */ LIBMDBX_INLINE_API(int, mdbx_dbi_flags, - (MDBX_txn * txn, MDBX_dbi dbi, unsigned *flags)) { + (const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags)) { unsigned state; return mdbx_dbi_flags_ex(txn, dbi, flags, &state); } @@ -4423,7 +4423,7 @@ LIBMDBX_API int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del); * by current thread. * \retval MDBX_NOTFOUND The key was not in the database. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, +LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data); /** \brief Get items from a database @@ -4456,7 +4456,7 @@ LIBMDBX_API int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * by current thread. * \retval MDBX_NOTFOUND The key was not in the database. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, +LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count); /** \brief Get equal or great item from a database. @@ -4487,7 +4487,7 @@ LIBMDBX_API int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, * by current thread. * \retval MDBX_NOTFOUND The key was not in the database. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data); /** \brief Store items into a database. @@ -4729,7 +4729,7 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *cursor, +LIBMDBX_API int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *cursor, MDBX_dbi dbi); /** \brief Unbind cursor from a transaction. @@ -4784,7 +4784,7 @@ LIBMDBX_API int mdbx_cursor_unbind(MDBX_cursor *cursor); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **cursor); /** \brief Close a cursor handle. @@ -4848,7 +4848,7 @@ LIBMDBX_API int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind); * \retval MDBX_EINVAL An invalid parameter was specified. * \retval MDBX_BAD_DBI The cursor was not bound to a DBI-handle * or such a handle became invalid. */ -LIBMDBX_API int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *cursor); +LIBMDBX_API int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *cursor); /** \brief Return the cursor's transaction handle. * \ingroup c_cursors @@ -5227,7 +5227,7 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, * \param [out] distance_items A pointer to store range estimation result. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, MDBX_val *begin_data, MDBX_val *end_key, MDBX_val *end_data, ptrdiff_t *distance_items); diff --git a/mdbx.h++ b/mdbx.h++ index 216c0631..ea0131be 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3839,7 +3839,7 @@ public: txn_managed start_nested(); /// \brief Opens cursor for specified key-value map handle. - inline cursor_managed open_cursor(map_handle map); + inline cursor_managed open_cursor(map_handle map) const; /// \brief Unbind or close all cursors. inline size_t release_all_cursors(bool unbind) const; @@ -4226,11 +4226,11 @@ public: /// \brief Renew/bind a cursor with a new transaction and previously used /// key-value map handle. - inline void renew(::mdbx::txn &txn); + inline void renew(const ::mdbx::txn &txn); /// \brief Bind/renew a cursor with a new transaction and specified key-value /// map handle. - inline void bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle); + inline void bind(const ::mdbx::txn &txn, ::mdbx::map_handle map_handle); /// \brief Unbind cursor from a transaction. inline void unbind(); @@ -5469,7 +5469,7 @@ inline txn::info txn::get_info(bool scan_reader_lock_table) const { return r; } -inline cursor_managed txn::open_cursor(map_handle map) { +inline cursor_managed txn::open_cursor(map_handle map) const { MDBX_cursor *ptr; error::success_or_throw(::mdbx_cursor_open(handle_, map.dbi, &ptr)); return cursor_managed(ptr); @@ -6121,11 +6121,12 @@ inline ptrdiff_t cursor::estimate(move_operation operation) const { return estimate(operation, &unused_key, nullptr); } -inline void cursor::renew(::mdbx::txn &txn) { +inline void cursor::renew(const ::mdbx::txn &txn) { error::success_or_throw(::mdbx_cursor_renew(txn, handle_)); } -inline void cursor::bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle) { +inline void cursor::bind(const ::mdbx::txn &txn, + ::mdbx::map_handle map_handle) { error::success_or_throw(::mdbx_cursor_bind(txn, handle_, map_handle.dbi)); } diff --git a/src/core.c b/src/core.c index 293b4aed..3745aaf5 100644 --- a/src/core.c +++ b/src/core.c @@ -3386,7 +3386,7 @@ static int __must_check_result cursor_first(MDBX_cursor *mc, MDBX_val *key, static int __must_check_result cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data); -static int __must_check_result cursor_init(MDBX_cursor *mc, MDBX_txn *txn, +static int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi); static int __must_check_result cursor_xinit0(MDBX_cursor *mc); static int __must_check_result cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, @@ -9582,7 +9582,7 @@ int mdbx_txn_flags(const MDBX_txn *txn) { } /* Check for misused dbi handles */ -static __inline bool dbi_changed(MDBX_txn *txn, size_t dbi) { +static __inline bool dbi_changed(const MDBX_txn *txn, size_t dbi) { if (txn->mt_dbiseqs == txn->mt_env->me_dbiseqs) return false; if (likely( @@ -11171,7 +11171,7 @@ static int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { } /* Check txn and dbi arguments to a function */ -static __always_inline bool check_dbi(MDBX_txn *txn, MDBX_dbi dbi, +static __always_inline bool check_dbi(const MDBX_txn *txn, MDBX_dbi dbi, unsigned validity) { if (likely(dbi < txn->mt_numdbs)) { if (likely(!dbi_changed(txn, dbi))) { @@ -11182,7 +11182,7 @@ static __always_inline bool check_dbi(MDBX_txn *txn, MDBX_dbi dbi, return false; } } - return dbi_import(txn, dbi); + return dbi_import((MDBX_txn *)txn, dbi); } /* Merge child txn into parent */ @@ -16083,7 +16083,8 @@ static __always_inline int node_read(MDBX_cursor *mc, const MDBX_node *node, return node_read_bigdata(mc, node, data, mp); } -int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { +int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, + MDBX_val *data) { DKBUF_DEBUG; DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); @@ -16105,7 +16106,7 @@ int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { return cursor_set(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err; } -int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, +int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -16128,8 +16129,8 @@ int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, return cursor_get(&cx.outer, key, data, MDBX_SET_LOWERBOUND); } -int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, - size_t *values_count) { +int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *data, size_t *values_count) { DKBUF_DEBUG; DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); @@ -18759,13 +18760,13 @@ static int cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, } static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, - MDBX_txn *const txn, MDBX_db *const db, + const MDBX_txn *const txn, MDBX_db *const db, MDBX_dbx *const dbx, uint8_t *const dbstate) { couple->outer.mc_signature = MDBX_MC_LIVE; couple->outer.mc_next = NULL; couple->outer.mc_backup = NULL; couple->outer.mc_dbi = (MDBX_dbi)dbi; - couple->outer.mc_txn = txn; + couple->outer.mc_txn = (MDBX_txn *)txn; couple->outer.mc_db = db; couple->outer.mc_dbx = dbx; couple->outer.mc_dbistate = dbstate; @@ -18803,7 +18804,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, } /* Initialize a cursor for a given transaction and database. */ -static int cursor_init(MDBX_cursor *mc, MDBX_txn *txn, size_t dbi) { +static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], @@ -18878,7 +18879,7 @@ int mdbx_cursor_unbind(MDBX_cursor *mc) { return MDBX_SUCCESS; } -int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { +int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return MDBX_EINVAL; @@ -18932,7 +18933,7 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { return MDBX_SUCCESS; } -int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { +int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { if (unlikely(!ret)) return MDBX_EINVAL; *ret = NULL; @@ -18951,7 +18952,7 @@ int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { return MDBX_SUCCESS; } -int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) { +int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; } @@ -22244,7 +22245,7 @@ __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, return rc; } -__cold int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, +__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -22860,7 +22861,7 @@ int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, return dbi_open(txn, name, flags, dbi, keycmp, datacmp); } -__cold int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, +__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -22880,7 +22881,7 @@ __cold int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, return MDBX_BAD_TXN; if (unlikely(txn->mt_dbistate[dbi] & DBI_STALE)) { - rc = fetch_sdb(txn, dbi); + rc = fetch_sdb((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; } @@ -22941,7 +22942,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { return rc; } -int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, +int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(rc != MDBX_SUCCESS)) @@ -24113,7 +24114,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return mdbx_estimate_distance(cursor, &next.outer, distance_items); } -int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, +int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, MDBX_val *begin_data, MDBX_val *end_key, MDBX_val *end_data, ptrdiff_t *size_items) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -25455,7 +25456,8 @@ LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, return __inline_mdbx_env_info(env, info, bytes); } -LIBMDBX_API int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) { +LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, + unsigned *flags) { return __inline_mdbx_dbi_flags(txn, dbi, flags); } From c254c728d2f0dbd8989cbe1b7fe7824aa0ef11cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 11:10:32 +0300 Subject: [PATCH 015/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=81=D1=82=D1=8B?= =?UTF-8?q?=D0=BB=D1=8C=20=D0=B4=D0=BB=D1=8F=20=D0=BB=D0=BE=D0=B6=D0=BD?= =?UTF-8?q?=D0=BE-=D0=BF=D0=BE=D0=BB=D0=BE=D0=B6=D0=B8=D1=82=D0=B5=D0=BB?= =?UTF-8?q?=D1=8C=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BF=D1=80=D0=B5=D0=B4=D1=83?= =?UTF-8?q?=D0=BF=D1=80=D0=B5=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D1=8F=20Coveri?= =?UTF-8?q?ty.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 3745aaf5..4a741bf3 100644 --- a/src/core.c +++ b/src/core.c @@ -26151,8 +26151,11 @@ static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { if (p->ranges[i].count) { assert(i < last); // раздвигаем - memmove(p->ranges + i + 1, p->ranges + i, - (last - i) * sizeof(p->ranges[0])); +#ifdef __COVERITY__ + if (i < last) /* avoid Coverity false-positive issue */ +#endif /* __COVERITY__ */ + memmove(p->ranges + i + 1, p->ranges + i, + (last - i) * sizeof(p->ranges[0])); } p->ranges[i].begin = n; p->ranges[i].end = n + 1; From 24f08aed286ce6b9ceac4b5942f3a34b5afb7077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 14:07:51 +0300 Subject: [PATCH 016/443] =?UTF-8?q?mdbx-doc:=20=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BD=D1=84?= =?UTF-8?q?=D0=B8=D0=B3=D1=83=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20Doxygen.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Doxyfile.in | 557 +++++++++++++++++++++++++++++------------------ 1 file changed, 350 insertions(+), 207 deletions(-) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 9aef3329..ca91f8b8 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -1,4 +1,4 @@ -# Doxyfile 1.9.1 +# Doxyfile 1.9.6 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -12,6 +12,16 @@ # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables or CMake type +# replacement variables: +# doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options @@ -60,16 +70,28 @@ PROJECT_LOGO = OUTPUT_DIRECTORY = . -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes -# performance problems for the file system. +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# number of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode @@ -81,26 +103,18 @@ ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -258,16 +272,16 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = @@ -312,8 +326,8 @@ OPTIMIZE_OUTPUT_SLICE = NO # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files @@ -460,13 +474,13 @@ TYPEDEF_HIDES_STRUCT = YES LOOKUP_CACHE_SIZE = 0 -# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use # during processing. When set to 0 doxygen will based this on the number of # cores available in the system. You can set it explicitly to a value larger # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you +# which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. @@ -554,7 +568,8 @@ HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. +# will also hide undocumented C++ concepts if enabled. This option has no effect +# if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO @@ -585,14 +600,15 @@ INTERNAL_DOCS = NO # filesystem is case sensitive (i.e. it supports files in the same directory # whose names only differ in casing), the option must be set to YES to properly # deal with such files in case they appear in the input. For filesystems that -# are not case sensitive the option should be be set to NO to properly deal with +# are not case sensitive the option should be set to NO to properly deal with # output files written for symbols that only differ in casing, such as for two # classes, one named CLASS and the other named Class, and to also support # references to files without having to specify the exact matching casing. On # Windows (including Cygwin) and MacOS, users should typically set this option # to NO, whereas on Linux or other Unix flavors it should typically be set to # YES. -# The default value is: system dependent. +# Possible values are: SYSTEM, NO and YES. +# The default value is: SYSTEM. CASE_SENSE_NAMES = NO @@ -610,6 +626,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -767,7 +789,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -813,22 +836,38 @@ WARNINGS = YES WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO +# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about +# undocumented enumeration values. If set to NO, doxygen will accept +# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: NO. + +WARN_IF_UNDOC_ENUM_VAL = NO + # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but @@ -844,13 +883,27 @@ WARN_AS_ERROR = NO # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard -# error (stderr). +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). WARN_LOGFILE = @@ -877,10 +930,21 @@ INPUT = overall.md \ # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: # https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# See also: INPUT_FILE_ENCODING # The default value is: UTF-8. INPUT_ENCODING = UTF-8 +# This tag can be used to specify the character encoding of the source files +# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify +# character encoding on a per file pattern basis. Doxygen will compare the file +# name with each pattern and apply the encoding instead of the default +# INPUT_ENCODING) if there is a match. The character encodings are a list of the +# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding +# "INPUT_ENCODING" for further information on supported encodings. + +INPUT_FILE_ENCODING = + # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. @@ -894,10 +958,10 @@ INPUT_ENCODING = UTF-8 # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, -# *.ucf, *.qsf and *.ice. +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.h @@ -936,20 +1000,40 @@ EXCLUDE_PATTERNS = # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test +# ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = NOMINMAX __ORDER_BIG_ENDIAN__ __ORDER_LITTLE_ENDIAN__ \ - __has_include __has_attribute __has_builtin __has_cpp_attribute __has_extension __has_feature \ - HAVE_STRUCT_IOVEC MDBX_STRINGIFY_HELPER MDBX_STRINGIFY \ - MDBX_NOSANITIZE_ENUM MDBX_PRINTF_ARGS \ - MDBX_HAVE_CXX20_CONCEPTS \ - CONSTEXPR_ENUM_FLAGS_OPERATIONS DEFINE_ENUM_FLAG_OPERATORS \ - bool false true __dll_export __dll_import \ - MDBX_64BIT_ATOMIC_CONFIG MDBX_64BIT_CAS_CONFIG MDBX_ENV_CHECKPID_CONFIG MDBX_LOCKING_CONFIG \ - MDBX_TRUST_RTC_CONFIG MDBX_TXN_CHECKOWNER_CONFIG MDBX_USE_OFDLOCKS_CONFIG +EXCLUDE_SYMBOLS = NOMINMAX \ + __ORDER_BIG_ENDIAN__ \ + __ORDER_LITTLE_ENDIAN__ \ + __has_include \ + __has_attribute \ + __has_builtin \ + __has_cpp_attribute \ + __has_extension \ + __has_feature \ + HAVE_STRUCT_IOVEC \ + MDBX_STRINGIFY_HELPER \ + MDBX_STRINGIFY \ + MDBX_NOSANITIZE_ENUM \ + MDBX_PRINTF_ARGS \ + MDBX_HAVE_CXX20_CONCEPTS \ + CONSTEXPR_ENUM_FLAGS_OPERATIONS \ + DEFINE_ENUM_FLAG_OPERATORS \ + bool \ + false \ + true \ + __dll_export \ + __dll_import \ + MDBX_64BIT_ATOMIC_CONFIG \ + MDBX_64BIT_CAS_CONFIG \ + MDBX_ENV_CHECKPID_CONFIG \ + MDBX_LOCKING_CONFIG \ + MDBX_TRUST_RTC_CONFIG \ + MDBX_TXN_CHECKOWNER_CONFIG \ + MDBX_USE_OFDLOCKS_CONFIG # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -992,6 +1076,11 @@ IMAGE_PATH = # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # +# Note that doxygen will use the data processed and written to standard output +# for further processing, therefore nothing else, like debug statements or used +# commands (so in case of a Windows batch file always use @echo OFF), should be +# written to standard output. +# # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. @@ -1033,6 +1122,15 @@ FILTER_SOURCE_PATTERNS = USE_MDFILE_AS_MAINPAGE = +# The Fortran standard specifies that for fixed formatted Fortran code all +# characters from position 72 are to be considered as comment. A common +# extension is to allow longer lines before the automatic comment starts. The +# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can +# be processed before the automatic comment starts. +# Minimum value: 7, maximum value: 10000, default value: 72. + +FORTRAN_COMMENT_AFTER = 72 + #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- @@ -1130,9 +1228,11 @@ VERBATIM_HEADERS = YES CLANG_ASSISTED_PARSING = NO -# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to -# YES then doxygen will add the directory of each input to the include path. +# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS +# tag is set to YES then doxygen will add the directory of each input to the +# include path. # The default value is: YES. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_ADD_INC_PATHS = YES @@ -1168,10 +1268,11 @@ CLANG_DATABASE_PATH = ALPHABETICAL_INDEX = YES -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. +# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) +# that should be ignored while generating the index headers. The IGNORE_PREFIX +# tag works for classes, function and member names. The entity will be placed in +# the alphabetical list under the first letter of the entity name that remains +# after removing the prefix. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = @@ -1250,7 +1351,12 @@ HTML_STYLESHEET = # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. +# list). +# Note: Since the styling of scrollbars can currently not be overruled in +# Webkit/Chromium, the styling will be left out of the default doxygen.css if +# one or more extra stylesheets have been specified. So if scrollbar +# customization is desired it has to be added explicitly. For an example see the +# documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = @@ -1265,9 +1371,22 @@ HTML_EXTRA_STYLESHEET = HTML_EXTRA_FILES = +# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output +# should be rendered with a dark or light theme. +# Possible values are: LIGHT always generate light mode output, DARK always +# generate dark mode output, AUTO_LIGHT automatically set the mode according to +# the user preference, use light mode if no preference is set (the default), +# AUTO_DARK automatically set the mode according to the user preference, use +# dark mode if no preference is set and TOGGLE allow to user to switch between +# light and dark mode via a button. +# The default value is: AUTO_LIGHT. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE = AUTO_LIGHT + # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see +# this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. @@ -1277,7 +1396,7 @@ HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1359,6 +1478,13 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1384,8 +1510,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: -# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1544,16 +1674,28 @@ DISABLE_INDEX = YES # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1578,6 +1720,13 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for @@ -1598,17 +1747,6 @@ HTML_FORMULA_FORMAT = png FORMULA_FONTSIZE = 10 -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. @@ -1626,11 +1764,29 @@ FORMULA_MACROFILE = USE_MATHJAX = YES +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1643,15 +1799,21 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = @@ -1831,29 +1993,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1898,8 +2062,7 @@ USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode # command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# if errors occur, instead of asking the user for help. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1912,16 +2075,6 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See # https://en.wikipedia.org/wiki/BibTeX and \cite for more info. @@ -2002,16 +2155,6 @@ RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -2108,15 +2251,6 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- @@ -2203,7 +2337,8 @@ SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by the -# preprocessor. +# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of +# RECURSIVE has no effect here. # This tag requires that the tag SEARCH_INCLUDES is set to YES. INCLUDE_PATH = @@ -2224,24 +2359,30 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - PREDEFINED = DOXYGEN \ - MDBX_CXX20_CONCEPT(CONCEPT,NAME)="CONCEPT NAME" \ - MDBX_STD_FILESYSTEM_PATH=::mdbx::filesystem::path \ - MDBX_U128_TYPE=uint128_t MDBX_I128_TYPE=int128_t \ - MDBX_DECLARE_EXCEPTION(NAME)="struct LIBMDBX_API_TYPE NAME : public exception{NAME(const ::mdbx::error &); virtual ~NAME() noexcept; }" \ - MDBX_PURE_FUNCTION=[[gnu::pure]] \ - MDBX_NOTHROW_PURE_FUNCTION="[[gnu::pure, gnu::nothrow]]" \ - MDBX_CONST_FUNCTION=[[gnu::const]] \ - MDBX_NOTHROW_CONST_FUNCTION="[[gnu::const, gnu::nothrow]]" \ - MDBX_CXX01_CONSTEXPR=constexpr MDBX_CXX01_CONSTEXPR_VAR=constexpr \ - MDBX_CXX11_CONSTEXPR=constexpr MDBX_CXX11_CONSTEXPR_VAR=constexpr \ - MDBX_CXX14_CONSTEXPR=constexpr MDBX_CXX14_CONSTEXPR_VAR=constexpr \ - MDBX_CXX17_CONSTEXPR=constexpr MDBX_CXX20_CONSTEXPR=constexpr \ - MDBX_CXX17_NOEXCEPT=noexcept MDBX_IF_CONSTEXPR=constexpr \ - MDBX_CXX20_LIKELY=[[likely]] MDBX_CXX20_UNLIKELY=[[unlikely]] \ - MDBX_MAYBE_UNUSED=[[maybe_unused]] \ - MDBX_DEPRECATED=[[deprecated]] + "MDBX_CXX20_CONCEPT(CONCEPT,NAME)=CONCEPT NAME" \ + MDBX_STD_FILESYSTEM_PATH=::mdbx::filesystem::path \ + MDBX_U128_TYPE=uint128_t \ + MDBX_I128_TYPE=int128_t \ + "MDBX_DECLARE_EXCEPTION(NAME)=struct LIBMDBX_API_TYPE NAME : public exception{NAME(const ::mdbx::error &); virtual ~NAME() noexcept; }" \ + MDBX_PURE_FUNCTION=[[gnu::pure]] \ + "MDBX_NOTHROW_PURE_FUNCTION=[[gnu::pure, gnu::nothrow]]" \ + MDBX_CONST_FUNCTION=[[gnu::const]] \ + "MDBX_NOTHROW_CONST_FUNCTION=[[gnu::const, gnu::nothrow]]" \ + MDBX_CXX01_CONSTEXPR=constexpr \ + MDBX_CXX01_CONSTEXPR_VAR=constexpr \ + MDBX_CXX11_CONSTEXPR=constexpr \ + MDBX_CXX11_CONSTEXPR_VAR=constexpr \ + MDBX_CXX14_CONSTEXPR=constexpr \ + MDBX_CXX14_CONSTEXPR_VAR=constexpr \ + MDBX_CXX17_CONSTEXPR=constexpr \ + MDBX_CXX20_CONSTEXPR=constexpr \ + MDBX_CXX17_NOEXCEPT=noexcept \ + MDBX_IF_CONSTEXPR=constexpr \ + MDBX_CXX20_LIKELY=[[likely]] \ + MDBX_CXX20_UNLIKELY=[[unlikely]] \ + MDBX_MAYBE_UNUSED=[[maybe_unused]] \ + MDBX_DEPRECATED=[[deprecated]] # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2312,15 +2453,6 @@ EXTERNAL_PAGES = NO # Configuration options related to the dot tool #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS = NO - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -2339,7 +2471,7 @@ HIDE_UNDOC_RELATIONS = YES # http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent # Bell Labs. The other options in this section have no effect if this option is # set to NO -# The default value is: YES. +# The default value is: NO. HAVE_DOT = NO @@ -2353,37 +2485,52 @@ HAVE_DOT = NO DOT_NUM_THREADS = 0 -# When you want a differently looking font in the dot files that doxygen -# generates you can specify the font name using DOT_FONTNAME. You need to make -# sure dot is able to find the font, which can be done by putting it in a -# standard location or by setting the DOTFONTPATH environment variable or by -# setting DOT_FONTPATH to the directory containing the font. -# The default value is: Helvetica. +# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of +# subgraphs. When you want a differently looking font in the dot files that +# doxygen generates you can specify fontname, fontcolor and fontsize attributes. +# For details please see Node, +# Edge and Graph Attributes specification You need to make sure dot is able +# to find the font, which can be done by putting it in a standard location or by +# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. Default graphviz fontsize is 14. +# The default value is: fontname=Helvetica,fontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTNAME = Helvetica +DOT_COMMON_ATTR = "fontname=Helvetica,fontsize=10" -# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of -# dot graphs. -# Minimum value: 4, maximum value: 24, default value: 10. +# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can +# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. Complete documentation about +# arrows shapes. +# The default value is: labelfontname=Helvetica,labelfontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTSIZE = 10 +DOT_EDGE_ATTR = "labelfontname=Helvetica,labelfontsize=10" -# By default doxygen will tell dot to use the default font as specified with -# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set -# the path where dot can find it using this tag. +# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes +# around nodes set 'shape=plain' or 'shape=plaintext' Shapes specification +# The default value is: shape=box,height=0.2,width=0.4. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_NODE_ATTR = "shape=box,height=0.2,width=0.4" + +# You can set the path where dot can find font specified with fontname in +# DOT_COMMON_ATTR and others dot attributes. # This tag requires that the tag HAVE_DOT is set to YES. DOT_FONTPATH = -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a +# graph for each documented class showing the direct and indirect inheritance +# relations. In case HAVE_DOT is set as well dot will be used to draw the graph, +# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set +# to TEXT the direct and indirect inheritance relations will be shown as texts / +# links. +# Possible values are: NO, YES, TEXT and GRAPH. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. -CLASS_GRAPH = YES +CLASS_GRAPH = TEXT # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a # graph for each documented class showing the direct and indirect implementation @@ -2395,7 +2542,8 @@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for -# groups, showing the direct groups dependencies. +# groups, showing the direct groups dependencies. See also the chapter Grouping +# in the manual. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2510,6 +2658,13 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES +# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels +# of child directories generated in directory dependency graphs by dot. +# Minimum value: 1, maximum value: 25, default value: 1. +# This tag requires that the tag DIRECTORY_GRAPH is set to YES. + +DIR_GRAPH_MAX_DEPTH = 1 + # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: @@ -2517,9 +2672,7 @@ DIRECTORY_GRAPH = YES # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order # to make the SVG files visible in IE 9+ (other browsers do not have this # requirement). -# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, -# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, -# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, +# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo, # png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and # png:gdiplus:gdiplus. # The default value is: png. @@ -2565,10 +2718,10 @@ MSCFILE_DIRS = DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the -# path where java can find the plantuml.jar file. If left blank, it is assumed -# PlantUML is not used or called during a preprocessing step. Doxygen will -# generate a warning when it encounters a \startuml command in this case and -# will not generate output for the diagram. +# path where java can find the plantuml.jar file or to the filename of jar file +# to be used. If left blank, it is assumed PlantUML is not used or called during +# a preprocessing step. Doxygen will generate a warning when it encounters a +# \startuml command in this case and will not generate output for the diagram. PLANTUML_JAR_PATH = @@ -2606,18 +2759,6 @@ DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not seem -# to support this out of the box. -# -# Warning: Depending on the platform used, enabling this option may lead to -# badly anti-aliased labels on the edges of a graph (i.e. they become hard to -# read). -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_TRANSPARENT = NO - # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) support @@ -2630,6 +2771,8 @@ DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page # explaining the meaning of the various boxes and arrows in the dot generated # graphs. +# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal +# graphical representation for inheritance and collaboration diagrams is used. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2638,8 +2781,8 @@ GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. # -# Note: This setting is not only used for dot files but also for msc and -# plantuml temporary files. +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. DOT_CLEANUP = YES From 5ebc2c523da7775402c75266e66f74885f04fa27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 23 Oct 2023 20:35:55 +0300 Subject: [PATCH 017/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 53ca0059..78cbe391 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -9,6 +9,24 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API. +Новое: + + - Расширение API функционалом проверки целостности структуры БД, с + переработкой и переноса функционала утилиты `mdbx_chk` внутрь библиотеки. + + - Расширение API функциями lock/unlock/upgrade/downgrade основной блокировки. + + - Добавление в API функций `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()`. + + - Возвращение `MDBX_TXN_INVALID` (`INT32_MIN`) вместо `-1` + из `mdbx_txn_flags()` при передаче невалидной транзакции. + +Мелочи: + + - Обновление конфигурации Doxygen до 1.9.6. + - Добавление `--read-var-info=yes` для Valgrind. + - Вывод из `mdbx_chk` информации об уровне детализации/verbosity. + ******************************************************************************** From ad4d00677b72e58022bb06cb92733216e5ca2635 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 29 Oct 2023 16:39:35 +0300 Subject: [PATCH 018/443] =?UTF-8?q?mdbx:=20PTHREAD=5FMUTEX=5FERRORCHECK=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20MDBX=5FDEBUG=20>=200.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/osal.c b/src/osal.c index b07565b4..adffbabf 100644 --- a/src/osal.c +++ b/src/osal.c @@ -503,8 +503,18 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(fastmutex); return MDBX_SUCCESS; +#elif MDBX_DEBUG + pthread_mutexattr_t ma; + int rc = pthread_mutexattr_init(&ma); + if (likely(!rc)) { + rc = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (likely(!rc) || rc == ENOTSUP) + rc = pthread_mutex_init(fastmutex, &ma); + pthread_mutexattr_destroy(&ma); + } + return rc; #else - return pthread_mutex_init(fastmutex, NULL); + return pthread_mutex_init(fastmutex, nullptr); #endif } From 07fc7b9227b699397f8c838c36f2ab719bfddcd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 30 Oct 2023 12:25:05 +0300 Subject: [PATCH 019/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20`--taillog`=20=D0=B2=20=D1=81=D1=82=D0=BE=D1=85=D0=B0?= =?UTF-8?q?=D1=81=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B8=D0=B9=20=D1=81?= =?UTF-8?q?=D0=BA=D1=80=D0=B8=D0=BF=D1=82.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 6 +++--- test/long_stochastic.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index c8d79a95..104ae372 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -418,15 +418,15 @@ smoke-fault: build-test test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) long-test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' - $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make + $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make --taillog test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) test-valgrind: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND test-valgrind: build-test diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 491ec695..c03c83da 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -13,6 +13,7 @@ DB_UPTO_MB=17408 PAGESIZE=min DONT_CHECK_RAM=no EXTRA=no +TAILLOG=0 while [ -n "$1" ] do @@ -35,9 +36,13 @@ do echo "--pagesize NN Use specified page size (256 is minimal and used by default)" echo "--dont-check-ram-size Don't check available RAM" echo "--extra Iterate extra modes/flags" + echo "--taillog Dump tail of test log on failure" echo "--help Print this usage help and exit" exit -2 ;; + --taillog) + TAILLOG=999 + ;; --multi) LIST=basic ;; @@ -345,14 +350,38 @@ if which lz4 >/dev/null; then function logger { lz4 > ${TESTDB_DIR}/long.log.lz4 } + function taillog { + if [ -s ${TESTDB_DIR}/long.log.lz4 ]; then + echo "=============================================== last ${TAILLOG} lines" + lz4 -d -c ${TESTDB_DIR}/long.log.lz4 | tail -n ${TAILLOG} + else + echo "=============================================== no test log" + fi + } elif which gzip >/dev/null; then function logger { gzip > ${TESTDB_DIR}/long.log.gz } + function taillog { + if [ -s ${TESTDB_DIR}/long.log.gz ]; then + echo "=============================================== last ${TAILLOG} lines" + gzip -d -c ${TESTDB_DIR}/long.log.gz | tail -n ${TAILLOG} + else + echo "=============================================== no test log" + fi + } else function logger { cat > ${TESTDB_DIR}/long.log } + function taillog { + if [ -s ${TESTDB_DIR}/long.log ]; then + echo "=============================================== last ${TAILLOG} lines" + tail -n ${TAILLOG} ${TESTDB_DIR}/long.log + else + echo "=============================================== no test log" + fi + } fi if [ "$EXTRA" != "no" ]; then @@ -375,6 +404,9 @@ function bits2options { function failed { echo "FAILED" >&2 + if [ ${TAILLOG} -gt 0 ]; then + taillog + fi exit 1 } From 7a413406bef54de8baed8ad38203d99acafba26d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 30 Oct 2023 20:32:10 +0300 Subject: [PATCH 020/443] =?UTF-8?q?mdbx-test:=20=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=D1=81=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=B4=D0=BB=D1=8F=20Val?= =?UTF-8?q?grind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/valgrind_suppress.txt | 75 +------------------------------------- 1 file changed, 1 insertion(+), 74 deletions(-) diff --git a/test/valgrind_suppress.txt b/test/valgrind_suppress.txt index 5bc50077..c01054ac 100644 --- a/test/valgrind_suppress.txt +++ b/test/valgrind_suppress.txt @@ -2,7 +2,6 @@ msync-whole-mmap-1 Memcheck:Param msync(start) - fun:msync ... fun:sync_locked* } @@ -10,7 +9,6 @@ msync-whole-mmap-2 Memcheck:Param msync(start) - fun:msync ... fun:env_sync* } @@ -18,7 +16,6 @@ msync-whole-mmap-3 Memcheck:Param msync(start) - fun:msync ... fun:map_resize* } @@ -26,7 +23,6 @@ msync-wipe-steady Memcheck:Param msync(start) - fun:msync ... fun:wipe_steady* } @@ -34,7 +30,6 @@ msync-meta Memcheck:Param msync(start) - fun:msync ... fun:meta_sync* } @@ -42,7 +37,6 @@ msync-spill Memcheck:Param msync(start) - fun:msync ... fun:txn_spill* } @@ -72,7 +66,6 @@ pwrite-page-flush Memcheck:Param pwrite(buf) - fun:pwrite ... fun:iov_write* } @@ -80,7 +73,6 @@ pwrite64-page-flush Memcheck:Param pwrite64(buf) - fun:pwrite ... fun:iov_write* } @@ -90,16 +82,14 @@ # pwritev-page-flush # Memcheck:Param # pwritev(vector[...]) -# fun:pwritev # ... # fun:iov_write* #} -# for((i=0;i<64;++i)); do echo -e "{\n pwritev-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n fun:pwritev\n ...\n fun:iov_write*\n}"; done >> valgrind_suppress.txt +# for((i=0;i<64;++i)); do echo -e "{\n pwritev-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n ...\n fun:iov_write*\n}"; done >> valgrind_suppress.txt { pwritev-page-flush-0 Memcheck:Param pwritev(vector[0]) - fun:pwritev ... fun:iov_write* } @@ -107,7 +97,6 @@ pwritev-page-flush-1 Memcheck:Param pwritev(vector[1]) - fun:pwritev ... fun:iov_write* } @@ -115,7 +104,6 @@ pwritev-page-flush-2 Memcheck:Param pwritev(vector[2]) - fun:pwritev ... fun:iov_write* } @@ -123,7 +111,6 @@ pwritev-page-flush-3 Memcheck:Param pwritev(vector[3]) - fun:pwritev ... fun:iov_write* } @@ -131,7 +118,6 @@ pwritev-page-flush-4 Memcheck:Param pwritev(vector[4]) - fun:pwritev ... fun:iov_write* } @@ -139,7 +125,6 @@ pwritev-page-flush-5 Memcheck:Param pwritev(vector[5]) - fun:pwritev ... fun:iov_write* } @@ -147,7 +132,6 @@ pwritev-page-flush-6 Memcheck:Param pwritev(vector[6]) - fun:pwritev ... fun:iov_write* } @@ -155,7 +139,6 @@ pwritev-page-flush-7 Memcheck:Param pwritev(vector[7]) - fun:pwritev ... fun:iov_write* } @@ -163,7 +146,6 @@ pwritev-page-flush-8 Memcheck:Param pwritev(vector[8]) - fun:pwritev ... fun:iov_write* } @@ -171,7 +153,6 @@ pwritev-page-flush-9 Memcheck:Param pwritev(vector[9]) - fun:pwritev ... fun:iov_write* } @@ -179,7 +160,6 @@ pwritev-page-flush-10 Memcheck:Param pwritev(vector[10]) - fun:pwritev ... fun:iov_write* } @@ -187,7 +167,6 @@ pwritev-page-flush-11 Memcheck:Param pwritev(vector[11]) - fun:pwritev ... fun:iov_write* } @@ -195,7 +174,6 @@ pwritev-page-flush-12 Memcheck:Param pwritev(vector[12]) - fun:pwritev ... fun:iov_write* } @@ -203,7 +181,6 @@ pwritev-page-flush-13 Memcheck:Param pwritev(vector[13]) - fun:pwritev ... fun:iov_write* } @@ -211,7 +188,6 @@ pwritev-page-flush-14 Memcheck:Param pwritev(vector[14]) - fun:pwritev ... fun:iov_write* } @@ -219,7 +195,6 @@ pwritev-page-flush-15 Memcheck:Param pwritev(vector[15]) - fun:pwritev ... fun:iov_write* } @@ -227,7 +202,6 @@ pwritev-page-flush-16 Memcheck:Param pwritev(vector[16]) - fun:pwritev ... fun:iov_write* } @@ -235,7 +209,6 @@ pwritev-page-flush-17 Memcheck:Param pwritev(vector[17]) - fun:pwritev ... fun:iov_write* } @@ -243,7 +216,6 @@ pwritev-page-flush-18 Memcheck:Param pwritev(vector[18]) - fun:pwritev ... fun:iov_write* } @@ -251,7 +223,6 @@ pwritev-page-flush-19 Memcheck:Param pwritev(vector[19]) - fun:pwritev ... fun:iov_write* } @@ -259,7 +230,6 @@ pwritev-page-flush-20 Memcheck:Param pwritev(vector[20]) - fun:pwritev ... fun:iov_write* } @@ -267,7 +237,6 @@ pwritev-page-flush-21 Memcheck:Param pwritev(vector[21]) - fun:pwritev ... fun:iov_write* } @@ -275,7 +244,6 @@ pwritev-page-flush-22 Memcheck:Param pwritev(vector[22]) - fun:pwritev ... fun:iov_write* } @@ -283,7 +251,6 @@ pwritev-page-flush-23 Memcheck:Param pwritev(vector[23]) - fun:pwritev ... fun:iov_write* } @@ -291,7 +258,6 @@ pwritev-page-flush-24 Memcheck:Param pwritev(vector[24]) - fun:pwritev ... fun:iov_write* } @@ -299,7 +265,6 @@ pwritev-page-flush-25 Memcheck:Param pwritev(vector[25]) - fun:pwritev ... fun:iov_write* } @@ -307,7 +272,6 @@ pwritev-page-flush-26 Memcheck:Param pwritev(vector[26]) - fun:pwritev ... fun:iov_write* } @@ -315,7 +279,6 @@ pwritev-page-flush-27 Memcheck:Param pwritev(vector[27]) - fun:pwritev ... fun:iov_write* } @@ -323,7 +286,6 @@ pwritev-page-flush-28 Memcheck:Param pwritev(vector[28]) - fun:pwritev ... fun:iov_write* } @@ -331,7 +293,6 @@ pwritev-page-flush-29 Memcheck:Param pwritev(vector[29]) - fun:pwritev ... fun:iov_write* } @@ -339,7 +300,6 @@ pwritev-page-flush-30 Memcheck:Param pwritev(vector[30]) - fun:pwritev ... fun:iov_write* } @@ -347,7 +307,6 @@ pwritev-page-flush-31 Memcheck:Param pwritev(vector[31]) - fun:pwritev ... fun:iov_write* } @@ -355,7 +314,6 @@ pwritev-page-flush-32 Memcheck:Param pwritev(vector[32]) - fun:pwritev ... fun:iov_write* } @@ -363,7 +321,6 @@ pwritev-page-flush-33 Memcheck:Param pwritev(vector[33]) - fun:pwritev ... fun:iov_write* } @@ -371,7 +328,6 @@ pwritev-page-flush-34 Memcheck:Param pwritev(vector[34]) - fun:pwritev ... fun:iov_write* } @@ -379,7 +335,6 @@ pwritev-page-flush-35 Memcheck:Param pwritev(vector[35]) - fun:pwritev ... fun:iov_write* } @@ -387,7 +342,6 @@ pwritev-page-flush-36 Memcheck:Param pwritev(vector[36]) - fun:pwritev ... fun:iov_write* } @@ -395,7 +349,6 @@ pwritev-page-flush-37 Memcheck:Param pwritev(vector[37]) - fun:pwritev ... fun:iov_write* } @@ -403,7 +356,6 @@ pwritev-page-flush-38 Memcheck:Param pwritev(vector[38]) - fun:pwritev ... fun:iov_write* } @@ -411,7 +363,6 @@ pwritev-page-flush-39 Memcheck:Param pwritev(vector[39]) - fun:pwritev ... fun:iov_write* } @@ -419,7 +370,6 @@ pwritev-page-flush-40 Memcheck:Param pwritev(vector[40]) - fun:pwritev ... fun:iov_write* } @@ -427,7 +377,6 @@ pwritev-page-flush-41 Memcheck:Param pwritev(vector[41]) - fun:pwritev ... fun:iov_write* } @@ -435,7 +384,6 @@ pwritev-page-flush-42 Memcheck:Param pwritev(vector[42]) - fun:pwritev ... fun:iov_write* } @@ -443,7 +391,6 @@ pwritev-page-flush-43 Memcheck:Param pwritev(vector[43]) - fun:pwritev ... fun:iov_write* } @@ -451,7 +398,6 @@ pwritev-page-flush-44 Memcheck:Param pwritev(vector[44]) - fun:pwritev ... fun:iov_write* } @@ -459,7 +405,6 @@ pwritev-page-flush-45 Memcheck:Param pwritev(vector[45]) - fun:pwritev ... fun:iov_write* } @@ -467,7 +412,6 @@ pwritev-page-flush-46 Memcheck:Param pwritev(vector[46]) - fun:pwritev ... fun:iov_write* } @@ -475,7 +419,6 @@ pwritev-page-flush-47 Memcheck:Param pwritev(vector[47]) - fun:pwritev ... fun:iov_write* } @@ -483,7 +426,6 @@ pwritev-page-flush-48 Memcheck:Param pwritev(vector[48]) - fun:pwritev ... fun:iov_write* } @@ -491,7 +433,6 @@ pwritev-page-flush-49 Memcheck:Param pwritev(vector[49]) - fun:pwritev ... fun:iov_write* } @@ -499,7 +440,6 @@ pwritev-page-flush-50 Memcheck:Param pwritev(vector[50]) - fun:pwritev ... fun:iov_write* } @@ -507,7 +447,6 @@ pwritev-page-flush-51 Memcheck:Param pwritev(vector[51]) - fun:pwritev ... fun:iov_write* } @@ -515,7 +454,6 @@ pwritev-page-flush-52 Memcheck:Param pwritev(vector[52]) - fun:pwritev ... fun:iov_write* } @@ -523,7 +461,6 @@ pwritev-page-flush-53 Memcheck:Param pwritev(vector[53]) - fun:pwritev ... fun:iov_write* } @@ -531,7 +468,6 @@ pwritev-page-flush-54 Memcheck:Param pwritev(vector[54]) - fun:pwritev ... fun:iov_write* } @@ -539,7 +475,6 @@ pwritev-page-flush-55 Memcheck:Param pwritev(vector[55]) - fun:pwritev ... fun:iov_write* } @@ -547,7 +482,6 @@ pwritev-page-flush-56 Memcheck:Param pwritev(vector[56]) - fun:pwritev ... fun:iov_write* } @@ -555,7 +489,6 @@ pwritev-page-flush-57 Memcheck:Param pwritev(vector[57]) - fun:pwritev ... fun:iov_write* } @@ -563,7 +496,6 @@ pwritev-page-flush-58 Memcheck:Param pwritev(vector[58]) - fun:pwritev ... fun:iov_write* } @@ -571,7 +503,6 @@ pwritev-page-flush-59 Memcheck:Param pwritev(vector[59]) - fun:pwritev ... fun:iov_write* } @@ -579,7 +510,6 @@ pwritev-page-flush-60 Memcheck:Param pwritev(vector[60]) - fun:pwritev ... fun:iov_write* } @@ -587,7 +517,6 @@ pwritev-page-flush-61 Memcheck:Param pwritev(vector[61]) - fun:pwritev ... fun:iov_write* } @@ -595,7 +524,6 @@ pwritev-page-flush-62 Memcheck:Param pwritev(vector[62]) - fun:pwritev ... fun:iov_write* } @@ -603,7 +531,6 @@ pwritev-page-flush-63 Memcheck:Param pwritev(vector[63]) - fun:pwritev ... fun:iov_write* } From 54920cd07bc42395ebf6ef8db0a727ae1a2ff893 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 31 Oct 2023 12:11:59 +0300 Subject: [PATCH 021/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BE=D0=BA=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B8=20`osal=5Ftxn=5Flock()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck-posix.c | 5 ++++- src/lck-windows.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lck-posix.c b/src/lck-posix.c index 7f58e9ed..d55a9395 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -956,11 +956,14 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); - eASSERT(env, !env->me_txn0->mt_owner); jitter4testing(true); const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); int rc = err; if (likely(!MDBX_IS_ERROR(err))) { + eASSERT(env, !env->me_txn0->mt_owner || + err == /* если другой поток в этом-же процессе завершился + не освободив блокировку */ + MDBX_RESULT_TRUE); env->me_txn0->mt_owner = osal_thread_self(); rc = MDBX_SUCCESS; } diff --git a/src/lck-windows.c b/src/lck-windows.c index ed77da30..d2354285 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -179,7 +179,6 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { #define DXB_WHOLE 0, DXB_MAXLEN int osal_txn_lock(MDBX_env *env, bool dontwait) { - eASSERT(env, !env->me_txn0->mt_owner); if (dontwait) { if (!TryEnterCriticalSection(&env->me_windowsbug_lock)) return MDBX_BUSY; @@ -195,6 +194,7 @@ int osal_txn_lock(MDBX_env *env, bool dontwait) { } } + eASSERT(env, !env->me_txn0->mt_owner); if (env->me_flags & MDBX_EXCLUSIVE) goto done; From 81f386f83123f7db742054cf1693408e8fc82ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 11:28:13 +0300 Subject: [PATCH 022/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BC?= =?UTF-8?q?=D0=B5=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=BB=D0=B5?= =?UTF-8?q?=D0=B9=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`MDBX=5Ftxn`=20?= =?UTF-8?q?=D0=B8=20`MDBX=5Fenv`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit В текущем понимании так префетчер ЦПУ может быть чуть более эффективным и чуть меньше зазоров для выравнивания. --- src/internals.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/internals.h b/src/internals.h index c871b3df..8fdb37a8 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1170,6 +1170,8 @@ struct MDBX_txn { #error "Oops, some txn flags overlapped or wrong" #endif uint32_t mt_flags; + unsigned mt_numdbs; + size_t mt_owner; /* thread ID that owns this transaction */ MDBX_txn *mt_parent; /* parent of a nested txn */ /* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */ @@ -1191,8 +1193,6 @@ struct MDBX_txn { MDBX_dbx *mt_dbxs; /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; - /* Array of sequence numbers for each DB handle */ - MDBX_atomic_uint32_t *mt_dbiseqs; /* Transaction DBI Flags */ #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ @@ -1202,16 +1202,15 @@ struct MDBX_txn { #define DBI_VALID 0x10 /* DB handle is valid, see also DB_VALID */ #define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ #define DBI_AUDITED 0x40 /* Internal flag for accounting during audit */ - /* Array of flags for each DB */ + /* Array of non-shared txn's flags of DBI */ uint8_t *mt_dbistate; - /* Number of DB records in use, or 0 when the txn is finished. - * This number only ever increments until the txn finishes; we - * don't decrement it when individual DB handles are closed. */ - MDBX_dbi mt_numdbs; - size_t mt_owner; /* thread ID that owns this transaction */ + + /* Array of sequence numbers for each DB handle. */ + MDBX_atomic_uint32_t *mt_dbiseqs; + MDBX_cursor **mt_cursors; + MDBX_canary mt_canary; void *mt_userctx; /* User-settable context */ - MDBX_cursor **mt_cursors; union { struct { @@ -1364,6 +1363,7 @@ struct MDBX_env { #define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000) #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) uint32_t me_flags; + unsigned me_psize; /* DB page size, initialized from me_os_psize */ osal_mmap_t me_dxb_mmap; /* The main data file */ #define me_map me_dxb_mmap.base #define me_lazy_fd me_dxb_mmap.fd @@ -1376,7 +1376,6 @@ struct MDBX_env { #define me_lfd me_lck_mmap.fd struct MDBX_lockinfo *me_lck; - unsigned me_psize; /* DB page size, initialized from me_os_psize */ unsigned me_leaf_nodemax; /* max size of a leaf-node */ unsigned me_branch_nodemax; /* max size of a branch-node */ atomic_pgno_t me_mlocked_pgno; @@ -1448,6 +1447,7 @@ struct MDBX_env { } me_sysv_ipc; #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */ bool me_incore; + bool me_prefault_write; MDBX_env *me_lcklist_next; @@ -1455,11 +1455,11 @@ struct MDBX_env { MDBX_txn *me_txn; /* current write transaction */ osal_fastmutex_t me_dbi_lock; - MDBX_dbi me_numdbs; /* number of DBs opened */ - bool me_prefault_write; + unsigned me_numdbs; /* number of DBs opened */ - MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ unsigned me_dp_reserve_len; + MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ + /* PNL of pages that became unused in a write txn */ MDBX_PNL me_retired_pages; osal_ioring_t me_ioring; From f3171707066b8cf83ff0e386d4b7cfbf1d3719c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 11:30:54 +0300 Subject: [PATCH 023/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=B2?= =?UTF-8?q?=D0=BD=D1=83=D1=82=D1=80=D0=B5=D0=BD=D0=BD=D0=B8=D1=85=20=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D0=B5=D0=B9=20=D0=B8=20=D0=BC=D0=B0=D0=BA=D1=80?= =?UTF-8?q?=D0=BE=D1=81=D0=BE=D0=B2=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=BB?= =?UTF-8?q?=D1=83=D1=87=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=20=D1=87=D0=B8=D1=82?= =?UTF-8?q?=D0=B0=D0=B5=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D0=BA=D0=BE=D0=B4?= =?UTF-8?q?=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 314 ++++++++++++++++++++++++------------------------ src/internals.h | 32 ++--- 2 files changed, 174 insertions(+), 172 deletions(-) diff --git a/src/core.c b/src/core.c index 4a741bf3..2db1e56f 100644 --- a/src/core.c +++ b/src/core.c @@ -3232,26 +3232,26 @@ static int page_touch(MDBX_cursor *mc); static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, const MDBX_val *data); -#define MDBX_END_NAMES \ +#define TXN_END_NAMES \ { \ "committed", "empty-commit", "abort", "reset", "reset-tmp", "fail-begin", \ "fail-beginchild" \ } enum { /* txn_end operation number, for logging */ - MDBX_END_COMMITTED, - MDBX_END_PURE_COMMIT, - MDBX_END_ABORT, - MDBX_END_RESET, - MDBX_END_RESET_TMP, - MDBX_END_FAIL_BEGIN, - MDBX_END_FAIL_BEGINCHILD + TXN_END_COMMITTED, + TXN_END_PURE_COMMIT, + TXN_END_ABORT, + TXN_END_RESET, + TXN_END_RESET_TMP, + TXN_END_FAIL_BEGIN, + TXN_END_FAIL_BEGINCHILD }; -#define MDBX_END_OPMASK 0x0F /* mask for txn_end() operation number */ -#define MDBX_END_UPDATE 0x10 /* update env state (DBIs) */ -#define MDBX_END_FREE 0x20 /* free txn unless it is MDBX_env.me_txn0 */ -#define MDBX_END_EOTDONE 0x40 /* txn's cursors already closed */ -#define MDBX_END_SLOT 0x80 /* release any reader slot if MDBX_NOTLS */ +#define TXN_END_OPMASK 0x0F /* mask for txn_end() operation number */ +#define TXN_END_UPDATE 0x10 /* update env state (DBIs) */ +#define TXN_END_FREE 0x20 /* free txn unless it is MDBX_env.me_txn0 */ +#define TXN_END_EOTDONE 0x40 /* txn's cursors already closed */ +#define TXN_END_SLOT 0x80 /* release any reader slot if MDBX_NOTLS */ static int txn_end(MDBX_txn *txn, const unsigned mode); static __always_inline pgr_t page_get_inline(const uint16_t ILL, @@ -4830,7 +4830,7 @@ static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { txn_lru_turn(txn); size_t keep = m0 ? cursor_keep(txn, m0) : 0; for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) - if (F_ISSET(txn->mt_dbistate[i], DBI_DIRTY | DBI_VALID) && + if (F_ISSET(txn->mt_dbi_state[i], DBI_DIRTY | DBI_VALID) && txn->mt_dbs[i].md_root != P_INVALID) for (MDBX_cursor *mc = txn->mt_cursors[i]; mc; mc = mc->mc_next) if (mc != m0) @@ -7761,7 +7761,7 @@ done: __hot static pgr_t page_alloc(const MDBX_cursor *const mc) { MDBX_txn *const txn = mc->mc_txn; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(txn->mt_dbistate[mc->mc_dbi], DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(txn->mt_dbi_state[mc->mc_dbi], DBI_DIRTY | DBI_VALID)); /* If there are any loose pages, just use them */ while (likely(txn->tw.loose_pages)) { @@ -7901,7 +7901,7 @@ __hot static int page_touch(MDBX_cursor *mc) { int rc; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(*mc->mc_dbistate, DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_DIRTY | DBI_VALID)); tASSERT(txn, !IS_OVERFLOW(mp)); if (ASSERT_ENABLED()) { if (mc->mc_flags & C_SUB) { @@ -7909,7 +7909,7 @@ __hot static int page_touch(MDBX_cursor *mc) { MDBX_cursor_couple *couple = container_of(mx, MDBX_cursor_couple, inner); tASSERT(txn, mc->mc_db == &couple->outer.mc_xcursor->mx_db); tASSERT(txn, mc->mc_dbx == &couple->outer.mc_xcursor->mx_dbx); - tASSERT(txn, *couple->outer.mc_dbistate & DBI_DIRTY); + tASSERT(txn, *couple->outer.mc_dbi_state & DBI_DIRTY); } tASSERT(txn, dirtylist_check(txn)); } @@ -8313,7 +8313,7 @@ static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { * txn pointer here for cursor fixups to keep working. */ mc->mc_txn = nested; mc->mc_db = &nested->mt_dbs[i]; - mc->mc_dbistate = &nested->mt_dbistate[i]; + mc->mc_dbi_state = &nested->mt_dbi_state[i]; MDBX_xcursor *mx = mc->mc_xcursor; if (mx != NULL) { *(MDBX_xcursor *)(bk + 1) = *mx; @@ -8362,7 +8362,7 @@ static void cursors_eot(MDBX_txn *txn, const bool merge) { mc->mc_backup = bk->mc_backup; mc->mc_txn = bk->mc_txn; mc->mc_db = bk->mc_db; - mc->mc_dbistate = bk->mc_dbistate; + mc->mc_dbi_state = bk->mc_dbi_state; if (mx) { if (mx != bk->mc_xcursor) { *bk->mc_xcursor = *mx; @@ -8994,7 +8994,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); env->me_txn = txn; txn->mt_numdbs = env->me_numdbs; - memcpy(txn->mt_dbiseqs, env->me_dbiseqs, txn->mt_numdbs * sizeof(unsigned)); + memcpy(txn->mt_dbi_seqs, env->me_dbi_seqs, + txn->mt_numdbs * sizeof(unsigned)); if ((txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { rc = dpl_alloc(txn); @@ -9016,17 +9017,17 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { osal_compiler_barrier(); memset(txn->mt_cursors, 0, sizeof(MDBX_cursor *) * txn->mt_numdbs); for (size_t i = CORE_DBS; i < txn->mt_numdbs; i++) { - const unsigned db_flags = env->me_dbflags[i]; + const unsigned db_flags = env->me_db_flags[i]; txn->mt_dbs[i].md_flags = db_flags & DB_PERSISTENT_FLAGS; - txn->mt_dbistate[i] = + txn->mt_dbi_state[i] = (db_flags & DB_VALID) ? DBI_VALID | DBI_USRVALID | DBI_STALE : 0; } - txn->mt_dbistate[MAIN_DBI] = DBI_VALID | DBI_USRVALID; + txn->mt_dbi_state[MAIN_DBI] = DBI_VALID | DBI_USRVALID; rc = setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - txn->mt_dbistate[FREE_DBI] = DBI_VALID; + txn->mt_dbi_state[FREE_DBI] = DBI_VALID; txn->mt_front = txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); @@ -9134,7 +9135,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } bailout: tASSERT(txn, rc != MDBX_SUCCESS); - txn_end(txn, MDBX_END_SLOT | MDBX_END_FAIL_BEGIN); + txn_end(txn, TXN_END_SLOT | TXN_END_FAIL_BEGIN); return rc; } @@ -9292,14 +9293,14 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, #if MDBX_DEBUG txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ #endif /* MDBX_DEBUG */ - txn->mt_dbistate = ptr_disp(txn, size - env->me_maxdbs); + txn->mt_dbi_state = ptr_disp(txn, size - env->me_maxdbs); txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_flags = flags; txn->mt_env = env; if (parent) { tASSERT(parent, dirtylist_check(parent)); - txn->mt_dbiseqs = parent->mt_dbiseqs; + txn->mt_dbi_seqs = parent->mt_dbi_seqs; txn->mt_geo = parent->mt_geo; rc = dpl_alloc(txn); if (likely(rc == MDBX_SUCCESS)) { @@ -9380,10 +9381,10 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, txn->mt_owner = parent->mt_owner; memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); txn->tw.troika = parent->tw.troika; - /* Copy parent's mt_dbistate, but clear DB_NEW */ + /* Copy parent's mt_dbi_state, but clear DB_NEW */ for (size_t i = 0; i < txn->mt_numdbs; i++) - txn->mt_dbistate[i] = - parent->mt_dbistate[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->mt_dbi_state[i] = + parent->mt_dbi_state[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == (parent->mt_parent ? parent->mt_parent->tw.dirtyroom @@ -9398,9 +9399,9 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, tASSERT(txn, audit_ex(txn, 0, false) == 0); } if (unlikely(rc != MDBX_SUCCESS)) - txn_end(txn, MDBX_END_FAIL_BEGINCHILD); + txn_end(txn, TXN_END_FAIL_BEGINCHILD); } else { /* MDBX_TXN_RDONLY */ - txn->mt_dbiseqs = env->me_dbiseqs; + txn->mt_dbi_seqs = env->me_dbi_seqs; renew: rc = txn_renew(txn, flags); } @@ -9583,18 +9584,18 @@ int mdbx_txn_flags(const MDBX_txn *txn) { /* Check for misused dbi handles */ static __inline bool dbi_changed(const MDBX_txn *txn, size_t dbi) { - if (txn->mt_dbiseqs == txn->mt_env->me_dbiseqs) + if (txn->mt_dbi_seqs == txn->mt_env->me_dbi_seqs) return false; if (likely( - txn->mt_dbiseqs[dbi].weak == - atomic_load32((MDBX_atomic_uint32_t *)&txn->mt_env->me_dbiseqs[dbi], + txn->mt_dbi_seqs[dbi].weak == + atomic_load32((MDBX_atomic_uint32_t *)&txn->mt_env->me_dbi_seqs[dbi], mo_AcquireRelease))) return false; return true; } static __inline unsigned dbi_seq(const MDBX_env *const env, size_t slot) { - unsigned v = env->me_dbiseqs[slot].weak + 1; + unsigned v = env->me_dbi_seqs[slot].weak + 1; return v + (v == 0); } @@ -9604,21 +9605,21 @@ static void dbi_import_locked(MDBX_txn *txn) { for (size_t i = CORE_DBS; i < n; ++i) { if (i >= txn->mt_numdbs) { txn->mt_cursors[i] = NULL; - if (txn->mt_dbiseqs != env->me_dbiseqs) - txn->mt_dbiseqs[i].weak = 0; - txn->mt_dbistate[i] = 0; + if (txn->mt_dbi_seqs != env->me_dbi_seqs) + txn->mt_dbi_seqs[i].weak = 0; + txn->mt_dbi_state[i] = 0; } if ((dbi_changed(txn, i) && - (txn->mt_dbistate[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0) || - ((env->me_dbflags[i] & DB_VALID) && - !(txn->mt_dbistate[i] & DBI_VALID))) { - tASSERT(txn, - (txn->mt_dbistate[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0); - txn->mt_dbiseqs[i] = env->me_dbiseqs[i]; - txn->mt_dbs[i].md_flags = env->me_dbflags[i] & DB_PERSISTENT_FLAGS; - txn->mt_dbistate[i] = 0; - if (env->me_dbflags[i] & DB_VALID) { - txn->mt_dbistate[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; + (txn->mt_dbi_state[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0) || + ((env->me_db_flags[i] & DB_VALID) && + !(txn->mt_dbi_state[i] & DBI_VALID))) { + tASSERT(txn, (txn->mt_dbi_state[i] & + (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0); + txn->mt_dbi_seqs[i] = env->me_dbi_seqs[i]; + txn->mt_dbs[i].md_flags = env->me_db_flags[i] & DB_PERSISTENT_FLAGS; + txn->mt_dbi_state[i] = 0; + if (env->me_db_flags[i] & DB_VALID) { + txn->mt_dbi_state[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; tASSERT(txn, txn->mt_dbxs[i].md_cmp != NULL); tASSERT(txn, txn->mt_dbxs[i].md_name.iov_base != NULL); } @@ -9626,13 +9627,13 @@ static void dbi_import_locked(MDBX_txn *txn) { } while (unlikely(n < txn->mt_numdbs)) if (txn->mt_cursors[txn->mt_numdbs - 1] == NULL && - (txn->mt_dbistate[txn->mt_numdbs - 1] & DBI_USRVALID) == 0) + (txn->mt_dbi_state[txn->mt_numdbs - 1] & DBI_USRVALID) == 0) txn->mt_numdbs -= 1; else { - if ((txn->mt_dbistate[n] & DBI_USRVALID) == 0) { - if (txn->mt_dbiseqs != env->me_dbiseqs) - txn->mt_dbiseqs[n].weak = 0; - txn->mt_dbistate[n] = 0; + if ((txn->mt_dbi_state[n] & DBI_USRVALID) == 0) { + if (txn->mt_dbi_seqs != env->me_dbi_seqs) + txn->mt_dbi_seqs[n].weak = 0; + txn->mt_dbi_state[n] = 0; } ++n; } @@ -9650,7 +9651,7 @@ __cold static bool dbi_import(MDBX_txn *txn, MDBX_dbi dbi) { dbi_import_locked(txn); ENSURE(txn->mt_env, osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - return txn->mt_dbistate[dbi] & DBI_USRVALID; + return txn->mt_dbi_state[dbi] & DBI_USRVALID; } /* Export or close DBI handles opened in this txn. */ @@ -9662,43 +9663,43 @@ static void dbi_update(MDBX_txn *txn, int keep) { MDBX_env *const env = txn->mt_env; for (size_t i = n; --i >= CORE_DBS;) { - if (likely((txn->mt_dbistate[i] & DBI_CREAT) == 0)) + if (likely((txn->mt_dbi_state[i] & DBI_CREAT) == 0)) continue; if (!locked) { ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); locked = true; } if (env->me_numdbs <= i || - txn->mt_dbiseqs[i].weak != env->me_dbiseqs[i].weak) + txn->mt_dbi_seqs[i].weak != env->me_dbi_seqs[i].weak) continue /* dbi explicitly closed and/or then re-opened by other txn */; if (keep) { - env->me_dbflags[i] = txn->mt_dbs[i].md_flags | DB_VALID; + env->me_db_flags[i] = txn->mt_dbs[i].md_flags | DB_VALID; } else { const MDBX_val name = env->me_dbxs[i].md_name; if (name.iov_base) { env->me_dbxs[i].md_name.iov_base = nullptr; - eASSERT(env, env->me_dbflags[i] == 0); - atomic_store32(&env->me_dbiseqs[i], dbi_seq(env, i), + eASSERT(env, env->me_db_flags[i] == 0); + atomic_store32(&env->me_dbi_seqs[i], dbi_seq(env, i), mo_AcquireRelease); env->me_dbxs[i].md_name.iov_len = 0; if (name.iov_len) osal_free(name.iov_base); } else { eASSERT(env, name.iov_len == 0); - eASSERT(env, env->me_dbflags[i] == 0); + eASSERT(env, env->me_db_flags[i] == 0); } } } n = env->me_numdbs; - if (n > CORE_DBS && unlikely(!(env->me_dbflags[n - 1] & DB_VALID))) { + if (n > CORE_DBS && unlikely(!(env->me_db_flags[n - 1] & DB_VALID))) { if (!locked) { ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); locked = true; } n = env->me_numdbs; - while (n > CORE_DBS && !(env->me_dbflags[n - 1] & DB_VALID)) + while (n > CORE_DBS && !(env->me_db_flags[n - 1] & DB_VALID)) --n; env->me_numdbs = n; } @@ -9782,7 +9783,7 @@ static void dpl_sift(MDBX_txn *const txn, MDBX_PNL pl, const bool spilled) { * [in] mode why and how to end the transaction */ static int txn_end(MDBX_txn *txn, const unsigned mode) { MDBX_env *env = txn->mt_env; - static const char *const names[] = MDBX_END_NAMES; + static const char *const names[] = TXN_END_NAMES; #if MDBX_ENV_CHECKPID if (unlikely(txn->mt_env->me_pid != osal_getpid())) { @@ -9793,11 +9794,11 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { DEBUG("%s txn %" PRIaTXN "%c %p on mdbenv %p, root page %" PRIaPGNO "/%" PRIaPGNO, - names[mode & MDBX_END_OPMASK], txn->mt_txnid, + names[mode & TXN_END_OPMASK], txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root, txn->mt_dbs[FREE_DBI].md_root); - if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ + if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ cursors_eot(txn, false); int rc = MDBX_SUCCESS; @@ -9823,7 +9824,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, slot->mr_pid.weak == env->me_pid); eASSERT(env, slot->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD); } - if (mode & MDBX_END_SLOT) { + if (mode & TXN_END_SLOT) { if ((env->me_flags & MDBX_ENV_TXKEY) == 0) atomic_store32(&slot->mr_pid, 0, mo_Relaxed); txn->to.reader = NULL; @@ -9852,7 +9853,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (txn == env->me_txn0) { eASSERT(env, txn->mt_parent == NULL); /* Export or close DBI handles created in this txn */ - dbi_update(txn, mode & MDBX_END_UPDATE); + dbi_update(txn, mode & TXN_END_UPDATE); pnl_shrink(&txn->tw.retired_pages); pnl_shrink(&txn->tw.relist); if (!(env->me_flags & MDBX_WRITEMAP)) @@ -9925,7 +9926,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { } eASSERT(env, txn == env->me_txn0 || txn->mt_owner == 0); - if ((mode & MDBX_END_FREE) != 0 && txn != env->me_txn0) { + if ((mode & TXN_END_FREE) != 0 && txn != env->me_txn0) { txn->mt_signature = 0; osal_free(txn); } @@ -9943,7 +9944,7 @@ int mdbx_txn_reset(MDBX_txn *txn) { return MDBX_EINVAL; /* LY: don't close DBI-handles */ - rc = txn_end(txn, MDBX_END_RESET | MDBX_END_UPDATE); + rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); if (rc == MDBX_SUCCESS) { tASSERT(txn, txn->mt_signature == MDBX_MT_SIGNATURE); tASSERT(txn, txn->mt_owner == 0); @@ -9971,8 +9972,8 @@ int mdbx_txn_abort(MDBX_txn *txn) { if (txn->mt_flags & MDBX_TXN_RDONLY) /* LY: don't close DBI-handles */ - return txn_end(txn, MDBX_END_ABORT | MDBX_END_UPDATE | MDBX_END_SLOT | - MDBX_END_FREE); + return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | + TXN_END_FREE); if (unlikely(txn->mt_flags & MDBX_TXN_FINISHED)) return MDBX_BAD_TXN; @@ -9981,7 +9982,7 @@ int mdbx_txn_abort(MDBX_txn *txn) { mdbx_txn_abort(txn->mt_child); tASSERT(txn, (txn->mt_flags & MDBX_TXN_ERROR) || dirtylist_check(txn)); - return txn_end(txn, MDBX_END_ABORT | MDBX_END_SLOT | MDBX_END_FREE); + return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); } /* Count all the pages in each DB and in the GC and make sure @@ -10019,16 +10020,16 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, tASSERT(txn, rc == MDBX_NOTFOUND); for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) - txn->mt_dbistate[i] &= ~DBI_AUDITED; + txn->mt_dbi_state[i] &= ~DBI_AUDIT; size_t used = NUM_METAS; for (size_t i = FREE_DBI; i <= MAIN_DBI; i++) { - if (!(txn->mt_dbistate[i] & DBI_VALID)) + if (!(txn->mt_dbi_state[i] & DBI_VALID)) continue; rc = cursor_init(&cx.outer, txn, i); if (unlikely(rc != MDBX_SUCCESS)) return rc; - txn->mt_dbistate[i] |= DBI_AUDITED; + txn->mt_dbi_state[i] |= DBI_AUDIT; if (txn->mt_dbs[i].md_root == P_INVALID) continue; used += (size_t)txn->mt_dbs[i].md_branch_pages + @@ -10049,13 +10050,13 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, memcpy(db = &db_copy, node_data(node), sizeof(db_copy)); if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) { for (MDBX_dbi k = txn->mt_numdbs; --k > MAIN_DBI;) { - if ((txn->mt_dbistate[k] & DBI_VALID) && + if ((txn->mt_dbi_state[k] & DBI_VALID) && /* txn->mt_dbxs[k].md_name.iov_base && */ node_ks(node) == txn->mt_dbxs[k].md_name.iov_len && memcmp(node_key(node), txn->mt_dbxs[k].md_name.iov_base, node_ks(node)) == 0) { - txn->mt_dbistate[k] |= DBI_AUDITED; - if (!(txn->mt_dbistate[k] & MDBX_DBI_STALE)) + txn->mt_dbi_state[k] |= DBI_AUDIT; + if (!(txn->mt_dbi_state[k] & MDBX_DBI_STALE)) db = txn->mt_dbs + k; break; } @@ -10071,25 +10072,25 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, } for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) { - if ((txn->mt_dbistate[i] & (DBI_VALID | DBI_AUDITED | DBI_STALE)) != + if ((txn->mt_dbi_state[i] & (DBI_VALID | DBI_AUDIT | DBI_STALE)) != DBI_VALID) continue; for (MDBX_txn *t = txn; t; t = t->mt_parent) - if (F_ISSET(t->mt_dbistate[i], DBI_DIRTY | DBI_CREAT)) { + if (F_ISSET(t->mt_dbi_state[i], DBI_DIRTY | DBI_CREAT)) { used += (size_t)t->mt_dbs[i].md_branch_pages + (size_t)t->mt_dbs[i].md_leaf_pages + (size_t)t->mt_dbs[i].md_overflow_pages; - txn->mt_dbistate[i] |= DBI_AUDITED; + txn->mt_dbi_state[i] |= DBI_AUDIT; break; } MDBX_ANALYSIS_ASSUME(txn != nullptr); - if (!(txn->mt_dbistate[i] & DBI_AUDITED)) { + if (!(txn->mt_dbi_state[i] & DBI_AUDIT)) { WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, (int)txn->mt_dbxs[i].md_name.iov_len, (const char *)txn->mt_dbxs[i].md_name.iov_base, - txn->mt_dbistate[i]); + txn->mt_dbi_state[i]); } } @@ -11175,10 +11176,10 @@ static __always_inline bool check_dbi(const MDBX_txn *txn, MDBX_dbi dbi, unsigned validity) { if (likely(dbi < txn->mt_numdbs)) { if (likely(!dbi_changed(txn, dbi))) { - if (likely(txn->mt_dbistate[dbi] & validity)) + if (likely(txn->mt_dbi_state[dbi] & validity)) return true; if (likely(dbi < CORE_DBS || - (txn->mt_env->me_dbflags[dbi] & DB_VALID) == 0)) + (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0)) return false; } } @@ -11601,7 +11602,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { /* txn_end() mode for a commit which writes nothing */ unsigned end_mode = - MDBX_END_PURE_COMMIT | MDBX_END_UPDATE | MDBX_END_SLOT | MDBX_END_FREE; + TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) goto done; @@ -11630,9 +11631,9 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (txn->tw.dirtylist->length == 0 && !(txn->mt_flags & MDBX_TXN_DIRTY) && parent->mt_numdbs == txn->mt_numdbs) { for (int i = txn->mt_numdbs; --i >= 0;) { - tASSERT(txn, (txn->mt_dbistate[i] & DBI_DIRTY) == 0); - if ((txn->mt_dbistate[i] & DBI_STALE) && - !(parent->mt_dbistate[i] & DBI_STALE)) + tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); + if ((txn->mt_dbi_state[i] & DBI_STALE) && + !(parent->mt_dbi_state[i] & DBI_STALE)) tASSERT(txn, memcmp(&parent->mt_dbs[i], &txn->mt_dbs[i], sizeof(MDBX_db)) == 0); } @@ -11646,7 +11647,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, txn->tw.loose_count == 0); /* fast completion of pure nested transaction */ - end_mode = MDBX_END_PURE_COMMIT | MDBX_END_SLOT | MDBX_END_FREE; + end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; goto done; } @@ -11706,7 +11707,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { /* Merge our cursors into parent's and close them */ cursors_eot(txn, true); - end_mode |= MDBX_END_EOTDONE; + end_mode |= TXN_END_EOTDONE; /* Update parent's DBs array */ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); @@ -11714,12 +11715,12 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { for (size_t i = 0; i < txn->mt_numdbs; i++) { /* preserve parent's status */ const uint8_t state = - txn->mt_dbistate[i] | - (parent->mt_dbistate[i] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + txn->mt_dbi_state[i] | + (parent->mt_dbi_state[i] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", i, - (parent->mt_dbistate[i] != state) ? "update" : "still", - parent->mt_dbistate[i], state); - parent->mt_dbistate[i] = state; + (parent->mt_dbi_state[i] != state) ? "update" : "still", + parent->mt_dbi_state[i], state); + parent->mt_dbi_state[i] = state; } if (latency) { @@ -11767,12 +11768,12 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { : txn->mt_env->me_options.dp_limit)); } cursors_eot(txn, false); - end_mode |= MDBX_END_EOTDONE; + end_mode |= TXN_END_EOTDONE; if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { for (intptr_t i = txn->mt_numdbs; --i >= 0;) - tASSERT(txn, (txn->mt_dbistate[i] & DBI_DIRTY) == 0); + tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); #if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT rc = txn_end(txn, end_mode); if (unlikely(rc != MDBX_SUCCESS)) @@ -11799,7 +11800,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (unlikely(rc != MDBX_SUCCESS)) goto fail; for (MDBX_dbi i = CORE_DBS; i < txn->mt_numdbs; i++) { - if (txn->mt_dbistate[i] & DBI_DIRTY) { + if (txn->mt_dbi_state[i] & DBI_DIRTY) { MDBX_db *db = &txn->mt_dbs[i]; DEBUG("update main's entry for sub-db %u, mod_txnid %" PRIaTXN " -> %" PRIaTXN, @@ -11830,11 +11831,11 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { goto fail; tASSERT(txn, txn->tw.loose_count == 0); - txn->mt_dbs[FREE_DBI].md_mod_txnid = (txn->mt_dbistate[FREE_DBI] & DBI_DIRTY) + txn->mt_dbs[FREE_DBI].md_mod_txnid = (txn->mt_dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->mt_txnid : txn->mt_dbs[FREE_DBI].md_mod_txnid; - txn->mt_dbs[MAIN_DBI].md_mod_txnid = (txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY) + txn->mt_dbs[MAIN_DBI].md_mod_txnid = (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->mt_txnid : txn->mt_dbs[MAIN_DBI].md_mod_txnid; @@ -11969,7 +11970,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { goto fail; } - end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE; + end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE; done: if (latency) @@ -14910,10 +14911,10 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; env->me_pathname = osal_calloc(env_pathname.ent_len + 1, sizeof(pathchar_t)); env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(MDBX_dbx)); - env->me_dbflags = osal_calloc(env->me_maxdbs, sizeof(env->me_dbflags[0])); - env->me_dbiseqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbiseqs[0])); - if (!(env->me_dbxs && env->me_pathname && env->me_dbflags && - env->me_dbiseqs)) { + env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); + env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); + if (!(env->me_dbxs && env->me_pathname && env->me_db_flags && + env->me_dbi_seqs)) { rc = MDBX_ENOMEM; goto bailout; } @@ -15268,10 +15269,10 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, txn->mt_dbs = ptr_disp(txn, tsize); txn->mt_cursors = ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs); - txn->mt_dbiseqs = + txn->mt_dbi_seqs = ptr_disp(txn->mt_cursors, sizeof(MDBX_cursor *) * env->me_maxdbs); - txn->mt_dbistate = ptr_disp( - txn->mt_dbiseqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); + txn->mt_dbi_state = ptr_disp( + txn->mt_dbi_seqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); txn->mt_env = env; txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDBX_TXN_FINISHED; @@ -15399,13 +15400,13 @@ __cold static int env_close(MDBX_env *env) { osal_memalign_free(env->me_pbuf); env->me_pbuf = nullptr; } - if (env->me_dbiseqs) { - osal_free(env->me_dbiseqs); - env->me_dbiseqs = nullptr; + if (env->me_dbi_seqs) { + osal_free(env->me_dbi_seqs); + env->me_dbi_seqs = nullptr; } - if (env->me_dbflags) { - osal_free(env->me_dbflags); - env->me_dbflags = nullptr; + if (env->me_db_flags) { + osal_free(env->me_db_flags); + env->me_db_flags = nullptr; } if (env->me_pathname) { osal_free(env->me_pathname); @@ -15945,7 +15946,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - txn->mt_dbistate[dbi] &= ~DBI_STALE; + txn->mt_dbi_state[dbi] &= ~DBI_STALE; return MDBX_SUCCESS; } @@ -15995,7 +15996,7 @@ __hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { } /* Make sure we're using an up-to-date root */ - if (unlikely(*mc->mc_dbistate & DBI_STALE)) { + if (unlikely(*mc->mc_dbi_state & DBI_STALE)) { rc = fetch_sdb(mc->mc_txn, mc->mc_dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -16019,7 +16020,7 @@ __hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { do if ((scan->mt_flags & MDBX_TXN_DIRTY) && (mc->mc_dbi == MAIN_DBI || - (scan->mt_dbistate[mc->mc_dbi] & DBI_DIRTY))) { + (scan->mt_dbi_state[mc->mc_dbi] & DBI_DIRTY))) { /* После коммита вложенных тразакций может быть mod_txnid > front */ pp_txnid = scan->mt_front; break; @@ -17171,8 +17172,8 @@ int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, } static int touch_dbi(MDBX_cursor *mc) { - cASSERT(mc, (*mc->mc_dbistate & DBI_DIRTY) == 0); - *mc->mc_dbistate |= DBI_DIRTY; + cASSERT(mc, (*mc->mc_dbi_state & DBI_DIRTY) == 0); + *mc->mc_dbi_state |= DBI_DIRTY; mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY; if (mc->mc_dbi >= CORE_DBS) { /* Touch DB record of named DB */ @@ -17180,7 +17181,7 @@ static int touch_dbi(MDBX_cursor *mc) { int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - mc->mc_txn->mt_dbistate[MAIN_DBI] |= DBI_DIRTY; + mc->mc_txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; rc = page_search(&cx.outer, &mc->mc_dbx->md_name, MDBX_PS_MODIFY); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -17198,7 +17199,7 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, MDBX_txn *const txn = mc->mc_txn; txn_lru_turn(txn); - if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) { + if (unlikely((*mc->mc_dbi_state & DBI_DIRTY) == 0)) { int err = touch_dbi(mc); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -18261,7 +18262,7 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) { DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno); ret.page->mp_flags = (uint16_t)flags; - cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY); + cASSERT(mc, *mc->mc_dbi_state & DBI_DIRTY); cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); #if MDBX_ENABLE_PGOP_STAT mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += 1; @@ -18292,7 +18293,7 @@ static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) { DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi, ret.page->mp_pgno, npages); ret.page->mp_flags = P_OVERFLOW; - cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY); + cASSERT(mc, *mc->mc_dbi_state & DBI_DIRTY); cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); #if MDBX_ENABLE_PGOP_STAT mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages; @@ -18615,7 +18616,7 @@ static int cursor_xinit0(MDBX_cursor *mc) { mx->mx_cursor.mc_db = &mx->mx_db; mx->mx_cursor.mc_dbx = &mx->mx_dbx; mx->mx_cursor.mc_dbi = mc->mc_dbi; - mx->mx_cursor.mc_dbistate = mc->mc_dbistate; + mx->mx_cursor.mc_dbi_state = mc->mc_dbi_state; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_top = 0; mx->mx_cursor.mc_flags = C_SUB; @@ -18769,7 +18770,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, couple->outer.mc_txn = (MDBX_txn *)txn; couple->outer.mc_db = db; couple->outer.mc_dbx = dbx; - couple->outer.mc_dbistate = dbstate; + couple->outer.mc_dbi_state = dbstate; couple->outer.mc_snum = 0; couple->outer.mc_top = 0; couple->outer.mc_pg[0] = 0; @@ -18784,7 +18785,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, couple->outer.mc_xcursor = NULL; int rc = MDBX_SUCCESS; - if (unlikely(*couple->outer.mc_dbistate & DBI_STALE)) { + if (unlikely(*couple->outer.mc_dbi_state & DBI_STALE)) { rc = page_search(&couple->outer, NULL, MDBX_PS_ROOTONLY); rc = (rc != MDBX_NOTFOUND) ? rc : MDBX_SUCCESS; } else if (unlikely(dbx->md_klen_max == 0)) { @@ -18808,7 +18809,7 @@ static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], - &txn->mt_dbistate[dbi]); + &txn->mt_dbi_state[dbi]); } MDBX_cursor *mdbx_cursor_create(void *context) { @@ -18907,7 +18908,7 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); cASSERT(mc, mc->mc_dbx == &txn->mt_dbxs[dbi]); cASSERT(mc, mc->mc_dbi == dbi); - cASSERT(mc, mc->mc_dbistate == &txn->mt_dbistate[dbi]); + cASSERT(mc, mc->mc_dbi_state == &txn->mt_dbi_state[dbi]); return likely(mc->mc_dbi == dbi && /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && mc->mc_txn == txn) @@ -18970,7 +18971,7 @@ int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { assert(dest->mc_db == src->mc_db); assert(dest->mc_dbi == src->mc_dbi); assert(dest->mc_dbx == src->mc_dbx); - assert(dest->mc_dbistate == src->mc_dbistate); + assert(dest->mc_dbi_state == src->mc_dbi_state); again: assert(dest->mc_txn == src->mc_txn); dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK; @@ -19728,7 +19729,7 @@ static void cursor_restore(const MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(cdst, cdst->mc_txn == csrc->mc_txn); cASSERT(cdst, cdst->mc_db == csrc->mc_db); cASSERT(cdst, cdst->mc_dbx == csrc->mc_dbx); - cASSERT(cdst, cdst->mc_dbistate == csrc->mc_dbistate); + cASSERT(cdst, cdst->mc_dbi_state == csrc->mc_dbi_state); cdst->mc_snum = csrc->mc_snum; cdst->mc_top = csrc->mc_top; cdst->mc_flags = csrc->mc_flags; @@ -19753,7 +19754,7 @@ static void cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { cdst->mc_txn = csrc->mc_txn; cdst->mc_db = csrc->mc_db; cdst->mc_dbx = csrc->mc_dbx; - cdst->mc_dbistate = csrc->mc_dbistate; + cdst->mc_dbi_state = csrc->mc_dbi_state; cursor_restore(csrc, cdst); } @@ -19811,7 +19812,7 @@ static int rebalance(MDBX_cursor *mc) { if (nkeys == 0) { cASSERT(mc, IS_LEAF(mp)); DEBUG("%s", "tree is completely empty"); - cASSERT(mc, (*mc->mc_dbistate & DBI_DIRTY) != 0); + cASSERT(mc, (*mc->mc_dbi_state & DBI_DIRTY) != 0); mc->mc_db->md_root = P_INVALID; mc->mc_db->md_depth = 0; cASSERT(mc, mc->mc_db->md_branch_pages == 0 && @@ -21508,7 +21509,7 @@ __cold static int compacting_walk_sdb(mdbx_compacting_ctx *ctx, MDBX_db *sdb) { memset(&couple, 0, sizeof(couple)); couple.inner.mx_cursor.mc_signature = ~MDBX_MC_LIVE; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDITED; + uint8_t dbistate = DBI_VALID | DBI_AUDIT; int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -21716,7 +21717,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, const bool dest_is_pipe, const MDBX_copy_flags_t flags) { /* We must start the actual read txn after blocking writers */ - int rc = txn_end(read_txn, MDBX_END_RESET_TMP); + int rc = txn_end(read_txn, TXN_END_RESET_TMP); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -22165,7 +22166,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { /* account opened named subDBs */ for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) - if ((txn->mt_dbistate[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) + if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) stat_add(txn->mt_dbs + dbi, st, bytes); if (!(txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_DUPSORT | MDBX_INTEGERKEY)) && @@ -22188,7 +22189,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { /* skip opened and already accounted */ for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) - if ((txn->mt_dbistate[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && + if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && node_ks(node) == txn->mt_dbxs[dbi].md_name.iov_len && memcmp(node_key(node), txn->mt_dbxs[dbi].md_name.iov_base, node_ks(node)) == 0) { @@ -22657,13 +22658,13 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, goto bailout; } /* Пересоздаём MAIN_DBI если там пусто. */ - atomic_store32(&txn->mt_dbiseqs[MAIN_DBI], dbi_seq(env, MAIN_DBI), + atomic_store32(&txn->mt_dbi_seqs[MAIN_DBI], dbi_seq(env, MAIN_DBI), mo_AcquireRelease); tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && txn->mt_dbs[MAIN_DBI].md_entries == 0 && txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); txn->mt_dbs[MAIN_DBI].md_flags &= MDBX_REVERSEKEY | MDBX_INTEGERKEY; - txn->mt_dbistate[MAIN_DBI] |= DBI_DIRTY; + txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; txn->mt_flags |= MDBX_TXN_DIRTY; txn->mt_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags); @@ -22790,24 +22791,25 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, dbiflags |= DBI_DIRTY | DBI_CREAT; txn->mt_flags |= MDBX_TXN_DIRTY; - tASSERT(txn, (txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY) != 0); + tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); } /* Got info, register DBI in this txn */ memset(txn->mt_dbxs + slot, 0, sizeof(MDBX_dbx)); memcpy(&txn->mt_dbs[slot], data.iov_base, sizeof(MDBX_db)); - env->me_dbflags[slot] = 0; + env->me_db_flags[slot] = 0; rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) { tASSERT(txn, (dbiflags & DBI_CREAT) == 0); goto bailout; } - txn->mt_dbistate[slot] = (uint8_t)dbiflags; + txn->mt_dbi_state[slot] = (uint8_t)dbiflags; txn->mt_dbxs[slot].md_name = key; - txn->mt_dbiseqs[slot].weak = env->me_dbiseqs[slot].weak = dbi_seq(env, slot); + txn->mt_dbi_seqs[slot].weak = env->me_dbi_seqs[slot].weak = + dbi_seq(env, slot); if (!(dbiflags & DBI_CREAT)) - env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; + env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; if (txn->mt_numdbs == slot) { txn->mt_cursors[slot] = NULL; osal_compiler_barrier(); @@ -22880,7 +22882,7 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) return MDBX_BAD_TXN; - if (unlikely(txn->mt_dbistate[dbi] & DBI_STALE)) { + if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { rc = fetch_sdb((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -22901,7 +22903,7 @@ static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { if (unlikely(!ptr)) return MDBX_BAD_DBI; - env->me_dbflags[dbi] = 0; + env->me_db_flags[dbi] = 0; env->me_dbxs[dbi].md_name.iov_len = 0; osal_memory_fence(mo_AcquireRelease, true); env->me_dbxs[dbi].md_name.iov_base = NULL; @@ -22934,7 +22936,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { - rc = (dbi < env->me_maxdbs && (env->me_dbflags[dbi] & DB_VALID)) + rc = (dbi < env->me_maxdbs && (env->me_db_flags[dbi] & DB_VALID)) ? dbi_close_locked(env, dbi) : MDBX_BAD_DBI; ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); @@ -22956,7 +22958,7 @@ int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, *flags = txn->mt_dbs[dbi].md_flags & DB_PERSISTENT_FLAGS; *state = - txn->mt_dbistate[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); + txn->mt_dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); return MDBX_SUCCESS; } @@ -23078,9 +23080,9 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { if (del && dbi >= CORE_DBS) { rc = delete (txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); if (likely(rc == MDBX_SUCCESS)) { - tASSERT(txn, txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY); + tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); - txn->mt_dbistate[dbi] = DBI_STALE; + txn->mt_dbi_state[dbi] = DBI_STALE; MDBX_env *env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (unlikely(rc != MDBX_SUCCESS)) { @@ -23094,7 +23096,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { } } else { /* reset the DB record, mark it dirty */ - txn->mt_dbistate[dbi] |= DBI_DIRTY; + txn->mt_dbi_state[dbi] |= DBI_DIRTY; txn->mt_dbs[dbi].md_depth = 0; txn->mt_dbs[dbi].md_branch_pages = 0; txn->mt_dbs[dbi].md_leaf_pages = 0; @@ -23749,7 +23751,7 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDITED; + uint8_t dbistate = DBI_VALID | DBI_AUDIT; int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24492,7 +24494,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) return MDBX_BAD_DBI; - if (unlikely(txn->mt_dbistate[dbi] & DBI_STALE)) { + if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { rc = fetch_sdb(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24513,7 +24515,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, tASSERT(txn, new > dbs->md_seq); dbs->md_seq = new; txn->mt_flags |= MDBX_TXN_DIRTY; - txn->mt_dbistate[dbi] |= DBI_DIRTY; + txn->mt_dbi_state[dbi] |= DBI_DIRTY; } return MDBX_SUCCESS; @@ -27027,7 +27029,7 @@ bailout: mdbx_cursor_close(cursor); if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && - txn->mt_dbistate[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + txn->mt_dbi_state[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) mdbx_dbi_close(env, dbi); } return err; diff --git a/src/internals.h b/src/internals.h index 8fdb37a8..d4ac2215 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1194,19 +1194,19 @@ struct MDBX_txn { /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; - /* Transaction DBI Flags */ -#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ -#define DBI_STALE MDBX_DBI_STALE /* Named-DB record is older than txnID */ -#define DBI_FRESH MDBX_DBI_FRESH /* Named-DB handle opened in this txn */ -#define DBI_CREAT MDBX_DBI_CREAT /* Named-DB handle created in this txn */ -#define DBI_VALID 0x10 /* DB handle is valid, see also DB_VALID */ -#define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ -#define DBI_AUDITED 0x40 /* Internal flag for accounting during audit */ + /* Non-shared DBI state flags inside transaction */ +#define DBI_DIRTY 0x01 /* DB was written in this txn */ +#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ +#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ +#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ +#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ +#define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ +#define DBI_AUDIT 0x40 /* Internal flag for accounting during audit */ /* Array of non-shared txn's flags of DBI */ - uint8_t *mt_dbistate; + uint8_t *mt_dbi_state; /* Array of sequence numbers for each DB handle. */ - MDBX_atomic_uint32_t *mt_dbiseqs; + MDBX_atomic_uint32_t *mt_dbi_seqs; MDBX_cursor **mt_cursors; MDBX_canary mt_canary; @@ -1292,8 +1292,8 @@ struct MDBX_cursor { MDBX_db *mc_db; /* The database auxiliary record for this cursor */ MDBX_dbx *mc_dbx; - /* The mt_dbistate for this database */ - uint8_t *mc_dbistate; + /* The mt_dbi_state[] for this DBI */ + uint8_t *mc_dbi_state; uint8_t mc_snum; /* number of pushed pages */ uint8_t mc_top; /* index of top page, normally mc_snum-1 */ @@ -1393,9 +1393,9 @@ struct MDBX_env { void *me_pbuf; /* scratch area for DUPSORT put() */ MDBX_txn *me_txn0; /* preallocated write transaction */ - MDBX_dbx *me_dbxs; /* array of static DB info */ - uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */ - MDBX_atomic_uint32_t *me_dbiseqs; /* array of dbi sequence numbers */ + MDBX_dbx *me_dbxs; /* array of static DB info */ + uint16_t *me_db_flags; /* array of flags from MDBX_db.md_flags */ + MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ unsigned me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */ unsigned me_maxgc_per_branch; @@ -1662,7 +1662,7 @@ typedef struct MDBX_node { /* mdbx_dbi_open() flags */ #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE) -#define DB_VALID 0x8000 /* DB handle is valid, for me_dbflags */ +#define DB_VALID 0x8000 /* DB handle is valid, for me_db_flags */ #define DB_INTERNAL_FLAGS DB_VALID #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS From 4b79d46d38cb6ca5f109fac1071b7f2879203047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 11:36:59 +0300 Subject: [PATCH 024/443] =?UTF-8?q?mdbx:=20=D1=83=D0=B4=D0=B0=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=BB=D1=8F=20`mt=5Fdbxs`=20?= =?UTF-8?q?=D0=B8=D0=B7=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Явного выигрыша или проигрыша в производительности тут нет. Но теперь меньше алиасинга указателей и чуть меньше полей в транзакциях. --- src/core.c | 110 ++++++++++++++++++++++++------------------------ src/internals.h | 2 - 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/core.c b/src/core.c index 2db1e56f..6e264f84 100644 --- a/src/core.c +++ b/src/core.c @@ -3781,13 +3781,13 @@ MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); - return txn->mt_dbxs[dbi].md_cmp(a, b); + return txn->mt_env->me_dbxs[dbi].md_cmp(a, b); } int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); - return txn->mt_dbxs[dbi].md_dcmp(a, b); + return txn->mt_env->me_dbxs[dbi].md_dcmp(a, b); } /* Allocate memory for a page. @@ -8917,7 +8917,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck @@ -9024,7 +9023,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } txn->mt_dbi_state[MAIN_DBI] = DBI_VALID | DBI_USRVALID; rc = - setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); + setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; txn->mt_dbi_state[FREE_DBI] = DBI_VALID; @@ -9294,7 +9293,6 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ #endif /* MDBX_DEBUG */ txn->mt_dbi_state = ptr_disp(txn, size - env->me_maxdbs); - txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_flags = flags; txn->mt_env = env; @@ -9620,8 +9618,8 @@ static void dbi_import_locked(MDBX_txn *txn) { txn->mt_dbi_state[i] = 0; if (env->me_db_flags[i] & DB_VALID) { txn->mt_dbi_state[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; - tASSERT(txn, txn->mt_dbxs[i].md_cmp != NULL); - tASSERT(txn, txn->mt_dbxs[i].md_name.iov_base != NULL); + tASSERT(txn, env->me_dbxs[i].md_cmp != NULL); + tASSERT(txn, env->me_dbxs[i].md_name.iov_base != NULL); } } } @@ -10039,29 +10037,32 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, if (i != MAIN_DBI) continue; rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); + const MDBX_env *const env = txn->mt_env; while (rc == MDBX_SUCCESS) { MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; for (size_t j = 0; j < page_numkeys(mp); j++) { - MDBX_node *node = page_node(mp, j); + const MDBX_node *node = page_node(mp, j); if (node_flags(node) == F_SUBDATA) { if (unlikely(node_ds(node) != sizeof(MDBX_db))) return MDBX_CORRUPTED; - MDBX_db db_copy, *db; - memcpy(db = &db_copy, node_data(node), sizeof(db_copy)); + const MDBX_val name = {node_key(node), node_ks(node)}; + const MDBX_db *db = nullptr; if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) { - for (MDBX_dbi k = txn->mt_numdbs; --k > MAIN_DBI;) { - if ((txn->mt_dbi_state[k] & DBI_VALID) && - /* txn->mt_dbxs[k].md_name.iov_base && */ - node_ks(node) == txn->mt_dbxs[k].md_name.iov_len && - memcmp(node_key(node), txn->mt_dbxs[k].md_name.iov_base, - node_ks(node)) == 0) { - txn->mt_dbi_state[k] |= DBI_AUDIT; - if (!(txn->mt_dbi_state[k] & MDBX_DBI_STALE)) - db = txn->mt_dbs + k; + for (MDBX_dbi dbi = txn->mt_numdbs; --dbi > MAIN_DBI;) { + if ((txn->mt_dbi_state[dbi] & DBI_VALID) && + /* env->me_dbxs[k].md_name.iov_base && */ + env->me_dbxs[MAIN_DBI].md_cmp( + &name, &env->me_dbxs[dbi].md_name) == 0) { + txn->mt_dbi_state[dbi] |= DBI_AUDIT; + if (!(txn->mt_dbi_state[dbi] & MDBX_DBI_STALE)) + db = txn->mt_dbs + dbi; break; } } } + MDBX_db aligned; + if (!db) + db = memcpy(&aligned, node_data(node), sizeof(MDBX_db)); used += (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + (size_t)db->md_overflow_pages; } @@ -10088,8 +10089,8 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, - (int)txn->mt_dbxs[i].md_name.iov_len, - (const char *)txn->mt_dbxs[i].md_name.iov_base, + (int)txn->mt_env->me_dbxs[i].md_name.iov_len, + (const char *)txn->mt_env->me_dbxs[i].md_name.iov_base, txn->mt_dbi_state[i]); } } @@ -11810,7 +11811,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { data.iov_base = db; WITH_CURSOR_TRACKING( couple.outer, - rc = cursor_put_nochecklen(&couple.outer, &txn->mt_dbxs[i].md_name, + rc = cursor_put_nochecklen(&couple.outer, &env->me_dbxs[i].md_name, &data, F_SUBDATA)); if (unlikely(rc != MDBX_SUCCESS)) goto fail; @@ -15274,7 +15275,6 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, txn->mt_dbi_state = ptr_disp( txn->mt_dbi_seqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); txn->mt_env = env; - txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDBX_TXN_FINISHED; env->me_txn0 = txn; txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); @@ -15884,7 +15884,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - MDBX_dbx *const dbx = &txn->mt_dbxs[dbi]; + MDBX_dbx *const dbx = &txn->mt_env->me_dbxs[dbi]; rc = page_search(&couple.outer, &dbx->md_name, 0); if (unlikely(rc != MDBX_SUCCESS)) { notfound: @@ -18808,7 +18808,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, - &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], + &txn->mt_dbs[dbi], &txn->mt_env->me_dbxs[dbi], &txn->mt_dbi_state[dbi]); } @@ -18906,7 +18906,7 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { return MDBX_EINVAL; cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); - cASSERT(mc, mc->mc_dbx == &txn->mt_dbxs[dbi]); + cASSERT(mc, mc->mc_dbx == &txn->mt_env->me_dbxs[dbi]); cASSERT(mc, mc->mc_dbi == dbi); cASSERT(mc, mc->mc_dbi_state == &txn->mt_dbi_state[dbi]); return likely(mc->mc_dbi == dbi && @@ -22155,7 +22155,8 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { if (unlikely(err != MDBX_SUCCESS)) return err; - st->ms_psize = txn->mt_env->me_psize; + const MDBX_env *const env = txn->mt_env; + st->ms_psize = env->me_psize; #if 1 /* assuming GC is internal and not subject for accounting */ stat_get(&txn->mt_dbs[MAIN_DBI], st, bytes); @@ -22188,11 +22189,11 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { return MDBX_CORRUPTED; /* skip opened and already accounted */ + const MDBX_val name = {node_key(node), node_ks(node)}; for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && - node_ks(node) == txn->mt_dbxs[dbi].md_name.iov_len && - memcmp(node_key(node), txn->mt_dbxs[dbi].md_name.iov_base, - node_ks(node)) == 0) { + env->me_dbxs[MAIN_DBI].md_cmp(&name, + &env->me_dbxs[dbi].md_name) == 0) { node = NULL; break; } @@ -22528,10 +22529,11 @@ static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, * 3) user_flags differs, but table is empty and MDBX_CREATE is provided * = assume that a properly create request with custom flags; */ + const MDBX_env *const env = txn->mt_env; if ((user_flags ^ txn->mt_dbs[dbi].md_flags) & DB_PERSISTENT_FLAGS) { /* flags are differs, check other conditions */ - if ((!user_flags && (!keycmp || keycmp == txn->mt_dbxs[dbi].md_cmp) && - (!datacmp || datacmp == txn->mt_dbxs[dbi].md_dcmp)) || + if ((!user_flags && (!keycmp || keycmp == env->me_dbxs[dbi].md_cmp) && + (!datacmp || datacmp == env->me_dbxs[dbi].md_dcmp)) || user_flags == MDBX_ACCEDE) { /* no comparators were provided and flags are zero, * seems that is case #1 above */ @@ -22544,29 +22546,29 @@ static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, txn->mt_flags |= MDBX_TXN_DIRTY; /* обнуляем компараторы для установки в соответствии с флагами, * либо заданных пользователем */ - txn->mt_dbxs[dbi].md_cmp = nullptr; - txn->mt_dbxs[dbi].md_dcmp = nullptr; + env->me_dbxs[dbi].md_cmp = nullptr; + env->me_dbxs[dbi].md_dcmp = nullptr; } else { return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; } } if (!keycmp) - keycmp = txn->mt_dbxs[dbi].md_cmp ? txn->mt_dbxs[dbi].md_cmp + keycmp = env->me_dbxs[dbi].md_cmp ? env->me_dbxs[dbi].md_cmp : get_default_keycmp(user_flags); - if (txn->mt_dbxs[dbi].md_cmp != keycmp) { - if (txn->mt_dbxs[dbi].md_cmp) + if (env->me_dbxs[dbi].md_cmp != keycmp) { + if (env->me_dbxs[dbi].md_cmp) return MDBX_EINVAL; - txn->mt_dbxs[dbi].md_cmp = keycmp; + env->me_dbxs[dbi].md_cmp = keycmp; } if (!datacmp) - datacmp = txn->mt_dbxs[dbi].md_dcmp ? txn->mt_dbxs[dbi].md_dcmp + datacmp = env->me_dbxs[dbi].md_dcmp ? env->me_dbxs[dbi].md_dcmp : get_default_datacmp(user_flags); - if (txn->mt_dbxs[dbi].md_dcmp != datacmp) { - if (txn->mt_dbxs[dbi].md_dcmp) + if (env->me_dbxs[dbi].md_dcmp != datacmp) { + if (env->me_dbxs[dbi].md_dcmp) return MDBX_EINVAL; - txn->mt_dbxs[dbi].md_dcmp = datacmp; + env->me_dbxs[dbi].md_dcmp = datacmp; } return MDBX_SUCCESS; @@ -22652,7 +22654,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, rc = MDBX_NOTFOUND; goto bailout; } - if (txn->mt_dbs[MAIN_DBI].md_leaf_pages || txn->mt_dbxs[MAIN_DBI].md_cmp) { + if (txn->mt_dbs[MAIN_DBI].md_leaf_pages || env->me_dbxs[MAIN_DBI].md_cmp) { /* В MAIN_DBI есть записи либо она уже использовалась. */ rc = MDBX_INCOMPATIBLE; goto bailout; @@ -22666,24 +22668,24 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, txn->mt_dbs[MAIN_DBI].md_flags &= MDBX_REVERSEKEY | MDBX_INTEGERKEY; txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; txn->mt_flags |= MDBX_TXN_DIRTY; - txn->mt_dbxs[MAIN_DBI].md_cmp = + env->me_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags); - txn->mt_dbxs[MAIN_DBI].md_dcmp = + env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(txn->mt_dbs[MAIN_DBI].md_flags); } - tASSERT(txn, txn->mt_dbxs[MAIN_DBI].md_cmp); + tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); /* Is the DB already open? */ MDBX_dbi scan, slot; for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!txn->mt_dbxs[scan].md_name.iov_base) { + if (!env->me_dbxs[scan].md_name.iov_base) { /* Remember this free slot */ slot = scan; continue; } - if (key.iov_len == txn->mt_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, txn->mt_dbxs[scan].md_name.iov_base, + if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && + !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, key.iov_len)) { rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) @@ -22751,13 +22753,13 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, /* Rescan after mutex acquisition & import handles */ for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!txn->mt_dbxs[scan].md_name.iov_base) { + if (!env->me_dbxs[scan].md_name.iov_base) { /* Remember this free slot */ slot = scan; continue; } - if (key.iov_len == txn->mt_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, txn->mt_dbxs[scan].md_name.iov_base, + if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && + !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, key.iov_len)) { rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) @@ -22795,7 +22797,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } /* Got info, register DBI in this txn */ - memset(txn->mt_dbxs + slot, 0, sizeof(MDBX_dbx)); + memset(env->me_dbxs + slot, 0, sizeof(MDBX_dbx)); memcpy(&txn->mt_dbs[slot], data.iov_base, sizeof(MDBX_db)); env->me_db_flags[slot] = 0; rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); @@ -22805,7 +22807,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } txn->mt_dbi_state[slot] = (uint8_t)dbiflags; - txn->mt_dbxs[slot].md_name = key; + env->me_dbxs[slot].md_name = key; txn->mt_dbi_seqs[slot].weak = env->me_dbi_seqs[slot].weak = dbi_seq(env, slot); if (!(dbiflags & DBI_CREAT)) diff --git a/src/internals.h b/src/internals.h index d4ac2215..d8dafc2d 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1189,8 +1189,6 @@ struct MDBX_txn { txnid_t mt_front; MDBX_env *mt_env; /* the DB environment */ - /* Array of records for each DB known in the environment. */ - MDBX_dbx *mt_dbxs; /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; From 796e56b9b9aa2f11987b95a848b1f7f29ff9924c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 14:11:58 +0300 Subject: [PATCH 025/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=B4=D0=B0=20?= =?UTF-8?q?=D1=81=D0=B8=D1=81=D1=82=D0=B5=D0=BC=D0=BD=D0=BE=D0=B9=20=D0=BE?= =?UTF-8?q?=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20`MDBX=5FEDEADLK`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 6 ++++-- src/lck-windows.c | 2 +- src/osal.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index 5c43ab89..ee6e21c7 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1950,7 +1950,8 @@ enum MDBX_error_t { MDBX_EPERM = ERROR_INVALID_FUNCTION, MDBX_EINTR = ERROR_CANCELLED, MDBX_ENOFILE = ERROR_FILE_NOT_FOUND, - MDBX_EREMOTE = ERROR_REMOTE_STORAGE_MEDIA_ERROR + MDBX_EREMOTE = ERROR_REMOTE_STORAGE_MEDIA_ERROR, + MDBX_EDEADLK = ERROR_POSSIBLE_DEADLOCK #else /* Windows */ #ifdef ENODATA MDBX_ENODATA = ENODATA, @@ -1966,7 +1967,8 @@ enum MDBX_error_t { MDBX_EPERM = EPERM, MDBX_EINTR = EINTR, MDBX_ENOFILE = ENOENT, - MDBX_EREMOTE = ENOTBLK + MDBX_EREMOTE = ENOTBLK, + MDBX_EDEADLK = EDEADLK #endif /* !Windows */ }; #ifndef __cplusplus diff --git a/src/lck-windows.c b/src/lck-windows.c index d2354285..bc77150d 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -190,7 +190,7 @@ int osal_txn_lock(MDBX_env *env, bool dontwait) { 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { - return ERROR_POSSIBLE_DEADLOCK; + return MDBX_EDEADLK; } } diff --git a/src/osal.c b/src/osal.c index adffbabf..5559b204 100644 --- a/src/osal.c +++ b/src/osal.c @@ -536,7 +536,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex) { 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { - return ERROR_POSSIBLE_DEADLOCK; + return MDBX_EDEADLK; } return MDBX_SUCCESS; #else From e6af7d7c53428ca2892bcbf7eec1c2acee06fd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 5 Nov 2023 22:10:29 +0300 Subject: [PATCH 026/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B8=D0=BD=D0=B8=D1=86?= =?UTF-8?q?=D0=B8=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D0=B8,=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=B8=20?= =?UTF-8?q?=D0=B8=D0=BC=D0=BF=D0=BE=D1=80=D1=82=D0=B0=20dbi-=D1=85=D0=B5?= =?UTF-8?q?=D0=BD=D0=B4=D0=BB=D0=BE=D0=B2=20=D0=B2=20=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D1=8F=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ранее инициализация в транзакциях структур данных, связанных с dbi-хендлами и subDb, выполнялась непосредственно при запуске транзакций. Что в сценариях с большим кол-вом dbi-дексприторов (например libfpta) порождало заметные накладные расходы, которые расли линейно от общего кол-ва открытых subDb, а не от реально используемых в транзакции. При использовании одной-двух сотен хендлов, при старте каждой транзакции могли копироваться и/или обнуляться десятки килобайт. Теперь этот недостаток устранен. Изменена схема инициализации, валидации и импорта хендлов открытых после старта транзакции: 1) Инициализация теперь выполняется отложенна, а при старте транзации обнуляется только массив с однобайтовыми статустами dbi-хендлов. При этом доступнва опция сборки `MDBX_ENABLE_DBI_SPARSE`, при активации которой используется битовая карты, что снижает объем инициализации при старте транзакции в 8 раз (CHAR_BIT). 2) Переработана валидация dbi-хендлов на входах API, с уменьшением кол-ва проверок и ветвлений до теоретического минимума. 3) Переработ импорт dbi-хендов открытых после старта транзакци, теперь при этом не захватывается мьютекс. --- CMakeLists.txt | 1 + mdbx.h | 8 +- mdbx.h++ | 1 + src/bits.md | 6 +- src/config.h.in | 1 + src/core.c | 1803 +++++++++++++++++++++++++++-------------------- src/internals.h | 23 +- src/mdbx.c++ | 3 + src/options.h | 7 + 9 files changed, 1068 insertions(+), 785 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 50bd1b4b..89eee769 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -531,6 +531,7 @@ add_mdbx_option(MDBX_ENABLE_BIGFOOT "Chunking long list of retired pages during add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) +add_mdbx_option(MDBX_ENABLE_DBI_SPARSE "FIXME" ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") diff --git a/mdbx.h b/mdbx.h index ee6e21c7..d9cc392e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1921,7 +1921,7 @@ enum MDBX_error_t { MDBX_TOO_LARGE = -30417, /** A thread has attempted to use a not owned object, - * e.g. a transaction that started by another thread. */ + * e.g. a transaction that started by another thread */ MDBX_THREAD_MISMATCH = -30416, /** Overlapping read and write transactions for the current thread */ @@ -1936,8 +1936,12 @@ enum MDBX_error_t { /** Alternative/Duplicate LCK-file is exists and should be removed manually */ MDBX_DUPLICATED_CLK = -30413, + /** Some cursors and/or other resources should be closed before subDb or + * corresponding DBI-handle could be (re)used */ + MDBX_DANGLING_DBI = -30412, + /* The last of MDBX-added error codes */ - MDBX_LAST_ADDED_ERRCODE = MDBX_DUPLICATED_CLK, + MDBX_LAST_ADDED_ERRCODE = MDBX_DANGLING_DBI, #if defined(_WIN32) || defined(_WIN64) MDBX_ENODATA = ERROR_HANDLE_EOF, diff --git a/mdbx.h++ b/mdbx.h++ index ea0131be..6c33a0b3 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -559,6 +559,7 @@ MDBX_DECLARE_EXCEPTION(thread_mismatch); MDBX_DECLARE_EXCEPTION(transaction_full); MDBX_DECLARE_EXCEPTION(transaction_overlapping); MDBX_DECLARE_EXCEPTION(duplicated_lck_file); +MDBX_DECLARE_EXCEPTION(dangling_map_id); #undef MDBX_DECLARE_EXCEPTION [[noreturn]] LIBMDBX_API void throw_too_small_target_buffer(); diff --git a/src/bits.md b/src/bits.md index e8708f02..d8166d16 100644 --- a/src/bits.md +++ b/src/bits.md @@ -5,9 +5,9 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| | 3 |0000 0008|ALLOC_SSCAN|TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | | 4 |0000 0010|ALLOC_FIFO |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | | -5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | | -6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | | -7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | | +5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA | | |P_LEAF2 | | +6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_OLDEN | |P_SUBP | | +7 |0000 0080| | | |ALLDUPS |DBI_LINDO | | | | 8 |0000 0100| _MAY_MOVE | | | | | | | <= | 9 |0000 0200| _MAY_UNMAP| | | | | | | <= | 10|0000 0400| | | | | | | | | diff --git a/src/config.h.in b/src/config.h.in index 05c561b1..2ffb9ecf 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -33,6 +33,7 @@ #cmakedefine01 MDBX_ENABLE_BIGFOOT #cmakedefine01 MDBX_ENABLE_PGOP_STAT #cmakedefine01 MDBX_ENABLE_PROFGC +#cmakedefine01 MDBX_ENABLE_DBI_SPARSE /* Windows */ #cmakedefine01 MDBX_WITHOUT_MSVC_CRT diff --git a/src/core.c b/src/core.c index 6e264f84..af6ff541 100644 --- a/src/core.c +++ b/src/core.c @@ -3474,8 +3474,11 @@ __cold const char *mdbx_liberr2str(int errnum) { return "MDBX_TXN_OVERLAPPING: Overlapping read and write transactions for" " the current thread"; case MDBX_DUPLICATED_CLK: - return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists, " - "please keep one and remove unused other"; + return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists," + " please keep one and remove unused other"; + case MDBX_DANGLING_DBI: + return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be" + " closed before subDb or corresponding DBI-handle could be (re)used"; default: return NULL; } @@ -3778,15 +3781,409 @@ MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { *tracking_head = tracked->mc_next; \ } while (0) +#if MDBX_ENABLE_DBI_SPARSE + +static __inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { + tASSERT(txn, bmi > 0); + STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->mt_dbi_sparse[0])); +#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl) + if (sizeof(txn->mt_dbi_sparse[0]) <= sizeof(int)) + return __builtin_ctz((int)bmi); + if (sizeof(txn->mt_dbi_sparse[0]) == sizeof(long)) + return __builtin_ctzl((long)bmi); +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ + __has_builtin(__builtin_ctzll) + return __builtin_ctzll(bmi); +#endif /* have(long long) && long long == uint64_t */ +#endif /* GNU C */ + +#if defined(_MSC_VER) + unsigned long index; + if (sizeof(txn->mt_dbi_sparse[0]) > 4) { +#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) + _BitScanForward64(&index, bmi); + return index; +#else + if (bmi > UINT32_MAX) { + _BitScanForward(&index, (uint32_t)((uint64_t)bmi >> 32)); + return index; + } +#endif + } + _BitScanForward(&index, (uint32_t)bmi); + return index; +#endif /* MSVC */ + + bmi &= -bmi; + if (sizeof(txn->mt_dbi_sparse[0]) > 4) { + static const uint8_t debruijn_ctz64[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12}; + return debruijn_ctz64[(UINT64_C(0x022FDD63CC95386D) * (uint64_t)bmi) >> 58]; + } else { + static const uint8_t debruijn_ctz32[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27]; + } +} + +/* LY: Макрос целенаправленно сделан с одним циклом, чтобы сохранить возможность + * использования оператора break */ +#define TXN_FOREACH_DBI_FROM(TXN, I, FROM) \ + for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->mt_dbi_sparse[0]), \ + bitmap_item = TXN->mt_dbi_sparse[0] >> FROM, I = FROM; \ + I < TXN->mt_numdbs; ++I) \ + if (bitmap_item == 0) { \ + I |= bitmap_chunk - 1; \ + bitmap_item = TXN->mt_dbi_sparse[(1 + I) / bitmap_chunk]; \ + continue; \ + } else if ((bitmap_item & 1) == 0) { \ + size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ + bitmap_item >>= bitmap_skip; \ + I += bitmap_skip - 1; \ + continue; \ + } else if (bitmap_item >>= 1, TXN->mt_dbi_state[I]) +#else +#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP) \ + for (size_t I = SKIP; I < TXN->mt_numdbs; ++I) \ + if (TXN->mt_dbi_state[I]) +#endif /* MDBX_ENABLE_DBI_SPARSE */ + +#define TXN_FOREACH_DBI_ALL(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, 0) +#define TXN_FOREACH_DBI_USER(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, CORE_DBS) + +/* Back up parent txn's cursor, then grab the original for tracking */ +static int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, + const size_t dbi) { + + tASSERT(nested_txn, dbi > FREE_DBI && dbi < nested_txn->mt_numdbs); + const size_t size = parent_cursor->mc_xcursor + ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) + : sizeof(MDBX_cursor); + for (MDBX_cursor *bk; parent_cursor; parent_cursor = bk->mc_next) { + bk = parent_cursor; + if (parent_cursor->mc_signature != MDBX_MC_LIVE) + continue; + bk = osal_malloc(size); + if (unlikely(!bk)) + return MDBX_ENOMEM; +#if MDBX_DEBUG + memset(bk, 0xCD, size); + VALGRIND_MAKE_MEM_UNDEFINED(bk, size); +#endif /* MDBX_DEBUG */ + *bk = *parent_cursor; + parent_cursor->mc_backup = bk; + /* Kill pointers into src to reduce abuse: The + * user may not use mc until dst ends. But we need a valid + * txn pointer here for cursor fixups to keep working. */ + parent_cursor->mc_txn = nested_txn; + parent_cursor->mc_db = &nested_txn->mt_dbs[dbi]; + parent_cursor->mc_dbi_state = &nested_txn->mt_dbi_state[dbi]; + MDBX_xcursor *mx = parent_cursor->mc_xcursor; + if (mx != NULL) { + *(MDBX_xcursor *)(bk + 1) = *mx; + mx->mx_cursor.mc_txn = nested_txn; + } + parent_cursor->mc_next = nested_txn->mt_cursors[dbi]; + nested_txn->mt_cursors[dbi] = parent_cursor; + } + return MDBX_SUCCESS; +} + +/* Close this txn's cursors, give parent txn's cursors back to parent. + * + * [in] txn the transaction handle. + * [in] merge true to keep changes to parent cursors, false to revert. + * + * Returns 0 on success, non-zero on failure. */ +static void cursors_eot(MDBX_txn *txn, const bool merge) { + tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr); + TXN_FOREACH_DBI_FROM(txn, i, /* skip FREE_DBI */ 1) { + MDBX_cursor *mc = txn->mt_cursors[i]; + if (!mc) + continue; + txn->mt_cursors[i] = nullptr; + do { + const unsigned stage = mc->mc_signature; + MDBX_cursor *const next = mc->mc_next; + MDBX_cursor *const bk = mc->mc_backup; + ENSURE(txn->mt_env, + stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); + cASSERT(mc, mc->mc_dbi == (MDBX_dbi)i); + if (bk) { + MDBX_xcursor *mx = mc->mc_xcursor; + tASSERT(txn, txn->mt_parent != NULL); + /* Zap: Using uninitialized memory '*mc->mc_backup'. */ + MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001); + ENSURE(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); + tASSERT(txn, mx == bk->mc_xcursor); + if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) + mc->mc_signature = stage /* Promote closed state to parent txn */; + else if (merge) { + /* Restore pointers to parent txn */ + mc->mc_next = bk->mc_next; + mc->mc_backup = bk->mc_backup; + mc->mc_txn = bk->mc_txn; + mc->mc_db = bk->mc_db; + mc->mc_dbi_state = bk->mc_dbi_state; + if (mx) { + if (mx != bk->mc_xcursor) { + *bk->mc_xcursor = *mx; + mx = bk->mc_xcursor; + } + mx->mx_cursor.mc_txn = bk->mc_txn; + } + } else { + /* Restore from backup, i.e. rollback/abort nested txn */ + *mc = *bk; + if (mx) + *mx = *(MDBX_xcursor *)(bk + 1); + } + bk->mc_signature = 0; + osal_free(bk); + } else { + ENSURE(txn->mt_env, stage == MDBX_MC_LIVE); + mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; + mc->mc_flags = 0 /* reset C_UNTRACK */; + } + mc = next; + } while (mc); + } +} + +static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi); + +static __inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { + STATIC_ASSERT(DBI_DIRTY == MDBX_DBI_DIRTY && DBI_STALE == MDBX_DBI_STALE && + DBI_FRESH == MDBX_DBI_FRESH && DBI_CREAT == MDBX_DBI_CREAT); + +#if MDBX_ENABLE_DBI_SPARSE + const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->mt_dbi_sparse[0]); + const size_t bitmap_indx = dbi / bitmap_chunk; + const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; + return likely(dbi < txn->mt_numdbs && + (txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) != 0) + ? txn->mt_dbi_state[dbi] + : 0; +#else + return likely(dbi < txn->mt_numdbs) ? txn->mt_dbi_state[dbi] : 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ +} + +static __inline bool dbi_changed(const MDBX_txn *txn, const size_t dbi) { + const MDBX_env *const env = txn->mt_env; + eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); + const uint32_t snap_seq = + atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); + return snap_seq != txn->mt_dbi_seqs[dbi]; +} + +static __always_inline int dbi_check(const MDBX_txn *txn, const size_t dbi) { + const uint8_t state = dbi_state(txn, dbi); + if (likely((state & DBI_LINDO) != 0 && !dbi_changed(txn, dbi))) + return (state & DBI_VALID) ? MDBX_SUCCESS : MDBX_BAD_DBI; + + /* Медленный путь: ленивая до-инициализацяи и импорт */ + return dbi_import((MDBX_txn *)txn, dbi); +} + +static __inline uint32_t dbi_seq_next(const MDBX_env *const env, size_t dbi) { + uint32_t v = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease) + 1; + return v ? v : 1; +} + +struct dbi_snap_result { + uint32_t sequence; + unsigned flags; +}; + +static struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) { + eASSERT(env, dbi < env->me_numdbs); + struct dbi_snap_result r; + uint32_t snap = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); + do { + r.sequence = snap; + r.flags = env->me_db_flags[dbi]; + snap = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); + } while (unlikely(snap != r.sequence)); + return r; +} + +static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { + const MDBX_env *const env = txn->mt_env; + if (dbi >= env->me_numdbs || !env->me_db_flags[dbi]) + return MDBX_BAD_DBI; + +#if MDBX_ENABLE_DBI_SPARSE + const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->mt_dbi_sparse[0]); + const size_t bitmap_indx = dbi / bitmap_chunk; + const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; + if (dbi >= txn->mt_numdbs) { + for (size_t i = (txn->mt_numdbs + bitmap_chunk - 1) / bitmap_chunk; + bitmap_indx >= i; ++i) + txn->mt_dbi_sparse[i] = 0; + eASSERT(env, (txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) == 0); + MDBX_txn *scan = txn; + do { + eASSERT(env, scan->mt_dbi_sparse == txn->mt_dbi_sparse); + eASSERT(env, scan->mt_numdbs < dbi + 1); + scan->mt_numdbs = (unsigned)dbi + 1; + scan->mt_dbi_state[dbi] = 0; + scan = scan->mt_parent; + } while (scan /* && scan->mt_dbi_sparse == txn->mt_dbi_sparse */); + txn->mt_dbi_sparse[bitmap_indx] |= bitmap_mask; + goto lindo; + } + if ((txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) == 0) { + MDBX_txn *scan = txn; + do { + eASSERT(env, scan->mt_dbi_sparse == txn->mt_dbi_sparse); + eASSERT(env, scan->mt_numdbs == txn->mt_numdbs); + scan->mt_dbi_state[dbi] = 0; + scan = scan->mt_parent; + } while (scan /* && scan->mt_dbi_sparse == txn->mt_dbi_sparse */); + txn->mt_dbi_sparse[bitmap_indx] |= bitmap_mask; + goto lindo; + } +#else + if (dbi >= txn->mt_numdbs) { + size_t i = txn->mt_numdbs; + do + txn->mt_dbi_state[i] = 0; + while (dbi >= ++i); + txn->mt_numdbs = i; + goto lindo; + } +#endif /* MDBX_ENABLE_DBI_SPARSE */ + + if (!txn->mt_dbi_state[dbi]) { + lindo: + /* dbi-слот еще не инициализирован в транзакции, а хендл не использовался */ + txn->mt_cursors[dbi] = nullptr; + MDBX_txn *const parent = txn->mt_parent; + if (parent) { + /* вложенная пишущая транзакция */ + int rc = dbi_check(parent, dbi); + /* копируем состояние subDB очищая new-флаги. */ + eASSERT(env, txn->mt_dbi_seqs == parent->mt_dbi_seqs); + txn->mt_dbi_state[dbi] = + parent->mt_dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + if (likely(rc == MDBX_SUCCESS)) { + txn->mt_dbs[dbi] = parent->mt_dbs[dbi]; + if (parent->mt_cursors[dbi]) { + rc = cursor_shadow(parent->mt_cursors[dbi], txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + /* не получилось забекапить курсоры */ + txn->mt_dbi_state[dbi] = DBI_OLDEN | DBI_LINDO | DBI_STALE; + txn->mt_flags |= MDBX_TXN_ERROR; + } + } + } + return rc; + } + txn->mt_dbi_seqs[dbi] = 0; + txn->mt_dbi_state[dbi] = DBI_LINDO; + } else { + eASSERT(env, txn->mt_dbi_seqs[dbi] != env->me_dbi_seqs[dbi].weak); + if (unlikely((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_OLDEN)) || + txn->mt_cursors[dbi])) { + /* хендл уже использовался в транзакции, но был закрыт или переоткрыт, + * либо при явном пере-открытии хендла есть висячие курсоры */ + eASSERT(env, (txn->mt_dbi_state[dbi] & DBI_STALE) == 0); + txn->mt_dbi_seqs[dbi] = env->me_dbi_seqs[dbi].weak; + txn->mt_dbi_state[dbi] = DBI_OLDEN | DBI_LINDO; + return txn->mt_cursors[dbi] ? MDBX_DANGLING_DBI : MDBX_BAD_DBI; + } + } + + /* хендл не использовался в транзакции, либо явно пере-отрывается при + * отсутствии висячих курсоров */ + eASSERT(env, (txn->mt_dbi_state[dbi] & DBI_LINDO) && !txn->mt_cursors[dbi]); + + /* читаем актуальные флаги и sequence */ + struct dbi_snap_result snap = dbi_snap(env, dbi); + txn->mt_dbi_seqs[dbi] = snap.sequence; + if (snap.flags & DB_VALID) { + txn->mt_dbs[dbi].md_flags = snap.flags & DB_PERSISTENT_FLAGS; + txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_STALE; + return MDBX_SUCCESS; + } + return MDBX_BAD_DBI; +} + +/* Export or close DBI handles opened in this txn. */ +static int dbi_update(MDBX_txn *txn, int keep) { + MDBX_env *const env = txn->mt_env; + tASSERT(txn, !txn->mt_parent && txn == env->me_txn0); + bool locked = false; + void *defer_free = nullptr; + TXN_FOREACH_DBI_USER(txn, dbi) { + if (likely((txn->mt_dbi_state[dbi] & DBI_CREAT) == 0)) + continue; + if (!locked) { + int err = osal_fastmutex_acquire(&env->me_dbi_lock); + if (unlikely(err != MDBX_SUCCESS)) + return err; + locked = true; + if (dbi >= env->me_numdbs) + /* хендл был закрыт из другого потока пока захватывали блокировку */ + continue; + } + tASSERT(txn, dbi < env->me_numdbs); + if (keep) { + env->me_db_flags[dbi] = txn->mt_dbs[dbi].md_flags | DB_VALID; + } else { + uint32_t seq = dbi_seq_next(env, dbi); + void *ptr = env->me_dbxs[dbi].md_name.iov_base; + if (ptr) { + env->me_db_flags[dbi] = 0; + env->me_dbxs[dbi].md_name.iov_len = 0; + env->me_dbxs[dbi].md_name.iov_base = nullptr; + atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); + osal_flush_incoherent_cpu_writeback(); + osal_free(defer_free); + defer_free = ptr; + } else { + eASSERT(env, env->me_dbxs[dbi].md_name.iov_len == 0); + eASSERT(env, env->me_db_flags[dbi] == 0); + } + } + } + + if (locked) { + size_t i = env->me_numdbs; + while ((env->me_db_flags[i - 1] & DB_VALID) == 0) { + --i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && + !env->me_dbxs[i].md_name.iov_base); + } + env->me_numdbs = (unsigned)i; + ENSURE(txn->mt_env, + osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + osal_free(defer_free); + } + return MDBX_SUCCESS; +} + int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); + tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); + tASSERT(txn, dbi < txn->mt_env->me_numdbs && + (txn->mt_env->me_db_flags[dbi] & DB_VALID) != 0); return txn->mt_env->me_dbxs[dbi].md_cmp(a, b); } int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); + tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); + tASSERT(txn, dbi < txn->mt_env->me_numdbs && + (txn->mt_env->me_db_flags[dbi] & DB_VALID)); return txn->mt_env->me_dbxs[dbi].md_dcmp(a, b); } @@ -4829,12 +5226,15 @@ static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); txn_lru_turn(txn); size_t keep = m0 ? cursor_keep(txn, m0) : 0; - for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) - if (F_ISSET(txn->mt_dbi_state[i], DBI_DIRTY | DBI_VALID) && - txn->mt_dbs[i].md_root != P_INVALID) - for (MDBX_cursor *mc = txn->mt_cursors[i]; mc; mc = mc->mc_next) + + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (F_ISSET(txn->mt_dbi_state[dbi], DBI_DIRTY | DBI_VALID) && + txn->mt_dbs[dbi].md_root != P_INVALID) + for (MDBX_cursor *mc = txn->mt_cursors[dbi]; mc; mc = mc->mc_next) if (mc != m0) keep += cursor_keep(txn, mc); + } + return keep; } @@ -4891,33 +5291,6 @@ spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { return prio = (unsigned)factor; } -/* Spill pages from the dirty list back to disk. - * This is intended to prevent running into MDBX_TXN_FULL situations, - * but note that they may still occur in a few cases: - * - * 1) our estimate of the txn size could be too small. Currently this - * seems unlikely, except with a large number of MDBX_MULTIPLE items. - * - * 2) child txns may run out of space if their parents dirtied a - * lot of pages and never spilled them. TODO: we probably should do - * a preemptive spill during mdbx_txn_begin() of a child txn, if - * the parent's dirtyroom is below a given threshold. - * - * Otherwise, if not using nested txns, it is expected that apps will - * not run into MDBX_TXN_FULL any more. The pages are flushed to disk - * the same way as for a txn commit, e.g. their dirty status is cleared. - * If the txn never references them again, they can be left alone. - * If the txn only reads them, they can be used without any fuss. - * If the txn writes them again, they can be dirtied immediately without - * going thru all of the work of page_touch(). Such references are - * handled by page_unspill(). - * - * Also note, we never spill DB root pages, nor pages of active cursors, - * because we'll need these back again soon anyway. And in nested txns, - * we can't spill a page in a child txn if it was already spilled in a - * parent txn. That would alter the parent txns' data even though - * the child hasn't committed yet, and we'd have no way to undo it if - * the child aborted. */ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries, const intptr_t wanna_spill_npages, @@ -6854,22 +7227,6 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, //------------------------------------------------------------------------------ -/* Allocate page numbers and memory for writing. Maintain mt_last_reclaimed, - * mt_relist and mt_next_pgno. Set MDBX_TXN_ERROR on failure. - * - * If there are free pages available from older transactions, they - * are re-used first. Otherwise allocate a new page at mt_next_pgno. - * Do not modify the GC, just merge GC records into mt_relist - * and move mt_last_reclaimed to say which records were consumed. Only this - * function can create mt_relist and move - * mt_last_reclaimed/mt_next_pgno. - * - * [in] mc cursor A cursor handle identifying the transaction and - * database for which we are allocating. - * [in] num the number of pages to allocate. - * - * Returns 0 on success, non-zero on failure.*/ - #define MDBX_ALLOC_DEFAULT 0 #define MDBX_ALLOC_RESERVE 1 #define MDBX_ALLOC_UNIMPORTANT 2 @@ -7761,7 +8118,8 @@ done: __hot static pgr_t page_alloc(const MDBX_cursor *const mc) { MDBX_txn *const txn = mc->mc_txn; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(txn->mt_dbi_state[mc->mc_dbi], DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(dbi_state(txn, mc->mc_dbi), + DBI_LINDO | DBI_VALID | DBI_DIRTY)); /* If there are any loose pages, just use them */ while (likely(txn->tw.loose_pages)) { @@ -7901,7 +8259,7 @@ __hot static int page_touch(MDBX_cursor *mc) { int rc; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_LINDO | DBI_VALID | DBI_DIRTY)); tASSERT(txn, !IS_OVERFLOW(mp)); if (ASSERT_ENABLED()) { if (mc->mc_flags & C_SUB) { @@ -8285,109 +8643,6 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { return env_sync(env, force, nonblock); } -/* Back up parent txn's cursors, then grab the originals for tracking */ -static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { - tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr); - nested->mt_cursors[FREE_DBI] = nullptr; - for (int i = parent->mt_numdbs; --i > FREE_DBI;) { - nested->mt_cursors[i] = NULL; - MDBX_cursor *mc = parent->mt_cursors[i]; - if (mc != NULL) { - size_t size = mc->mc_xcursor ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) - : sizeof(MDBX_cursor); - for (MDBX_cursor *bk; mc; mc = bk->mc_next) { - bk = mc; - if (mc->mc_signature != MDBX_MC_LIVE) - continue; - bk = osal_malloc(size); - if (unlikely(!bk)) - return MDBX_ENOMEM; -#if MDBX_DEBUG - memset(bk, 0xCD, size); - VALGRIND_MAKE_MEM_UNDEFINED(bk, size); -#endif /* MDBX_DEBUG */ - *bk = *mc; - mc->mc_backup = bk; - /* Kill pointers into src to reduce abuse: The - * user may not use mc until dst ends. But we need a valid - * txn pointer here for cursor fixups to keep working. */ - mc->mc_txn = nested; - mc->mc_db = &nested->mt_dbs[i]; - mc->mc_dbi_state = &nested->mt_dbi_state[i]; - MDBX_xcursor *mx = mc->mc_xcursor; - if (mx != NULL) { - *(MDBX_xcursor *)(bk + 1) = *mx; - mx->mx_cursor.mc_txn = nested; - } - mc->mc_next = nested->mt_cursors[i]; - nested->mt_cursors[i] = mc; - } - } - } - return MDBX_SUCCESS; -} - -/* Close this txn's cursors, give parent txn's cursors back to parent. - * - * [in] txn the transaction handle. - * [in] merge true to keep changes to parent cursors, false to revert. - * - * Returns 0 on success, non-zero on failure. */ -static void cursors_eot(MDBX_txn *txn, const bool merge) { - tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr); - for (intptr_t i = txn->mt_numdbs; --i > FREE_DBI;) { - MDBX_cursor *mc = txn->mt_cursors[i]; - if (!mc) - continue; - txn->mt_cursors[i] = nullptr; - do { - const unsigned stage = mc->mc_signature; - MDBX_cursor *const next = mc->mc_next; - MDBX_cursor *const bk = mc->mc_backup; - ENSURE(txn->mt_env, - stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); - cASSERT(mc, mc->mc_dbi == (MDBX_dbi)i); - if (bk) { - MDBX_xcursor *mx = mc->mc_xcursor; - tASSERT(txn, txn->mt_parent != NULL); - /* Zap: Using uninitialized memory '*mc->mc_backup'. */ - MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001); - ENSURE(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); - tASSERT(txn, mx == bk->mc_xcursor); - if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) - mc->mc_signature = stage /* Promote closed state to parent txn */; - else if (merge) { - /* Restore pointers to parent txn */ - mc->mc_next = bk->mc_next; - mc->mc_backup = bk->mc_backup; - mc->mc_txn = bk->mc_txn; - mc->mc_db = bk->mc_db; - mc->mc_dbi_state = bk->mc_dbi_state; - if (mx) { - if (mx != bk->mc_xcursor) { - *bk->mc_xcursor = *mx; - mx = bk->mc_xcursor; - } - mx->mx_cursor.mc_txn = bk->mc_txn; - } - } else { - /* Restore from backup, i.e. rollback/abort nested txn */ - *mc = *bk; - if (mx) - *mx = *(MDBX_xcursor *)(bk + 1); - } - bk->mc_signature = 0; - osal_free(bk); - } else { - ENSURE(txn->mt_env, stage == MDBX_MC_LIVE); - mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; - mc->mc_flags = 0 /* reset C_UNTRACK */; - } - mc = next; - } while (mc); - } -} - #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) /* Find largest mvcc-snapshot still referenced by this process. */ static pgno_t find_largest_this(MDBX_env *env, pgno_t largest) { @@ -8730,6 +8985,8 @@ __hot static int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, /* Copy the DB info and flags */ txn->mt_geo = head.ptr_v->mm_geo; memcpy(txn->mt_dbs, head.ptr_c->mm_dbs, CORE_DBS * sizeof(MDBX_db)); + VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbs + CORE_DBS, + txn->mt_env->me_maxdbs - CORE_DBS); txn->mt_canary = head.ptr_v->mm_canary; if (unlikely(!coherency_check(txn->mt_env, head.txnid, txn->mt_dbs, @@ -8917,7 +9174,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); @@ -8992,9 +9248,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { if (txn->tw.lifo_reclaimed) MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); env->me_txn = txn; - txn->mt_numdbs = env->me_numdbs; - memcpy(txn->mt_dbi_seqs, env->me_dbi_seqs, - txn->mt_numdbs * sizeof(unsigned)); if ((txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { rc = dpl_alloc(txn); @@ -9012,23 +9265,46 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { eASSERT(env, txn->tw.writemap_spilled_npages == 0); } + txn->mt_front = + txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); + /* Setup db info */ - osal_compiler_barrier(); - memset(txn->mt_cursors, 0, sizeof(MDBX_cursor *) * txn->mt_numdbs); - for (size_t i = CORE_DBS; i < txn->mt_numdbs; i++) { - const unsigned db_flags = env->me_db_flags[i]; - txn->mt_dbs[i].md_flags = db_flags & DB_PERSISTENT_FLAGS; - txn->mt_dbi_state[i] = - (db_flags & DB_VALID) ? DBI_VALID | DBI_USRVALID | DBI_STALE : 0; + VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbi_state, env->me_maxdbs); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_numdbs = CORE_DBS; + VALGRIND_MAKE_MEM_UNDEFINED( + txn->mt_dbi_sparse, + ceil_powerof2(env->me_maxdbs, CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / + CHAR_BIT); + txn->mt_dbi_sparse[0] = (1 << CORE_DBS) - 1; +#else + txn->mt_numdbs = (env->me_numdbs < 8) ? env->me_numdbs : 8; + if (txn->mt_numdbs > CORE_DBS) + memset(txn->mt_dbi_state + CORE_DBS, 0, txn->mt_numdbs - CORE_DBS); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->mt_dbi_state[FREE_DBI] = DBI_LINDO | DBI_VALID; + txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO | DBI_VALID; + txn->mt_cursors[FREE_DBI] = nullptr; + txn->mt_cursors[MAIN_DBI] = nullptr; + txn->mt_dbi_seqs[FREE_DBI] = 0; + struct dbi_snap_result main_snap = dbi_snap(env, MAIN_DBI); + if (unlikely(main_snap.flags != + (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags))) { + if (main_snap.flags & DB_VALID) { + rc = MDBX_INCOMPATIBLE; + goto bailout; + } + env->me_db_flags[MAIN_DBI] = DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; + main_snap.sequence = + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); } - txn->mt_dbi_state[MAIN_DBI] = DBI_VALID | DBI_USRVALID; + txn->mt_dbi_seqs[MAIN_DBI] = main_snap.sequence; + rc = setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - txn->mt_dbi_state[FREE_DBI] = DBI_VALID; - txn->mt_front = - txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { WARNING("%s", "environment had fatal error, must shutdown!"); @@ -9238,7 +9514,6 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, return MDBX_EACCESS; flags |= env->me_flags & MDBX_WRITEMAP; - MDBX_txn *txn = nullptr; if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ @@ -9270,11 +9545,24 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, goto renew; } + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->me_maxdbs, CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / + CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); const size_t base = (flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn); const size_t size = - base + env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + 1); + base + + ((flags & MDBX_TXN_RDONLY) + ? (size_t)bitmap_bytes + env->me_maxdbs * sizeof(txn->mt_dbi_seqs[0]) + : 0) + + env->me_maxdbs * (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + + sizeof(txn->mt_dbi_state[0])); txn = osal_malloc(size); if (unlikely(txn == nullptr)) { DEBUG("calloc: %s", "failed"); @@ -9288,16 +9576,21 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); txn->mt_dbs = ptr_disp(txn, base); - txn->mt_cursors = ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs); + txn->mt_cursors = + ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); #if MDBX_DEBUG txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ -#endif /* MDBX_DEBUG */ - txn->mt_dbi_state = ptr_disp(txn, size - env->me_maxdbs); +#endif + txn->mt_dbi_state = + ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); txn->mt_flags = flags; txn->mt_env = env; if (parent) { tASSERT(parent, dirtylist_check(parent)); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = parent->mt_dbi_sparse; +#endif /* MDBX_ENABLE_DBI_SPARSE */ txn->mt_dbi_seqs = parent->mt_dbi_seqs; txn->mt_geo = parent->mt_geo; rc = dpl_alloc(txn); @@ -9375,14 +9668,19 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, parent->mt_flags |= MDBX_TXN_HAS_CHILD; parent->mt_child = txn; txn->mt_parent = parent; - txn->mt_numdbs = parent->mt_numdbs; txn->mt_owner = parent->mt_owner; - memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); txn->tw.troika = parent->tw.troika; - /* Copy parent's mt_dbi_state, but clear DB_NEW */ - for (size_t i = 0; i < txn->mt_numdbs; i++) - txn->mt_dbi_state[i] = - parent->mt_dbi_state[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + + txn->mt_cursors[FREE_DBI] = nullptr; + txn->mt_cursors[MAIN_DBI] = nullptr; + txn->mt_dbi_state[FREE_DBI] = + parent->mt_dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->mt_dbi_state[MAIN_DBI] = + parent->mt_dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + memset(txn->mt_dbi_state + CORE_DBS, 0, + (txn->mt_numdbs = parent->mt_numdbs) - CORE_DBS); + memcpy(txn->mt_dbs, parent->mt_dbs, sizeof(txn->mt_dbs[0]) * CORE_DBS); + tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == (parent->mt_parent ? parent->mt_parent->tw.dirtyroom @@ -9391,7 +9689,10 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, (txn->mt_parent ? txn->mt_parent->tw.dirtyroom : txn->mt_env->me_options.dp_limit)); env->me_txn = txn; - rc = cursor_shadow(parent, txn); + tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr); + rc = parent->mt_cursors[MAIN_DBI] + ? cursor_shadow(parent->mt_cursors[MAIN_DBI], txn, MAIN_DBI) + : MDBX_SUCCESS; if (AUDIT_ENABLED() && ASSERT_ENABLED()) { txn->mt_signature = MDBX_MT_SIGNATURE; tASSERT(txn, audit_ex(txn, 0, false) == 0); @@ -9399,7 +9700,11 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (unlikely(rc != MDBX_SUCCESS)) txn_end(txn, TXN_END_FAIL_BEGINCHILD); } else { /* MDBX_TXN_RDONLY */ - txn->mt_dbi_seqs = env->me_dbi_seqs; + txn->mt_dbi_seqs = + ptr_disp(txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ renew: rc = txn_renew(txn, flags); } @@ -9580,133 +9885,6 @@ int mdbx_txn_flags(const MDBX_txn *txn) { return txn->mt_flags; } -/* Check for misused dbi handles */ -static __inline bool dbi_changed(const MDBX_txn *txn, size_t dbi) { - if (txn->mt_dbi_seqs == txn->mt_env->me_dbi_seqs) - return false; - if (likely( - txn->mt_dbi_seqs[dbi].weak == - atomic_load32((MDBX_atomic_uint32_t *)&txn->mt_env->me_dbi_seqs[dbi], - mo_AcquireRelease))) - return false; - return true; -} - -static __inline unsigned dbi_seq(const MDBX_env *const env, size_t slot) { - unsigned v = env->me_dbi_seqs[slot].weak + 1; - return v + (v == 0); -} - -static void dbi_import_locked(MDBX_txn *txn) { - const MDBX_env *const env = txn->mt_env; - size_t n = env->me_numdbs; - for (size_t i = CORE_DBS; i < n; ++i) { - if (i >= txn->mt_numdbs) { - txn->mt_cursors[i] = NULL; - if (txn->mt_dbi_seqs != env->me_dbi_seqs) - txn->mt_dbi_seqs[i].weak = 0; - txn->mt_dbi_state[i] = 0; - } - if ((dbi_changed(txn, i) && - (txn->mt_dbi_state[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0) || - ((env->me_db_flags[i] & DB_VALID) && - !(txn->mt_dbi_state[i] & DBI_VALID))) { - tASSERT(txn, (txn->mt_dbi_state[i] & - (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0); - txn->mt_dbi_seqs[i] = env->me_dbi_seqs[i]; - txn->mt_dbs[i].md_flags = env->me_db_flags[i] & DB_PERSISTENT_FLAGS; - txn->mt_dbi_state[i] = 0; - if (env->me_db_flags[i] & DB_VALID) { - txn->mt_dbi_state[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; - tASSERT(txn, env->me_dbxs[i].md_cmp != NULL); - tASSERT(txn, env->me_dbxs[i].md_name.iov_base != NULL); - } - } - } - while (unlikely(n < txn->mt_numdbs)) - if (txn->mt_cursors[txn->mt_numdbs - 1] == NULL && - (txn->mt_dbi_state[txn->mt_numdbs - 1] & DBI_USRVALID) == 0) - txn->mt_numdbs -= 1; - else { - if ((txn->mt_dbi_state[n] & DBI_USRVALID) == 0) { - if (txn->mt_dbi_seqs != env->me_dbi_seqs) - txn->mt_dbi_seqs[n].weak = 0; - txn->mt_dbi_state[n] = 0; - } - ++n; - } - txn->mt_numdbs = (MDBX_dbi)n; -} - -/* Import DBI which opened after txn started into context */ -__cold static bool dbi_import(MDBX_txn *txn, MDBX_dbi dbi) { - if (dbi < CORE_DBS || - (dbi >= txn->mt_numdbs && dbi >= txn->mt_env->me_numdbs)) - return false; - - ENSURE(txn->mt_env, - osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - dbi_import_locked(txn); - ENSURE(txn->mt_env, - osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - return txn->mt_dbi_state[dbi] & DBI_USRVALID; -} - -/* Export or close DBI handles opened in this txn. */ -static void dbi_update(MDBX_txn *txn, int keep) { - tASSERT(txn, !txn->mt_parent && txn == txn->mt_env->me_txn0); - MDBX_dbi n = txn->mt_numdbs; - if (n) { - bool locked = false; - MDBX_env *const env = txn->mt_env; - - for (size_t i = n; --i >= CORE_DBS;) { - if (likely((txn->mt_dbi_state[i] & DBI_CREAT) == 0)) - continue; - if (!locked) { - ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); - locked = true; - } - if (env->me_numdbs <= i || - txn->mt_dbi_seqs[i].weak != env->me_dbi_seqs[i].weak) - continue /* dbi explicitly closed and/or then re-opened by other txn */; - if (keep) { - env->me_db_flags[i] = txn->mt_dbs[i].md_flags | DB_VALID; - } else { - const MDBX_val name = env->me_dbxs[i].md_name; - if (name.iov_base) { - env->me_dbxs[i].md_name.iov_base = nullptr; - eASSERT(env, env->me_db_flags[i] == 0); - atomic_store32(&env->me_dbi_seqs[i], dbi_seq(env, i), - mo_AcquireRelease); - env->me_dbxs[i].md_name.iov_len = 0; - if (name.iov_len) - osal_free(name.iov_base); - } else { - eASSERT(env, name.iov_len == 0); - eASSERT(env, env->me_db_flags[i] == 0); - } - } - } - - n = env->me_numdbs; - if (n > CORE_DBS && unlikely(!(env->me_db_flags[n - 1] & DB_VALID))) { - if (!locked) { - ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); - locked = true; - } - - n = env->me_numdbs; - while (n > CORE_DBS && !(env->me_db_flags[n - 1] & DB_VALID)) - --n; - env->me_numdbs = n; - } - - if (unlikely(locked)) - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } -} - /* Filter-out pgno list from transaction's dirty-page list */ static void dpl_sift(MDBX_txn *const txn, MDBX_PNL pl, const bool spilled) { tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); @@ -9790,7 +9968,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { } #endif /* MDBX_ENV_CHECKPID */ - DEBUG("%s txn %" PRIaTXN "%c %p on mdbenv %p, root page %" PRIaPGNO + DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK], txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, @@ -9851,7 +10029,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (txn == env->me_txn0) { eASSERT(env, txn->mt_parent == NULL); /* Export or close DBI handles created in this txn */ - dbi_update(txn, mode & TXN_END_UPDATE); + rc = dbi_update(txn, mode & TXN_END_UPDATE); pnl_shrink(&txn->tw.retired_pages); pnl_shrink(&txn->tw.relist); if (!(env->me_flags & MDBX_WRITEMAP)) @@ -9983,10 +10161,41 @@ int mdbx_txn_abort(MDBX_txn *txn) { return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); } +__cold static MDBX_db *audit_db_dig(const MDBX_txn *txn, const size_t dbi, + MDBX_db *fallback) { + const MDBX_txn *dig = txn; + do { + tASSERT(txn, txn->mt_numdbs == dig->mt_numdbs); + const uint8_t state = dbi_state(dig, dbi); + if (state & DBI_LINDO) + switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) { + case DBI_VALID: + case DBI_OLDEN: + return dig->mt_dbs + dbi; + case 0: + return nullptr; + case DBI_VALID | DBI_STALE: + case DBI_OLDEN | DBI_STALE: + break; + default: + tASSERT(txn, !!"unexpected dig->mt_dbi_state[dbi]"); + } + dig = dig->mt_parent; + } while (dig); + return fallback; +} + +static size_t audit_db_used(const MDBX_db *db) { + return db ? (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + + (size_t)db->md_overflow_pages + : 0; +} + /* Count all the pages in each DB and in the GC and make sure * it matches the actual number of pages being used. */ -__cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc) { +__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, + bool dont_filter_gc) { + const MDBX_env *const env = txn->mt_env; size_t pending = 0; if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.relist) + @@ -10017,82 +10226,66 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, } tASSERT(txn, rc == MDBX_NOTFOUND); - for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) - txn->mt_dbi_state[i] &= ~DBI_AUDIT; - - size_t used = NUM_METAS; - for (size_t i = FREE_DBI; i <= MAIN_DBI; i++) { - if (!(txn->mt_dbi_state[i] & DBI_VALID)) - continue; - rc = cursor_init(&cx.outer, txn, i); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - txn->mt_dbi_state[i] |= DBI_AUDIT; - if (txn->mt_dbs[i].md_root == P_INVALID) - continue; - used += (size_t)txn->mt_dbs[i].md_branch_pages + - (size_t)txn->mt_dbs[i].md_leaf_pages + - (size_t)txn->mt_dbs[i].md_overflow_pages; - - if (i != MAIN_DBI) - continue; - rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); - const MDBX_env *const env = txn->mt_env; - while (rc == MDBX_SUCCESS) { - MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; - for (size_t j = 0; j < page_numkeys(mp); j++) { - const MDBX_node *node = page_node(mp, j); - if (node_flags(node) == F_SUBDATA) { - if (unlikely(node_ds(node) != sizeof(MDBX_db))) - return MDBX_CORRUPTED; - const MDBX_val name = {node_key(node), node_ks(node)}; - const MDBX_db *db = nullptr; - if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) { - for (MDBX_dbi dbi = txn->mt_numdbs; --dbi > MAIN_DBI;) { - if ((txn->mt_dbi_state[dbi] & DBI_VALID) && - /* env->me_dbxs[k].md_name.iov_base && */ - env->me_dbxs[MAIN_DBI].md_cmp( - &name, &env->me_dbxs[dbi].md_name) == 0) { - txn->mt_dbi_state[dbi] |= DBI_AUDIT; - if (!(txn->mt_dbi_state[dbi] & MDBX_DBI_STALE)) - db = txn->mt_dbs + dbi; - break; - } - } - } - MDBX_db aligned; - if (!db) - db = memcpy(&aligned, node_data(node), sizeof(MDBX_db)); - used += (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + - (size_t)db->md_overflow_pages; - } - } - rc = cursor_sibling(&cx.outer, SIBLING_RIGHT); - } - tASSERT(txn, rc == MDBX_NOTFOUND); + const size_t done_bitmap_size = (txn->mt_numdbs + CHAR_BIT - 1) / CHAR_BIT; + uint8_t *const done_bitmap = alloca(done_bitmap_size); + memset(done_bitmap, 0, done_bitmap_size); + if (txn->mt_parent) { + tASSERT(txn, txn->mt_numdbs == txn->mt_parent->mt_numdbs && + txn->mt_numdbs == txn->mt_env->me_txn->mt_numdbs); +#if MDBX_ENABLE_DBI_SPARSE + tASSERT(txn, txn->mt_dbi_sparse == txn->mt_parent->mt_dbi_sparse && + txn->mt_dbi_sparse == txn->mt_env->me_txn->mt_dbi_sparse); +#endif /* MDBX_ENABLE_DBI_SPARSE */ } - for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) { - if ((txn->mt_dbi_state[i] & (DBI_VALID | DBI_AUDIT | DBI_STALE)) != - DBI_VALID) - continue; - for (MDBX_txn *t = txn; t; t = t->mt_parent) - if (F_ISSET(t->mt_dbi_state[i], DBI_DIRTY | DBI_CREAT)) { - used += (size_t)t->mt_dbs[i].md_branch_pages + - (size_t)t->mt_dbs[i].md_leaf_pages + - (size_t)t->mt_dbs[i].md_overflow_pages; - txn->mt_dbi_state[i] |= DBI_AUDIT; + size_t used = NUM_METAS + + audit_db_used(audit_db_dig(txn, FREE_DBI, nullptr)) + + audit_db_used(audit_db_dig(txn, MAIN_DBI, nullptr)); + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + for (rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); rc == MDBX_SUCCESS; + rc = cursor_sibling(&cx.outer, SIBLING_RIGHT)) { + MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; + for (size_t k = 0; k < page_numkeys(mp); k++) { + MDBX_node *node = page_node(mp, k); + if (node_flags(node) != F_SUBDATA) + continue; + if (unlikely(node_ds(node) != sizeof(MDBX_db))) + return MDBX_CORRUPTED; + + MDBX_db reside; + const MDBX_db *db = memcpy(&reside, node_data(node), sizeof(reside)); + const MDBX_val name = {node_key(node), node_ks(node)}; + for (size_t dbi = CORE_DBS; dbi < env->me_numdbs; ++dbi) { + if (dbi >= txn->mt_numdbs || !(env->me_db_flags[dbi] & DB_VALID)) + continue; + if (env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[dbi].md_name)) + continue; + + done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; + db = audit_db_dig(txn, dbi, &reside); break; } - MDBX_ANALYSIS_ASSUME(txn != nullptr); - if (!(txn->mt_dbi_state[i] & DBI_AUDIT)) { + used += audit_db_used(db); + } + } + tASSERT(txn, rc == MDBX_NOTFOUND); + + for (size_t dbi = CORE_DBS; dbi < txn->mt_numdbs; ++dbi) { + if (done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT)) + continue; + const MDBX_db *db = audit_db_dig(txn, dbi, nullptr); + if (db) + used += audit_db_used(db); + else if (dbi_state(txn, dbi)) WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", - txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, - (int)txn->mt_env->me_dbxs[i].md_name.iov_len, - (const char *)txn->mt_env->me_dbxs[i].md_name.iov_base, - txn->mt_dbi_state[i]); - } + txn->mt_parent ? "nested-" : "", txn->mt_txnid, dbi, + (int)env->me_dbxs[dbi].md_name.iov_len, + (const char *)env->me_dbxs[dbi].md_name.iov_base, + dbi_state(txn, dbi)); } if (pending + gc + used == txn->mt_next_pgno) @@ -10113,6 +10306,18 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, return MDBX_PROBLEM; } +__cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, + bool dont_filter_gc) { + MDBX_env *const env = txn->mt_env; + int rc = osal_fastmutex_acquire(&env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + rc = audit_ex_locked(txn, retired_stored, dont_filter_gc); + ENSURE(txn->mt_env, + osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + } + return rc; +} + typedef struct gc_update_context { size_t retired_stored, loop; size_t settled, cleaned_slot, reused_slot, filled_slot; @@ -11172,21 +11377,6 @@ static int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { return rc; } -/* Check txn and dbi arguments to a function */ -static __always_inline bool check_dbi(const MDBX_txn *txn, MDBX_dbi dbi, - unsigned validity) { - if (likely(dbi < txn->mt_numdbs)) { - if (likely(!dbi_changed(txn, dbi))) { - if (likely(txn->mt_dbi_state[dbi] & validity)) - return true; - if (likely(dbi < CORE_DBS || - (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0)) - return false; - } - } - return dbi_import((MDBX_txn *)txn, dbi); -} - /* Merge child txn into parent */ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { @@ -11631,7 +11821,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (txn->tw.dirtylist->length == 0 && !(txn->mt_flags & MDBX_TXN_DIRTY) && parent->mt_numdbs == txn->mt_numdbs) { - for (int i = txn->mt_numdbs; --i >= 0;) { + TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); if ((txn->mt_dbi_state[i] & DBI_STALE) && !(parent->mt_dbi_state[i] & DBI_STALE)) @@ -11648,6 +11838,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, txn->tw.loose_count == 0); /* fast completion of pure nested transaction */ + VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->mt_txnid); end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; goto done; } @@ -11711,17 +11902,23 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { end_mode |= TXN_END_EOTDONE; /* Update parent's DBs array */ - memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); - parent->mt_numdbs = txn->mt_numdbs; - for (size_t i = 0; i < txn->mt_numdbs; i++) { - /* preserve parent's status */ - const uint8_t state = - txn->mt_dbi_state[i] | - (parent->mt_dbi_state[i] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); - DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", i, - (parent->mt_dbi_state[i] != state) ? "update" : "still", - parent->mt_dbi_state[i], state); - parent->mt_dbi_state[i] = state; + eASSERT(env, parent->mt_numdbs == txn->mt_numdbs); + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (txn->mt_dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { + parent->mt_dbs[dbi] = txn->mt_dbs[dbi]; + /* preserve parent's status */ + const uint8_t state = + txn->mt_dbi_state[dbi] | + (parent->mt_dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, + (parent->mt_dbi_state[dbi] != state) ? "update" : "still", + parent->mt_dbi_state[dbi], state); + parent->mt_dbi_state[dbi] = state; + } else { + eASSERT(env, txn->mt_dbi_state[dbi] == + (parent->mt_dbi_state[dbi] & + ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); + } } if (latency) { @@ -11766,15 +11963,16 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); + : env->me_options.dp_limit)); } cursors_eot(txn, false); end_mode |= TXN_END_EOTDONE; if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { - for (intptr_t i = txn->mt_numdbs; --i >= 0;) - tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); + TXN_FOREACH_DBI_ALL(txn, i) { + tASSERT(txn, !(txn->mt_dbi_state[i] & DBI_DIRTY)); + } #if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT rc = txn_end(txn, end_mode); if (unlikely(rc != MDBX_SUCCESS)) @@ -11786,37 +11984,37 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { #endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ } - DEBUG("committing txn %" PRIaTXN " %p on mdbenv %p, root page %" PRIaPGNO + DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root, txn->mt_dbs[FREE_DBI].md_root); - /* Update DB root pointers */ if (txn->mt_numdbs > CORE_DBS) { - MDBX_cursor_couple couple; - MDBX_val data; - data.iov_len = sizeof(MDBX_db); - - rc = cursor_init(&couple.outer, txn, MAIN_DBI); + /* Update subDB root pointers */ + MDBX_cursor_couple cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) goto fail; - for (MDBX_dbi i = CORE_DBS; i < txn->mt_numdbs; i++) { - if (txn->mt_dbi_state[i] & DBI_DIRTY) { - MDBX_db *db = &txn->mt_dbs[i]; - DEBUG("update main's entry for sub-db %u, mod_txnid %" PRIaTXN - " -> %" PRIaTXN, - i, db->md_mod_txnid, txn->mt_txnid); - /* Может быть mod_txnid > front после коммита вложенных тразакций */ - db->md_mod_txnid = txn->mt_txnid; - data.iov_base = db; - WITH_CURSOR_TRACKING( - couple.outer, - rc = cursor_put_nochecklen(&couple.outer, &env->me_dbxs[i].md_name, - &data, F_SUBDATA)); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; + cx.outer.mc_next = txn->mt_cursors[MAIN_DBI]; + txn->mt_cursors[MAIN_DBI] = &cx.outer; + TXN_FOREACH_DBI_USER(txn, i) { + if ((txn->mt_dbi_state[i] & DBI_DIRTY) == 0) + continue; + MDBX_db *const db = &txn->mt_dbs[i]; + DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN + " -> %" PRIaTXN, + i, db->md_mod_txnid, txn->mt_txnid); + /* Может быть mod_txnid > front после коммита вложенных тразакций */ + db->md_mod_txnid = txn->mt_txnid; + MDBX_val data = {db, sizeof(MDBX_db)}; + rc = cursor_put_nochecklen(&cx.outer, &env->me_dbxs[i].md_name, &data, + F_SUBDATA); + if (unlikely(rc != MDBX_SUCCESS)) { + txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; + goto fail; } } + txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; } ts_1 = latency ? osal_monotime() : 0; @@ -14911,7 +15109,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; env->me_pathname = osal_calloc(env_pathname.ent_len + 1, sizeof(pathchar_t)); - env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(MDBX_dbx)); + env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); if (!(env->me_dbxs && env->me_pathname && env->me_db_flags && @@ -14921,6 +15119,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } memcpy(env->me_pathname, env_pathname.dxb, env_pathname.ent_len * sizeof(pathchar_t)); + env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; @@ -15256,24 +15455,38 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } if ((flags & MDBX_RDONLY) == 0) { - const size_t tsize = sizeof(MDBX_txn) + sizeof(MDBX_cursor), - size = tsize + env->me_maxdbs * - (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + - sizeof(MDBX_atomic_uint32_t) + 1); + MDBX_txn *txn = nullptr; + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->me_maxdbs, + CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / + CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + const size_t base = sizeof(MDBX_txn) + sizeof(MDBX_cursor); + const size_t size = + base + bitmap_bytes + + env->me_maxdbs * + (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + + sizeof(txn->mt_dbi_seqs[0]) + sizeof(txn->mt_dbi_state[0])); rc = alloc_page_buf(env); if (rc == MDBX_SUCCESS) { memset(env->me_pbuf, -1, env->me_psize * (size_t)2); memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, env->me_psize); - MDBX_txn *txn = osal_calloc(1, size); + txn = osal_calloc(1, size); if (txn) { - txn->mt_dbs = ptr_disp(txn, tsize); + txn->mt_dbs = ptr_disp(txn, base); txn->mt_cursors = - ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs); - txn->mt_dbi_seqs = - ptr_disp(txn->mt_cursors, sizeof(MDBX_cursor *) * env->me_maxdbs); - txn->mt_dbi_state = ptr_disp( - txn->mt_dbi_seqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); + ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); + txn->mt_dbi_seqs = ptr_disp( + txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); + txn->mt_dbi_state = + ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ txn->mt_env = env; txn->mt_flags = MDBX_TXN_FINISHED; env->me_txn0 = txn; @@ -15876,10 +16089,6 @@ static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, static int fetch_sdb(MDBX_txn *txn, size_t dbi) { MDBX_cursor_couple couple; - if (unlikely(dbi_changed(txn, dbi))) { - NOTICE("dbi %zu was changed for txn %" PRIaTXN, dbi, txn->mt_txnid); - return MDBX_BAD_DBI; - } int rc = cursor_init(&couple.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -15887,7 +16096,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { MDBX_dbx *const dbx = &txn->mt_env->me_dbxs[dbi]; rc = page_search(&couple.outer, &dbx->md_name, 0); if (unlikely(rc != MDBX_SUCCESS)) { - notfound: + bailout: NOTICE("dbi %zu refs to inaccessible subDB `%*s` for txn %" PRIaTXN " (err %d)", dbi, (int)dbx->md_name.iov_len, (const char *)dbx->md_name.iov_base, @@ -15899,7 +16108,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { struct node_result nsr = node_search(&couple.outer, &dbx->md_name); if (unlikely(!nsr.exact)) { rc = MDBX_NOTFOUND; - goto notfound; + goto bailout; } if (unlikely((node_flags(nsr.node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) { NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", @@ -15977,8 +16186,8 @@ __hot static int page_search_lowest(MDBX_cursor *mc) { * [in] key the key to search for, or NULL for first/last page. * [in] flags If MDBX_PS_MODIFY is set, visited pages in the DB * are touched (updated with new page numbers). - * If MDBX_PS_FIRST or MDBX_PS_LAST is set, find first or last - * leaf. + * If MDBX_PS_FIRST or MDBX_PS_LAST is set, + * find first or last leaf. * This is used by mdbx_cursor_first() and mdbx_cursor_last(). * If MDBX_PS_ROOTONLY set, just fetch root node, no further * lookups. @@ -16096,9 +16305,6 @@ int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -16116,9 +16322,6 @@ int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) return MDBX_BAD_TXN; @@ -16142,9 +16345,6 @@ int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -17178,7 +17378,10 @@ static int touch_dbi(MDBX_cursor *mc) { if (mc->mc_dbi >= CORE_DBS) { /* Touch DB record of named DB */ MDBX_cursor_couple cx; - int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); + int rc = dbi_check(mc->mc_txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; mc->mc_txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; @@ -17195,6 +17398,8 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, cASSERT(mc, (mc->mc_flags & C_INITIALIZED) || mc->mc_snum == 0); cASSERT(mc, cursor_is_tracked(mc)); + cASSERT(mc, F_ISSET(dbi_state(mc->mc_txn, FREE_DBI), DBI_LINDO | DBI_VALID)); + cASSERT(mc, F_ISSET(dbi_state(mc->mc_txn, MAIN_DBI), DBI_LINDO | DBI_VALID)); if ((mc->mc_flags & C_SUB) == 0) { MDBX_txn *const txn = mc->mc_txn; txn_lru_turn(txn); @@ -18655,7 +18860,8 @@ static int cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, case F_DUPDATA | F_SUBDATA: if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("invalid nested-db record size %zu", node_ds(node)); + ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), + sizeof(MDBX_db)); return MDBX_CORRUPTED; } memcpy(&mx->mx_db, node_data(node), sizeof(MDBX_db)); @@ -18762,7 +18968,8 @@ static int cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, const MDBX_txn *const txn, MDBX_db *const db, - MDBX_dbx *const dbx, uint8_t *const dbstate) { + MDBX_dbx *const dbx, uint8_t *const dbi_state) { + tASSERT(txn, F_ISSET(*dbi_state, DBI_VALID | DBI_LINDO)); couple->outer.mc_signature = MDBX_MC_LIVE; couple->outer.mc_next = NULL; couple->outer.mc_backup = NULL; @@ -18770,7 +18977,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, couple->outer.mc_txn = (MDBX_txn *)txn; couple->outer.mc_db = db; couple->outer.mc_dbx = dbx; - couple->outer.mc_dbi_state = dbstate; + couple->outer.mc_dbi_state = dbi_state; couple->outer.mc_snum = 0; couple->outer.mc_top = 0; couple->outer.mc_pg[0] = 0; @@ -18807,9 +19014,12 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, /* Initialize a cursor for a given transaction and database. */ static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); - return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, + int rc = dbi_check(txn, dbi); + if (likely(rc == MDBX_SUCCESS)) + rc = couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, &txn->mt_dbs[dbi], &txn->mt_env->me_dbxs[dbi], &txn->mt_dbi_state[dbi]); + return rc; } MDBX_cursor *mdbx_cursor_create(void *context) { @@ -18892,8 +19102,9 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; if (unlikely(dbi == FREE_DBI && !(txn->mt_flags & MDBX_TXN_RDONLY))) return MDBX_EACCESS; @@ -19024,7 +19235,7 @@ void mdbx_cursor_close(MDBX_cursor *mc) { int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD); if (likely(rc == MDBX_SUCCESS)) { - for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) { + TXN_FOREACH_DBI_FROM(txn, i, MAIN_DBI) { while (txn->mt_cursors[i]) { MDBX_cursor *mc = txn->mt_cursors[i]; ENSURE(NULL, mc->mc_signature == MDBX_MC_LIVE && @@ -20281,7 +20492,8 @@ __cold static int page_check(const MDBX_cursor *const mc, break; case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: if (unlikely(dsize != sizeof(MDBX_db))) { - rc = bad_page(mp, "invalid nested-db record size (%zu)\n", dsize); + rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", + dsize, sizeof(MDBX_db)); continue; } break; @@ -20486,7 +20698,7 @@ int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (unlikely(!key)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) + if (unlikely(dbi <= FREE_DBI)) return MDBX_BAD_DBI; if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) @@ -21155,7 +21367,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) + if (unlikely(dbi <= FREE_DBI)) return MDBX_BAD_DBI; if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | @@ -21509,8 +21721,8 @@ __cold static int compacting_walk_sdb(mdbx_compacting_ctx *ctx, MDBX_db *sdb) { memset(&couple, 0, sizeof(couple)); couple.inner.mx_cursor.mc_signature = ~MDBX_MC_LIVE; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDIT; - int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbistate); + uint8_t dbi_state = DBI_LINDO | DBI_VALID; + int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbi_state); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -22155,27 +22367,22 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { if (unlikely(err != MDBX_SUCCESS)) return err; + MDBX_cursor_couple cx; + err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); + if (unlikely(err != MDBX_SUCCESS)) + return err; + const MDBX_env *const env = txn->mt_env; st->ms_psize = env->me_psize; -#if 1 - /* assuming GC is internal and not subject for accounting */ - stat_get(&txn->mt_dbs[MAIN_DBI], st, bytes); -#else - stat_get(&txn->mt_dbs[FREE_DBI], st, bytes); - stat_add(&txn->mt_dbs[MAIN_DBI], st, bytes); -#endif - - /* account opened named subDBs */ - for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) + TXN_FOREACH_DBI_FROM( + txn, dbi, + /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) stat_add(txn->mt_dbs + dbi, st, bytes); + } - if (!(txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_DUPSORT | MDBX_INTEGERKEY)) && + if (!(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT) && txn->mt_dbs[MAIN_DBI].md_entries /* TODO: use `md_subs` field */) { - MDBX_cursor_couple cx; - err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); - if (unlikely(err != MDBX_SUCCESS)) - return err; /* scan and account not opened named subDBs */ err = page_search(&cx.outer, NULL, MDBX_PS_FIRST); @@ -22190,13 +22397,14 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { /* skip opened and already accounted */ const MDBX_val name = {node_key(node), node_ks(node)}; - for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) + TXN_FOREACH_DBI_USER(txn, dbi) { if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[dbi].md_name) == 0) { node = NULL; break; } + } if (node) { MDBX_db db; @@ -22256,9 +22464,6 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, if (unlikely(!mask)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -22519,54 +22724,110 @@ static __inline MDBX_cmp_func *get_default_datacmp(MDBX_db_flags_t flags) { : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical)); } -static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, +static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - /* Accepting only three cases: - * 1) user_flags and both comparators are zero - * = assume that a by-default mode/flags is requested for reading; - * 2) user_flags exactly the same - * = assume that the target mode/flags are requested properly; - * 3) user_flags differs, but table is empty and MDBX_CREATE is provided - * = assume that a properly create request with custom flags; - */ const MDBX_env *const env = txn->mt_env; - if ((user_flags ^ txn->mt_dbs[dbi].md_flags) & DB_PERSISTENT_FLAGS) { + eASSERT(env, dbi < txn->mt_numdbs && dbi < env->me_numdbs); + eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); + eASSERT(env, env->me_db_flags[dbi] != DB_POISON); + if ((env->me_db_flags[dbi] & DB_VALID) == 0) { + eASSERT(env, !env->me_dbxs[dbi].md_cmp && !env->me_dbxs[dbi].md_dcmp && + !env->me_dbxs[dbi].md_name.iov_len && + !env->me_dbxs[dbi].md_name.iov_base && + !env->me_dbxs[dbi].md_klen_max && + !env->me_dbxs[dbi].md_klen_min && + !env->me_dbxs[dbi].md_vlen_max && + !env->me_dbxs[dbi].md_vlen_min); + } else { + eASSERT(env, !(txn->mt_dbi_state[dbi] & DBI_VALID) || + (txn->mt_dbs[dbi].md_flags | DB_VALID) == + env->me_db_flags[dbi]); + eASSERT(env, env->me_dbxs[dbi].md_name.iov_base); + } + + /* Если dbi уже использовался, то корректными считаем четыре варианта: + * 1) user_flags равны MDBX_DB_ACCEDE + * = предполагаем что пользователь открывает существующую subDb, + * при этом код проверки не позволит установить другие компараторы. + * 2) user_flags нулевые, а оба компаратора пустые/нулевые или равны текущим + * = предполагаем что пользователь открывает существующую subDb + * старым способом с нулевыми с флагами по-умолчанию. + * 3) user_flags совпадают, а компараторы не заданы или те же + * = предполагаем что пользователь открывает subDb указывая все параметры; + * 4) user_flags отличаются, но subDb пустая и задан флаг MDBX_CREATE + * = предполагаем что пользователь пересоздает subDb; + */ + if ((user_flags & ~MDBX_CREATE) != + (unsigned)(env->me_db_flags[dbi] & DB_PERSISTENT_FLAGS)) { /* flags are differs, check other conditions */ if ((!user_flags && (!keycmp || keycmp == env->me_dbxs[dbi].md_cmp) && (!datacmp || datacmp == env->me_dbxs[dbi].md_dcmp)) || - user_flags == MDBX_ACCEDE) { - /* no comparators were provided and flags are zero, - * seems that is case #1 above */ - user_flags = txn->mt_dbs[dbi].md_flags; - } else if ((user_flags & MDBX_CREATE) && txn->mt_dbs[dbi].md_entries == 0) { - if (txn->mt_flags & MDBX_TXN_RDONLY) - return /* FIXME: return extended info */ MDBX_EACCESS; - /* make sure flags changes get committed */ - txn->mt_dbs[dbi].md_flags = user_flags & DB_PERSISTENT_FLAGS; - txn->mt_flags |= MDBX_TXN_DIRTY; - /* обнуляем компараторы для установки в соответствии с флагами, - * либо заданных пользователем */ - env->me_dbxs[dbi].md_cmp = nullptr; - env->me_dbxs[dbi].md_dcmp = nullptr; - } else { + user_flags == MDBX_DB_ACCEDE) { + user_flags = env->me_db_flags[dbi] & DB_PERSISTENT_FLAGS; + } else if ((user_flags & MDBX_CREATE) == 0) return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; + else { + eASSERT(env, env->me_db_flags[dbi] & DB_VALID); + if (txn->mt_dbi_state[dbi] & DBI_STALE) { + int err = fetch_sdb(txn, dbi); + if (unlikely(err == MDBX_SUCCESS)) + return err; + } + eASSERT(env, + (txn->mt_dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == + (DBI_LINDO | DBI_VALID)); + if (unlikely(txn->mt_dbs[dbi].md_leaf_pages)) + return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; + + /* Пересоздаём subDB если там пусто */ + if (unlikely(txn->mt_cursors[dbi])) + return MDBX_DANGLING_DBI; + env->me_db_flags[dbi] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[dbi], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); + + const uint32_t seq = dbi_seq_next(env, dbi); + const uint16_t db_flags = user_flags & DB_PERSISTENT_FLAGS; + eASSERT(env, txn->mt_dbs[dbi].md_depth == 0 && + txn->mt_dbs[dbi].md_entries == 0 && + txn->mt_dbs[dbi].md_root == P_INVALID); + env->me_dbxs[dbi].md_cmp = + keycmp ? keycmp : get_default_keycmp(user_flags); + env->me_dbxs[dbi].md_dcmp = + datacmp ? datacmp : get_default_datacmp(user_flags); + txn->mt_dbs[dbi].md_flags = db_flags; + txn->mt_dbs[dbi].md_xsize = 0; + if (unlikely(setup_dbx(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], + env->me_psize))) { + txn->mt_dbi_state[dbi] = DBI_LINDO; + txn->mt_flags |= MDBX_TXN_ERROR; + return MDBX_PROBLEM; + } + + env->me_db_flags[dbi] = db_flags | DB_VALID; + atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); + txn->mt_dbi_seqs[dbi] = seq; + txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_CREAT | DBI_DIRTY; + txn->mt_flags |= MDBX_TXN_DIRTY; } } if (!keycmp) - keycmp = env->me_dbxs[dbi].md_cmp ? env->me_dbxs[dbi].md_cmp - : get_default_keycmp(user_flags); + keycmp = (env->me_db_flags[dbi] & DB_VALID) + ? env->me_dbxs[dbi].md_cmp + : get_default_keycmp(user_flags); if (env->me_dbxs[dbi].md_cmp != keycmp) { - if (env->me_dbxs[dbi].md_cmp) + if (env->me_db_flags[dbi] & DB_VALID) return MDBX_EINVAL; env->me_dbxs[dbi].md_cmp = keycmp; } if (!datacmp) - datacmp = env->me_dbxs[dbi].md_dcmp ? env->me_dbxs[dbi].md_dcmp - : get_default_datacmp(user_flags); + datacmp = (env->me_db_flags[dbi] & DB_VALID) + ? env->me_dbxs[dbi].md_dcmp + : get_default_datacmp(user_flags); if (env->me_dbxs[dbi].md_dcmp != datacmp) { - if (env->me_dbxs[dbi].md_dcmp) + if (env->me_db_flags[dbi] & DB_VALID) return MDBX_EINVAL; env->me_dbxs[dbi].md_dcmp = datacmp; } @@ -22574,34 +22835,207 @@ static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, return MDBX_SUCCESS; } -static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, - unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, - MDBX_cmp_func *datacmp) { - int rc = MDBX_EINVAL; - if (unlikely(!dbi)) - return rc; +static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, + MDBX_val name) { + MDBX_env *const env = txn->mt_env; - void *clone = nullptr; - bool locked = false; - if (unlikely((user_flags & ~DB_USABLE_FLAGS) != 0)) { - bailout: - tASSERT(txn, MDBX_IS_ERROR(rc)); - *dbi = 0; - if (locked) - ENSURE(txn->mt_env, - osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - osal_free(clone); - return rc; + /* Cannot mix named table(s) with DUPSORT flags */ + tASSERT(txn, + (txn->mt_dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == + (DBI_LINDO | DBI_VALID)); + if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT)) { + if (unlikely((user_flags & MDBX_CREATE) == 0)) + return MDBX_NOTFOUND; + if (unlikely(txn->mt_dbs[MAIN_DBI].md_leaf_pages)) + /* В MainDB есть записи, либо она уже использовалась. */ + return MDBX_INCOMPATIBLE; + + /* Пересоздаём MainDB когда там пусто. */ + tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && + txn->mt_dbs[MAIN_DBI].md_entries == 0 && + txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); + if (unlikely(txn->mt_cursors[MAIN_DBI])) + return MDBX_DANGLING_DBI; + env->me_db_flags[MAIN_DBI] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); + + const uint32_t seq = dbi_seq_next(env, MAIN_DBI); + const uint16_t main_flags = + txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + env->me_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(main_flags); + env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(main_flags); + txn->mt_dbs[MAIN_DBI].md_flags = main_flags; + txn->mt_dbs[MAIN_DBI].md_xsize = 0; + if (unlikely(setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + env->me_psize) != MDBX_SUCCESS)) { + txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO; + txn->mt_flags |= MDBX_TXN_ERROR; + env->me_flags |= MDBX_FATAL_ERROR; + return MDBX_FATAL_ERROR; + } + env->me_db_flags[MAIN_DBI] = main_flags | DB_VALID; + txn->mt_dbi_seqs[MAIN_DBI] = + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; + txn->mt_flags |= MDBX_TXN_DIRTY; } - rc = check_txn(txn, MDBX_TXN_BLOCKED); + tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); + + /* Is the DB already open? */ + size_t slot = env->me_numdbs; + for (size_t scan = CORE_DBS; scan < env->me_numdbs; ++scan) { + if ((env->me_db_flags[scan] & DB_VALID) == 0) { + /* Remember this free slot */ + slot = (slot < scan) ? slot : scan; + continue; + } + if (!env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[scan].md_name)) { + slot = scan; + int err = dbi_check(txn, slot); + if (err == MDBX_BAD_DBI && + txn->mt_dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) { + /* хендл использовался, стал невалидным, + * но теперь явно пере-открывается в этой транзакци */ + eASSERT(env, !txn->mt_cursors[slot]); + txn->mt_dbi_state[slot] = DBI_LINDO; + err = dbi_check(txn, slot); + } + if (err == MDBX_SUCCESS) { + err = dbi_bind(txn, slot, user_flags, keycmp, datacmp); + if (likely(err == MDBX_SUCCESS)) { + goto done; + } + } + return err; + } + } + + /* Fail, if no free slot and max hit */ + if (unlikely(slot >= env->me_maxdbs)) + return MDBX_DBS_FULL; + + if (env->me_numdbs == slot) + eASSERT(env, !env->me_db_flags[slot] && + !env->me_dbxs[slot].md_name.iov_len && + !env->me_dbxs[slot].md_name.iov_base); + + env->me_db_flags[slot] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[slot], dbi_seq_next(env, slot), + mo_AcquireRelease); + memset(&env->me_dbxs[slot], 0, sizeof(env->me_dbxs[slot])); + if (env->me_numdbs == slot) + env->me_numdbs = (unsigned)slot + 1; + eASSERT(env, slot < env->me_numdbs); + + int err = dbi_check(txn, slot); + eASSERT(env, err == MDBX_BAD_DBI); + if (err != MDBX_BAD_DBI) + return MDBX_PROBLEM; + + /* Find the DB info */ + MDBX_val body; + MDBX_cursor_couple cx; + int rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = cursor_set(&cx.outer, &name, &body, MDBX_SET).err; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) + return rc; + } else { + /* make sure this is actually a table */ + MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], + cx.outer.mc_ki[cx.outer.mc_top]); + if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) + return MDBX_INCOMPATIBLE; + if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(MDBX_db))) + return MDBX_CORRUPTED; + memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(MDBX_db)); + } + + /* Done here so we cannot fail after creating a new DB */ + void *clone = nullptr; + if (name.iov_len) { + clone = osal_malloc(name.iov_len); + if (unlikely(!clone)) + return MDBX_ENOMEM; + name.iov_base = memcpy(clone, name.iov_base, name.iov_len); + } else + name.iov_base = ""; + + uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH; + if (unlikely(rc)) { + /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */ + tASSERT(txn, rc == MDBX_NOTFOUND); + body.iov_base = + memset(&txn->mt_dbs[slot], 0, body.iov_len = sizeof(MDBX_db)); + txn->mt_dbs[slot].md_root = P_INVALID; + txn->mt_dbs[slot].md_mod_txnid = txn->mt_txnid; + txn->mt_dbs[slot].md_flags = user_flags & DB_PERSISTENT_FLAGS; + WITH_CURSOR_TRACKING( + cx.outer, rc = cursor_put_checklen(&cx.outer, &name, &body, + F_SUBDATA | MDBX_NOOVERWRITE)); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + dbi_state |= DBI_DIRTY | DBI_CREAT; + txn->mt_flags |= MDBX_TXN_DIRTY; + tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); + } + + /* Got info, register DBI in this txn */ + const uint32_t seq = dbi_seq_next(env, slot); + eASSERT(env, + env->me_db_flags[slot] == DB_POISON && !txn->mt_cursors[slot] && + (txn->mt_dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO); + txn->mt_dbi_state[slot] = dbi_state; + memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(txn->mt_dbs[slot])); + env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags; + rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - if ((user_flags & MDBX_CREATE) && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) { - rc = MDBX_EACCESS; - goto bailout; - } + env->me_dbxs[slot].md_name = name; + env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; + txn->mt_dbi_seqs[slot] = + atomic_store32(&env->me_dbi_seqs[slot], seq, mo_AcquireRelease); + +done: + *dbi = (MDBX_dbi)slot; + tASSERT(txn, + slot < txn->mt_numdbs && (env->me_db_flags[slot] & DB_VALID) != 0); + eASSERT(env, dbi_check(txn, slot) == MDBX_SUCCESS); + return MDBX_SUCCESS; + +bailout: + eASSERT(env, !txn->mt_cursors[slot] && !env->me_dbxs[slot].md_name.iov_len && + !env->me_dbxs[slot].md_name.iov_base); + txn->mt_dbi_state[slot] &= DBI_LINDO | DBI_OLDEN; + env->me_db_flags[slot] = 0; + osal_free(clone); + if (slot + 1 == env->me_numdbs) + txn->mt_numdbs = env->me_numdbs = (unsigned)slot; + return rc; +} + +static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, + unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { + if (unlikely(!dbi)) + return MDBX_EINVAL; + *dbi = 0; + if (unlikely((user_flags & ~DB_USABLE_FLAGS) != 0)) + return MDBX_EINVAL; + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if ((user_flags & MDBX_CREATE) && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) + return MDBX_EACCESS; switch (user_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_ACCEDE)) { @@ -22610,8 +23044,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, break; __fallthrough /* fall through */; default: - rc = MDBX_EINVAL; - goto bailout; + return MDBX_EINVAL; case MDBX_DUPSORT: case MDBX_DUPSORT | MDBX_REVERSEDUP: @@ -22619,213 +23052,36 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case 0: + case MDBX_DB_DEFAULTS: break; } /* main table? */ - if (table_name == MDBX_CHK_MAIN || table_name->iov_base == MDBX_CHK_MAIN) { + if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) { rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - *dbi = MAIN_DBI; + if (likely(rc == MDBX_SUCCESS)) + *dbi = MAIN_DBI; return rc; } - if (table_name == MDBX_CHK_GC || table_name->iov_base == MDBX_CHK_GC) { + if (unlikely(name == MDBX_CHK_GC || name->iov_base == MDBX_CHK_GC)) { rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - *dbi = FREE_DBI; + if (likely(rc == MDBX_SUCCESS)) + *dbi = FREE_DBI; return rc; } - if (table_name == MDBX_CHK_META || table_name->iov_base == MDBX_CHK_META) { - rc = MDBX_EINVAL; - goto bailout; - } - - MDBX_val key = *table_name; - MDBX_env *const env = txn->mt_env; - if (key.iov_len > env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db)) + if (unlikely(name == MDBX_CHK_META || name->iov_base == MDBX_CHK_META)) + return MDBX_EINVAL; + if (unlikely(name->iov_len > + txn->mt_env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db))) return MDBX_EINVAL; - /* Cannot mix named table(s) with DUPSORT flags */ - if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT)) { - if ((user_flags & MDBX_CREATE) == 0) { - rc = MDBX_NOTFOUND; - goto bailout; - } - if (txn->mt_dbs[MAIN_DBI].md_leaf_pages || env->me_dbxs[MAIN_DBI].md_cmp) { - /* В MAIN_DBI есть записи либо она уже использовалась. */ - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - /* Пересоздаём MAIN_DBI если там пусто. */ - atomic_store32(&txn->mt_dbi_seqs[MAIN_DBI], dbi_seq(env, MAIN_DBI), - mo_AcquireRelease); - tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && - txn->mt_dbs[MAIN_DBI].md_entries == 0 && - txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); - txn->mt_dbs[MAIN_DBI].md_flags &= MDBX_REVERSEKEY | MDBX_INTEGERKEY; - txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; - txn->mt_flags |= MDBX_TXN_DIRTY; - env->me_dbxs[MAIN_DBI].md_cmp = - get_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags); - env->me_dbxs[MAIN_DBI].md_dcmp = - get_default_datacmp(txn->mt_dbs[MAIN_DBI].md_flags); + rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); + ENSURE(txn->mt_env, + osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); } - - tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); - - /* Is the DB already open? */ - MDBX_dbi scan, slot; - for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!env->me_dbxs[scan].md_name.iov_base) { - /* Remember this free slot */ - slot = scan; - continue; - } - if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, - key.iov_len)) { - rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - *dbi = scan; - return rc; - } - } - - /* Fail, if no free slot and max hit */ - if (unlikely(slot >= env->me_maxdbs)) { - rc = MDBX_DBS_FULL; - goto bailout; - } - - /* Find the DB info */ - MDBX_val data; - MDBX_cursor_couple couple; - rc = cursor_init(&couple.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - rc = cursor_set(&couple.outer, &key, &data, MDBX_SET).err; - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) - goto bailout; - } else { - /* make sure this is actually a table */ - MDBX_node *node = page_node(couple.outer.mc_pg[couple.outer.mc_top], - couple.outer.mc_ki[couple.outer.mc_top]); - if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) { - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - if (!MDBX_DISABLE_VALIDATION && unlikely(data.iov_len != sizeof(MDBX_db))) { - rc = MDBX_CORRUPTED; - goto bailout; - } - } - - if (rc != MDBX_SUCCESS && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) { - rc = MDBX_EACCESS; - goto bailout; - } - - /* Done here so we cannot fail after creating a new DB */ - if (key.iov_len) { - clone = osal_malloc(key.iov_len); - if (unlikely(!clone)) { - rc = MDBX_ENOMEM; - goto bailout; - } - key.iov_base = memcpy(clone, key.iov_base, key.iov_len); - } else - key.iov_base = ""; - - int err = osal_fastmutex_acquire(&env->me_dbi_lock); - if (unlikely(err != MDBX_SUCCESS)) { - rc = err; - goto bailout; - } - locked = true; - - /* Import handles from env */ - dbi_import_locked(txn); - - /* Rescan after mutex acquisition & import handles */ - for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!env->me_dbxs[scan].md_name.iov_base) { - /* Remember this free slot */ - slot = scan; - continue; - } - if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, - key.iov_len)) { - rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - slot = scan; - goto done; - } - } - - if (unlikely(slot >= env->me_maxdbs)) { - rc = MDBX_DBS_FULL; - goto bailout; - } - - unsigned dbiflags = DBI_FRESH | DBI_VALID | DBI_USRVALID; - MDBX_db db_dummy; - if (unlikely(rc)) { - /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */ - tASSERT(txn, rc == MDBX_NOTFOUND); - memset(&db_dummy, 0, sizeof(db_dummy)); - db_dummy.md_root = P_INVALID; - db_dummy.md_mod_txnid = txn->mt_txnid; - db_dummy.md_flags = user_flags & DB_PERSISTENT_FLAGS; - data.iov_len = sizeof(db_dummy); - data.iov_base = &db_dummy; - WITH_CURSOR_TRACKING( - couple.outer, rc = cursor_put_checklen(&couple.outer, &key, &data, - F_SUBDATA | MDBX_NOOVERWRITE)); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - dbiflags |= DBI_DIRTY | DBI_CREAT; - txn->mt_flags |= MDBX_TXN_DIRTY; - tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); - } - - /* Got info, register DBI in this txn */ - memset(env->me_dbxs + slot, 0, sizeof(MDBX_dbx)); - memcpy(&txn->mt_dbs[slot], data.iov_base, sizeof(MDBX_db)); - env->me_db_flags[slot] = 0; - rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) { - tASSERT(txn, (dbiflags & DBI_CREAT) == 0); - goto bailout; - } - - txn->mt_dbi_state[slot] = (uint8_t)dbiflags; - env->me_dbxs[slot].md_name = key; - txn->mt_dbi_seqs[slot].weak = env->me_dbi_seqs[slot].weak = - dbi_seq(env, slot); - if (!(dbiflags & DBI_CREAT)) - env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; - if (txn->mt_numdbs == slot) { - txn->mt_cursors[slot] = NULL; - osal_compiler_barrier(); - txn->mt_numdbs = slot + 1; - } - if (env->me_numdbs <= slot) { - osal_memory_fence(mo_AcquireRelease, true); - env->me_numdbs = slot + 1; - } - -done: - *dbi = slot; - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - return MDBX_SUCCESS; + return rc; } static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, @@ -22874,8 +23130,9 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, if (unlikely(!dest)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) @@ -22909,14 +23166,18 @@ static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { env->me_dbxs[dbi].md_name.iov_len = 0; osal_memory_fence(mo_AcquireRelease, true); env->me_dbxs[dbi].md_name.iov_base = NULL; + osal_flush_incoherent_cpu_writeback(); osal_free(ptr); if (env->me_numdbs == dbi + 1) { size_t i = env->me_numdbs; - do + do { --i; - while (i > CORE_DBS && !env->me_dbxs[i - 1].md_name.iov_base); - env->me_numdbs = (MDBX_dbi)i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && + !env->me_dbxs[i].md_name.iov_base); + } while ((env->me_db_flags[i - 1] & DB_VALID) == 0); + env->me_numdbs = (unsigned)i; } return MDBX_SUCCESS; @@ -22955,8 +23216,9 @@ int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, if (unlikely(!flags || !state)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; *flags = txn->mt_dbs[dbi].md_flags & DB_PERSISTENT_FLAGS; *state = @@ -23080,22 +23342,20 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { /* Can't delete the main DB */ if (del && dbi >= CORE_DBS) { - rc = delete (txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); + rc = delete(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); if (likely(rc == MDBX_SUCCESS)) { tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); - txn->mt_dbi_state[dbi] = DBI_STALE; + txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; MDBX_env *env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (unlikely(rc != MDBX_SUCCESS)) { - txn->mt_flags |= MDBX_TXN_ERROR; + if (likely(rc == MDBX_SUCCESS)) { + dbi_close_locked(env, dbi); + ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); goto bailout; } - dbi_close_locked(env, dbi); - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } else { - txn->mt_flags |= MDBX_TXN_ERROR; } + txn->mt_flags |= MDBX_TXN_ERROR; } else { /* reset the DB record, mark it dirty */ txn->mt_dbi_state[dbi] |= DBI_DIRTY; @@ -23753,8 +24013,8 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDIT; - int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); + uint8_t dbi_state = DBI_LINDO | DBI_VALID; + int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbi_state); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24137,9 +24397,6 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple begin; /* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */ rc = cursor_init(&begin.outer, txn, dbi); @@ -24313,7 +24570,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) + if (unlikely(dbi <= FREE_DBI)) return MDBX_BAD_DBI; if (unlikely(flags & @@ -24493,8 +24750,9 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { rc = fetch_sdb(txn, dbi); @@ -24507,7 +24765,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, *result = dbs->md_seq; if (likely(increment > 0)) { - if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) + if (unlikely(dbi == FREE_DBI || (txn->mt_flags & MDBX_TXN_RDONLY) != 0)) return MDBX_EACCESS; uint64_t new = dbs->md_seq + increment; @@ -26721,9 +26979,13 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); if (unlikely(err)) { + tASSERT(txn, dbi >= txn->mt_env->me_numdbs || + (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0); chk_error_rc(scope, err, "mdbx_dbi_open"); goto bailout; } + tASSERT(txn, dbi < txn->mt_env->me_numdbs && + (txn->mt_env->me_db_flags[dbi] & DB_VALID) != 0); } const MDBX_db *const db = txn->mt_dbs + dbi; @@ -27030,8 +27292,7 @@ bailout: } mdbx_cursor_close(cursor); - if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && - txn->mt_dbi_state[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + if (!txn->mt_cursors[dbi] && (txn->mt_dbi_state[dbi] & DBI_FRESH)) mdbx_dbi_close(env, dbi); } return err; diff --git a/src/internals.h b/src/internals.h index d8dafc2d..a3e8e5cc 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1192,19 +1192,23 @@ struct MDBX_txn { /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; +#if MDBX_ENABLE_DBI_SPARSE + unsigned *mt_dbi_sparse; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + /* Non-shared DBI state flags inside transaction */ -#define DBI_DIRTY 0x01 /* DB was written in this txn */ -#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ -#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ -#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ -#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ -#define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ -#define DBI_AUDIT 0x40 /* Internal flag for accounting during audit */ +#define DBI_DIRTY 0x01 /* DB was written in this txn */ +#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ +#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ +#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ +#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ +#define DBI_OLDEN 0x40 /* Handle was closed/reopened outside txn */ +#define DBI_LINDO 0x80 /* Lazy initialization done for DBI-slot */ /* Array of non-shared txn's flags of DBI */ uint8_t *mt_dbi_state; /* Array of sequence numbers for each DB handle. */ - MDBX_atomic_uint32_t *mt_dbi_seqs; + uint32_t *mt_dbi_seqs; MDBX_cursor **mt_cursors; MDBX_canary mt_canary; @@ -1660,7 +1664,8 @@ typedef struct MDBX_node { /* mdbx_dbi_open() flags */ #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE) -#define DB_VALID 0x8000 /* DB handle is valid, for me_db_flags */ +#define DB_VALID 0x8000u /* DB handle is valid, for me_db_flags */ +#define DB_POISON 0x7fffu /* update pending */ #define DB_INTERNAL_FLAGS DB_VALID #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 9ac6cd71..1b52739a 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -286,6 +286,8 @@ DEFINE_EXCEPTION(thread_mismatch) DEFINE_EXCEPTION(transaction_full) DEFINE_EXCEPTION(transaction_overlapping) DEFINE_EXCEPTION(duplicated_lck_file) +DEFINE_EXCEPTION(dangling_map_id) + #undef DEFINE_EXCEPTION __cold const char *error::what() const noexcept { @@ -372,6 +374,7 @@ __cold void error::throw_exception() const { CASE_EXCEPTION(transaction_full, MDBX_TXN_FULL); CASE_EXCEPTION(transaction_overlapping, MDBX_TXN_OVERLAPPING); CASE_EXCEPTION(duplicated_lck_file, MDBX_DUPLICATED_CLK); + CASE_EXCEPTION(dangling_map_id, MDBX_DANGLING_DBI); #undef CASE_EXCEPTION default: if (is_mdbx_error()) diff --git a/src/options.h b/src/options.h index 6e542153..65bb797c 100644 --- a/src/options.h +++ b/src/options.h @@ -163,6 +163,13 @@ #error MDBX_AVOID_MSYNC must be defined as 0 or 1 #endif /* MDBX_AVOID_MSYNC */ +/** FIXME */ +#ifndef MDBX_ENABLE_DBI_SPARSE +#define MDBX_ENABLE_DBI_SPARSE 1 +#elif !(MDBX_ENABLE_DBI_SPARSE == 0 || MDBX_ENABLE_DBI_SPARSE == 1) +#error MDBX_ENABLE_DBI_SPARSE must be defined as 0 or 1 +#endif /* MDBX_ENABLE_DBI_SPARSE */ + /** Controls sort order of internal page number lists. * This mostly experimental/advanced option with not for regular MDBX users. * \warning The database format depend on this option and libmdbx built with From 3622669a9fbc46c29421f0dc751c56ada8c14104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 5 Nov 2023 22:14:33 +0300 Subject: [PATCH 027/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BA=D0=B0=20db-=D1=84=D0=BB=D0=B0=D0=B3=D0=BE=D0=B2=20?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B5-=D0=B8=D0=BD=D0=B8=D1=86=D0=B8=D0=B0?= =?UTF-8?q?=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20MainDB=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B8=20=D1=84=D0=BB=D0=B0=D0=B3=D0=BE=D0=B2=20=D0=B4=D1=80?= =?UTF-8?q?=D1=83=D0=B3=D0=B8=D0=BC=20=D0=BF=D1=80=D0=BE=D1=86=D0=B5=D1=81?= =?UTF-8?q?=D1=81=D0=BE=D0=BC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 159 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 30 deletions(-) diff --git a/src/core.c b/src/core.c index af6ff541..cdb25b1b 100644 --- a/src/core.c +++ b/src/core.c @@ -3956,6 +3956,23 @@ static void cursors_eot(MDBX_txn *txn, const bool merge) { static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi); +static __inline bool db_check_flags(uint16_t db_flags) { + switch (db_flags & ~(DB_VALID | MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + default: + NOTICE("invalid db-flags 0x%x", db_flags); + return false; + case MDBX_DUPSORT: + case MDBX_DUPSORT | MDBX_REVERSEDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + case MDBX_DB_DEFAULTS: + return (db_flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != + (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + } +} + static __inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { STATIC_ASSERT(DBI_DIRTY == MDBX_DBI_DIRTY && DBI_STALE == MDBX_DBI_STALE && DBI_FRESH == MDBX_DBI_FRESH && DBI_CREAT == MDBX_DBI_CREAT); @@ -8992,6 +9009,9 @@ __hot static int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, if (unlikely(!coherency_check(txn->mt_env, head.txnid, txn->mt_dbs, head.ptr_v, *timestamp == 0))) return coherency_timeout(timestamp, -1, txn->mt_env); + + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); return MDBX_SUCCESS; } @@ -9015,6 +9035,9 @@ static int coherency_check_written(const MDBX_env *env, const txnid_t txnid, } if (unlikely(!coherency_check(env, head_txnid, meta->mm_dbs, meta, report))) return coherency_timeout(timestamp, pgno, env); + + eASSERT(env, meta->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(meta->mm_dbs[MAIN_DBI].md_flags)); return MDBX_SUCCESS; } @@ -9177,6 +9200,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); } else { eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | MDBX_WRITEMAP)) == 0); @@ -9234,6 +9259,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { goto bailout; } + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); txn->mt_flags = flags; txn->mt_child = NULL; txn->tw.loose_pages = NULL; @@ -9269,6 +9296,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); /* Setup db info */ + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbi_state, env->me_maxdbs); #if MDBX_ENABLE_DBI_SPARSE txn->mt_numdbs = CORE_DBS; @@ -9287,25 +9316,74 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_cursors[FREE_DBI] = nullptr; txn->mt_cursors[MAIN_DBI] = nullptr; txn->mt_dbi_seqs[FREE_DBI] = 0; - struct dbi_snap_result main_snap = dbi_snap(env, MAIN_DBI); - if (unlikely(main_snap.flags != + txn->mt_dbi_seqs[MAIN_DBI] = + atomic_load32(&env->me_dbi_seqs[MAIN_DBI], mo_AcquireRelease); + + if (unlikely(env->me_db_flags[MAIN_DBI] != (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags))) { - if (main_snap.flags & DB_VALID) { - rc = MDBX_INCOMPATIBLE; - goto bailout; + const bool need_txn_lock = env->me_txn0 && env->me_txn0->mt_owner != tid; + bool should_unlock = false; + if (need_txn_lock) { + rc = osal_txn_lock(env, true); + if (rc == MDBX_SUCCESS) + should_unlock = true; + else if (rc != MDBX_BUSY && rc != MDBX_EDEADLK) + goto bailout; } - env->me_db_flags[MAIN_DBI] = DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; - main_snap.sequence = - atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), - mo_AcquireRelease); + rc = osal_fastmutex_acquire(&env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + uint32_t seq = dbi_seq_next(env, MAIN_DBI); + /* проверяем повторно после захвата блокировки */ + if (env->me_db_flags[MAIN_DBI] != + (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags)) { + if (!need_txn_lock || should_unlock || + /* если нет активной пишущей транзакции, + * то следующая будет ждать на me_dbi_lock */ + !env->me_txn) { + if (env->me_db_flags[MAIN_DBI] != 0 || MDBX_DEBUG) + NOTICE("renew MainDB for %s-txn %" PRIaTXN + " since db-flags changes 0x%x -> 0x%x", + (txn->mt_flags & MDBX_TXN_RDONLY) ? "ro" : "rw", + txn->mt_txnid, env->me_db_flags[MAIN_DBI] & ~DB_VALID, + txn->mt_dbs[MAIN_DBI].md_flags); + env->me_db_flags[MAIN_DBI] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + rc = setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + env->me_psize); + if (likely(rc == MDBX_SUCCESS)) { + seq = dbi_seq_next(env, MAIN_DBI); + env->me_db_flags[MAIN_DBI] = + DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; + txn->mt_dbi_seqs[MAIN_DBI] = atomic_store32( + &env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + } + } else { + ERROR("MainDB db-flags changes 0x%x -> 0x%x ahead of read-txn " + "%" PRIaTXN, + txn->mt_dbs[MAIN_DBI].md_flags, + env->me_db_flags[MAIN_DBI] & ~DB_VALID, txn->mt_txnid); + rc = MDBX_INCOMPATIBLE; + } + } + ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + } else { + DEBUG("me_dbi_lock failed, err %d", rc); + } + if (should_unlock) + osal_txn_unlock(env); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; } - txn->mt_dbi_seqs[MAIN_DBI] = main_snap.sequence; - rc = - setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + txn->mt_dbs[FREE_DBI].md_flags); + rc = MDBX_INCOMPATIBLE; goto bailout; + } + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { WARNING("%s", "environment had fatal error, must shutdown!"); rc = MDBX_PANIC; @@ -9390,13 +9468,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } #endif /* Windows */ } else { - if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - txn->mt_dbs[FREE_DBI].md_flags); - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - tASSERT(txn, txn == env->me_txn0); MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); rc = cursor_init(gc, txn, FREE_DBI); @@ -12404,6 +12475,17 @@ static int validate_meta(MDBX_env *env, MDBX_meta *const meta, return MDBX_RESULT_TRUE; } + if (unlikely(meta->mm_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { + WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + "GC/FreeDB", meta->mm_dbs[FREE_DBI].md_flags); + return MDBX_INCOMPATIBLE; + } + if (unlikely(!db_check_flags(meta->mm_dbs[MAIN_DBI].md_flags))) { + WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + "MainDB", meta->mm_dbs[MAIN_DBI].md_flags); + return MDBX_INCOMPATIBLE; + } + DEBUG("checking meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", @@ -12788,6 +12870,8 @@ __cold static MDBX_meta *init_metas(const MDBX_env *env, void *buffer) { static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, meta_troika_t *const troika) { eASSERT(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0); + eASSERT(env, pending->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(pending->mm_dbs[MAIN_DBI].md_flags)); const MDBX_meta *const meta0 = METAPAGE(env, 0); const MDBX_meta *const meta1 = METAPAGE(env, 1); const MDBX_meta *const meta2 = METAPAGE(env, 2); @@ -13086,6 +13170,8 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, target->mm_geo = pending->mm_geo; target->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI]; target->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI]; + eASSERT(env, target->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(target->mm_dbs[MAIN_DBI].md_flags)); target->mm_canary = pending->mm_canary; memcpy(target->mm_pages_retired, pending->mm_pages_retired, 8); jitter4testing(true); @@ -13140,6 +13226,8 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, env->me_lck->mti_pgop_stat.wops.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ const MDBX_meta undo_meta = *target; + eASSERT(env, pending->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(pending->mm_dbs[MAIN_DBI].md_flags)); rc = osal_pwrite(env->me_fd4meta, pending, sizeof(MDBX_meta), ptr_dist(target, env->me_map)); if (unlikely(rc != MDBX_SUCCESS)) { @@ -13879,6 +13967,19 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, pv2pages(header.mm_geo.shrink_pv), unaligned_peek_u64(4, header.mm_txnid_a), durable_caption(&header)); + if (unlikely(header.mm_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + header.mm_dbs[FREE_DBI].md_flags); + return MDBX_INCOMPATIBLE; + } + env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; + env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ + env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; + env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; + env->me_dbxs[FREE_DBI].md_vlen_min = 4; + env->me_dbxs[FREE_DBI].md_vlen_max = + mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); + if (env->me_psize != header.mm_psize) setup_pagesize(env, header.mm_psize); const size_t used_bytes = pgno2bytes(env, header.mm_geo.next); @@ -14631,7 +14732,7 @@ __cold static int __must_check_result override_meta(MDBX_env *env, if (shape) { if (txnid && unlikely(!check_meta_coherency(env, shape, false))) { ERROR("bailout overriding meta-%zu since model failed " - "freedb/maindb %s-check for txnid #%" PRIaTXN, + "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, target, "pre", constmeta_txnid(shape)); return MDBX_PROBLEM; } @@ -14655,7 +14756,7 @@ __cold static int __must_check_result override_meta(MDBX_env *env, sizeof(model->mm_magic_and_version)); if (unlikely(!check_meta_coherency(env, model, false))) { ERROR("bailout overriding meta-%zu since model failed " - "freedb/maindb %s-check for txnid #%" PRIaTXN, + "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, target, "post", txnid); return MDBX_PROBLEM; } @@ -15119,13 +15220,6 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } memcpy(env->me_pathname, env_pathname.dxb, env_pathname.ent_len * sizeof(pathchar_t)); - env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; - env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ - env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; - env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; - env->me_dbxs[FREE_DBI].md_vlen_min = 4; - env->me_dbxs[FREE_DBI].md_vlen_max = - mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: * @@ -16059,6 +16153,10 @@ __hot __noinline static int page_search_root(MDBX_cursor *mc, static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, const unsigned pagesize) { + if (unlikely(!db_check_flags(db->md_flags))) { + ERROR("incompatible or invalid db.md_flags (%u) ", db->md_flags); + return MDBX_INCOMPATIBLE; + } if (unlikely(!dbx->md_cmp)) { dbx->md_cmp = get_default_keycmp(db->md_flags); dbx->md_dcmp = get_default_datacmp(db->md_flags); @@ -22742,7 +22840,7 @@ static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, eASSERT(env, !(txn->mt_dbi_state[dbi] & DBI_VALID) || (txn->mt_dbs[dbi].md_flags | DB_VALID) == env->me_db_flags[dbi]); - eASSERT(env, env->me_dbxs[dbi].md_name.iov_base); + eASSERT(env, env->me_dbxs[dbi].md_name.iov_base || dbi < CORE_DBS); } /* Если dbi уже использовался, то корректными считаем четыре варианта: @@ -23055,6 +23153,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, case MDBX_DB_DEFAULTS: break; } + tASSERT(txn, db_check_flags((uint16_t)user_flags)); /* main table? */ if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) { From 96504bf338fda2f739419bdfef2aad077e8acc9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 4 Nov 2023 23:45:29 +0300 Subject: [PATCH 028/443] =?UTF-8?q?mdbx:=20=D0=BE=D1=82=D0=BB=D0=BE=D0=B6?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=BE=D0=B5=20=D0=BE=D1=81=D0=B2=D0=BE=D0=B1?= =?UTF-8?q?=D0=BE=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=20=D1=81=D0=B2=D1=8F=D0=B7=D0=B0=D0=BD=D0=BD=D1=8B=D1=85?= =?UTF-8?q?=20c=20dbi-=D1=85=D0=B5=D0=BD=D0=B4=D0=BB=D0=B0=D0=BC=D0=B8=20?= =?UTF-8?q?=D0=B8=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B8=20`MDBX=5FENABLE=5FDBI?= =?UTF-8?q?=5FLOCKFREE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Отложенное освобождение позволяет реализовать безопасное выполнение fastpath/lockfree при повторном открытии из других потоков/транзакцйий уже открытых subDB, что и происходит при активации добавленной опции сборки `MDBX_ENABLE_DBI_LOCKFREE`. --- CMakeLists.txt | 1 + src/config.h.in | 1 + src/core.c | 196 ++++++++++++++++++++++++++++++++++++++---------- src/internals.h | 8 ++ src/options.h | 7 ++ 5 files changed, 173 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 89eee769..90c2d766 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -532,6 +532,7 @@ add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) add_mdbx_option(MDBX_ENABLE_DBI_SPARSE "FIXME" ON) +add_mdbx_option(MDBX_ENABLE_DBI_LOCKFREE "FIXME" ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") diff --git a/src/config.h.in b/src/config.h.in index 2ffb9ecf..0304db03 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -34,6 +34,7 @@ #cmakedefine01 MDBX_ENABLE_PGOP_STAT #cmakedefine01 MDBX_ENABLE_PROFGC #cmakedefine01 MDBX_ENABLE_DBI_SPARSE +#cmakedefine01 MDBX_ENABLE_DBI_LOCKFREE /* Windows */ #cmakedefine01 MDBX_WITHOUT_MSVC_CRT diff --git a/src/core.c b/src/core.c index cdb25b1b..1b1d4b58 100644 --- a/src/core.c +++ b/src/core.c @@ -3781,6 +3781,58 @@ MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { *tracking_head = tracked->mc_next; \ } while (0) +static int +env_defer_free_and_release(MDBX_env *const env, + struct mdbx_defer_free_item *const chain) { + size_t length = 0; + struct mdbx_defer_free_item *obsolete_chain = nullptr; +#if MDBX_ENABLE_DBI_LOCKFREE + const uint64_t now = osal_monotime(); + struct mdbx_defer_free_item **scan = &env->me_defer_free; + if (env->me_defer_free) { + const uint64_t threshold_1second = osal_16dot16_to_monotime(1 * 65536); + do { + struct mdbx_defer_free_item *item = *scan; + if (now - item->timestamp < threshold_1second) { + scan = &item->next; + length += 1; + } else { + *scan = item->next; + item->next = obsolete_chain; + obsolete_chain = item; + } + } while (*scan); + } + + eASSERT(env, *scan == nullptr); + if (chain) { + struct mdbx_defer_free_item *item = chain; + do { + item->timestamp = now; + item = item->next; + } while (item); + *scan = chain; + } +#else /* MDBX_ENABLE_DBI_LOCKFREE */ + obsolete_chain = chain; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + + ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + if (length > 42) { +#if defined(_WIN32) || defined(_WIN64) + SwitchToThread(); +#else + sched_yield(); +#endif /* Windows */ + } + while (obsolete_chain) { + struct mdbx_defer_free_item *item = obsolete_chain; + obsolete_chain = obsolete_chain->next; + osal_free(item); + } + return chain ? MDBX_SUCCESS : MDBX_BAD_DBI; +} + #if MDBX_ENABLE_DBI_SPARSE static __inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { @@ -4136,7 +4188,7 @@ static int dbi_update(MDBX_txn *txn, int keep) { MDBX_env *const env = txn->mt_env; tASSERT(txn, !txn->mt_parent && txn == env->me_txn0); bool locked = false; - void *defer_free = nullptr; + struct mdbx_defer_free_item *defer_chain = nullptr; TXN_FOREACH_DBI_USER(txn, dbi) { if (likely((txn->mt_dbi_state[dbi] & DBI_CREAT) == 0)) continue; @@ -4154,15 +4206,15 @@ static int dbi_update(MDBX_txn *txn, int keep) { env->me_db_flags[dbi] = txn->mt_dbs[dbi].md_flags | DB_VALID; } else { uint32_t seq = dbi_seq_next(env, dbi); - void *ptr = env->me_dbxs[dbi].md_name.iov_base; - if (ptr) { + struct mdbx_defer_free_item *item = env->me_dbxs[dbi].md_name.iov_base; + if (item) { env->me_db_flags[dbi] = 0; env->me_dbxs[dbi].md_name.iov_len = 0; env->me_dbxs[dbi].md_name.iov_base = nullptr; atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); osal_flush_incoherent_cpu_writeback(); - osal_free(defer_free); - defer_free = ptr; + item->next = defer_chain; + defer_chain = item; } else { eASSERT(env, env->me_dbxs[dbi].md_name.iov_len == 0); eASSERT(env, env->me_db_flags[dbi] == 0); @@ -4179,9 +4231,7 @@ static int dbi_update(MDBX_txn *txn, int keep) { !env->me_dbxs[i].md_name.iov_base); } env->me_numdbs = (unsigned)i; - ENSURE(txn->mt_env, - osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - osal_free(defer_free); + env_defer_free_and_release(env, defer_chain); } return MDBX_SUCCESS; } @@ -15651,6 +15701,14 @@ __cold static int env_close(MDBX_env *env) { env->me_txkey = (osal_thread_key_t)0; } +#if MDBX_ENABLE_DBI_LOCKFREE + for (struct mdbx_defer_free_item *next, *ptr = env->me_defer_free; ptr; + ptr = next) { + next = ptr->next; + osal_free(ptr); + } +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + munlock_all(env); if (!(env->me_flags & MDBX_RDONLY)) osal_ioring_destroy(&env->me_ioring); @@ -23057,7 +23115,9 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, /* Done here so we cannot fail after creating a new DB */ void *clone = nullptr; if (name.iov_len) { - clone = osal_malloc(name.iov_len); + clone = osal_malloc((name.iov_len > sizeof(struct mdbx_defer_free_item)) + ? name.iov_len + : sizeof(struct mdbx_defer_free_item)); if (unlikely(!clone)) return MDBX_ENOMEM; name.iov_base = memcpy(clone, name.iov_base, name.iov_len); @@ -23174,6 +23234,67 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, txn->mt_env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db))) return MDBX_EINVAL; +#if MDBX_ENABLE_DBI_LOCKFREE + /* Is the DB already open? */ + const MDBX_env *const env = txn->mt_env; + size_t free_slot = env->me_numdbs; + for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) { + retry: + if ((env->me_db_flags[i] & DB_VALID) == 0) { + free_slot = i; + continue; + } + + const uint32_t snap_seq = + atomic_load32(&env->me_dbi_seqs[i], mo_AcquireRelease); + const uint16_t snap_flags = env->me_db_flags[i]; + const MDBX_val snap_name = env->me_dbxs[i].md_name; + if (user_flags != MDBX_ACCEDE && + (((user_flags ^ snap_flags) & DB_PERSISTENT_FLAGS) || + (keycmp && keycmp != env->me_dbxs[i].md_cmp) || + (datacmp && datacmp != env->me_dbxs[i].md_dcmp))) + continue; + const uint32_t main_seq = + atomic_load32(&env->me_dbi_seqs[MAIN_DBI], mo_AcquireRelease); + MDBX_cmp_func *const snap_cmp = env->me_dbxs[MAIN_DBI].md_cmp; + if (unlikely(!(snap_flags & DB_VALID) || !snap_name.iov_base || + !snap_name.iov_len || !snap_cmp)) + continue; + + const bool name_match = snap_cmp(&snap_name, name) == 0; + osal_flush_incoherent_cpu_writeback(); + if (unlikely(snap_seq != + atomic_load32(&env->me_dbi_seqs[i], mo_AcquireRelease) || + main_seq != atomic_load32(&env->me_dbi_seqs[MAIN_DBI], + mo_AcquireRelease) || + snap_flags != env->me_db_flags[i] || + snap_name.iov_base != env->me_dbxs[i].md_name.iov_base || + snap_name.iov_len != env->me_dbxs[i].md_name.iov_len)) + goto retry; + if (name_match) { + rc = dbi_check(txn, i); + if (rc == MDBX_BAD_DBI && + txn->mt_dbi_state[i] == (DBI_OLDEN | DBI_LINDO)) { + /* хендл использовался, стал невалидным, + * но теперь явно пере-открывается в этой транзакци */ + eASSERT(env, !txn->mt_cursors[i]); + txn->mt_dbi_state[i] = DBI_LINDO; + rc = dbi_check(txn, i); + } + if (likely(rc == MDBX_SUCCESS)) { + rc = dbi_bind(txn, i, user_flags, keycmp, datacmp); + if (likely(rc == MDBX_SUCCESS)) + *dbi = (MDBX_dbi)i; + } + return rc; + } + } + + /* Fail, if no free slot and max hit */ + if (unlikely(free_slot >= env->me_maxdbs)) + return MDBX_DBS_FULL; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); @@ -23251,35 +23372,35 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, return MDBX_SUCCESS; } -static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { +static struct mdbx_defer_free_item *dbi_close_locked(MDBX_env *env, + MDBX_dbi dbi) { eASSERT(env, dbi >= CORE_DBS); if (unlikely(dbi >= env->me_numdbs)) - return MDBX_BAD_DBI; + return nullptr; - char *const ptr = env->me_dbxs[dbi].md_name.iov_base; - /* If there was no name, this was already closed */ - if (unlikely(!ptr)) - return MDBX_BAD_DBI; + const uint32_t seq = dbi_seq_next(env, dbi); + struct mdbx_defer_free_item *defer_item = env->me_dbxs[dbi].md_name.iov_base; + if (likely(defer_item)) { + env->me_db_flags[dbi] = 0; + env->me_dbxs[dbi].md_name.iov_len = 0; + env->me_dbxs[dbi].md_name.iov_base = nullptr; + atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); + osal_flush_incoherent_cpu_writeback(); + defer_item->next = nullptr; - env->me_db_flags[dbi] = 0; - env->me_dbxs[dbi].md_name.iov_len = 0; - osal_memory_fence(mo_AcquireRelease, true); - env->me_dbxs[dbi].md_name.iov_base = NULL; - osal_flush_incoherent_cpu_writeback(); - osal_free(ptr); - - if (env->me_numdbs == dbi + 1) { - size_t i = env->me_numdbs; - do { - --i; - eASSERT(env, i >= CORE_DBS); - eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && - !env->me_dbxs[i].md_name.iov_base); - } while ((env->me_db_flags[i - 1] & DB_VALID) == 0); - env->me_numdbs = (unsigned)i; + if (env->me_numdbs == dbi + 1) { + size_t i = env->me_numdbs; + do { + --i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && + !env->me_dbxs[i].md_name.iov_base); + } while (i > CORE_DBS && !env->me_dbxs[i - 1].md_name.iov_base); + env->me_numdbs = (unsigned)i; + } } - return MDBX_SUCCESS; + return defer_item; } int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { @@ -23297,12 +23418,8 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { return MDBX_BAD_DBI; rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - rc = (dbi < env->me_maxdbs && (env->me_db_flags[dbi] & DB_VALID)) - ? dbi_close_locked(env, dbi) - : MDBX_BAD_DBI; - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } + if (likely(rc == MDBX_SUCCESS)) + rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); return rc; } @@ -23449,8 +23566,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { MDBX_env *env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { - dbi_close_locked(env, dbi); - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); goto bailout; } } diff --git a/src/internals.h b/src/internals.h index a3e8e5cc..856ad1db 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1348,6 +1348,11 @@ typedef struct MDBX_cursor_couple { MDBX_xcursor inner; } MDBX_cursor_couple; +struct mdbx_defer_free_item { + struct mdbx_defer_free_item *next; + uint64_t timestamp; +}; + /* The database environment. */ struct MDBX_env { /* ----------------------------------------------------- mostly static part */ @@ -1452,6 +1457,9 @@ struct MDBX_env { bool me_prefault_write; MDBX_env *me_lcklist_next; +#if MDBX_ENABLE_DBI_LOCKFREE + struct mdbx_defer_free_item *me_defer_free; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ /* --------------------------------------------------- mostly volatile part */ diff --git a/src/options.h b/src/options.h index 65bb797c..9aff6755 100644 --- a/src/options.h +++ b/src/options.h @@ -170,6 +170,13 @@ #error MDBX_ENABLE_DBI_SPARSE must be defined as 0 or 1 #endif /* MDBX_ENABLE_DBI_SPARSE */ +/** FIXME */ +#ifndef MDBX_ENABLE_DBI_LOCKFREE +#define MDBX_ENABLE_DBI_LOCKFREE 1 +#elif !(MDBX_ENABLE_DBI_LOCKFREE == 0 || MDBX_ENABLE_DBI_LOCKFREE == 1) +#error MDBX_ENABLE_DBI_LOCKFREE must be defined as 0 or 1 +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + /** Controls sort order of internal page number lists. * This mostly experimental/advanced option with not for regular MDBX users. * \warning The database format depend on this option and libmdbx built with From 903d964f4d1dcd15c08eed2eee263f00c170e468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Nov 2023 23:15:38 +0300 Subject: [PATCH 029/443] =?UTF-8?q?mdbx:=20=D0=B8=D0=BD=D1=82=D0=B5=D0=BD?= =?UTF-8?q?=D1=81=D0=B8=D0=B2=D0=BD=D0=BE=D0=B5=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`=5F?= =?UTF-8?q?=5Frestrict`,=20=D0=B2=20=D1=82=D0=BE=D0=BC=20=D1=87=D0=B8?= =?UTF-8?q?=D1=81=D0=BB=D0=B5=20=D0=BF=D1=80=D0=B8=20=D0=BE=D0=BF=D1=80?= =?UTF-8?q?=D0=B5=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8=D0=B8=20=D1=8D=D0=BB?= =?UTF-8?q?=D0=B5=D0=BC=D0=B5=D0=BD=D1=82=D0=BE=D0=B2=20=D1=81=D1=82=D1=80?= =?UTF-8?q?=D1=83=D0=BA=D1=82=D1=83=D1=80.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 34 ++++++++++++++++++---------------- src/internals.h | 32 ++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/core.c b/src/core.c index 1b1d4b58..5034c45e 100644 --- a/src/core.c +++ b/src/core.c @@ -2265,7 +2265,7 @@ static void pnl_free(MDBX_PNL pl) { } /* Shrink the PNL to the default size if it has grown larger */ -static void pnl_shrink(MDBX_PNL *ppl) { +static void pnl_shrink(MDBX_PNL __restrict *__restrict ppl) { assert(pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) >= MDBX_PNL_INITIAL && pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < MDBX_PNL_INITIAL * 3 / 2); @@ -2288,7 +2288,8 @@ static void pnl_shrink(MDBX_PNL *ppl) { } /* Grow the PNL to the size growed to at least given size */ -static int pnl_reserve(MDBX_PNL *ppl, const size_t wanna) { +static int pnl_reserve(MDBX_PNL __restrict *__restrict ppl, + const size_t wanna) { const size_t allocated = MDBX_PNL_ALLOCLEN(*ppl); assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); @@ -2318,8 +2319,8 @@ static int pnl_reserve(MDBX_PNL *ppl, const size_t wanna) { } /* Make room for num additional elements in an PNL */ -static __always_inline int __must_check_result pnl_need(MDBX_PNL *ppl, - size_t num) { +static __always_inline int __must_check_result +pnl_need(MDBX_PNL __restrict *__restrict ppl, size_t num) { assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); assert(num <= MDBX_PGL_LIMIT); @@ -2328,7 +2329,7 @@ static __always_inline int __must_check_result pnl_need(MDBX_PNL *ppl, : pnl_reserve(ppl, wanna); } -static __always_inline void pnl_xappend(MDBX_PNL pl, pgno_t pgno) { +static __always_inline void pnl_xappend(__restrict MDBX_PNL pl, pgno_t pgno) { assert(MDBX_PNL_GETSIZE(pl) < MDBX_PNL_ALLOCLEN(pl)); if (AUDIT_ENABLED()) { for (size_t i = MDBX_PNL_GETSIZE(pl); i > 0; --i) @@ -2339,10 +2340,8 @@ static __always_inline void pnl_xappend(MDBX_PNL pl, pgno_t pgno) { } /* Append an pgno range onto an unsorted PNL */ -__always_inline static int __must_check_result pnl_append_range(bool spilled, - MDBX_PNL *ppl, - pgno_t pgno, - size_t n) { +__always_inline static int __must_check_result pnl_append_range( + bool spilled, __restrict MDBX_PNL *ppl, pgno_t pgno, size_t n) { assert(n > 0); int rc = pnl_need(ppl, n); if (unlikely(rc != MDBX_SUCCESS)) @@ -2369,7 +2368,7 @@ __always_inline static int __must_check_result pnl_append_range(bool spilled, } /* Append an pgno range into the sorted PNL */ -__hot static int __must_check_result pnl_insert_range(MDBX_PNL *ppl, +__hot static int __must_check_result pnl_insert_range(__restrict MDBX_PNL *ppl, pgno_t pgno, size_t n) { assert(n > 0); int rc = pnl_need(ppl, n); @@ -2673,7 +2672,8 @@ static void txl_free(MDBX_TXL tl) { osal_free(tl - 1); } -static int txl_reserve(MDBX_TXL *ptl, const size_t wanna) { +static int txl_reserve(MDBX_TXL __restrict *__restrict ptl, + const size_t wanna) { const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptl); assert(MDBX_PNL_GETSIZE(*ptl) <= MDBX_TXL_MAX && MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_GETSIZE(*ptl)); @@ -2702,8 +2702,8 @@ static int txl_reserve(MDBX_TXL *ptl, const size_t wanna) { return MDBX_ENOMEM; } -static __always_inline int __must_check_result txl_need(MDBX_TXL *ptl, - size_t num) { +static __always_inline int __must_check_result +txl_need(MDBX_TXL __restrict *__restrict ptl, size_t num) { assert(MDBX_PNL_GETSIZE(*ptl) <= MDBX_TXL_MAX && MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_GETSIZE(*ptl)); assert(num <= MDBX_PGL_LIMIT); @@ -2712,7 +2712,7 @@ static __always_inline int __must_check_result txl_need(MDBX_TXL *ptl, : txl_reserve(ptl, wanna); } -static __always_inline void txl_xappend(MDBX_TXL tl, txnid_t id) { +static __always_inline void txl_xappend(MDBX_TXL __restrict tl, txnid_t id) { assert(MDBX_PNL_GETSIZE(tl) < MDBX_PNL_ALLOCLEN(tl)); tl[0] += 1; MDBX_PNL_LAST(tl) = id; @@ -2724,7 +2724,8 @@ static void txl_sort(MDBX_TXL tl) { txnid_sort(MDBX_PNL_BEGIN(tl), MDBX_PNL_END(tl)); } -static int __must_check_result txl_append(MDBX_TXL *ptl, txnid_t id) { +static int __must_check_result txl_append(MDBX_TXL __restrict *ptl, + txnid_t id) { if (unlikely(MDBX_PNL_GETSIZE(*ptl) == MDBX_PNL_ALLOCLEN(*ptl))) { int rc = txl_need(ptl, MDBX_TXL_GRANULATE); if (unlikely(rc != MDBX_SUCCESS)) @@ -4582,7 +4583,8 @@ static void refund_loose(MDBX_txn *txn) { /* Filter-out loose chain & dispose refunded pages. */ unlink_loose: - for (MDBX_page **link = &txn->tw.loose_pages; *link;) { + for (MDBX_page *__restrict *__restrict link = &txn->tw.loose_pages; + *link;) { MDBX_page *dp = *link; tASSERT(txn, dp->mp_flags == P_LOOSE); MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(dp), sizeof(MDBX_page *)); diff --git a/src/internals.h b/src/internals.h index 856ad1db..f38c7f7a 100644 --- a/src/internals.h +++ b/src/internals.h @@ -93,6 +93,10 @@ disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ producing 'defined' has undefined behavior */ #endif +#if _MSC_VER < 1920 +/* avoid "error C2219: syntax error: type qualifier must be after '*'" */ +#define __restrict +#endif #if _MSC_VER > 1930 #pragma warning(disable : 6235) /* is always a constant */ #pragma warning(disable : 6237) /* is never evaluated and might \ @@ -1193,7 +1197,7 @@ struct MDBX_txn { MDBX_db *mt_dbs; #if MDBX_ENABLE_DBI_SPARSE - unsigned *mt_dbi_sparse; + unsigned *__restrict mt_dbi_sparse; #endif /* MDBX_ENABLE_DBI_SPARSE */ /* Non-shared DBI state flags inside transaction */ @@ -1205,10 +1209,10 @@ struct MDBX_txn { #define DBI_OLDEN 0x40 /* Handle was closed/reopened outside txn */ #define DBI_LINDO 0x80 /* Lazy initialization done for DBI-slot */ /* Array of non-shared txn's flags of DBI */ - uint8_t *mt_dbi_state; + uint8_t *__restrict mt_dbi_state; /* Array of sequence numbers for each DB handle. */ - uint32_t *mt_dbi_seqs; + uint32_t *__restrict mt_dbi_seqs; MDBX_cursor **mt_cursors; MDBX_canary mt_canary; @@ -1222,8 +1226,8 @@ struct MDBX_txn { struct { meta_troika_t troika; /* In write txns, array of cursors for each DB */ - MDBX_PNL relist; /* Reclaimed GC pages */ - txnid_t last_reclaimed; /* ID of last used record */ + MDBX_PNL __restrict relist; /* Reclaimed GC pages */ + txnid_t last_reclaimed; /* ID of last used record */ #if MDBX_ENABLE_REFUND pgno_t loose_refund_wl /* FIXME: describe */; #endif /* MDBX_ENABLE_REFUND */ @@ -1235,14 +1239,14 @@ struct MDBX_txn { * dirtylist into mt_parent after freeing hidden mt_parent pages. */ size_t dirtyroom; /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ - MDBX_dpl *dirtylist; + MDBX_dpl *__restrict dirtylist; /* The list of reclaimed txns from GC */ - MDBX_TXL lifo_reclaimed; + MDBX_TXL __restrict lifo_reclaimed; /* The list of pages that became unused during this transaction. */ - MDBX_PNL retired_pages; + MDBX_PNL __restrict retired_pages; /* The list of loose pages that became unused and may be reused * in this transaction, linked through `mp_next`. */ - MDBX_page *loose_pages; + MDBX_page *__restrict loose_pages; /* Number of loose pages (tw.loose_pages) */ size_t loose_count; union { @@ -1251,7 +1255,7 @@ struct MDBX_txn { /* The sorted list of dirty pages we temporarily wrote to disk * because the dirty list was full. page numbers in here are * shifted left by 1, deleted slots have the LSB set. */ - MDBX_PNL list; + MDBX_PNL __restrict list; } spilled; size_t writemap_dirty_npages; size_t writemap_spilled_npages; @@ -1295,7 +1299,7 @@ struct MDBX_cursor { /* The database auxiliary record for this cursor */ MDBX_dbx *mc_dbx; /* The mt_dbi_state[] for this DBI */ - uint8_t *mc_dbi_state; + uint8_t *__restrict mc_dbi_state; uint8_t mc_snum; /* number of pushed pages */ uint8_t mc_top; /* index of top page, normally mc_snum-1 */ @@ -1401,7 +1405,7 @@ struct MDBX_env { MDBX_txn *me_txn0; /* preallocated write transaction */ MDBX_dbx *me_dbxs; /* array of static DB info */ - uint16_t *me_db_flags; /* array of flags from MDBX_db.md_flags */ + uint16_t *__restrict me_db_flags; /* array of flags from MDBX_db.md_flags */ MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ unsigned me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */ @@ -1468,10 +1472,10 @@ struct MDBX_env { unsigned me_numdbs; /* number of DBs opened */ unsigned me_dp_reserve_len; - MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ + MDBX_page *__restrict me_dp_reserve; /* list of malloc'ed blocks for re-use */ /* PNL of pages that became unused in a write txn */ - MDBX_PNL me_retired_pages; + MDBX_PNL __restrict me_retired_pages; osal_ioring_t me_ioring; #if defined(_WIN32) || defined(_WIN64) From c9c02dddfb3aea039a64be65f6994a24407bae06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 1 Nov 2023 01:07:01 +0300 Subject: [PATCH 030/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fdbi=5Frename()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 +++ src/core.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 113 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index d9cc392e..e2da78b3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4225,6 +4225,11 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +/** FIXME */ +LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); +LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *name); + /** \defgroup value2key Value-to-Key functions * \brief Value-to-Key functions to * \ref avoid_custom_comparators "avoid using custom comparators" diff --git a/src/core.c b/src/core.c index 5034c45e..56fb4f6b 100644 --- a/src/core.c +++ b/src/core.c @@ -22993,6 +22993,12 @@ static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, return MDBX_SUCCESS; } +static __inline size_t dbi_namelen(const MDBX_val name) { + return (name.iov_len > sizeof(struct mdbx_defer_free_item)) + ? name.iov_len + : sizeof(struct mdbx_defer_free_item); +} + static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, MDBX_val name) { @@ -23117,9 +23123,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, /* Done here so we cannot fail after creating a new DB */ void *clone = nullptr; if (name.iov_len) { - clone = osal_malloc((name.iov_len > sizeof(struct mdbx_defer_free_item)) - ? name.iov_len - : sizeof(struct mdbx_defer_free_item)); + clone = osal_malloc(dbi_namelen(name)); if (unlikely(!clone)) return MDBX_ENOMEM; name.iov_base = memcpy(clone, name.iov_base, name.iov_len); @@ -23343,6 +23347,105 @@ int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, return dbi_open(txn, name, flags, dbi, keycmp, datacmp); } +__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { + MDBX_val thunk, *name; + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) + name = (void *)name_cstr; + else { + thunk.iov_len = strlen(name_cstr); + thunk.iov_base = (void *)name_cstr; + name = &thunk; + } + return mdbx_dbi_rename2(txn, dbi, name); +} + +struct dbi_rename_result { + struct mdbx_defer_free_item *defer; + int err; +}; + +__cold static struct dbi_rename_result +dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { + struct dbi_rename_result pair; + pair.defer = nullptr; + pair.err = dbi_check(txn, dbi); + if (unlikely(pair.err != MDBX_SUCCESS)) + return pair; + + MDBX_env *const env = txn->mt_env; + MDBX_val old_name = env->me_dbxs[dbi].md_name; + if (env->me_dbxs[MAIN_DBI].md_cmp(&new_name, &old_name) == 0 && + MDBX_DEBUG == 0) + return pair; + + MDBX_cursor_couple cx; + pair.err = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(pair.err != MDBX_SUCCESS)) + return pair; + pair.err = cursor_set(&cx.outer, &new_name, nullptr, MDBX_SET).err; + if (unlikely(pair.err != MDBX_NOTFOUND)) { + pair.err = (pair.err == MDBX_SUCCESS) ? MDBX_KEYEXIST : pair.err; + return pair; + } + + pair.defer = osal_malloc(dbi_namelen(new_name)); + if (unlikely(!pair.defer)) { + pair.err = MDBX_ENOMEM; + return pair; + } + new_name.iov_base = memcpy(pair.defer, new_name.iov_base, new_name.iov_len); + + cx.outer.mc_next = txn->mt_cursors[MAIN_DBI]; + txn->mt_cursors[MAIN_DBI] = &cx.outer; + + MDBX_val data = {&txn->mt_dbs[dbi], sizeof(MDBX_db)}; + pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, + F_SUBDATA | MDBX_NOOVERWRITE); + if (likely(pair.err == MDBX_SUCCESS)) { + pair.err = cursor_set(&cx.outer, &old_name, nullptr, MDBX_SET).err; + if (likely(pair.err == MDBX_SUCCESS)) + pair.err = cursor_del(&cx.outer, F_SUBDATA); + if (likely(pair.err == MDBX_SUCCESS)) { + pair.defer = env->me_dbxs[dbi].md_name.iov_base; + env->me_dbxs[dbi].md_name = new_name; + } else + txn->mt_flags |= MDBX_TXN_ERROR; + } + + txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; + return pair; +} + +__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *new_name) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(new_name == MDBX_CHK_MAIN || + new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || + new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || + new_name->iov_base == MDBX_CHK_META)) + return MDBX_EINVAL; + + if (unlikely(dbi < CORE_DBS)) + return MDBX_EINVAL; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name); + if (pair.defer) + pair.defer->next = nullptr; + env_defer_free_and_release(txn->mt_env, pair.defer); + rc = pair.err; + } + return rc; +} + __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -23540,7 +23643,7 @@ static int drop_tree(MDBX_cursor *mc, const bool may_have_subDBs) { return rc; } -int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { +__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -23565,7 +23668,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; - MDBX_env *env = txn->mt_env; + MDBX_env *const env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); From c216e1afb7b9bbe76bcee2d51d573b92ccbe5fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 1 Nov 2023 11:04:00 +0300 Subject: [PATCH 031/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D0=BA=D0=B8=20`mdbx=5Fdbi=5Frename()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/jitter.c++ | 27 ++++++++++++++++++++++++++- test/test.c++ | 42 ++++++++++++++++++++++++++++-------------- test/test.h++ | 5 ++++- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/test/jitter.c++ b/test/jitter.c++ index b25599b0..993631e8 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -39,6 +39,12 @@ bool testcase_jitter::run() { if (upper_limit < 1) upper_limit = config.params.size_now * 2; + tablename_buf buffer; + const char *const tablename = db_tablename(buffer); + tablename_buf buffer_renamed; + const char *const tablename_renamed = + db_tablename(buffer_renamed, ".renamed"); + while (should_continue()) { jitter_delay(); db_open(); @@ -48,6 +54,15 @@ bool testcase_jitter::run() { txn_begin(false); dbi = db_table_open(true); check_dbi_error(MDBX_SUCCESS, "created-uncommitted"); + + bool renamed = false; + if (flipcoin()) { + err = mdbx_dbi_rename(txn_guard.get(), dbi, tablename_renamed); + if (err != MDBX_SUCCESS) + failure_perror("jitter.rename-1", err); + renamed = true; + } + // note: here and below the 4-byte length keys and value are used // to be compatible with any Db-flags given from command line. MDBX_val k = {(void *)"k000", 4}, v = {(void *)"v001", 4}; @@ -75,7 +90,17 @@ bool testcase_jitter::run() { failure_perror("jitter.put-2", err); check_dbi_error(MDBX_BAD_DBI, "dropped-recreated-aborted"); // restore DBI - dbi = db_table_open(false); + dbi = db_table_open(false, renamed); + if (renamed) { + err = mdbx_dbi_open( + txn_guard.get(), tablename_renamed, + flipcoin() ? MDBX_DB_ACCEDE : config.params.table_flags, &dbi); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("open-renamed", err); + err = mdbx_dbi_rename(txn_guard.get(), dbi, tablename); + if (err != MDBX_SUCCESS) + failure_perror("jitter.rename-2", err); + } check_dbi_error(MDBX_SUCCESS, "dropped-recreated-aborted+reopened"); v = {(void *)"v003", 4}; err = mdbx_put(txn_guard.get(), dbi, &k, &v, MDBX_UPSERT); diff --git a/test/test.c++ b/test/test.c++ index 77c90c0a..e590d3ce 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -537,29 +537,43 @@ int testcase::db_open__begin__table_create_open_clean(MDBX_dbi &handle) { return err; } -MDBX_dbi testcase::db_table_open(bool create) { - log_trace(">> testcase::db_table_create"); - - char tablename_buf[16]; +const char *testcase::db_tablename(tablename_buf &buffer, + const char *suffix) const { const char *tablename = nullptr; if (config.space_id) { - int rc = snprintf(tablename_buf, sizeof(tablename_buf), "TBL%04u", - config.space_id); + int rc = + snprintf(buffer, sizeof(buffer), "TBL%04u%s", config.space_id, suffix); if (rc < 4 || rc >= (int)sizeof(tablename_buf) - 1) failure("snprintf(tablename): %d", rc); - tablename = tablename_buf; + tablename = buffer; } log_debug("use %s table", tablename ? tablename : "MAINDB"); + return tablename; +} + +MDBX_dbi testcase::db_table_open(bool create, bool expect_failure) { + log_trace(">> testcase::db_table_%s%s", create ? "create" : "open", + expect_failure ? "(expect_failure)" : ""); + + tablename_buf buffer; + const char *tablename = db_tablename(buffer); MDBX_dbi handle = 0; - int rc = mdbx_dbi_open(txn_guard.get(), tablename, - (create ? MDBX_CREATE : MDBX_DB_DEFAULTS) | - config.params.table_flags, - &handle); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_open()", rc); + int rc = mdbx_dbi_open( + txn_guard.get(), tablename, + create ? (MDBX_CREATE | config.params.table_flags) + : (flipcoin() ? MDBX_DB_ACCEDE + : MDBX_DB_DEFAULTS | config.params.table_flags), + &handle); + if (unlikely(expect_failure != (rc != MDBX_SUCCESS))) { + char act[64]; + snprintf(act, sizeof(act), "mdbx_dbi_open(create=%s,expect_failure=%s)", + create ? "true" : "false", expect_failure ? "true" : "false"); + failure_perror(act, rc); + } - log_trace("<< testcase::db_table_create, handle %u", handle); + log_trace("<< testcase::db_table_%s%s, handle %u", create ? "create" : "open", + expect_failure ? "(expect_failure)" : "", handle); return handle; } diff --git a/test/test.h++ b/test/test.h++ index 6158ba66..96d93a7c 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -265,7 +265,10 @@ protected: MDBX_val expected_valued); unsigned txn_underutilization_x256(MDBX_txn *txn) const; - MDBX_dbi db_table_open(bool create); + using tablename_buf = char[32]; + const char *db_tablename(tablename_buf &buffer, + const char *suffix = "") const; + MDBX_dbi db_table_open(bool create, bool expect_failure = false); void db_table_drop(MDBX_dbi handle); void db_table_clear(MDBX_dbi handle, MDBX_txn *txn = nullptr); void db_table_close(MDBX_dbi handle); From 0916d2432151c943d6c29bd9bef5f03e6f6076e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Nov 2023 16:54:32 +0300 Subject: [PATCH 032/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D1=86=D0=B8=D0=B8=20`ENABLE=5FMEMCHECK`=20=D0=B2=D0=BC=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=BE=20`ENABLE=5FVALGRIND`=20=D0=B8=20`MDBX=5FUSE?= =?UTF-8?q?=5FVALGRIND`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- GNUmakefile | 4 +- cmake/profile.cmake | 45 +++++++++++++++------ src/base.h | 4 +- src/config.h.in | 2 +- src/core.c | 96 ++++++++++++++++++++++----------------------- src/internals.h | 6 +-- src/options.h | 4 +- 8 files changed, 91 insertions(+), 72 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 90c2d766..26e08261 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -467,7 +467,7 @@ endif() # #### # # # #### # # #### # -set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN MDBX_USE_VALGRIND ENABLE_GPROF ENABLE_GCOV) +set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) macro(add_mdbx_option NAME DESCRIPTION DEFAULT) list(APPEND MDBX_BUILD_OPTIONS ${NAME}) if(NOT ${DEFAULT} STREQUAL "AUTO") diff --git a/GNUmakefile b/GNUmakefile index 104ae372..d6e222b5 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -428,13 +428,13 @@ test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) -test-valgrind: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND +test-valgrind: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK test-valgrind: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt -memcheck: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND +memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." $(QUIET)rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG).gz && (set -o pipefail; ( \ diff --git a/cmake/profile.cmake b/cmake/profile.cmake index f13b6976..e4973f51 100644 --- a/cmake/profile.cmake +++ b/cmake/profile.cmake @@ -24,6 +24,25 @@ endif() cmake_policy(PUSH) cmake_policy(VERSION ${CMAKE_MINIMUM_REQUIRED_VERSION}) +unset(MEMCHECK_OPTION_NAME) +if(NOT DEFINED ENABLE_MEMCHECK) + if (DEFINED MDBX_USE_VALGRIND) + set(MEMCHECK_OPTION_NAME "MDBX_USE_VALGRIND") + elseif(DEFINED ENABLE_VALGRIND) + set(MEMCHECK_OPTION_NAME "ENABLE_VALGRIND") + else() + set(MEMCHECK_OPTION_NAME "ENABLE_MEMCHECK") + endif() + if(MEMCHECK_OPTION_NAME STREQUAL "ENABLE_MEMCHECK") + option(ENABLE_MEMCHECK + "Enable integration with valgrind, a memory analyzing tool" OFF) + elseif(${MEMCHECK_OPTION_NAME}) + set(ENABLE_MEMCHECK ON) + else() + set(ENABLE_MEMCHECK OFF) + endif() +endif() + include(CheckLibraryExists) check_library_exists(gcov __gcov_flush "" HAVE_GCOV) @@ -33,23 +52,23 @@ option(ENABLE_GCOV option(ENABLE_GPROF "Enable integration with gprof, a performance analyzing tool" OFF) -if(CMAKE_CXX_COMPILER_LOADED) - include(CheckIncludeFileCXX) - check_include_file_cxx(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) -else() - include(CheckIncludeFile) - check_include_file(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) -endif() - -option(MDBX_USE_VALGRIND "Enable integration with valgrind, a memory analyzing tool" OFF) -if(MDBX_USE_VALGRIND AND NOT HAVE_VALGRIND_MEMCHECK_H) - message(FATAL_ERROR "MDBX_USE_VALGRIND option is set but valgrind/memcheck.h is not found") -endif() - option(ENABLE_ASAN "Enable AddressSanitizer, a fast memory error detector based on compiler instrumentation" OFF) option(ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector based on compiler instrumentation" OFF) +if(ENABLE_MEMCHECK) + if(CMAKE_CXX_COMPILER_LOADED) + include(CheckIncludeFileCXX) + check_include_file_cxx(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) + else() + include(CheckIncludeFile) + check_include_file(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) + endif() + if(NOT HAVE_VALGRIND_MEMCHECK_H) + message(FATAL_ERROR "${MEMCHECK_OPTION_NAME} option is set but valgrind/memcheck.h is not found") + endif() +endif() + cmake_policy(POP) diff --git a/src/base.h b/src/base.h index fd730945..8f1d533f 100644 --- a/src/base.h +++ b/src/base.h @@ -686,7 +686,7 @@ __extern_C key_t ftok(const char *, int); /*----------------------------------------------------------------------------*/ -#if defined(MDBX_USE_VALGRIND) +#if defined(ENABLE_MEMCHECK) #include #ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE /* LY: available since Valgrind 3.10 */ @@ -708,7 +708,7 @@ __extern_C key_t ftok(const char *, int); #define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a, s) (0) #define VALGRIND_CHECK_MEM_IS_DEFINED(a, s) (0) #define RUNNING_ON_VALGRIND (0) -#endif /* MDBX_USE_VALGRIND */ +#endif /* ENABLE_MEMCHECK */ #ifdef __SANITIZE_ADDRESS__ #include diff --git a/src/config.h.in b/src/config.h.in index 0304db03..88a282c8 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -5,7 +5,7 @@ /* clang-format off */ #cmakedefine LTO_ENABLED -#cmakedefine MDBX_USE_VALGRIND +#cmakedefine ENABLE_MEMCHECK #cmakedefine ENABLE_GPROF #cmakedefine ENABLE_GCOV #cmakedefine ENABLE_ASAN diff --git a/src/core.c b/src/core.c index 56fb4f6b..e94f4eba 100644 --- a/src/core.c +++ b/src/core.c @@ -4935,7 +4935,7 @@ status_done: return MDBX_SUCCESS; } -#if !MDBX_DEBUG && !defined(MDBX_USE_VALGRIND) && !defined(__SANITIZE_ADDRESS__) +#if !MDBX_DEBUG && !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) if (unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) #endif { @@ -4952,7 +4952,7 @@ status_done: goto skip_invalidate; } -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) if (MDBX_DEBUG != 0 || unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) #endif kill_page(txn, mp, pgno, npages); @@ -6485,9 +6485,9 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); -#if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND) +#if MDBX_ENABLE_MADVISE || defined(ENABLE_MEMCHECK) const void *const prev_map = env->me_dxb_mmap.base; -#endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */ +#endif /* MDBX_ENABLE_MADVISE || ENABLE_MEMCHECK */ VERBOSE("resize/%d datafile/mapping: " "present %" PRIuPTR " -> %" PRIuPTR ", " @@ -6672,7 +6672,7 @@ bailout: env->me_dbgeo.now = env->me_dxb_mmap.current; env->me_dbgeo.upper = env->me_dxb_mmap.limit; adjust_defaults(env); -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK if (prev_limit != env->me_dxb_mmap.limit || prev_map != env->me_map) { VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = 0; @@ -6680,7 +6680,7 @@ bailout: env->me_valgrind_handle = VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx"); } -#endif /* MDBX_USE_VALGRIND */ +#endif /* ENABLE_MEMCHECK */ } else { if (rc != MDBX_UNABLE_EXTEND_MAPSIZE && rc != MDBX_EPERM) { ERROR("failed resize datafile/mapping: " @@ -6979,9 +6979,9 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { do { mask = (uint8_t)diffcmp2mask_sse2(range - 3, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return range + 28 - __builtin_clz(mask); } range -= 4; @@ -6994,7 +6994,7 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7006,7 +7006,7 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ do if (*range - range[offset] == target) return range; @@ -7050,9 +7050,9 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { do { mask = (uint8_t)diffcmp2mask_avx2(range - 7, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return range + 24 - __builtin_clz(mask); } range -= 8; @@ -7065,7 +7065,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xfe0 /* enough for '-31' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7077,7 +7077,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ if (range - 3 > detent) { mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); if (mask) @@ -7118,9 +7118,9 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { do { mask = diffcmp2mask_avx512bw(range - 15, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return range + 16 - __builtin_clz(mask); } range -= 16; @@ -7133,7 +7133,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xfc0 /* enough for '-63' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7145,7 +7145,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ if (range - 7 > detent) { mask = diffcmp2mask_avx2(range - 7, offset, *(const __m256i *)&pattern); if (mask) @@ -7198,9 +7198,9 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, do { mask = diffcmp2mask_neon(range - 3, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return ptr_disp(range, -(__builtin_clzl(mask) >> sizeof(size_t) / 4)); } range -= 4; @@ -7213,7 +7213,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7225,7 +7225,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ do if (*range - range[offset] == target) return range; @@ -8712,7 +8712,7 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { return env_sync(env, force, nonblock); } -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) /* Find largest mvcc-snapshot still referenced by this process. */ static pgno_t find_largest_this(MDBX_env *env, pgno_t largest) { MDBX_lockinfo *const lck = env->me_lck_mmap.lck; @@ -8790,7 +8790,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { osal_txn_unlock(env); } } -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ typedef struct { int err; @@ -9526,9 +9526,9 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { if (rc != MDBX_SUCCESS) goto bailout; } -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ return MDBX_SUCCESS; } bailout: @@ -10112,9 +10112,9 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, @@ -10140,10 +10140,10 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) if (txn == env->me_txn0) txn_valgrind(env, nullptr); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; env->me_txn = txn->mt_parent; @@ -10588,13 +10588,13 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { } static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { -#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе * из cursor_set(MDBX_SET_KEY), которая вызывается ниже внутри update_gc() в * цикле очистки и цикле заполнения зарезервированных элементов. */ memset(pnl.iov_base, 0xBB, pnl.iov_len); -#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ /* PNL is initially empty, zero out at least the length */ memset(pnl.iov_base, 0, sizeof(pgno_t)); @@ -10911,14 +10911,14 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; -#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) @@ -10958,14 +10958,14 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; -#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ /* Retry if tw.retired_pages[] grew during the Put() */ } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); @@ -12967,7 +12967,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, : pending->mm_geo.next); eASSERT(env, largest_pgno >= NUM_METAS); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) const pgno_t edge = env->me_poison_edge; if (edge > largest_pgno) { env->me_poison_edge = largest_pgno; @@ -12978,7 +12978,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, ptr_disp(env->me_map, pgno2bytes(env, largest_pgno)), pgno2bytes(env, edge - largest_pgno)); } -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ #if MDBX_ENABLE_MADVISE && \ (defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)) @@ -14188,14 +14188,14 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, #endif /* MADV_DODUMP */ #endif /* MDBX_ENABLE_MADVISE */ -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK env->me_valgrind_handle = VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx"); -#endif /* MDBX_USE_VALGRIND */ +#endif /* ENABLE_MEMCHECK */ eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && used_bytes <= env->me_dxb_mmap.limit); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) if (env->me_dxb_mmap.filesize > used_bytes && env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) { VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->me_map, used_bytes), @@ -14207,7 +14207,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, bytes2pgno(env, (env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) ? env->me_dxb_mmap.filesize : env->me_dxb_mmap.limit); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ meta_troika_t troika = meta_tap(env); #if MDBX_DEBUG @@ -15681,9 +15681,9 @@ bailout: env->me_flags = saved_me_flags | ((rc != MDBX_PANIC) ? 0 : MDBX_FATAL_ERROR); } else { -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ } osal_free(env_pathname.buffer_for_free); return rc; @@ -15725,7 +15725,7 @@ __cold static int env_close(MDBX_env *env) { if (env->me_map) { osal_munmap(&env->me_dxb_mmap); -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = -1; #endif @@ -28392,9 +28392,9 @@ __dll_export #ifdef __SANITIZE_ADDRESS__ " SANITIZE_ADDRESS=YES" #endif /* __SANITIZE_ADDRESS__ */ -#ifdef MDBX_USE_VALGRIND - " MDBX_USE_VALGRIND=YES" -#endif /* MDBX_USE_VALGRIND */ +#ifdef ENABLE_MEMCHECK + " ENABLE_MEMCHECK=YES" +#endif /* ENABLE_MEMCHECK */ #if MDBX_FORCE_ASSERTIONS " MDBX_FORCE_ASSERTIONS=YES" #endif /* MDBX_FORCE_ASSERTIONS */ diff --git a/src/internals.h b/src/internals.h index f38c7f7a..6a5d8018 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1493,12 +1493,12 @@ struct MDBX_env { #if MDBX_DEBUG MDBX_assert_func *me_assert_func; /* Callback for assertion failures */ #endif -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK int me_valgrind_handle; #endif -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) pgno_t me_poison_edge; -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ #ifndef xMDBX_DEBUG_SPILLING #define xMDBX_DEBUG_SPILLING 0 diff --git a/src/options.h b/src/options.h index 9aff6755..fe47904e 100644 --- a/src/options.h +++ b/src/options.h @@ -224,8 +224,8 @@ /** If defined then enables integration with Valgrind, * a memory analyzing tool. */ -#ifndef MDBX_USE_VALGRIND -#endif /* MDBX_USE_VALGRIND */ +#ifndef ENABLE_MEMCHECK +#endif /* ENABLE_MEMCHECK */ /** If defined then enables use C11 atomics, * otherwise detects ones availability automatically. */ From 24d5b26bc50b333466d8e17b0c45b3b80ccfb13a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Nov 2023 20:10:59 +0300 Subject: [PATCH 033/443] =?UTF-8?q?mdbx-make:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D1=86=D0=B5=D0=BB=D0=B5=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 47 ++++++++++++++++++++++++++++------------------- Makefile | 7 ++++--- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index d6e222b5..0ddce68e 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -172,22 +172,22 @@ help: @echo " make bench-clean - remove temp database(s) after benchmark" #> dist-cutoff-begin @echo "" - @echo " make smoke - fast smoke test" - @echo " make test - basic test" @echo " make check - smoke test with amalgamation and installation checking" - @echo " make long-test - execute long test which runs for several weeks, or until you interrupt it" - @echo " make memcheck - build with Valgrind's and smoke test with memcheck tool" - @echo " make test-valgrind - build with Valgrind's and basic test with memcheck tool" - @echo " make test-asan - build with AddressSanitizer and basic test" - @echo " make test-leak - build with LeakSanitizer and basic test" - @echo " make test-ubsan - build with UndefinedBehaviourSanitizer and basic test" + @echo " make smoke - fast smoke test" + @echo " make smoke-memcheck - build with Valgrind support and run smoke test under memcheck tool" + @echo " make smoke-fault - execute transaction owner failure smoke testcase" + @echo " make smoke-singleprocess - execute single-process smoke test" + @echo " make test - basic test" + @echo " make test-memcheck - build with Valgrind support and run basic test under memcheck tool" + @echo " make test-long - execute long test which runs for several weeks, or until interruption" + @echo " make test-asan - build with AddressSanitizer and run basic test" + @echo " make test-leak - build with LeakSanitizer and run basic test" + @echo " make test-ubsan - build with UndefinedBehaviourSanitizer and run basic test" + @echo " make test-singleprocess - execute single-process basic test (also used by make cross-qemu)" @echo " make cross-gcc - check cross-compilation without test execution" @echo " make cross-qemu - run cross-compilation and execution basic test with QEMU" @echo " make gcc-analyzer - run gcc-analyzer (mostly useless for now)" @echo " make build-test - build test executable(s)" - @echo " make smoke-fault - execute transaction owner failure smoke testcase" - @echo " make smoke-singleprocess - execute single-process smoke test" - @echo " make test-singleprocess - execute single-process basic test (also used by make cross-qemu)" @echo "" @echo " make dist - build amalgamated source code" @echo " make doxygen - build HTML documentation" @@ -328,8 +328,14 @@ else .PHONY: build-test build-test-with-valgrind check cross-gcc cross-qemu dist doxygen gcc-analyzer long-test .PHONY: reformat release-assets tags smoke test test-asan smoke-fault test-leak -.PHONY: smoke-singleprocess test-singleprocess test-ubsan test-valgrind memcheck -.PHONY: smoke-assertion test-assertion long-test-assertion +.PHONY: smoke-singleprocess test-singleprocess test-ubsan test-valgrind test-memcheck memcheck smoke-memcheck +.PHONY: smoke-assertion test-assertion long-test-assertion test-ci test-ci-extra + +test-ci-extra: test-ci cross-gcc cross-qemu + +test-ci: check \ + smoke-singleprocess smoke-fault smoke-memcheck smoke \ + test-leak test-asan test-ubsan test-singleprocess test test-memcheck define uname2osal case "$(UNAME)" in @@ -420,7 +426,8 @@ test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) -long-test: build-test +long-test: test-long +test-long: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make --taillog @@ -428,14 +435,16 @@ test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) -test-valgrind: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK -test-valgrind: build-test +test-valgrind: test-memcheck +test-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK +test-memcheck: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) -memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt -memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK -memcheck: build-test +memcheck: smoke-memcheck +smoke-memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt +smoke-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK +smoke-memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." $(QUIET)rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG).gz && (set -o pipefail; ( \ $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ diff --git a/Makefile b/Makefile index 599e4787..78ba3483 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,10 @@ bench bench-clean bench-couple bench-quartet bench-triplet re-bench \ lib libs lib-static lib-shared tools-static \ libmdbx mdbx mdbx_chk mdbx_copy mdbx_drop mdbx_dump mdbx_load mdbx_stat \ check dist memcheck cross-gcc cross-qemu doxygen gcc-analyzer reformat \ -release-assets tags test build-test mdbx_test smoke smoke-fault smoke-singleprocess \ -smoke-assertion test-assertion long-test-assertion \ -test-asan test-leak test-singleprocess test-ubsan test-valgrind: +release-assets tags build-test mdbx_test \ +smoke smoke-fault smoke-singleprocess smoke-assertion smoke-memcheck \ +test test-assertion test-long test-long-assertion test-ci test-ci-extra \ +test-asan test-leak test-singleprocess test-ubsan test-memcheck: @CC=$(CC) \ CXX=`if test -n "$(CXX)" && which "$(CXX)" > /dev/null; then echo "$(CXX)"; elif test -n "$(CCC)" && which "$(CCC)" > /dev/null; then echo "$(CCC)"; else echo "c++"; fi` \ `which gmake || which gnumake || echo 'echo "GNU Make 3.80 or above is required"; exit 2;'` \ From 9a6f8a1bf86de125d81714c3b121394740befbc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 12:33:21 +0300 Subject: [PATCH 034/443] =?UTF-8?q?mdbx-test:=20=D1=83=D0=B2=D0=B5=D0=BB?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20tail-log=20=D0=B4=D0=BE?= =?UTF-8?q?=203333=20=D0=B4=D0=BB=D1=8F=20CI.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index c03c83da..ff73726c 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -41,7 +41,7 @@ do exit -2 ;; --taillog) - TAILLOG=999 + TAILLOG=3333 ;; --multi) LIST=basic From dea6570fc192814a06ae9d6bd4f1501e5cab3bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 13:48:06 +0300 Subject: [PATCH 035/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`coherency=5Fcheck()`=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D1=81=D0=BB=D1=83=D1=87=D0=B0=D1=8F=20=D0=BF=D0=BB?= =?UTF-8?q?=D0=BE=D1=85=D0=B8=D1=85=20=D0=BD=D0=BE=D0=BC=D0=B5=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=BA=D0=BE=D1=80=D0=BD=D0=B5=D0=B2=D1=8B=D1=85=20?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index e94f4eba..6d62234e 100644 --- a/src/core.c +++ b/src/core.c @@ -8939,20 +8939,45 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, const volatile MDBX_meta *meta, bool report) { const txnid_t freedb_mod_txnid = dbs[FREE_DBI].md_mod_txnid; const txnid_t maindb_mod_txnid = dbs[MAIN_DBI].md_mod_txnid; + const pgno_t last_pgno = meta->mm_geo.now; const pgno_t freedb_root_pgno = dbs[FREE_DBI].md_root; - const MDBX_page *freedb_root = (env->me_map && freedb_root_pgno != P_INVALID) + const MDBX_page *freedb_root = (env->me_map && freedb_root_pgno < last_pgno) ? pgno2page(env, freedb_root_pgno) : nullptr; const pgno_t maindb_root_pgno = dbs[MAIN_DBI].md_root; - const MDBX_page *maindb_root = (env->me_map && maindb_root_pgno != P_INVALID) + const MDBX_page *maindb_root = (env->me_map && maindb_root_pgno < last_pgno) ? pgno2page(env, maindb_root_pgno) : nullptr; const uint64_t magic_and_version = unaligned_peek_u64_volatile(4, &meta->mm_magic_and_version); bool ok = true; + if (freedb_root_pgno != P_INVALID && + unlikely(freedb_root_pgno >= last_pgno)) { + if (report) + WARNING( + "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + " %s", + "free", freedb_root_pgno, txnid, + (env->me_stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + if (maindb_root_pgno != P_INVALID && + unlikely(maindb_root_pgno >= last_pgno)) { + if (report) + WARNING( + "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + " %s", + "main", maindb_root_pgno, txnid, + (env->me_stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } if (unlikely(txnid < freedb_mod_txnid || (!freedb_mod_txnid && freedb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) { From 6477e6c5de3afb3ac47f960bbe1cfe049e11f794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 00:00:33 +0300 Subject: [PATCH 036/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B0=D0=B2=D1=82=D0=BE?= =?UTF-8?q?-=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BA=D0=B8=20`MDBX?= =?UTF-8?q?=5FENV=5FCHECKPID`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D0=BB=D1=83?= =?UTF-8?q?=D1=87=D0=B0=D1=8F=20`MDBX=5FENABLE=5FMADVISE=3D0`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/options.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/options.h b/src/options.h index fe47904e..21dd57bc 100644 --- a/src/options.h +++ b/src/options.h @@ -28,9 +28,17 @@ #define MDBX_OSX_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY #endif /* MDBX_OSX_SPEED_INSTEADOF_DURABILITY */ +/** Controls using of POSIX' madvise() and/or similar hints. */ +#ifndef MDBX_ENABLE_MADVISE +#define MDBX_ENABLE_MADVISE 1 +#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1) +#error MDBX_ENABLE_MADVISE must be defined as 0 or 1 +#endif /* MDBX_ENABLE_MADVISE */ + /** Controls checking PID against reuse DB environment after the fork() */ #ifndef MDBX_ENV_CHECKPID -#if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64) +#if (defined(MADV_DONTFORK) && MDBX_ENABLE_MADVISE) || defined(_WIN32) || \ + defined(_WIN64) /* PID check could be omitted: * - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork() * mapped pages will not be available for child process. @@ -117,13 +125,6 @@ #error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1 #endif /* MDBX_ENABLE_BIGFOOT */ -/** Controls using of POSIX' madvise() and/or similar hints. */ -#ifndef MDBX_ENABLE_MADVISE -#define MDBX_ENABLE_MADVISE 1 -#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1) -#error MDBX_ENABLE_MADVISE must be defined as 0 or 1 -#endif /* MDBX_ENABLE_MADVISE */ - /** Disable some checks to reduce an overhead and detection probability of * database corruption to a values closer to the LMDB. */ #ifndef MDBX_DISABLE_VALIDATION From 2fe01eee89eb205512127f5c6440419ecc41c15c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:07:21 +0300 Subject: [PATCH 037/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BA=D0=B0=20pid=20=D1=82=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=B7=D0=B0=D0=BF=D1=80=D0=BE=D1=81?= =?UTF-8?q?=D0=B0=20=D0=B0=D0=BA=D1=82=D0=B8=D0=B2=D0=BD=D0=BE=D0=B9=20env?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/core.c b/src/core.c index 6d62234e..3d3f22ac 100644 --- a/src/core.c +++ b/src/core.c @@ -8685,17 +8685,16 @@ static __inline int check_env(const MDBX_env *env, const bool wanna_active) { if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) return MDBX_EBADSIGN; -#if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid())) { - ((MDBX_env *)env)->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) return MDBX_PANIC; if (wanna_active) { +#if MDBX_ENV_CHECKPID + if (unlikely(env->me_pid != osal_getpid())) { + ((MDBX_env *)env)->me_flags |= MDBX_FATAL_ERROR; + return MDBX_PANIC; + } +#endif /* MDBX_ENV_CHECKPID */ if (unlikely((env->me_flags & MDBX_ENV_ACTIVE) == 0)) return MDBX_EPERM; eASSERT(env, env->me_map != nullptr); From 1b2f5f25d4e6cd66578b96c586ffd18acab9404e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:10:44 +0300 Subject: [PATCH 038/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B8=20=D0=B2=D1=8B?= =?UTF-8?q?=D0=BD=D0=BE=D1=81=20`txn=5Fabort()`=20=D0=B1=D0=B5=D0=B7=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=B4=D0=B0=20=D0=B2=D1=85=D0=BE=D0=B4=D1=8F=D1=89?= =?UTF-8?q?=D0=B8=D1=85=20=D0=BF=D0=B5=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BE?= =?UTF-8?q?=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/core.c b/src/core.c index 3d3f22ac..bc96a044 100644 --- a/src/core.c +++ b/src/core.c @@ -10108,13 +10108,6 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { MDBX_env *env = txn->mt_env; static const char *const names[] = TXN_END_NAMES; -#if MDBX_ENV_CHECKPID - if (unlikely(txn->mt_env->me_pid != osal_getpid())) { - env->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK], txn->mt_txnid, @@ -10288,11 +10281,7 @@ int mdbx_txn_break(MDBX_txn *txn) { return MDBX_SUCCESS; } -int mdbx_txn_abort(MDBX_txn *txn) { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - +static int txn_abort(MDBX_txn *txn) { if (txn->mt_flags & MDBX_TXN_RDONLY) /* LY: don't close DBI-handles */ return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | @@ -10302,12 +10291,24 @@ int mdbx_txn_abort(MDBX_txn *txn) { return MDBX_BAD_TXN; if (txn->mt_child) - mdbx_txn_abort(txn->mt_child); + txn_abort(txn->mt_child); tASSERT(txn, (txn->mt_flags & MDBX_TXN_ERROR) || dirtylist_check(txn)); return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); } +int mdbx_txn_abort(MDBX_txn *txn) { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = check_env(txn->mt_env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + return txn_abort(txn); +} + __cold static MDBX_db *audit_db_dig(const MDBX_txn *txn, const size_t dbi, MDBX_db *fallback) { const MDBX_txn *dig = txn; From 97418d5c9c3d55bf25e1c196601cdfb1e541740f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:16:06 +0300 Subject: [PATCH 039/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20`env=5Fhandle=5Fpathname?= =?UTF-8?q?()`=20=D0=B4=D0=BB=D1=8F=20=D0=BE=D0=B4=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D1=82=D0=BE=D1=87=D0=BA=D0=B8=20=D0=B2=D1=8B=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D1=8F/=D0=BE=D1=81=D0=B2=D0=BE=D0=B1=D0=BE?= =?UTF-8?q?=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=B0=D0=BC=D1=8F?= =?UTF-8?q?=D1=82=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 203 ++++++++++++++++++++++-------------------------- src/internals.h | 10 ++- 2 files changed, 101 insertions(+), 112 deletions(-) diff --git a/src/core.c b/src/core.c index bc96a044..03bd80ed 100644 --- a/src/core.c +++ b/src/core.c @@ -14535,12 +14535,12 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, /******************************************************************************/ /* Open and/or initialize the lock region for the environment. */ -__cold static int setup_lck(MDBX_env *env, pathchar_t *lck_pathname, - mdbx_mode_t mode) { +__cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { eASSERT(env, env->me_lazy_fd != INVALID_HANDLE_VALUE); eASSERT(env, env->me_lfd == INVALID_HANDLE_VALUE); - int err = osal_openfile(MDBX_OPEN_LCK, env, lck_pathname, &env->me_lfd, mode); + int err = osal_openfile(MDBX_OPEN_LCK, env, env->me_pathname.lck, + &env->me_lfd, mode); if (err != MDBX_SUCCESS) { switch (err) { default: @@ -14559,7 +14559,7 @@ __cold static int setup_lck(MDBX_env *env, pathchar_t *lck_pathname, if (err != MDBX_ENOFILE) { /* ENSURE the file system is read-only */ - err = osal_check_fs_rdonly(env->me_lazy_fd, lck_pathname, err); + err = osal_check_fs_rdonly(env->me_lazy_fd, env->me_pathname.lck, err); if (err != MDBX_SUCCESS && /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ !(err == MDBX_ENOSYS && (env->me_flags & MDBX_EXCLUSIVE))) @@ -14965,12 +14965,6 @@ __cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, 0); } -typedef struct { - void *buffer_for_free; - pathchar_t *lck, *dxb; - size_t ent_len; -} MDBX_handle_env_pathname; - __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { int err = osal_fileexists(lck_pathname); if (unlikely(err != MDBX_RESULT_FALSE)) { @@ -14982,11 +14976,9 @@ __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { return err; } -__cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, - const pathchar_t *pathname, - MDBX_env_flags_t *flags, +__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, const mdbx_mode_t mode) { - memset(ctx, 0, sizeof(*ctx)); + memset(&env->me_pathname, 0, sizeof(env->me_pathname)); if (unlikely(!pathname || !*pathname)) return MDBX_EINVAL; @@ -14997,21 +14989,22 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, rc = GetLastError(); if (rc != MDBX_ENOFILE) return rc; - if (mode == 0 || (*flags & MDBX_RDONLY) != 0) + if (mode == 0 || (env->me_flags & MDBX_RDONLY) != 0) /* can't open existing */ return rc; /* auto-create directory if requested */ - if ((*flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathname, nullptr)) { + if ((env->me_flags & MDBX_NOSUBDIR) == 0 && + !CreateDirectoryW(pathname, nullptr)) { rc = GetLastError(); if (rc != ERROR_ALREADY_EXISTS) return rc; } } else { /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ - *flags |= MDBX_NOSUBDIR; + env->me_flags |= MDBX_NOSUBDIR; if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) - *flags -= MDBX_NOSUBDIR; + env->me_flags -= MDBX_NOSUBDIR; } #else struct stat st; @@ -15019,7 +15012,7 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, rc = errno; if (rc != MDBX_ENOFILE) return rc; - if (mode == 0 || (*flags & MDBX_RDONLY) != 0) + if (mode == 0 || (env->me_flags & MDBX_RDONLY) != 0) /* can't open non-existing */ return rc /* MDBX_ENOFILE */; @@ -15030,16 +15023,16 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, /* always add read/write/search for owner */ S_IRWXU | ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); - if ((*flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { + if ((env->me_flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { rc = errno; if (rc != EEXIST) return rc; } } else { /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ - *flags |= MDBX_NOSUBDIR; + env->me_flags |= MDBX_NOSUBDIR; if (S_ISDIR(st.st_mode)) - *flags -= MDBX_NOSUBDIR; + env->me_flags -= MDBX_NOSUBDIR; } #endif @@ -15055,41 +15048,42 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, const size_t pathname_len = strlen(pathname); #endif assert(!osal_isdirsep(lock_suffix[0])); - ctx->ent_len = pathname_len; + size_t base_len = pathname_len; static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1; - if (*flags & MDBX_NOSUBDIR) { - if (ctx->ent_len > dxb_name_len && - osal_pathequal(pathname + ctx->ent_len - dxb_name_len, dxb_name, + if (env->me_flags & MDBX_NOSUBDIR) { + if (base_len > dxb_name_len && + osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, dxb_name_len)) { - *flags -= MDBX_NOSUBDIR; - ctx->ent_len -= dxb_name_len; - } else if (ctx->ent_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && + env->me_flags -= MDBX_NOSUBDIR; + base_len -= dxb_name_len; + } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && osal_isdirsep(lck_name[0]) && - osal_pathequal(pathname + ctx->ent_len - dxb_name_len + 1, + osal_pathequal(pathname + base_len - dxb_name_len + 1, dxb_name + 1, dxb_name_len - 1)) { - *flags -= MDBX_NOSUBDIR; - ctx->ent_len -= dxb_name_len - 1; + env->me_flags -= MDBX_NOSUBDIR; + base_len -= dxb_name_len - 1; } } const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t); const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name); - const size_t enogh4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) - ? suflen_with_NOSUBDIR - : suflen_without_NOSUBDIR; - const size_t bytes_needed = sizeof(pathchar_t) * ctx->ent_len * 2 + enogh4any; - ctx->buffer_for_free = osal_malloc(bytes_needed); - if (!ctx->buffer_for_free) + const size_t enough4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) + ? suflen_with_NOSUBDIR + : suflen_without_NOSUBDIR; + const size_t bytes_needed = + sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; + env->me_pathname.buffer = osal_malloc(bytes_needed); + if (!env->me_pathname.buffer) return MDBX_ENOMEM; - ctx->dxb = ctx->buffer_for_free; - ctx->lck = ctx->dxb + ctx->ent_len + dxb_name_len + 1; - pathchar_t *const buf = ctx->buffer_for_free; + env->me_pathname.specified = env->me_pathname.buffer; + env->me_pathname.dxb = env->me_pathname.specified + pathname_len + 1; + env->me_pathname.lck = env->me_pathname.dxb + base_len + dxb_name_len + 1; rc = MDBX_SUCCESS; - if (ctx->ent_len) { - memcpy(buf + /* shutting up goofy MSVC static analyzer */ 0, pathname, - sizeof(pathchar_t) * pathname_len); - if (*flags & MDBX_NOSUBDIR) { + pathchar_t *const buf = env->me_pathname.buffer; + if (base_len) { + memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len); + if (env->me_flags & MDBX_NOSUBDIR) { const pathchar_t *const lck_ext = osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); if (lck_ext) { @@ -15099,32 +15093,33 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, rc = check_alternative_lck_absent(buf); } } else { - memcpy(buf + ctx->ent_len, dxb_name, sizeof(dxb_name)); - memcpy(buf + ctx->ent_len + dxb_name_len, lock_suffix, - sizeof(lock_suffix)); + memcpy(buf + base_len, dxb_name, sizeof(dxb_name)); + memcpy(buf + base_len + dxb_name_len, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); } - memcpy(ctx->dxb + /* shutting up goofy MSVC static analyzer */ 0, pathname, - sizeof(pathchar_t) * (ctx->ent_len + 1)); - memcpy(ctx->lck, pathname, sizeof(pathchar_t) * ctx->ent_len); - if (*flags & MDBX_NOSUBDIR) { - memcpy(ctx->lck + ctx->ent_len, lock_suffix, sizeof(lock_suffix)); + memcpy(env->me_pathname.dxb, pathname, sizeof(pathchar_t) * (base_len + 1)); + memcpy(env->me_pathname.lck, pathname, sizeof(pathchar_t) * base_len); + if (env->me_flags & MDBX_NOSUBDIR) { + memcpy(env->me_pathname.lck + base_len, lock_suffix, sizeof(lock_suffix)); } else { - memcpy(ctx->dxb + ctx->ent_len, dxb_name, sizeof(dxb_name)); - memcpy(ctx->lck + ctx->ent_len, lck_name, sizeof(lck_name)); + memcpy(env->me_pathname.dxb + base_len, dxb_name, sizeof(dxb_name)); + memcpy(env->me_pathname.lck + base_len, lck_name, sizeof(lck_name)); } } else { - assert(!(*flags & MDBX_NOSUBDIR)); - memcpy(buf + /* shutting up goofy MSVC static analyzer */ 0, dxb_name + 1, - sizeof(dxb_name) - sizeof(pathchar_t)); + assert(!(env->me_flags & MDBX_NOSUBDIR)); + memcpy(buf, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); - memcpy(ctx->dxb + /* shutting up goofy MSVC static analyzer */ 0, - dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); - memcpy(ctx->lck, lck_name + 1, sizeof(lck_name) - sizeof(pathchar_t)); + memcpy(env->me_pathname.dxb, dxb_name + 1, + sizeof(dxb_name) - sizeof(pathchar_t)); + memcpy(env->me_pathname.lck, lck_name + 1, + sizeof(lck_name) - sizeof(pathchar_t)); } + + memcpy(env->me_pathname.specified, pathname, + sizeof(pathchar_t) * (pathname_len + 1)); return rc; } @@ -15162,23 +15157,19 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; dummy_env->me_os_psize = (unsigned)osal_syspagesize(); dummy_env->me_psize = (unsigned)mdbx_default_pagesize(); - dummy_env->me_pathname = (pathchar_t *)pathname; - MDBX_handle_env_pathname env_pathname; STATIC_ASSERT(sizeof(dummy_env->me_flags) == sizeof(MDBX_env_flags_t)); - int rc = MDBX_RESULT_TRUE, - err = handle_env_pathname(&env_pathname, pathname, - (MDBX_env_flags_t *)&dummy_env->me_flags, 0); + int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0); if (likely(err == MDBX_SUCCESS)) { mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, dxb_handle = INVALID_HANDLE_VALUE; if (mode > MDBX_ENV_JUST_DELETE) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, env_pathname.dxb, - &dxb_handle, 0); + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, + dummy_env->me_pathname.dxb, &dxb_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; if (err == MDBX_SUCCESS) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, env_pathname.lck, - &clk_handle, 0); + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, + dummy_env->me_pathname.lck, &clk_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; } if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE) @@ -15188,7 +15179,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, } if (err == MDBX_SUCCESS) { - err = osal_removefile(env_pathname.dxb); + err = osal_removefile(dummy_env->me_pathname.dxb); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) @@ -15196,7 +15187,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, } if (err == MDBX_SUCCESS) { - err = osal_removefile(env_pathname.lck); + err = osal_removefile(dummy_env->me_pathname.lck); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) @@ -15218,7 +15209,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, } else if (err == MDBX_ENOFILE) err = MDBX_SUCCESS; - osal_free(env_pathname.buffer_for_free); + osal_free(dummy_env->me_pathname.buffer); return (err == MDBX_SUCCESS) ? rc : err; } @@ -15280,23 +15271,19 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ } - MDBX_handle_env_pathname env_pathname; - rc = handle_env_pathname(&env_pathname, pathname, &flags, mode); + env->me_flags = (flags & ~MDBX_FATAL_ERROR); + rc = env_handle_pathname(env, pathname, mode); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; - env->me_pathname = osal_calloc(env_pathname.ent_len + 1, sizeof(pathchar_t)); env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); - if (!(env->me_dbxs && env->me_pathname && env->me_db_flags && - env->me_dbi_seqs)) { + if (!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs)) { rc = MDBX_ENOMEM; goto bailout; } - memcpy(env->me_pathname, env_pathname.dxb, - env_pathname.ent_len * sizeof(pathchar_t)); /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: * @@ -15385,14 +15372,15 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, * при этом для записи мета требуется отдельный не-overlapped дескриптор. */ - rc = osal_openfile((flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ - : MDBX_OPEN_DXB_LAZY, - env, env_pathname.dxb, &env->me_lazy_fd, mode); - if (rc != MDBX_SUCCESS) - goto bailout; + env->me_pid = osal_getpid(); + rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ + : MDBX_OPEN_DXB_LAZY, + env, env->me_pathname.dxb, &env->me_lazy_fd, mode); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; #if MDBX_LOCKING == MDBX_LOCKING_SYSV - env->me_sysv_ipc.key = ftok(env_pathname.dxb, 42); + env->me_sysv_ipc.key = ftok(env->me_pathname.dxb, 42); if (env->me_sysv_ipc.key == -1) { rc = errno; goto bailout; @@ -15447,7 +15435,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT : MDBX_OPEN_DXB_OVERLAPPED, - env, env_pathname.dxb, &env->me_overlapped_fd, 0); + env, env->me_pathname.dxb, &env->me_overlapped_fd, 0); if (rc != MDBX_SUCCESS) goto bailout; env->me_data_lock_event = CreateEventW(nullptr, true, false, nullptr); @@ -15473,7 +15461,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, ((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) | ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); #endif /* !Windows */ - const int lck_rc = setup_lck(env, env_pathname.lck, mode); + const int lck_rc = setup_lck(env, mode); if (MDBX_IS_ERROR(lck_rc)) { rc = lck_rc; goto bailout; @@ -15486,7 +15474,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, | MDBX_EXCLUSIVE #endif /* !Windows */ ))) { - rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env_pathname.dxb, + rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->me_pathname.dxb, &env->me_dsync_fd, 0); if (MDBX_IS_ERROR(rc)) goto bailout; @@ -15710,7 +15698,6 @@ bailout: txn_valgrind(env, nullptr); #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ } - osal_free(env_pathname.buffer_for_free); return rc; } @@ -15763,6 +15750,10 @@ __cold static int env_close(MDBX_env *env) { CloseHandle(env->me_data_lock_event); env->me_data_lock_event = INVALID_HANDLE_VALUE; } + if (env->me_pathname_char) { + osal_free(env->me_pathname_char); + env->me_pathname_char = nullptr; + } #endif /* Windows */ if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { @@ -15800,16 +15791,10 @@ __cold static int env_close(MDBX_env *env) { osal_free(env->me_db_flags); env->me_db_flags = nullptr; } - if (env->me_pathname) { - osal_free(env->me_pathname); - env->me_pathname = nullptr; + if (env->me_pathname.buffer) { + osal_free(env->me_pathname.buffer); + env->me_pathname.buffer = nullptr; } -#if defined(_WIN32) || defined(_WIN64) - if (env->me_pathname_char) { - osal_free(env->me_pathname_char); - env->me_pathname_char = nullptr; - } -#endif /* Windows */ if (env->me_txn0) { dpl_free(env->me_txn0); txl_free(env->me_txn0->tw.lifo_reclaimed); @@ -22459,7 +22444,7 @@ __cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) { if (unlikely(!arg)) return MDBX_EINVAL; - *arg = env->me_pathname; + *arg = env->me_pathname.specified; return MDBX_SUCCESS; } #endif /* Windows */ @@ -22476,12 +22461,14 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { if (!env->me_pathname_char) { *arg = nullptr; DWORD flags = /* WC_ERR_INVALID_CHARS */ 0x80; - size_t mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags, env->me_pathname, - -1, nullptr, 0, nullptr, nullptr); + size_t mb_len = + WideCharToMultiByte(CP_THREAD_ACP, flags, env->me_pathname.specified, + -1, nullptr, 0, nullptr, nullptr); rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); if (rc == ERROR_INVALID_FLAGS) { - mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, env->me_pathname, - -1, nullptr, 0, nullptr, nullptr); + mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, + env->me_pathname.specified, -1, nullptr, 0, + nullptr, nullptr); rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); } if (unlikely(rc != MDBX_SUCCESS)) @@ -22490,9 +22477,9 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { char *const mb_pathname = osal_malloc(mb_len); if (!mb_pathname) return MDBX_ENOMEM; - if (mb_len != (size_t)WideCharToMultiByte(CP_THREAD_ACP, flags, - env->me_pathname, -1, mb_pathname, - (int)mb_len, nullptr, nullptr)) { + if (mb_len != (size_t)WideCharToMultiByte( + CP_THREAD_ACP, flags, env->me_pathname.specified, -1, + mb_pathname, (int)mb_len, nullptr, nullptr)) { rc = (int)GetLastError(); osal_free(mb_pathname); return rc; @@ -22504,7 +22491,7 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { } *arg = env->me_pathname_char; #else - *arg = env->me_pathname; + *arg = env->me_pathname.specified; #endif /* Windows */ return MDBX_SUCCESS; } diff --git a/src/internals.h b/src/internals.h index 6a5d8018..0dd35629 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1400,10 +1400,12 @@ struct MDBX_env { MDBX_dbi me_maxdbs; /* size of the DB table */ uint32_t me_pid; /* process ID of this env */ osal_thread_key_t me_txkey; /* thread-key for readers */ - pathchar_t *me_pathname; /* path to the DB files */ - void *me_pbuf; /* scratch area for DUPSORT put() */ - MDBX_txn *me_txn0; /* preallocated write transaction */ - + struct { /* path to the DB files */ + pathchar_t *lck, *dxb, *specified; + void *buffer; + } me_pathname; + void *me_pbuf; /* scratch area for DUPSORT put() */ + MDBX_txn *me_txn0; /* preallocated write transaction */ MDBX_dbx *me_dbxs; /* array of static DB info */ uint16_t *__restrict me_db_flags; /* array of flags from MDBX_db.md_flags */ MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ From eddade7b9945963b971a219f2e59ce5288a7ad75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 00:23:26 +0300 Subject: [PATCH 040/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B5=D1=84=D0=B8=D0=BA=D1=81=D0=BE=D0=B2=20=D0=B8=D0=BC=D1=91?= =?UTF-8?q?=D0=BD=20osal-ipc=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck-posix.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lck-posix.c b/src/lck-posix.c index d55a9395..d8f1fdc6 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -875,7 +875,7 @@ MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void) { } #endif /* __ANDROID_API__ || ANDROID) || BIONIC */ -static int mdbx_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, +static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, const bool dont_wait) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ MDBX_LOCKING == MDBX_LOCKING_POSIX2008 @@ -915,7 +915,7 @@ static int mdbx_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, return rc; } -static int mdbx_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { +int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ MDBX_LOCKING == MDBX_LOCKING_POSIX2008 int rc = pthread_mutex_unlock(ipc); @@ -940,14 +940,14 @@ static int mdbx_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) { TRACE("%s", ">>"); jitter4testing(true); - int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_rlock, false); + int rc = osal_ipclock_lock(env, &env->me_lck->mti_rlock, false); TRACE("<< rc %d", rc); return rc; } MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_rlock); + int rc = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); TRACE("<< rc %d", rc); if (unlikely(rc != MDBX_SUCCESS)) mdbx_panic("%s() failed: err %d\n", __func__, rc); @@ -957,7 +957,7 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); jitter4testing(true); - const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); + const int err = osal_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); int rc = err; if (likely(!MDBX_IS_ERROR(err))) { eASSERT(env, !env->me_txn0->mt_owner || @@ -975,7 +975,7 @@ void osal_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); env->me_txn0->mt_owner = 0; - int err = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); + int err = osal_ipclock_unlock(env, &env->me_lck->mti_wlock); TRACE("<< err %d", err); if (unlikely(err != MDBX_SUCCESS)) mdbx_panic("%s() failed: err %d\n", __func__, err); From 7ad54f54b4a7258353d6c3b98bcb01091dadfd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 23:54:21 +0300 Subject: [PATCH 041/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D1=8A=D0=B5=D0=B4?= =?UTF-8?q?=D0=B8=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20lck-=D1=81=D0=BF=D0=B8?= =?UTF-8?q?=D1=81=D0=BA=D0=B0=20=D0=B8=20rthc-=D1=82=D0=B0=D0=B1=D0=BB?= =?UTF-8?q?=D0=B8=D1=86=D1=8B=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D1=89=D0=B5=D0=BD=D0=B8=D1=8F=20(=D0=B4=D0=B5)=D1=80?= =?UTF-8?q?=D0=B5=D0=B3=D0=B8=D1=81=D1=82=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20?= =?UTF-8?q?TLS-=D0=B4=D0=B5=D1=81=D1=82=D1=80=D1=83=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 406 ++++++++++++++++++++++-------------------------- src/internals.h | 5 - 2 files changed, 188 insertions(+), 223 deletions(-) diff --git a/src/core.c b/src/core.c index 03bd80ed..92eb7709 100644 --- a/src/core.c +++ b/src/core.c @@ -1128,10 +1128,12 @@ MDBX_MAYBE_UNUSED static /*----------------------------------------------------------------------------*/ /* rthc (tls keys and destructors) */ +static int rthc_register(MDBX_env *const env); +static int rthc_remove(MDBX_env *const env); +static int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found); + typedef struct rthc_entry_t { - MDBX_reader *begin; - MDBX_reader *end; - osal_thread_key_t thr_tls_key; + MDBX_env *env; } rthc_entry_t; #if MDBX_DEBUG @@ -1144,10 +1146,8 @@ static bin128_t bootid; #if defined(_WIN32) || defined(_WIN64) static CRITICAL_SECTION rthc_critical_section; -static CRITICAL_SECTION lcklist_critical_section; #else -static pthread_mutex_t lcklist_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t rthc_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t rthc_cond = PTHREAD_COND_INITIALIZER; static osal_thread_key_t rthc_key; @@ -1346,17 +1346,24 @@ static void thread_rthc_set(osal_thread_key_t key, const void *value) { /* dtor called for thread, i.e. for all mdbx's environment objects */ __cold void thread_dtor(void *rthc) { rthc_lock(); - TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", osal_getpid(), + const uint32_t self_pid = osal_getpid(); + TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", self_pid, osal_thread_self(), rthc); - const uint32_t self_pid = osal_getpid(); for (size_t i = 0; i < rthc_count; ++i) { - const osal_thread_key_t key = rthc_table[i].thr_tls_key; - MDBX_reader *const reader = thread_rthc_get(key); - if (reader < rthc_table[i].begin || reader >= rthc_table[i].end) + MDBX_env *const env = rthc_table[i].env; + if (env->me_pid != self_pid) + continue; + if (!(env->me_flags & MDBX_ENV_TXKEY)) + continue; + MDBX_reader *const reader = thread_rthc_get(env->me_txkey); + MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; + MDBX_reader *const end = + &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; + if (reader < begin || reader >= end) continue; #if !defined(_WIN32) && !defined(_WIN64) - if (pthread_setspecific(key, nullptr) != 0) { + if (pthread_setspecific(env->me_txkey, nullptr) != 0) { TRACE("== thread 0x%" PRIxPTR ", rthc %p: ignore race with tsd-key deletion", osal_thread_self(), __Wpedantic_format_voidptr(reader)); @@ -1368,13 +1375,13 @@ __cold void thread_dtor(void *rthc) { ", rthc %p, [%zi], %p ... %p (%+i), rtch-pid %i, " "current-pid %i", osal_thread_self(), __Wpedantic_format_voidptr(reader), i, - __Wpedantic_format_voidptr(rthc_table[i].begin), - __Wpedantic_format_voidptr(rthc_table[i].end), - (int)(reader - rthc_table[i].begin), reader->mr_pid.weak, self_pid); + __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + (int)(reader - begin), reader->mr_pid.weak, self_pid); if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), __Wpedantic_format_voidptr(reader)); (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); } } @@ -1419,14 +1426,15 @@ __cold void thread_dtor(void *rthc) { MDBX_EXCLUDE_FOR_GPROF __cold void global_dtor(void) { - TRACE(">> pid %d", osal_getpid()); + const uint32_t self_pid = osal_getpid(); + TRACE(">> pid %d", self_pid); rthc_lock(); #if !defined(_WIN32) && !defined(_WIN64) uint64_t *rthc = pthread_getspecific(rthc_key); TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 ", left %d", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), osal_getpid(), + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, rthc ? rthc_read(rthc) : ~UINT64_C(0), atomic_load32(&rthc_pending, mo_Relaxed)); if (rthc) { @@ -1437,20 +1445,20 @@ __cold void global_dtor(void) { rthc_compare_and_clean(rthc, sign_registered)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), osal_getpid(), + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, "registered", state); } else if (state == sign_counted && rthc_compare_and_clean(rthc, sign_counted)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), osal_getpid(), + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, "counted", state); ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); } else { WARNING("thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), - osal_getpid(), "wrong", state); + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + "wrong", state); } } @@ -1467,7 +1475,7 @@ __cold void global_dtor(void) { for (unsigned left; (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { - NOTICE("tls-cleanup: pid %d, pending %u, wait for...", osal_getpid(), left); + NOTICE("tls-cleanup: pid %d, pending %u, wait for...", self_pid, left); const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); if (rc && rc != EINTR) break; @@ -1475,23 +1483,31 @@ __cold void global_dtor(void) { thread_key_delete(rthc_key); #endif - const uint32_t self_pid = osal_getpid(); for (size_t i = 0; i < rthc_count; ++i) { - const osal_thread_key_t key = rthc_table[i].thr_tls_key; - thread_key_delete(key); - for (MDBX_reader *rthc = rthc_table[i].begin; rthc < rthc_table[i].end; - ++rthc) { + MDBX_env *const env = rthc_table[i].env; + if (env->me_pid != self_pid) + continue; + if (!(env->me_flags & MDBX_ENV_TXKEY)) + continue; + MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; + MDBX_reader *const end = + &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; + thread_key_delete(env->me_txkey); + bool cleaned = false; + for (MDBX_reader *reader = begin; reader < end; ++reader) { TRACE("== [%zi] = key %" PRIuPTR ", %p ... %p, rthc %p (%+i), " "rthc-pid %i, current-pid %i", - i, (uintptr_t)key, __Wpedantic_format_voidptr(rthc_table[i].begin), - __Wpedantic_format_voidptr(rthc_table[i].end), - __Wpedantic_format_voidptr(rthc), (int)(rthc - rthc_table[i].begin), - rthc->mr_pid.weak, self_pid); - if (atomic_load32(&rthc->mr_pid, mo_Relaxed) == self_pid) { - atomic_store32(&rthc->mr_pid, 0, mo_AcquireRelease); - TRACE("== cleanup %p", __Wpedantic_format_voidptr(rthc)); + i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), + __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), + (int)(reader - begin), reader->mr_pid.weak, self_pid); + if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { + (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); + cleaned = true; } } + if (cleaned) + atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); } rthc_limit = rthc_count = 0; @@ -1501,7 +1517,6 @@ __cold void global_dtor(void) { rthc_unlock(); #if defined(_WIN32) || defined(_WIN64) - DeleteCriticalSection(&lcklist_critical_section); DeleteCriticalSection(&rthc_critical_section); #else /* LY: yielding a few timeslices to give a more chance @@ -1510,24 +1525,26 @@ __cold void global_dtor(void) { #endif osal_dtor(); - TRACE("<< pid %d\n", osal_getpid()); + TRACE("<< pid %d\n", self_pid); } -__cold int rthc_alloc(osal_thread_key_t *pkey, MDBX_reader *begin, - MDBX_reader *end) { - assert(pkey != NULL); -#ifndef NDEBUG - *pkey = (osal_thread_key_t)0xBADBADBAD; -#endif /* NDEBUG */ +__cold int rthc_register(MDBX_env *const env) { + TRACE(">> env %p, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), rthc_count, rthc_limit); - rthc_lock(); - TRACE(">> rthc_count %u, rthc_limit %u", rthc_count, rthc_limit); - int rc; - if (rthc_count == rthc_limit) { + int rc = MDBX_SUCCESS; + for (size_t i = 0; i < rthc_count; ++i) + if (unlikely(rthc_table[i].env == env)) { + rc = MDBX_PANIC; + goto bailout; + } + + env->me_txkey = 0; + if (unlikely(rthc_count == rthc_limit)) { rthc_entry_t *new_table = osal_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table, sizeof(rthc_entry_t) * rthc_limit * 2); - if (new_table == nullptr) { + if (unlikely(new_table == nullptr)) { rc = MDBX_ENOMEM; goto bailout; } @@ -1537,84 +1554,92 @@ __cold int rthc_alloc(osal_thread_key_t *pkey, MDBX_reader *begin, rthc_limit *= 2; } - rc = thread_key_create(&rthc_table[rthc_count].thr_tls_key); - if (rc != MDBX_SUCCESS) - goto bailout; + if ((env->me_flags & MDBX_NOTLS) == 0) { + rc = thread_key_create(&env->me_txkey); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + env->me_flags |= MDBX_ENV_TXKEY; + } - *pkey = rthc_table[rthc_count].thr_tls_key; - TRACE("== [%i] = key %" PRIuPTR ", %p ... %p", rthc_count, (uintptr_t)*pkey, - __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end)); - - rthc_table[rthc_count].begin = begin; - rthc_table[rthc_count].end = end; + rthc_table[rthc_count].env = env; + TRACE("== [%i] = env %p, key %" PRIuPTR, rthc_count, + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey); ++rthc_count; - TRACE("<< key %" PRIuPTR ", rthc_count %u, rthc_limit %u", (uintptr_t)*pkey, - rthc_count, rthc_limit); - rthc_unlock(); - return MDBX_SUCCESS; bailout: - rthc_unlock(); + TRACE("<< env %p, key %" PRIuPTR ", rthc_count %u, rthc_limit %u, rc %d", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, + rthc_limit, rc); return rc; } +__cold static int rthc_drown(MDBX_env *const env) { + const uint32_t self_pid = osal_getpid(); + int rc = MDBX_SUCCESS; + MDBX_env *inprocess_neighbor = nullptr; + if (likely(env->me_lck_mmap.lck && self_pid == env->me_pid)) { + MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; + MDBX_reader *const end = + &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; + TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", + (self_pid == env->me_pid) ? "cleanup" : "skip", + __Wpedantic_format_voidptr(env), env->me_pid, + __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + self_pid); + bool cleaned = false; + for (MDBX_reader *r = begin; r < end; ++r) { + if (atomic_load32(&r->mr_pid, mo_Relaxed) == self_pid) { + atomic_store32(&r->mr_pid, 0, mo_AcquireRelease); + TRACE("== cleanup %p", __Wpedantic_format_voidptr(r)); + cleaned = true; + } + } + if (cleaned) + atomic_store32(&env->me_lck_mmap.lck->mti_readers_refresh_flag, true, + mo_Relaxed); + rc = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); + if (!inprocess_neighbor && env->me_live_reader && + env->me_lfd != INVALID_HANDLE_VALUE) { + int err = osal_rpid_clear(env); + rc = rc ? rc : err; + } + } + int err = osal_lck_destroy(env, inprocess_neighbor); + env->me_pid = 0; + return rc ? rc : err; +} -__cold void rthc_remove(const osal_thread_key_t key) { - thread_key_delete(key); - rthc_lock(); - TRACE(">> key %zu, rthc_count %u, rthc_limit %u", (uintptr_t)key, rthc_count, +__cold static int rthc_remove(MDBX_env *const env) { + TRACE(">>> env %p, key %zu, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, rthc_limit); - for (size_t i = 0; i < rthc_count; ++i) { - if (key == rthc_table[i].thr_tls_key) { - const uint32_t self_pid = osal_getpid(); - TRACE("== [%zi], %p ...%p, current-pid %d", i, - __Wpedantic_format_voidptr(rthc_table[i].begin), - __Wpedantic_format_voidptr(rthc_table[i].end), self_pid); + int rc = MDBX_SUCCESS; + if (likely(env->me_pid)) + rc = rthc_drown(env); - for (MDBX_reader *rthc = rthc_table[i].begin; rthc < rthc_table[i].end; - ++rthc) { - if (atomic_load32(&rthc->mr_pid, mo_Relaxed) == self_pid) { - atomic_store32(&rthc->mr_pid, 0, mo_AcquireRelease); - TRACE("== cleanup %p", __Wpedantic_format_voidptr(rthc)); - } - } + for (size_t i = 0; i < rthc_count; ++i) { + if (rthc_table[i].env == env) { if (--rthc_count > 0) rthc_table[i] = rthc_table[rthc_count]; else if (rthc_table != rthc_table_static) { - osal_free(rthc_table); + void *tmp = rthc_table; rthc_table = rthc_table_static; rthc_limit = RTHC_INITIAL_LIMIT; + osal_memory_barrier(); + osal_free(tmp); } break; } } - TRACE("<< key %zu, rthc_count %u, rthc_limit %u", (size_t)key, rthc_count, + TRACE("<<< %p, key %zu, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, rthc_limit); - rthc_unlock(); + return rc; } //------------------------------------------------------------------------------ -#define RTHC_ENVLIST_END ((MDBX_env *)((uintptr_t)50459)) -static MDBX_env *inprocess_lcklist_head = RTHC_ENVLIST_END; - -static __inline void lcklist_lock(void) { -#if defined(_WIN32) || defined(_WIN64) - EnterCriticalSection(&lcklist_critical_section); -#else - ENSURE(nullptr, osal_pthread_mutex_lock(&lcklist_mutex) == 0); -#endif -} - -static __inline void lcklist_unlock(void) { -#if defined(_WIN32) || defined(_WIN64) - LeaveCriticalSection(&lcklist_critical_section); -#else - ENSURE(nullptr, pthread_mutex_unlock(&lcklist_mutex) == 0); -#endif -} - MDBX_NOTHROW_CONST_FUNCTION static uint64_t rrxmrrxmsx_0(uint64_t v) { /* Pelle Evensen's mixer, https://bit.ly/2HOfynt */ v ^= (v << 39 | v >> 25) ^ (v << 14 | v >> 50); @@ -1667,13 +1692,16 @@ static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, return uniq_peek(pending, scan); } -__cold static int uniq_check(const osal_mmap_t *pending, MDBX_env **found) { +__cold static int rthc_uniq_check(const osal_mmap_t *pending, + MDBX_env **found) { *found = nullptr; uint64_t salt = 0; - for (MDBX_env *scan = inprocess_lcklist_head; scan != RTHC_ENVLIST_END; - scan = scan->me_lcklist_next) { - MDBX_lockinfo *const scan_lck = scan->me_lck_mmap.lck; - int err = atomic_load64(&scan_lck->mti_bait_uniqueness, mo_AcquireRelease) + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const scan = rthc_table[i].env; + if (!scan->me_lck_mmap.lck || &scan->me_lck_mmap == pending) + continue; + int err = atomic_load64(&scan->me_lck_mmap.lck->mti_bait_uniqueness, + mo_AcquireRelease) ? uniq_peek(pending, &scan->me_lck_mmap) : uniq_poke(pending, &scan->me_lck_mmap, &salt); if (err == MDBX_ENODATA) { @@ -1681,8 +1709,8 @@ __cold static int uniq_check(const osal_mmap_t *pending, MDBX_env **found) { if (likely(osal_filesize(pending->fd, &length) == MDBX_SUCCESS && length == 0)) { /* LY: skip checking since LCK-file is empty, i.e. just created. */ - DEBUG("uniq-probe: %s", "unique (new/empty lck)"); - return MDBX_RESULT_TRUE; + DEBUG("%s", "unique (new/empty lck)"); + return MDBX_SUCCESS; } } if (err == MDBX_RESULT_TRUE) @@ -1695,44 +1723,17 @@ __cold static int uniq_check(const osal_mmap_t *pending, MDBX_env **found) { if (err == MDBX_RESULT_TRUE) { err = uniq_poke(pending, &scan->me_lck_mmap, &salt); *found = scan; - DEBUG("uniq-probe: found %p", __Wpedantic_format_voidptr(*found)); - return MDBX_RESULT_FALSE; + DEBUG("found %p", __Wpedantic_format_voidptr(*found)); + return MDBX_SUCCESS; } if (unlikely(err != MDBX_SUCCESS)) { - DEBUG("uniq-probe: failed rc %d", err); + DEBUG("failed rc %d", err); return err; } } - DEBUG("uniq-probe: %s", "unique"); - return MDBX_RESULT_TRUE; -} - -static int lcklist_detach_locked(MDBX_env *env) { - MDBX_env *inprocess_neighbor = nullptr; - int rc = MDBX_SUCCESS; - if (env->me_lcklist_next != nullptr) { - ENSURE(env, env->me_lcklist_next != nullptr); - ENSURE(env, inprocess_lcklist_head != RTHC_ENVLIST_END); - for (MDBX_env **ptr = &inprocess_lcklist_head; *ptr != RTHC_ENVLIST_END; - ptr = &(*ptr)->me_lcklist_next) { - if (*ptr == env) { - *ptr = env->me_lcklist_next; - env->me_lcklist_next = nullptr; - break; - } - } - ENSURE(env, env->me_lcklist_next == nullptr); - } - - rc = likely(osal_getpid() == env->me_pid) - ? uniq_check(&env->me_lck_mmap, &inprocess_neighbor) - : MDBX_PANIC; - if (!inprocess_neighbor && env->me_live_reader) - (void)osal_rpid_clear(env); - if (!MDBX_IS_ERROR(rc)) - rc = osal_lck_destroy(env, inprocess_neighbor); - return rc; + DEBUG("%s", "unique"); + return MDBX_SUCCESS; } /*------------------------------------------------------------------------------ @@ -14567,59 +14568,28 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { } /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ - /* beginning of a locked section ---------------------------------------- */ - lcklist_lock(); - eASSERT(env, env->me_lcklist_next == nullptr); env->me_lfd = INVALID_HANDLE_VALUE; - const int rc = osal_lck_seize(env); - if (MDBX_IS_ERROR(rc)) { - /* Calling lcklist_detach_locked() is required to restore POSIX-filelock - * and this job will be done by env_close(). */ - lcklist_unlock(); - return rc; - } - /* insert into inprocess lck-list */ - env->me_lcklist_next = inprocess_lcklist_head; - inprocess_lcklist_head = env; - lcklist_unlock(); - /* end of a locked section ---------------------------------------------- */ - - env->me_lck = lckless_stub(env); - env->me_maxreaders = UINT_MAX; - DEBUG("lck-setup:%s%s%s", " lck-less", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - return rc; } /* beginning of a locked section ------------------------------------------ */ - lcklist_lock(); - eASSERT(env, env->me_lcklist_next == nullptr); + rthc_lock(); + err = rthc_register(env); + if (likely(err == MDBX_SUCCESS)) + err = osal_lck_seize(env); - /* Try to get exclusive lock. If we succeed, then - * nobody is using the lock region and we should initialize it. */ - err = osal_lck_seize(env); - if (MDBX_IS_ERROR(err)) { - bailout: - /* Calling lcklist_detach_locked() is required to restore POSIX-filelock - * and this job will be done by env_close(). */ - lcklist_unlock(); - return err; - } - - MDBX_env *inprocess_neighbor = nullptr; - if (err == MDBX_RESULT_TRUE) { - err = uniq_check(&env->me_lck_mmap, &inprocess_neighbor); - if (MDBX_IS_ERROR(err)) - goto bailout; - if (inprocess_neighbor && - ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0)) { - err = MDBX_BUSY; - goto bailout; - } - } const int lck_seize_rc = err; + if (MDBX_IS_ERROR(err)) + goto bailout; + + struct MDBX_lockinfo *lck = nullptr; + if (env->me_lfd == INVALID_HANDLE_VALUE) { + lck = lckless_stub(env); + env->me_maxreaders = UINT_MAX; + DEBUG("lck-setup:%s%s%s", " lck-less", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + goto done; + } DEBUG("lck-setup:%s%s%s", " with-lck", (env->me_flags & MDBX_RDONLY) ? " readonly" : "", @@ -14688,9 +14658,10 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { #endif /* MADV_WILLNEED */ #endif /* MDBX_ENABLE_MADVISE */ - struct MDBX_lockinfo *const lck = env->me_lck_mmap.lck; + lck = env->me_lck_mmap.lck; if (lck_seize_rc == MDBX_RESULT_TRUE) { - /* LY: exclusive mode, check and reset lck content */ + /* If we succeed got exclusive lock, then nobody is using the lock region + * and we should initialize it. */ memset(lck, 0, (size_t)size); jitter4testing(false); lck->mti_magic_and_version = MDBX_LOCK_MAGIC; @@ -14724,19 +14695,32 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { } } + MDBX_env *inprocess_neighbor = nullptr; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); + if (MDBX_IS_ERROR(err)) + goto bailout; + if (inprocess_neighbor && + ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || + (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0)) { + err = MDBX_BUSY; + goto bailout; + } + } + err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); if (MDBX_IS_ERROR(err)) goto bailout; - ENSURE(env, env->me_lcklist_next == nullptr); - /* insert into inprocess lck-list */ - env->me_lcklist_next = inprocess_lcklist_head; - inprocess_lcklist_head = env; - lcklist_unlock(); - /* end of a locked section ------------------------------------------------ */ - - eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); +done: env->me_lck = lck; + eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); + +bailout: + /* Calling osal_lck_destroy() is required to restore POSIX-filelock + * and this job will be done by env_close(). */ + rthc_unlock(); + /* end of a locked section ------------------------------------------------ */ return lck_seize_rc; } @@ -15603,14 +15587,6 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, if (MDBX_IS_ERROR(rc)) goto bailout; } - - if ((env->me_flags & MDBX_NOTLS) == 0) { - rc = rthc_alloc(&env->me_txkey, &lck->mti_readers[0], - &lck->mti_readers[env->me_maxreaders]); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - env->me_flags |= MDBX_ENV_TXKEY; - } } if ((flags & MDBX_RDONLY) == 0) { @@ -15704,17 +15680,19 @@ bailout: /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ __cold static int env_close(MDBX_env *env) { const unsigned flags = env->me_flags; - if (!(flags & MDBX_ENV_ACTIVE)) { - ENSURE(env, env->me_lcklist_next == nullptr); - return MDBX_SUCCESS; - } - env->me_flags &= ~ENV_INTERNAL_FLAGS; if (flags & MDBX_ENV_TXKEY) { - rthc_remove(env->me_txkey); - env->me_txkey = (osal_thread_key_t)0; + thread_key_delete(env->me_txkey); + env->me_txkey = 0; } + if (env->me_lck) + munlock_all(env); + + rthc_lock(); + int rc = rthc_remove(env); + rthc_unlock(); + #if MDBX_ENABLE_DBI_LOCKFREE for (struct mdbx_defer_free_item *next, *ptr = env->me_defer_free; ptr; ptr = next) { @@ -15723,14 +15701,9 @@ __cold static int env_close(MDBX_env *env) { } #endif /* MDBX_ENABLE_DBI_LOCKFREE */ - munlock_all(env); if (!(env->me_flags & MDBX_RDONLY)) osal_ioring_destroy(&env->me_ioring); - lcklist_lock(); - const int rc = lcklist_detach_locked(env); - lcklist_unlock(); - env->me_lck = nullptr; if (env->me_lck_mmap.lck) osal_munmap(&env->me_lck_mmap); @@ -15882,8 +15855,6 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { osal_free(ptr); } VALGRIND_DESTROY_MEMPOOL(env); - ENSURE(env, env->me_lcklist_next == nullptr); - env->me_pid = 0; osal_free(env); return rc; @@ -25943,7 +25914,6 @@ __cold void global_ctor(void) { rthc_table = rthc_table_static; #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(&rthc_critical_section); - InitializeCriticalSection(&lcklist_critical_section); #else ENSURE(nullptr, pthread_key_create(&rthc_key, thread_dtor) == 0); TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), diff --git a/src/internals.h b/src/internals.h index 0dd35629..02d8ab49 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1462,7 +1462,6 @@ struct MDBX_env { bool me_incore; bool me_prefault_write; - MDBX_env *me_lcklist_next; #if MDBX_ENABLE_DBI_LOCKFREE struct mdbx_defer_free_item *me_defer_free; #endif /* MDBX_ENABLE_DBI_LOCKFREE */ @@ -1560,10 +1559,6 @@ osal_flush_incoherent_mmap(const void *addr, size_t nbytes, MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked, int *dead); -MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin, - MDBX_reader *end); -MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key); - MDBX_INTERNAL_FUNC void global_ctor(void); MDBX_INTERNAL_FUNC void osal_ctor(void); MDBX_INTERNAL_FUNC void global_dtor(void); From a3e2300f583f593291332e4d20647cf5a68fcb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 00:03:43 +0300 Subject: [PATCH 042/443] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=BC=D0=BE?= =?UTF-8?q?=D0=B6=D0=BD=D0=BE=D1=81=D1=82=D1=8C=20=D0=B2=D1=8B=D0=B7=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0=20`osal=5Flck=5Fdestroy()`=20=D0=B2=20=D0=B4=D0=BE?= =?UTF-8?q?=D1=87=D0=B5=D1=80=D0=BD=D0=B5=D0=BC=20=D0=BF=D1=80=D0=BE=D1=86?= =?UTF-8?q?=D0=B5=D1=81=D1=81=D0=B5=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5=20`fo?= =?UTF-8?q?rk()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 51 ++++++++++++++++++++++++----------------------- src/lck-posix.c | 23 +++++++++++++-------- src/lck-windows.c | 4 +++- src/osal.h | 3 ++- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/src/core.c b/src/core.c index 92eb7709..b6587417 100644 --- a/src/core.c +++ b/src/core.c @@ -1346,13 +1346,13 @@ static void thread_rthc_set(osal_thread_key_t key, const void *value) { /* dtor called for thread, i.e. for all mdbx's environment objects */ __cold void thread_dtor(void *rthc) { rthc_lock(); - const uint32_t self_pid = osal_getpid(); - TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", self_pid, + const uint32_t current_pid = osal_getpid(); + TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", current_pid, osal_thread_self(), rthc); for (size_t i = 0; i < rthc_count; ++i) { MDBX_env *const env = rthc_table[i].env; - if (env->me_pid != self_pid) + if (env->me_pid != current_pid) continue; if (!(env->me_flags & MDBX_ENV_TXKEY)) continue; @@ -1376,11 +1376,11 @@ __cold void thread_dtor(void *rthc) { "current-pid %i", osal_thread_self(), __Wpedantic_format_voidptr(reader), i, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - (int)(reader - begin), reader->mr_pid.weak, self_pid); - if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { + (int)(reader - begin), reader->mr_pid.weak, current_pid); + if (atomic_load32(&reader->mr_pid, mo_Relaxed) == current_pid) { TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), __Wpedantic_format_voidptr(reader)); - (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + (void)atomic_cas32(&reader->mr_pid, current_pid, 0); atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); } } @@ -1426,15 +1426,15 @@ __cold void thread_dtor(void *rthc) { MDBX_EXCLUDE_FOR_GPROF __cold void global_dtor(void) { - const uint32_t self_pid = osal_getpid(); - TRACE(">> pid %d", self_pid); + const uint32_t current_pid = osal_getpid(); + TRACE(">> pid %d", current_pid); rthc_lock(); #if !defined(_WIN32) && !defined(_WIN64) uint64_t *rthc = pthread_getspecific(rthc_key); TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 ", left %d", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, rthc ? rthc_read(rthc) : ~UINT64_C(0), atomic_load32(&rthc_pending, mo_Relaxed)); if (rthc) { @@ -1445,19 +1445,19 @@ __cold void global_dtor(void) { rthc_compare_and_clean(rthc, sign_registered)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, "registered", state); } else if (state == sign_counted && rthc_compare_and_clean(rthc, sign_counted)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, "counted", state); ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); } else { WARNING("thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, "wrong", state); } } @@ -1475,7 +1475,7 @@ __cold void global_dtor(void) { for (unsigned left; (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { - NOTICE("tls-cleanup: pid %d, pending %u, wait for...", self_pid, left); + NOTICE("tls-cleanup: pid %d, pending %u, wait for...", current_pid, left); const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); if (rc && rc != EINTR) break; @@ -1485,7 +1485,7 @@ __cold void global_dtor(void) { for (size_t i = 0; i < rthc_count; ++i) { MDBX_env *const env = rthc_table[i].env; - if (env->me_pid != self_pid) + if (env->me_pid != current_pid) continue; if (!(env->me_flags & MDBX_ENV_TXKEY)) continue; @@ -1499,9 +1499,9 @@ __cold void global_dtor(void) { "rthc-pid %i, current-pid %i", i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), - (int)(reader - begin), reader->mr_pid.weak, self_pid); - if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { - (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + (int)(reader - begin), reader->mr_pid.weak, current_pid); + if (atomic_load32(&reader->mr_pid, mo_Relaxed) == current_pid) { + (void)atomic_cas32(&reader->mr_pid, current_pid, 0); TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); cleaned = true; } @@ -1525,7 +1525,7 @@ __cold void global_dtor(void) { #endif osal_dtor(); - TRACE("<< pid %d\n", self_pid); + TRACE("<< pid %d\n", current_pid); } __cold int rthc_register(MDBX_env *const env) { @@ -1573,21 +1573,21 @@ bailout: return rc; } __cold static int rthc_drown(MDBX_env *const env) { - const uint32_t self_pid = osal_getpid(); + const uint32_t current_pid = osal_getpid(); int rc = MDBX_SUCCESS; MDBX_env *inprocess_neighbor = nullptr; - if (likely(env->me_lck_mmap.lck && self_pid == env->me_pid)) { + if (likely(env->me_lck_mmap.lck && current_pid == env->me_pid)) { MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; MDBX_reader *const end = &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", - (self_pid == env->me_pid) ? "cleanup" : "skip", + (current_pid == env->me_pid) ? "cleanup" : "skip", __Wpedantic_format_voidptr(env), env->me_pid, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - self_pid); + current_pid); bool cleaned = false; for (MDBX_reader *r = begin; r < end; ++r) { - if (atomic_load32(&r->mr_pid, mo_Relaxed) == self_pid) { + if (atomic_load32(&r->mr_pid, mo_Relaxed) == current_pid) { atomic_store32(&r->mr_pid, 0, mo_AcquireRelease); TRACE("== cleanup %p", __Wpedantic_format_voidptr(r)); cleaned = true; @@ -1603,7 +1603,7 @@ __cold static int rthc_drown(MDBX_env *const env) { rc = rc ? rc : err; } } - int err = osal_lck_destroy(env, inprocess_neighbor); + int err = osal_lck_destroy(env, inprocess_neighbor, current_pid); env->me_pid = 0; return rc ? rc : err; } @@ -15844,7 +15844,8 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { #if MDBX_LOCKING > MDBX_LOCKING_SYSV MDBX_lockinfo *const stub = lckless_stub(env); - ENSURE(env, osal_ipclock_destroy(&stub->mti_wlock) == 0); + /* может вернуть ошибку в дочернем процессе после fork() */ + osal_ipclock_destroy(&stub->mti_wlock); #endif /* MDBX_LOCKING */ while ((dp = env->me_dp_reserve) != NULL) { diff --git a/src/lck-posix.c b/src/lck-posix.c index d8f1fdc6..af16be1a 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -556,14 +556,13 @@ MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { } __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor) { - if (unlikely(osal_getpid() != env->me_pid)) - return MDBX_PANIC; - + MDBX_env *inprocess_neighbor, + const uint32_t current_pid) { + eASSERT(env, osal_getpid() == current_pid); int rc = MDBX_SUCCESS; struct stat lck_info; - MDBX_lockinfo *lck = env->me_lck_mmap.lck; - if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor && lck && + MDBX_lockinfo *lck = env->me_lck; + if (lck && lck == env->me_lck_mmap.lck && !inprocess_neighbor && /* try get exclusive access */ lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && /* if LCK was not removed */ @@ -572,7 +571,8 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX) == 0) { - VERBOSE("%p got exclusive, drown locks", (void *)env); + VERBOSE("%p got exclusive, drown ipc-locks", (void *)env); + eASSERT(env, current_pid == env->me_pid); #if MDBX_LOCKING == MDBX_LOCKING_SYSV if (env->me_sysv_ipc.semid != -1) rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0; @@ -586,13 +586,20 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, if (rc == 0) { const bool synced = lck->mti_unsynced_pages.weak == 0; osal_munmap(&env->me_lck_mmap); - if (synced) + if (synced && env->me_lfd != INVALID_HANDLE_VALUE) rc = ftruncate(env->me_lfd, 0) ? errno : 0; } jitter4testing(false); } + if (current_pid != env->me_pid) { + eASSERT(env, !inprocess_neighbor); + NOTICE("drown env %p after-fork pid %d -> %d", + __Wpedantic_format_voidptr(env), env->me_pid, current_pid); + inprocess_neighbor = nullptr; + } + /* 1) POSIX's fcntl() locks (i.e. when op_setlk == F_SETLK) should be restored * after file was closed. * diff --git a/src/lck-windows.c b/src/lck-windows.c index bc77150d..e7a4b03a 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -682,7 +682,9 @@ MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, } MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor) { + MDBX_env *inprocess_neighbor, + const uint32_t current_pid) { + (void)current_pid; /* LY: should unmap before releasing the locks to avoid race condition and * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ if (env->me_map) diff --git a/src/osal.h b/src/osal.h index 1b5c317f..3e74cdb0 100644 --- a/src/osal.h +++ b/src/osal.h @@ -690,7 +690,8 @@ MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, /// restore POSIX-fcntl locks after the closing of file descriptors. /// \return Error code (MDBX_PANIC) or zero on success. MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor); + MDBX_env *inprocess_neighbor, + const uint32_t current_pid); /// \brief Connects to shared interprocess locking objects and tries to acquire /// the maximum lock level (shared if exclusive is not available) From 54efb8bd81b61da9735f18bbfae185d6062dd649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 15:30:40 +0300 Subject: [PATCH 043/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=B5=20=D1=81=D1=87?= =?UTF-8?q?=D0=B8=D1=82=D0=B0=D0=B5=D0=BC=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA?= =?UTF-8?q?=D0=B8=20ipc-unlock=20=D0=BA=D1=80=D0=B8=D1=82=D0=B8=D1=87?= =?UTF-8?q?=D0=BD=D1=8B=D0=BC=D0=B8=20=D0=B2=20=D1=81=D0=BB=D1=83=D1=87?= =?UTF-8?q?=D0=B0=D0=B5=20=D1=81=D0=BC=D0=B5=D0=BD=D1=8B=20pid.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- src/internals.h | 2 +- src/lck-posix.c | 49 +++++++++++++++++++++++++++++++------------------ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/core.c b/src/core.c index b6587417..d6959a8c 100644 --- a/src/core.c +++ b/src/core.c @@ -13552,7 +13552,7 @@ __cold int mdbx_env_create(MDBX_env **penv) { #if MDBX_LOCKING > MDBX_LOCKING_SYSV MDBX_lockinfo *const stub = lckless_stub(env); - rc = osal_ipclock_stub(&stub->mti_wlock); + rc = osal_ipclock_stubinit(&stub->mti_wlock); #endif /* MDBX_LOCKING */ if (unlikely(rc != MDBX_SUCCESS)) { osal_fastmutex_destroy(&env->me_remap_guard); diff --git a/src/internals.h b/src/internals.h index 02d8ab49..7f9aedd0 100644 --- a/src/internals.h +++ b/src/internals.h @@ -817,7 +817,7 @@ typedef sem_t osal_ipclock_t; #endif /* MDBX_LOCKING */ #if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus) -MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc); +MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc); MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc); #endif /* MDBX_LOCKING */ diff --git a/src/lck-posix.c b/src/lck-posix.c index af16be1a..09e62f8d 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -294,7 +294,7 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) { /*---------------------------------------------------------------------------*/ #if MDBX_LOCKING > MDBX_LOCKING_SYSV -MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc) { +MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX1988 return sem_init(ipc, false, 1) ? errno : 0; #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ @@ -796,7 +796,7 @@ bailout: #endif /* MDBX_LOCKING > 0 */ } -__cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, +__cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, const int err) { int rc = err; #if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 || MDBX_LOCKING == MDBX_LOCKING_SYSV @@ -918,29 +918,42 @@ static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, #endif /* MDBX_LOCKING */ if (unlikely(rc != MDBX_SUCCESS && rc != MDBX_BUSY)) - rc = mdbx_ipclock_failed(env, ipc, rc); + rc = osal_ipclock_failed(env, ipc, rc); return rc; } int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { + int err = MDBX_ENOSYS; #if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ MDBX_LOCKING == MDBX_LOCKING_POSIX2008 - int rc = pthread_mutex_unlock(ipc); - (void)env; + err = pthread_mutex_unlock(ipc); #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 - int rc = sem_post(ipc) ? errno : MDBX_SUCCESS; - (void)env; + err = sem_post(ipc) ? errno : MDBX_SUCCESS; #elif MDBX_LOCKING == MDBX_LOCKING_SYSV if (unlikely(*ipc != (pid_t)env->me_pid)) - return EPERM; - *ipc = 0; - struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock), - .sem_op = 1, - .sem_flg = SEM_UNDO}; - int rc = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; + err = EPERM; + else { + *ipc = 0; + struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock), + .sem_op = 1, + .sem_flg = SEM_UNDO}; + err = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; + } #else #error "FIXME" #endif /* MDBX_LOCKING */ + int rc = err; + if (unlikely(rc != MDBX_SUCCESS)) { + const uint32_t current_pid = osal_getpid(); + if (current_pid == env->me_pid || LOG_ENABLED(MDBX_LOG_NOTICE)) + debug_log((current_pid == env->me_pid) + ? MDBX_LOG_FATAL + : (rc = MDBX_SUCCESS, MDBX_LOG_NOTICE), + "ipc-unlock()", __LINE__, "failed: env %p, lck-%s %p, err %d\n", + __Wpedantic_format_voidptr(env), + (env->me_lck == env->me_lck_mmap.lck) ? "mmap" : "stub", + __Wpedantic_format_voidptr(env->me_lck), err); + } return rc; } @@ -954,10 +967,10 @@ MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) { MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); - TRACE("<< rc %d", rc); - if (unlikely(rc != MDBX_SUCCESS)) - mdbx_panic("%s() failed: err %d\n", __func__, rc); + int err = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); + TRACE("<< err %d", err); + if (unlikely(err != MDBX_SUCCESS)) + mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } @@ -974,7 +987,7 @@ int osal_txn_lock(MDBX_env *env, bool dont_wait) { env->me_txn0->mt_owner = osal_thread_self(); rc = MDBX_SUCCESS; } - TRACE("<< rc %d", err); + TRACE("<< err %d, rc %d", err, rc); return rc; } From ce74fae036ea1e0b48f5587e1bac774857ff6d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 00:23:09 +0300 Subject: [PATCH 044/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B8=20=D0=B2=D1=8B?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`env=5Fopen()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 353 +++++++++++++++++++++++++++-------------------------- 1 file changed, 177 insertions(+), 176 deletions(-) diff --git a/src/core.c b/src/core.c index d6959a8c..39ba89b1 100644 --- a/src/core.c +++ b/src/core.c @@ -13512,7 +13512,6 @@ __cold int mdbx_env_create(MDBX_env **penv) { env->me_maxdbs = env->me_numdbs = CORE_DBS; env->me_lazy_fd = env->me_dsync_fd = env->me_fd4meta = env->me_lfd = INVALID_HANDLE_VALUE; - env->me_pid = osal_getpid(); env->me_stuck_meta = -1; env->me_options.rp_augment_limit = MDBX_PNL_INITIAL; @@ -13946,7 +13945,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } -#endif +#endif /* Windows */ if (new_geo.now != current_geo->now || new_geo.upper != current_geo->upper) { @@ -13995,6 +13994,7 @@ __cold static int alloc_page_buf(MDBX_env *env) { __cold static int setup_dxb(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits) { MDBX_meta header; + eASSERT(env, !(env->me_flags & MDBX_ENV_ACTIVE)); int rc = MDBX_RESULT_FALSE; int err = read_header(env, &header, lck_rc, mode_bits); if (unlikely(err != MDBX_SUCCESS)) { @@ -14239,7 +14239,6 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, #if MDBX_DEBUG meta_troika_dump(env, &troika); #endif - eASSERT(env, !env->me_txn && !env->me_txn0); //-------------------------------- validate/rollback head & steady meta-pages if (unlikely(env->me_stuck_meta >= 0)) { /* recovery mode */ @@ -15197,78 +15196,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, return (err == MDBX_SUCCESS) ? rc : err; } -__cold int mdbx_env_open(MDBX_env *env, const char *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *pathnameW = nullptr; - int rc = osal_mb2w(pathname, &pathnameW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_openW(env, pathnameW, flags, mode); - osal_free(pathnameW); - if (rc == MDBX_SUCCESS) - /* force to make cache of the multi-byte pathname representation */ - mdbx_env_get_path(env, &pathname); - } - return rc; -} - -__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { -#endif /* Windows */ - - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(flags & ~ENV_USABLE_FLAGS)) - return MDBX_EINVAL; - - if (unlikely(env->me_lazy_fd != INVALID_HANDLE_VALUE || - (env->me_flags & MDBX_ENV_ACTIVE) != 0 || env->me_map)) - return MDBX_EPERM; - - /* Pickup previously mdbx_env_set_flags(), - * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ - const uint32_t saved_me_flags = env->me_flags; - flags = merge_sync_flags(flags | MDBX_DEPRECATED_COALESCE, env->me_flags); - - if (flags & MDBX_RDONLY) { - /* Silently ignore irrelevant flags when we're only getting read access */ - flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | - MDBX_NOMETASYNC | MDBX_DEPRECATED_COALESCE | MDBX_LIFORECLAIM | - MDBX_NOMEMINIT | MDBX_ACCEDE); - mode = 0; - } else { -#if MDBX_MMAP_INCOHERENT_FILE_WRITE - /* Temporary `workaround` for OpenBSD kernel's flaw. - * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ - if ((flags & MDBX_WRITEMAP) == 0) { - if (flags & MDBX_ACCEDE) - flags |= MDBX_WRITEMAP; - else { - debug_log(MDBX_LOG_ERROR, __func__, __LINE__, - "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " - "of an internal flaw(s) in a file/buffer/page cache.\n"); - return 42 /* ENOPROTOOPT */; - } - } -#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ - } - - env->me_flags = (flags & ~MDBX_FATAL_ERROR); - rc = env_handle_pathname(env, pathname, mode); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; - env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); - env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); - env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); - if (!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs)) { - rc = MDBX_ENOMEM; - goto bailout; - } - +__cold static int env_open(MDBX_env *env, mdbx_mode_t mode) { /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: * * 0) Если размер страниц БД меньше системной страницы ОЗУ, то ядру ОС @@ -15357,18 +15285,16 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, */ env->me_pid = osal_getpid(); - rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ - : MDBX_OPEN_DXB_LAZY, - env, env->me_pathname.dxb, &env->me_lazy_fd, mode); + int rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ + : MDBX_OPEN_DXB_LAZY, + env, env->me_pathname.dxb, &env->me_lazy_fd, mode); if (unlikely(rc != MDBX_SUCCESS)) return rc; #if MDBX_LOCKING == MDBX_LOCKING_SYSV env->me_sysv_ipc.key = ftok(env->me_pathname.dxb, 42); - if (env->me_sysv_ipc.key == -1) { - rc = errno; - goto bailout; - } + if (unlikely(env->me_sysv_ipc.key == -1)) + return errno; #endif /* MDBX_LOCKING */ /* Set the position in files outside of the data to avoid corruption @@ -15380,9 +15306,9 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #if defined(_WIN32) || defined(_WIN64) eASSERT(env, env->me_overlapped_fd == 0); bool ior_direct = false; - if (!(flags & + if (!(env->me_flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) { - if (MDBX_AVOID_MSYNC && (flags & MDBX_WRITEMAP)) { + if (MDBX_AVOID_MSYNC && (env->me_flags & MDBX_WRITEMAP)) { /* Запрошен режим MDBX_SYNC_DURABLE | MDBX_WRITEMAP при активной опции * MDBX_AVOID_MSYNC. * @@ -15420,23 +15346,19 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT : MDBX_OPEN_DXB_OVERLAPPED, env, env->me_pathname.dxb, &env->me_overlapped_fd, 0); - if (rc != MDBX_SUCCESS) - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; env->me_data_lock_event = CreateEventW(nullptr, true, false, nullptr); - if (!env->me_data_lock_event) { - rc = (int)GetLastError(); - goto bailout; - } + if (unlikely(!env->me_data_lock_event)) + return (int)GetLastError(); osal_fseek(env->me_overlapped_fd, safe_parking_lot_offset); } #else if (mode == 0) { /* pickup mode for lck-file */ struct stat st; - if (fstat(env->me_lazy_fd, &st)) { - rc = errno; - goto bailout; - } + if (unlikely(fstat(env->me_lazy_fd, &st))) + return errno; mode = st.st_mode; } mode = (/* inherit read permissions for group and others */ mode & @@ -15446,24 +15368,24 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); #endif /* !Windows */ const int lck_rc = setup_lck(env, mode); - if (MDBX_IS_ERROR(lck_rc)) { - rc = lck_rc; - goto bailout; - } - osal_fseek(env->me_lfd, safe_parking_lot_offset); + if (unlikely(MDBX_IS_ERROR(lck_rc))) + return lck_rc; + if (env->me_lfd != INVALID_HANDLE_VALUE) + osal_fseek(env->me_lfd, safe_parking_lot_offset); eASSERT(env, env->me_dsync_fd == INVALID_HANDLE_VALUE); - if (!(flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC + if (!(env->me_flags & + (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC #if defined(_WIN32) || defined(_WIN64) - | MDBX_EXCLUSIVE + | MDBX_EXCLUSIVE #endif /* !Windows */ - ))) { + ))) { rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->me_pathname.dxb, &env->me_dsync_fd, 0); - if (MDBX_IS_ERROR(rc)) - goto bailout; + if (unlikely(MDBX_IS_ERROR(rc))) + return rc; if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { - if ((flags & MDBX_NOMETASYNC) == 0) + if ((env->me_flags & MDBX_NOMETASYNC) == 0) env->me_fd4meta = env->me_dsync_fd; osal_fseek(env->me_dsync_fd, safe_parking_lot_offset); } @@ -15538,17 +15460,14 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, ERROR("current mode/flags 0x%X incompatible with requested 0x%X, " "rigorous diff 0x%X", env->me_flags, snap_flags, rigorous_diff); - rc = MDBX_INCOMPATIBLE; - goto bailout; + return MDBX_INCOMPATIBLE; } } mincore_clean_cache(env); const int dxb_rc = setup_dxb(env, lck_rc, mode); - if (MDBX_IS_ERROR(dxb_rc)) { - rc = dxb_rc; - goto bailout; - } + if (MDBX_IS_ERROR(dxb_rc)) + return dxb_rc; rc = osal_check_fs_incore(env->me_lazy_fd); env->me_incore = false; @@ -15557,18 +15476,18 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, NOTICE("%s", "in-core database"); } else if (unlikely(rc != MDBX_SUCCESS)) { ERROR("check_fs_incore(), err %d", rc); - goto bailout; + return rc; } if (unlikely(/* recovery mode */ env->me_stuck_meta >= 0) && (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || - (flags & MDBX_EXCLUSIVE) == 0)) { + (env->me_flags & MDBX_EXCLUSIVE) == 0)) { ERROR("%s", "recovery requires exclusive mode"); - rc = MDBX_BUSY; - goto bailout; + return MDBX_BUSY; } DEBUG("opened dbenv %p", (void *)env); + env->me_flags |= MDBX_ENV_ACTIVE; if (!lck || lck_rc == MDBX_RESULT_TRUE) { env->me_lck->mti_envmode.weak = env->me_flags & mode_flags; env->me_lck->mti_meta_sync_txnid.weak = @@ -15581,14 +15500,96 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, DEBUG("lck-downgrade-%s: rc %i", (env->me_flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc); if (rc != MDBX_SUCCESS) - goto bailout; + return rc; } else { rc = cleanup_dead_readers(env, false, NULL); if (MDBX_IS_ERROR(rc)) - goto bailout; + return rc; } } + rc = (env->me_flags & MDBX_RDONLY) + ? MDBX_SUCCESS + : osal_ioring_create(&env->me_ioring +#if defined(_WIN32) || defined(_WIN64) + , + ior_direct, env->me_overlapped_fd +#endif /* Windows */ + ); + return rc; +} + +__cold int mdbx_env_open(MDBX_env *env, const char *pathname, + MDBX_env_flags_t flags, mdbx_mode_t mode) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_openW(env, pathnameW, flags, mode); + osal_free(pathnameW); + if (rc == MDBX_SUCCESS) + /* force to make cache of the multi-byte pathname representation */ + mdbx_env_get_path(env, &pathname); + } + return rc; +} + +__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, + MDBX_env_flags_t flags, mdbx_mode_t mode) { +#endif /* Windows */ + + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(flags & ~ENV_USABLE_FLAGS)) + return MDBX_EINVAL; + + if (unlikely(env->me_lazy_fd != INVALID_HANDLE_VALUE || + (env->me_flags & MDBX_ENV_ACTIVE) != 0 || env->me_map)) + return MDBX_EPERM; + + /* Pickup previously mdbx_env_set_flags(), + * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ + const uint32_t saved_me_flags = env->me_flags; + flags = merge_sync_flags(flags | MDBX_DEPRECATED_COALESCE, env->me_flags); + + if (flags & MDBX_RDONLY) { + /* Silently ignore irrelevant flags when we're only getting read access */ + flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | + MDBX_NOMETASYNC | MDBX_DEPRECATED_COALESCE | MDBX_LIFORECLAIM | + MDBX_NOMEMINIT | MDBX_ACCEDE); + mode = 0; + } else { +#if MDBX_MMAP_INCOHERENT_FILE_WRITE + /* Temporary `workaround` for OpenBSD kernel's flaw. + * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ + if ((flags & MDBX_WRITEMAP) == 0) { + if (flags & MDBX_ACCEDE) + flags |= MDBX_WRITEMAP; + else { + debug_log(MDBX_LOG_ERROR, __func__, __LINE__, + "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " + "of an internal flaw(s) in a file/buffer/page cache.\n"); + return 42 /* ENOPROTOOPT */; + } + } +#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ + } + + env->me_flags = (flags & ~MDBX_FATAL_ERROR); + rc = env_handle_pathname(env, pathname, mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); + env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); + env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); + if (unlikely(!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs))) { + rc = MDBX_ENOMEM; + goto bailout; + } + if ((flags & MDBX_RDONLY) == 0) { MDBX_txn *txn = nullptr; const intptr_t bitmap_bytes = @@ -15606,73 +15607,73 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + sizeof(txn->mt_dbi_seqs[0]) + sizeof(txn->mt_dbi_state[0])); rc = alloc_page_buf(env); - if (rc == MDBX_SUCCESS) { - memset(env->me_pbuf, -1, env->me_psize * (size_t)2); - memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, - env->me_psize); - txn = osal_calloc(1, size); - if (txn) { - txn->mt_dbs = ptr_disp(txn, base); - txn->mt_cursors = - ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); - txn->mt_dbi_seqs = ptr_disp( - txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); - txn->mt_dbi_state = - ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); -#if MDBX_ENABLE_DBI_SPARSE - txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - txn->mt_env = env; - txn->mt_flags = MDBX_TXN_FINISHED; - env->me_txn0 = txn; - txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); - txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) - rc = MDBX_ENOMEM; - } else - rc = MDBX_ENOMEM; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + memset(env->me_pbuf, -1, env->me_psize * (size_t)2); + memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, env->me_psize); + txn = osal_calloc(1, size); + if (unlikely(!txn)) { + rc = MDBX_ENOMEM; + goto bailout; } - if (rc == MDBX_SUCCESS) - rc = osal_ioring_create(&env->me_ioring -#if defined(_WIN32) || defined(_WIN64) - , - ior_direct, env->me_overlapped_fd -#endif /* Windows */ - ); - if (rc == MDBX_SUCCESS) - adjust_defaults(env); + txn->mt_dbs = ptr_disp(txn, base); + txn->mt_cursors = + ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); + txn->mt_dbi_seqs = + ptr_disp(txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); + txn->mt_dbi_state = + ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->mt_env = env; + txn->mt_flags = MDBX_TXN_FINISHED; + env->me_txn0 = txn; + txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); + txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) { + rc = MDBX_ENOMEM; + goto bailout; + } + adjust_defaults(env); } + rc = env_open(env, mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + #if MDBX_DEBUG - if (rc == MDBX_SUCCESS) { - const meta_troika_t troika = meta_tap(env); - const meta_ptr_t head = meta_recent(env, &troika); - const MDBX_db *db = &head.ptr_c->mm_dbs[MAIN_DBI]; + const meta_troika_t troika = meta_tap(env); + const meta_ptr_t head = meta_recent(env, &troika); + const MDBX_db *db = &head.ptr_c->mm_dbs[MAIN_DBI]; - DEBUG("opened database version %u, pagesize %u", - (uint8_t)unaligned_peek_u64(4, head.ptr_c->mm_magic_and_version), - env->me_psize); - DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, - data_page(head.ptr_c)->mp_pgno, head.txnid); - DEBUG("depth: %u", db->md_depth); - DEBUG("entries: %" PRIu64, db->md_entries); - DEBUG("branch pages: %" PRIaPGNO, db->md_branch_pages); - DEBUG("leaf pages: %" PRIaPGNO, db->md_leaf_pages); - DEBUG("large/overflow pages: %" PRIaPGNO, db->md_overflow_pages); - DEBUG("root: %" PRIaPGNO, db->md_root); - DEBUG("schema_altered: %" PRIaTXN, db->md_mod_txnid); - } -#endif + DEBUG("opened database version %u, pagesize %u", + (uint8_t)unaligned_peek_u64(4, head.ptr_c->mm_magic_and_version), + env->me_psize); + DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, + data_page(head.ptr_c)->mp_pgno, head.txnid); + DEBUG("depth: %u", db->md_depth); + DEBUG("entries: %" PRIu64, db->md_entries); + DEBUG("branch pages: %" PRIaPGNO, db->md_branch_pages); + DEBUG("leaf pages: %" PRIaPGNO, db->md_leaf_pages); + DEBUG("large/overflow pages: %" PRIaPGNO, db->md_overflow_pages); + DEBUG("root: %" PRIaPGNO, db->md_root); + DEBUG("schema_altered: %" PRIaTXN, db->md_mod_txnid); +#endif /* MDBX_DEBUG */ -bailout: - if (rc != MDBX_SUCCESS) { - rc = env_close(env) ? MDBX_PANIC : rc; - env->me_flags = - saved_me_flags | ((rc != MDBX_PANIC) ? 0 : MDBX_FATAL_ERROR); - } else { + if (likely(rc == MDBX_SUCCESS)) { #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ + } else { + bailout: + if (likely(env_close(env) == MDBX_SUCCESS)) { + env->me_flags = saved_me_flags; + } else { + rc = MDBX_PANIC; + env->me_flags = saved_me_flags | MDBX_FATAL_ERROR; + } } return rc; } @@ -15713,7 +15714,7 @@ __cold static int env_close(MDBX_env *env) { #ifdef ENABLE_MEMCHECK VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = -1; -#endif +#endif /* ENABLE_MEMCHECK */ } #if defined(_WIN32) || defined(_WIN64) From a22ec56938fb539fb335d46360ea5255e1a607d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:21:18 +0300 Subject: [PATCH 045/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`pthread=5Fa?= =?UTF-8?q?tfork(after=5Ffork)`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/core.c b/src/core.c index 39ba89b1..8138dcaf 100644 --- a/src/core.c +++ b/src/core.c @@ -25910,6 +25910,26 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, return rc; } +#if !defined(_WIN32) && !defined(_WIN64) +__cold static void rthc_afterfork(void) { + NOTICE("drown %d rthc entries", rthc_count); + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const env = rthc_table[i].env; + NOTICE("drown env %p", __Wpedantic_format_voidptr(env)); + env->me_dxb_mmap.base = nullptr; + env->me_lck_mmap.base = nullptr; + env->me_lck = lckless_stub(env); + rthc_drown(env); + } + if (rthc_table != rthc_table_static) + osal_free(rthc_table); + rthc_count = 0; + rthc_table = rthc_table_static; + rthc_limit = RTHC_INITIAL_LIMIT; + rthc_pending.weak = 0; +} +#endif /* ! Windows */ + __cold void global_ctor(void) { osal_ctor(); rthc_limit = RTHC_INITIAL_LIMIT; @@ -25917,6 +25937,7 @@ __cold void global_ctor(void) { #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(&rthc_critical_section); #else + ENSURE(nullptr, pthread_atfork(nullptr, nullptr, rthc_afterfork) == 0); ENSURE(nullptr, pthread_key_create(&rthc_key, thread_dtor) == 0); TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), __Wpedantic_format_voidptr(&rthc_key), (unsigned)rthc_key); From af4dfe541b83938ff67ea48012b8c78f3226691a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:08:04 +0300 Subject: [PATCH 046/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fenv=5Fresurrect=5Fafte?= =?UTF-8?q?r=5Ffork()`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 +++ src/core.c | 109 +++++++++++++++++++++++++++++++++++------------------ 2 files changed, 78 insertions(+), 36 deletions(-) diff --git a/mdbx.h b/mdbx.h index e2da78b3..ce2dad5e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2895,6 +2895,11 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { return mdbx_env_close_ex(env, false); } +#if !(defined(_WIN32) || defined(_WIN64)) +/** FIXME */ +LIBMDBX_API int mdbx_env_resurrect_after_fork(MDBX_env *env); +#endif /* Windows */ + /** \brief Warming up options * \ingroup c_settings * \anchor warmup_flags diff --git a/src/core.c b/src/core.c index 8138dcaf..81d62e69 100644 --- a/src/core.c +++ b/src/core.c @@ -3317,7 +3317,7 @@ static int __must_check_result read_header(MDBX_env *env, MDBX_meta *meta, static int __must_check_result sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, meta_troika_t *const troika); -static int env_close(MDBX_env *env); +static int env_close(MDBX_env *env, bool resurrect_after_fork); struct node_result { MDBX_node *node; @@ -15668,7 +15668,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ } else { bailout: - if (likely(env_close(env) == MDBX_SUCCESS)) { + if (likely(env_close(env, false) == MDBX_SUCCESS)) { env->me_flags = saved_me_flags; } else { rc = MDBX_PANIC; @@ -15679,7 +15679,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ -__cold static int env_close(MDBX_env *env) { +__cold static int env_close(MDBX_env *env, bool resurrect_after_fork) { const unsigned flags = env->me_flags; env->me_flags &= ~ENV_INTERNAL_FLAGS; if (flags & MDBX_ENV_TXKEY) { @@ -15724,6 +15724,7 @@ __cold static int env_close(MDBX_env *env) { CloseHandle(env->me_data_lock_event); env->me_data_lock_event = INVALID_HANDLE_VALUE; } + eASSERT(env, !resurrect_after_fork); if (env->me_pathname_char) { osal_free(env->me_pathname_char); env->me_pathname_char = nullptr; @@ -15745,43 +15746,79 @@ __cold static int env_close(MDBX_env *env) { env->me_lfd = INVALID_HANDLE_VALUE; } - if (env->me_dbxs) { - for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) - if (env->me_dbxs[i].md_name.iov_len) - osal_free(env->me_dbxs[i].md_name.iov_base); - osal_free(env->me_dbxs); - env->me_numdbs = CORE_DBS; - env->me_dbxs = nullptr; - } - if (env->me_pbuf) { - osal_memalign_free(env->me_pbuf); - env->me_pbuf = nullptr; - } - if (env->me_dbi_seqs) { - osal_free(env->me_dbi_seqs); - env->me_dbi_seqs = nullptr; - } - if (env->me_db_flags) { - osal_free(env->me_db_flags); - env->me_db_flags = nullptr; - } - if (env->me_pathname.buffer) { - osal_free(env->me_pathname.buffer); - env->me_pathname.buffer = nullptr; - } - if (env->me_txn0) { - dpl_free(env->me_txn0); - txl_free(env->me_txn0->tw.lifo_reclaimed); - pnl_free(env->me_txn0->tw.retired_pages); - pnl_free(env->me_txn0->tw.spilled.list); - pnl_free(env->me_txn0->tw.relist); - osal_free(env->me_txn0); - env->me_txn0 = nullptr; + if (!resurrect_after_fork) { + if (env->me_dbxs) { + for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) + if (env->me_dbxs[i].md_name.iov_len) + osal_free(env->me_dbxs[i].md_name.iov_base); + osal_free(env->me_dbxs); + env->me_numdbs = CORE_DBS; + env->me_dbxs = nullptr; + } + if (env->me_pbuf) { + osal_memalign_free(env->me_pbuf); + env->me_pbuf = nullptr; + } + if (env->me_dbi_seqs) { + osal_free(env->me_dbi_seqs); + env->me_dbi_seqs = nullptr; + } + if (env->me_db_flags) { + osal_free(env->me_db_flags); + env->me_db_flags = nullptr; + } + if (env->me_pathname.buffer) { + osal_free(env->me_pathname.buffer); + env->me_pathname.buffer = nullptr; + } + if (env->me_txn0) { + dpl_free(env->me_txn0); + txl_free(env->me_txn0->tw.lifo_reclaimed); + pnl_free(env->me_txn0->tw.retired_pages); + pnl_free(env->me_txn0->tw.spilled.list); + pnl_free(env->me_txn0->tw.relist); + osal_free(env->me_txn0); + env->me_txn0 = nullptr; + } } env->me_stuck_meta = -1; return rc; } +#if !(defined(_WIN32) || defined(_WIN64)) +__cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { + if (unlikely(!env)) + return MDBX_EINVAL; + + if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) + return MDBX_EBADSIGN; + + if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) + return MDBX_PANIC; + + const uint32_t new_pid = osal_getpid(); + if (unlikely(env->me_pid == new_pid)) + return MDBX_SUCCESS; + + if (!atomic_cas32(&env->me_signature, MDBX_ME_SIGNATURE, ~MDBX_ME_SIGNATURE)) + return MDBX_EBADSIGN; + + if (env->me_txn) + txn_abort(env->me_txn0); + env->me_live_reader = 0; + int rc = env_close(env, true); + env->me_signature.weak = MDBX_ME_SIGNATURE; + if (likely(rc == MDBX_SUCCESS)) { + rc = env_open(env, 0); + if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { + rc = MDBX_PANIC; + env->me_flags |= MDBX_FATAL_ERROR; + } + } + return rc; +} +#endif /* Windows */ + __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { MDBX_page *dp; int rc = MDBX_SUCCESS; @@ -15834,7 +15871,7 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { } eASSERT(env, env->me_signature.weak == 0); - rc = env_close(env) ? MDBX_PANIC : rc; + rc = env_close(env, false) ? MDBX_PANIC : rc; ENSURE(env, osal_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS); #if defined(_WIN32) || defined(_WIN64) /* me_remap_guard don't have destructor (Slim Reader/Writer Lock) */ From d9f49b17dee1ab709c0d825fb32843415a398edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 8 Nov 2023 19:58:18 +0300 Subject: [PATCH 047/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=BE=D0=B2=20=D0=B4=D0=BB=D1=8F=20`mdbx=5Fenv=5Fresurrect=5Fa?= =?UTF-8?q?fter=5Ffork()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 1 + test/cases.c++ | 4 + test/config.h++ | 4 + test/fork.c++ | 224 ++++++++++++++++++++++++++++++++++++++++++++ test/log.c++ | 9 +- test/log.h++ | 1 + test/main.c++ | 16 ++++ test/osal-unix.c++ | 16 ++-- test/osal.h++ | 4 + test/test.c++ | 6 ++ 10 files changed, 276 insertions(+), 9 deletions(-) create mode 100644 test/fork.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f6901916..23789be0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -28,6 +28,7 @@ set(LIBMDBX_TEST_SOURCES append.c++ ttl.c++ nested.c++ + fork.c++ ) if(NOT MDBX_BUILD_CXX) diff --git a/test/cases.c++ b/test/cases.c++ index 97421e7d..5ccb87ae 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -105,6 +105,10 @@ void testcase_setup(const char *casename, const actor_params ¶ms, configure_actor(last_space_id, ac_try, nullptr, params); configure_actor(last_space_id, ac_jitter, nullptr, params); configure_actor(last_space_id, ac_try, nullptr, params); +#if !defined(_WIN32) && !defined(_WIN64) + configure_actor(last_space_id, ac_forkread, nullptr, params); + configure_actor(last_space_id, ac_forkwrite, nullptr, params); +#endif /* Windows */ log_notice("<<< testcase_setup(%s): done", casename); } else { failure("unknown testcase `%s`", casename); diff --git a/test/config.h++ b/test/config.h++ index f57dce7c..80996157 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -25,6 +25,10 @@ enum actor_testcase { ac_hill, ac_deadread, ac_deadwrite, +#if !defined(_WIN32) && !defined(_WIN64) + ac_forkread, + ac_forkwrite, +#endif /* Windows */ ac_jitter, ac_try, ac_copy, diff --git a/test/fork.c++ b/test/fork.c++ new file mode 100644 index 00000000..7f1c9b19 --- /dev/null +++ b/test/fork.c++ @@ -0,0 +1,224 @@ +/* + * Copyright 2023 Leonid Yuriev + * and other libmdbx authors: please see AUTHORS file. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "test.h++" + +#if !defined(_WIN32) && !defined(_WIN64) + +#include +#include + +class testcase_smoke4fork : public testcase { + using inherited = testcase; + +public: + testcase_smoke4fork(const actor_config &config, const mdbx_pid_t pid) + : testcase(config, pid) {} + bool run() override; + virtual bool smoke() = 0; +}; + +bool testcase_smoke4fork::run() { + static std::vector history; + const pid_t current_pid = getpid(); + if (history.empty() || current_pid != history.front()) { + history.push_back(current_pid); + if (history.size() > /* TODO: add test option */ 2) { + log_notice("force exit to avoid fork-bomb: deep %zu, pid stack", + history.size()); + for (const auto pid : history) + logging::feed(" %d", pid); + logging::ln(); + log_flush(); + exit(0); + } + } + const int deep = (int)history.size(); + + int err = db_open__begin__table_create_open_clean(dbi); + if (unlikely(err != MDBX_SUCCESS)) { + log_notice("fork[deep %d, pid %d]: bailout-prepare due '%s'", deep, + current_pid, mdbx_strerror(err)); + return false; + } + + if (flipcoin()) { + if (!smoke()) { + log_notice("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, + "failed"); + return false; + } + log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, + "done"); + } else { + log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, + "skipped"); +#ifdef __SANITIZE_ADDRESS__ + const bool abort_txn_to_avoid_memleak = true; +#else + const bool abort_txn_to_avoid_memleak = !RUNNING_ON_VALGRIND && flipcoin(); +#endif + if (abort_txn_to_avoid_memleak && txn_guard) + txn_end(false); + } + + log_flush(); + const pid_t child = fork(); + if (child < 0) + failure_perror("fork()", errno); + + if (child == 0) { + const pid_t new_pid = getpid(); + log_verbose(">>> %s, deep %d, parent-pid %d, child-pid %d", + "mdbx_env_resurrect_after_fork()", deep, current_pid, new_pid); + log_flush(); + int err = mdbx_env_resurrect_after_fork(db_guard.get()); + log_verbose("<<< %s, deep %d, parent-pid %d, child-pid %d, err %d", + "mdbx_env_resurrect_after_fork()", deep, current_pid, new_pid, + err); + log_flush(); + if (err != MDBX_SUCCESS) + failure_perror("mdbx_env_resurrect_after_fork()", err); + if (txn_guard) + mdbx_txn_abort(txn_guard.release()); + if (!smoke()) { + log_notice("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, + "failed"); + return false; + } + log_verbose("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, + "done"); + log_flush(); + return true; + } + + if (txn_guard) + txn_end(false); + + int status = 0xdeadbeef; + if (waitpid(child, &status, 0) != child) + failure_perror("waitpid()", errno); + + if (WIFEXITED(status)) { + const int code = WEXITSTATUS(status); + if (code != EXIT_SUCCESS) { + log_notice("%s[deep %d, pid %d] child-pid %d failed, err %d", + "fork-child", deep, current_pid, child, code); + return false; + } + log_notice("%s[deep %d, pid %d] child-pid %d done", "fork-child", deep, + current_pid, child); + } else if (WIFSIGNALED(status)) { + const int sig = WTERMSIG(status); + switch (sig) { + case SIGABRT: + case SIGBUS: + case SIGFPE: + case SIGILL: + case SIGSEGV: + log_notice("%s[deep %d, pid %d] child-pid %d %s by SIG%s", "fork-child", + deep, current_pid, child, "terminated", signal_name(sig)); + break; + default: + log_notice("%s[deep %d, pid %d] child-id %d %s by SIG%s", "fork-child", + deep, current_pid, child, "killed", signal_name(sig)); + } + return false; + } else { + assert(false); + } + + if (!smoke()) { + log_notice("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, + "failed"); + return false; + } + log_verbose("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, + "done"); + return true; +} + +//----------------------------------------------------------------------------- + +class testcase_forkread : public testcase_smoke4fork { + using inherited = testcase_smoke4fork; + +public: + testcase_forkread(const actor_config &config, const mdbx_pid_t pid) + : testcase_smoke4fork(config, pid) {} + bool smoke() override; +}; +REGISTER_TESTCASE(forkread); + +bool testcase_forkread::smoke() { + MDBX_envinfo env_info; + int err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, + sizeof(env_info)); + if (err) + failure_perror("mdbx_env_info_ex()", err); + + if (!txn_guard) + txn_begin(true); + + MDBX_txn_info txn_info; + err = mdbx_txn_info(txn_guard.get(), &txn_info, sizeof(txn_info)); + if (err) + failure_perror("mdbx_txn_info()", err); + fetch_canary(); + err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, + sizeof(env_info)); + if (err) + failure_perror("mdbx_env_info_ex()", err); + + uint64_t seq; + err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_sequence(get)", err); + txn_end(false); + return true; +} + +//----------------------------------------------------------------------------- + +class testcase_forkwrite : public testcase_forkread { + using inherited = testcase_forkread; + +public: + testcase_forkwrite(const actor_config &config, const mdbx_pid_t pid) + : testcase_forkread(config, pid) {} + bool smoke() override; +}; +REGISTER_TESTCASE(forkwrite); + +bool testcase_forkwrite::smoke() { + const bool firstly_read = flipcoin(); + if (firstly_read) { + if (!testcase_forkread::smoke()) + return false; + } + + if (!txn_guard) + txn_begin(false); + uint64_t seq; + int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_sequence(inc)", err); + txn_end(false); + + if (!firstly_read && !testcase_forkread::smoke()) + return false; + return true; +} + +#endif /* Windows */ diff --git a/test/log.c++ b/test/log.c++ index 04dad84d..5fe485c8 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -108,8 +108,7 @@ bool output(const loglevel priority, const char *format, ...) { return true; } -void output_nocheckloglevel_ap(const logging::loglevel priority, - const char *format, va_list ap) { +bool ln() { if (last) { putc('\n', last); fflush(last); @@ -118,8 +117,14 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, fflush(stdout); } last = nullptr; + return true; } + return false; +} +void output_nocheckloglevel_ap(const logging::loglevel priority, + const char *format, va_list ap) { + ln(); chrono::time now = chrono::now_realtime(); struct tm tm; #ifdef _MSC_VER diff --git a/test/log.h++ b/test/log.h++ index aa111ac9..96d68848 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -55,6 +55,7 @@ bool MDBX_PRINTF_ARGS(2, 3) output(const loglevel priority, const char *format, ...); bool feed_ap(const char *format, va_list ap); bool MDBX_PRINTF_ARGS(1, 2) feed(const char *format, ...); +bool ln(); void inline MDBX_PRINTF_ARGS(2, 3) output_nocheckloglevel(const loglevel priority, const char *format, ...) { diff --git a/test/main.c++ b/test/main.c++ index 2b8ff655..ba086e90 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -60,6 +60,10 @@ MDBX_NORETURN void usage(void) { " --append Append-mode insertions\n" " --dead.reader Dead-reader simulator\n" " --dead.writer Dead-writer simulator\n" +#if !defined(_WIN32) && !defined(_WIN64) + " --fork.reader After-fork reader\n" + " --fork.writer After-fork writer\n" +#endif /* Windows */ "Actor options:\n" " --batch.read=N Read-operations batch size\n" " --batch.write=N Write-operations batch size\n" @@ -591,6 +595,18 @@ int main(int argc, char *const argv[]) { configure_actor(last_space_id, ac_nested, value, params); continue; } +#if !defined(_WIN32) && !defined(_WIN64) + if (config::parse_option(argc, argv, narg, "fork.reader", nullptr)) { + fixup4qemu(params); + configure_actor(last_space_id, ac_forkread, value, params); + continue; + } + if (config::parse_option(argc, argv, narg, "fork.writer", nullptr)) { + fixup4qemu(params); + configure_actor(last_space_id, ac_forkwrite, value, params); + continue; + } +#endif /* Windows */ if (*argv[narg] != '-') { fixup4qemu(params); diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index 094d6769..0554000a 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -356,6 +356,7 @@ mdbx_pid_t osal_getpid(void) { return getpid(); } int osal_delay(unsigned seconds) { return sleep(seconds) ? errno : 0; } int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { + static sigset_t mask; if (children.empty()) { struct sigaction act; memset(&act, 0, sizeof(act)); @@ -366,7 +367,6 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { sigaction(SIGUSR1, &act, nullptr); sigaction(SIGUSR2, &act, nullptr); - sigset_t mask; sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigaddset(&mask, SIGUSR1); @@ -377,6 +377,7 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { pid = fork(); if (pid == 0) { + sigprocmask(SIG_BLOCK, &mask, nullptr); overlord_pid = getppid(); const bool result = test_execute(config); exit(result ? EXIT_SUCCESS : EXIT_FAILURE); @@ -400,7 +401,7 @@ void osal_killall_actors(void) { } } -static const char *signal_name(const int sig) { +const char *signal_name(const int sig) { if (sig == SIGHUP) return "HUP"; if (sig == SIGINT) @@ -532,24 +533,25 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { children[pid] = (WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed; else if (WIFSIGNALED(status)) { + int sig = WTERMSIG(status); #ifdef WCOREDUMP if (WCOREDUMP(status)) children[pid] = as_coredump; else #endif /* WCOREDUMP */ - switch (WTERMSIG(status)) { + switch (sig) { case SIGABRT: case SIGBUS: case SIGFPE: case SIGILL: case SIGSEGV: - log_notice("child pid %lu terminated by SIG%s", (long)pid, - signal_name(WTERMSIG(status))); + log_notice("child pid %lu %s by SIG%s", (long)pid, "terminated", + signal_name(sig)); children[pid] = as_coredump; break; default: - log_notice("child pid %lu killed by SIG%s", (long)pid, - signal_name(WTERMSIG(status))); + log_notice("child pid %lu %s by SIG%s", (long)pid, "killed", + signal_name(sig)); children[pid] = as_killed; } } else if (WIFSTOPPED(status)) diff --git a/test/osal.h++ b/test/osal.h++ index ef3b5562..5c92b2e9 100644 --- a/test/osal.h++ +++ b/test/osal.h++ @@ -46,3 +46,7 @@ std::string osal_tempdir(void); #define STDERR_FILENO _fileno(stderr) #endif #endif /* _MSC_VER */ + +#if !defined(_WIN32) && !defined(_WIN64) +const char *signal_name(const int sig); +#endif /* Windows */ diff --git a/test/test.c++ b/test/test.c++ index e590d3ce..79ca8a43 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -39,6 +39,12 @@ const char *testcase2str(const actor_testcase testcase) { return "ttl"; case ac_nested: return "nested"; +#if !defined(_WIN32) && !defined(_WIN64) + case ac_forkread: + return "forkread"; + case ac_forkwrite: + return "forkwrite"; +#endif /* Windows */ } } From cfce4ef4d3bd3f821474e72f530de71adf926e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 21:14:32 +0300 Subject: [PATCH 048/443] =?UTF-8?q?mdbx-test:=20=D1=8F=D0=B2=D0=BD=D0=B0?= =?UTF-8?q?=D1=8F=20=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BA=D0=B0?= =?UTF-8?q?=20append-=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=B0=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20stdout/stderr.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/log.c++ | 100 ++++++++++++++++++++++---------------------------- test/log.h++ | 2 +- test/main.c++ | 13 ++++++- 3 files changed, 57 insertions(+), 58 deletions(-) diff --git a/test/log.c++ b/test/log.c++ index 5fe485c8..dd55fb70 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -56,7 +56,7 @@ namespace logging { static std::string prefix; static std::string suffix; static loglevel level; -static FILE *last; +static FILE *flow; void setlevel(loglevel priority) { level = priority; @@ -67,13 +67,13 @@ void setlevel(loglevel priority) { log_trace("set mdbx debug-opts: 0x%02x", rc); } +void setup(const std::string &_prefix) { prefix = _prefix; } + void setup(loglevel priority, const std::string &_prefix) { setlevel(priority); - prefix = _prefix; + setup(_prefix); } -void setup(const std::string &_prefix) { prefix = _prefix; } - const char *level2str(const loglevel alevel) { switch (alevel) { default: @@ -108,18 +108,13 @@ bool output(const loglevel priority, const char *format, ...) { return true; } -bool ln() { - if (last) { - putc('\n', last); - fflush(last); - if (last == stderr) { +void ln() { + if (flow) { + putc('\n', flow); + if (flow != stdout) putc('\n', stdout); - fflush(stdout); - } - last = nullptr; - return true; + flow = nullptr; } - return false; } void output_nocheckloglevel_ap(const logging::loglevel priority, @@ -139,8 +134,7 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, if (rc != MDBX_SUCCESS) failure_perror("localtime_r()", rc); - last = stdout; - fprintf(last, + fprintf(stdout, "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */, tm.tm_year - 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, chrono::fractional2us(now.fractional), (long)osal_getpid(), @@ -150,19 +144,17 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, memset(&ones, 0, sizeof(ones)) /* zap MSVC and other goofy compilers */; if (same_or_higher(priority, error)) va_copy(ones, ap); - vfprintf(last, format, ap); + vfprintf(stdout, format, ap); size_t len = strlen(format); char end = len ? format[len - 1] : '\0'; switch (end) { default: - putc('\n', last); - MDBX_CXX17_FALLTHROUGH; // fall through + putc('\n', stdout); + break; case '\n': - fflush(last); - last = nullptr; - MDBX_CXX17_FALLTHROUGH; // fall through + break; case ' ': case '_': case ':': @@ -172,46 +164,39 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, case '\b': case '\r': case '\0': + flow = stdout; break; } if (same_or_higher(priority, error)) { - if (last != stderr) { - fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), - prefix.c_str(), level2str(priority), suffix.c_str()); - vfprintf(stderr, format, ones); - if (end == '\n') - fflush(stderr); - else - last = stderr; - } + if (flow) + flow = stderr; + fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), + prefix.c_str(), level2str(priority), suffix.c_str()); + vfprintf(stderr, format, ones); va_end(ones); } } bool feed_ap(const char *format, va_list ap) { - if (!last) + if (!flow) return false; - if (last == stderr) { + if (flow == stderr) { va_list ones; va_copy(ones, ap); vfprintf(stdout, format, ones); va_end(ones); } - vfprintf(last, format, ap); + vfprintf(flow, format, ap); size_t len = strlen(format); - if (len && format[len - 1] == '\n') { - fflush(last); - if (last == stderr) - fflush(stdout); - last = nullptr; - } + if (len && format[len - 1] == '\n') + flow = nullptr; return true; } bool feed(const char *format, ...) { - if (!last) + if (!flow) return false; va_list ap; @@ -299,73 +284,73 @@ void progress_canary(bool active) { } // namespace logging void log_extra(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::extra, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::extra, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_trace(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::trace, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::trace, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_debug(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::debug, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::debug, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_verbose(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::verbose, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::verbose, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_notice(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::notice, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::notice, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_warning(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::warning, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::warning, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_error(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::error, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::error, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_trouble(const char *where, const char *what, int errnum) { @@ -376,4 +361,7 @@ bool log_enabled(const logging::loglevel priority) { return logging::same_or_higher(priority, logging::level); } -void log_flush(void) { fflushall(); } +void log_flush(void) { + logging::ln(); + fflushall(); +} diff --git a/test/log.h++ b/test/log.h++ index 96d68848..cf955551 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -55,7 +55,7 @@ bool MDBX_PRINTF_ARGS(2, 3) output(const loglevel priority, const char *format, ...); bool feed_ap(const char *format, va_list ap); bool MDBX_PRINTF_ARGS(1, 2) feed(const char *format, ...); -bool ln(); +void ln(); void inline MDBX_PRINTF_ARGS(2, 3) output_nocheckloglevel(const loglevel priority, const char *format, ...) { diff --git a/test/main.c++ b/test/main.c++ index ba086e90..6242a05d 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -267,8 +267,19 @@ static void fixup4qemu(actor_params ¶ms) { (void)params; } -int main(int argc, char *const argv[]) { +static void set_linebuf_append(FILE *out) { + setvbuf(out, NULL, _IOLBF, 65536); +#if !defined(_WIN32) && !defined(_WIN64) + int fd = fileno(out); + int flags = fcntl(fd, F_GETFD); + if (flags != -1) + (void)fcntl(fd, F_SETFD, O_APPEND | flags); +#endif /* !Windows */ +} +int main(int argc, char *const argv[]) { + set_linebuf_append(stdout); + set_linebuf_append(stderr); #ifdef _DEBUG log_trace("#argc = %d", argc); for (int i = 0; i < argc; ++i) From 100e95957c9b71d81ef9b8e9645dbe61b77d9b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 12:27:42 +0300 Subject: [PATCH 049/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5/=D0=B8?= =?UTF-8?q?=D0=B7=20=D0=B3=D0=BB=D0=BE=D0=B1=D0=B0=D0=BB=D1=8C=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D0=B4=D0=B5=D1=81=D1=82=D1=80=D1=83=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/pcrf/pcrf_test.c | 2 +- test/log.c++ | 48 +++++++++++++++++++++++++------------ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/test/extra/pcrf/pcrf_test.c b/test/extra/pcrf/pcrf_test.c index 1d1f1e7e..c33ee797 100644 --- a/test/extra/pcrf/pcrf_test.c +++ b/test/extra/pcrf/pcrf_test.c @@ -34,7 +34,7 @@ #define IP_PRINTF_ARG_HOST(addr) \ (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), \ - (int)((addr)&0xff) + (int)((addr) & 0xff) char opt_db_path[PATH_MAX] = "./mdbx_bench2"; static MDBX_env *env; diff --git a/test/log.c++ b/test/log.c++ index dd55fb70..037e7509 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -53,8 +53,16 @@ static void mdbx_logger(MDBX_log_level_t priority, const char *function, namespace logging { -static std::string prefix; -static std::string suffix; +/* логирование может быть вызвано после деструкторов */ +static char prefix_buf[64]; +static size_t prefix_len; +static std::string suffix_buf; +static const char *suffix_ptr = "~~~"; +struct suffix_cleaner { + suffix_cleaner() { suffix_ptr = ""; } + ~suffix_cleaner() { suffix_ptr = "~~~"; } +} static anchor; + static loglevel level; static FILE *flow; @@ -67,11 +75,14 @@ void setlevel(loglevel priority) { log_trace("set mdbx debug-opts: 0x%02x", rc); } -void setup(const std::string &_prefix) { prefix = _prefix; } +void setup(const std::string &prefix) { + prefix_len = std::min(prefix.size(), sizeof(prefix_buf) - 1); + memcpy(prefix_buf, prefix.data(), prefix_len); +} -void setup(loglevel priority, const std::string &_prefix) { +void setup(loglevel priority, const std::string &prefix) { setlevel(priority); - setup(_prefix); + setup(prefix); } const char *level2str(const loglevel alevel) { @@ -138,7 +149,7 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */, tm.tm_year - 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, chrono::fractional2us(now.fractional), (long)osal_getpid(), - prefix.c_str(), level2str(priority), suffix.c_str()); + prefix_buf, level2str(priority), suffix_ptr); va_list ones; memset(&ones, 0, sizeof(ones)) /* zap MSVC and other goofy compilers */; @@ -171,8 +182,8 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, if (same_or_higher(priority, error)) { if (flow) flow = stderr; - fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), - prefix.c_str(), level2str(priority), suffix.c_str()); + fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), prefix_buf, + level2str(priority), suffix_ptr); vfprintf(stderr, format, ones); va_end(ones); } @@ -207,29 +218,36 @@ bool feed(const char *format, ...) { } local_suffix::local_suffix(const char *c_str) - : trim_pos(suffix.size()), indent(0) { - suffix.append(c_str); + : trim_pos(suffix_buf.size()), indent(0) { + suffix_buf.append(c_str); + suffix_ptr = suffix_buf.c_str(); } local_suffix::local_suffix(const std::string &str) - : trim_pos(suffix.size()), indent(0) { - suffix.append(str); + : trim_pos(suffix_buf.size()), indent(0) { + suffix_buf.append(str); + suffix_ptr = suffix_buf.c_str(); } void local_suffix::push() { indent += 1; - suffix.push_back('\t'); + suffix_buf.push_back('\t'); + suffix_ptr = suffix_buf.c_str(); } void local_suffix::pop() { assert(indent > 0); if (indent > 0) { indent -= 1; - suffix.pop_back(); + suffix_buf.pop_back(); + suffix_ptr = suffix_buf.c_str(); } } -local_suffix::~local_suffix() { suffix.erase(trim_pos); } +local_suffix::~local_suffix() { + suffix_buf.erase(trim_pos); + suffix_ptr = suffix_buf.c_str(); +} void progress_canary(bool active) { static chrono::time progress_timestamp; From b7605e8033eb487bb70b3eba29fd989bbb7a371f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 16:33:13 +0300 Subject: [PATCH 050/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=BE=D0=B1=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B8=20`MDBX=5FGET=5FMULTIPLE`=20?= =?UTF-8?q?=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20`key`=20?= =?UTF-8?q?=D0=BD=D0=B0=20`NULL`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/core.c b/src/core.c index 81d62e69..94a16328 100644 --- a/src/core.c +++ b/src/core.c @@ -17268,24 +17268,30 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely((mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) return MDBX_INCOMPATIBLE; - rc = (mc->mc_flags & C_INITIALIZED) - ? MDBX_SUCCESS - : cursor_set(mc, key, data, MDBX_SET).err; - if ((mc->mc_xcursor->mx_cursor.mc_flags & (C_INITIALIZED | C_EOF)) != - C_INITIALIZED) + if ((mc->mc_flags & C_INITIALIZED) == 0) { + if (unlikely(!key)) + return MDBX_EINVAL; + rc = cursor_set(mc, key, data, MDBX_SET).err; + if (unlikely(rc != MDBX_SUCCESS)) + break; + } + rc = MDBX_SUCCESS; + if (unlikely(C_INITIALIZED != (mc->mc_xcursor->mx_cursor.mc_flags & + (C_INITIALIZED | C_EOF)))) { + rc = MDBX_NOTFOUND; break; - goto fetchm; + } + goto fetch_multiple; case MDBX_NEXT_MULTIPLE: - if (unlikely(data == NULL)) + if (unlikely(!data)) return MDBX_EINVAL; if (unlikely(!(mc->mc_db->md_flags & MDBX_DUPFIXED))) return MDBX_INCOMPATIBLE; rc = cursor_next(mc, key, data, MDBX_NEXT_DUP); if (rc == MDBX_SUCCESS) { if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - MDBX_cursor *mx; - fetchm: - mx = &mc->mc_xcursor->mx_cursor; + fetch_multiple:; + MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor; data->iov_len = page_numkeys(mx->mc_pg[mx->mc_top]) * mx->mc_db->md_xsize; data->iov_base = page_data(mx->mc_pg[mx->mc_top]); @@ -17296,21 +17302,20 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, } break; case MDBX_PREV_MULTIPLE: - if (data == NULL) + if (unlikely(!data)) return MDBX_EINVAL; if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) return MDBX_INCOMPATIBLE; rc = MDBX_SUCCESS; - if (!(mc->mc_flags & C_INITIALIZED)) + if ((mc->mc_flags & C_INITIALIZED) == 0) rc = cursor_last(mc, key, data); if (rc == MDBX_SUCCESS) { MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor; + rc = MDBX_NOTFOUND; if (mx->mc_flags & C_INITIALIZED) { rc = cursor_sibling(mx, SIBLING_LEFT); if (rc == MDBX_SUCCESS) - goto fetchm; - } else { - rc = MDBX_NOTFOUND; + goto fetch_multiple; } } break; From 44beae00ec30f7e3476286d0d2eb1f41b54d9524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 16:30:14 +0300 Subject: [PATCH 051/443] =?UTF-8?q?mdbx:=20`const`=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=BD=D0=B0=D1=87=D0=B0=D0=BB=D0=B0=20=D0=B8=20=D0=BA=D0=BE?= =?UTF-8?q?=D0=BD=D1=86=D0=B0=20=D0=B4=D0=B8=D0=B0=D0=BF=D0=B0=D0=B7=D0=BE?= =?UTF-8?q?=D0=BD=D0=B0=20=D0=B2=20=D0=B0=D1=80=D0=B3=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=82=D0=B0=D1=85=20`mdbx=5Festimate=5Frange()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 6 ++++-- src/core.c | 34 ++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/mdbx.h b/mdbx.h index ce2dad5e..eb8e4ff6 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5244,8 +5244,10 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, - MDBX_val *begin_key, MDBX_val *begin_data, - MDBX_val *end_key, MDBX_val *end_data, + const MDBX_val *begin_key, + const MDBX_val *begin_data, + const MDBX_val *end_key, + const MDBX_val *end_data, ptrdiff_t *distance_items); /** \brief The EPSILON value for mdbx_estimate_range() diff --git a/src/core.c b/src/core.c index 94a16328..119d39d9 100644 --- a/src/core.c +++ b/src/core.c @@ -24725,9 +24725,10 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return mdbx_estimate_distance(cursor, &next.outer, distance_items); } -int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, - MDBX_val *begin_data, MDBX_val *end_key, - MDBX_val *end_data, ptrdiff_t *size_items) { +int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *begin_key, const MDBX_val *begin_data, + const MDBX_val *end_key, const MDBX_val *end_data, + ptrdiff_t *size_items) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24755,13 +24756,13 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, return MDBX_SUCCESS; } + MDBX_val stub; if (!begin_key) { if (unlikely(!end_key)) { /* LY: FIRST..LAST case */ *size_items = (ptrdiff_t)begin.outer.mc_db->md_entries; return MDBX_SUCCESS; } - MDBX_val stub = {0, 0}; rc = cursor_first(&begin.outer, &stub, &stub); if (unlikely(end_key == MDBX_EPSILON)) { /* LY: FIRST..+epsilon case */ @@ -24773,7 +24774,6 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, if (unlikely(begin_key == MDBX_EPSILON)) { if (end_key == NULL) { /* LY: -epsilon..LAST case */ - MDBX_val stub = {0, 0}; rc = cursor_last(&begin.outer, &stub, &stub); return (rc == MDBX_SUCCESS) ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) @@ -24791,7 +24791,7 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, (begin_key == end_key || begin.outer.mc_dbx->md_cmp(begin_key, end_key) == 0)) { /* LY: single key case */ - rc = cursor_set(&begin.outer, begin_key, NULL, MDBX_SET).err; + rc = cursor_set(&begin.outer, (MDBX_val *)begin_key, NULL, MDBX_SET).err; if (unlikely(rc != MDBX_SUCCESS)) { *size_items = 0; return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; @@ -24812,10 +24812,14 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, } } return MDBX_SUCCESS; - } else { - rc = cursor_set(&begin.outer, begin_key, begin_data, - begin_data ? MDBX_GET_BOTH_RANGE : MDBX_SET_RANGE) + } else if (begin_data) { + stub = *begin_data; + rc = cursor_set(&begin.outer, (MDBX_val *)begin_key, &stub, + MDBX_GET_BOTH_RANGE) .err; + } else { + stub = *begin_key; + rc = cursor_set(&begin.outer, &stub, nullptr, MDBX_SET_RANGE).err; } } @@ -24828,13 +24832,15 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, rc = cursor_init(&end.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (!end_key) { - MDBX_val stub = {0, 0}; + if (!end_key) rc = cursor_last(&end.outer, &stub, &stub); - } else { - rc = cursor_set(&end.outer, end_key, end_data, - end_data ? MDBX_GET_BOTH_RANGE : MDBX_SET_RANGE) + else if (end_data) { + stub = *end_data; + rc = cursor_set(&end.outer, (MDBX_val *)end_key, &stub, MDBX_GET_BOTH_RANGE) .err; + } else { + stub = *end_key; + rc = cursor_set(&end.outer, &stub, nullptr, MDBX_SET_RANGE).err; } if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_NOTFOUND || !(end.outer.mc_flags & C_INITIALIZED)) From 6cef39c32fbe9d9f0bd60580f5960e853f15be24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 18:34:23 +0300 Subject: [PATCH 052/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`cursor::estimation=5Fre?= =?UTF-8?q?sult`=20=D0=B8=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=BA=D0=B0=20`cursor::estimate()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 92 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 6c33a0b3..1a7bc510 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4048,10 +4048,12 @@ public: put_multiple(map, key, vector.data(), vector.size(), mode); } - inline ptrdiff_t estimate(map_handle map, pair from, pair to) const; - inline ptrdiff_t estimate(map_handle map, slice from, slice to) const; - inline ptrdiff_t estimate_from_first(map_handle map, slice to) const; - inline ptrdiff_t estimate_to_last(map_handle map, slice from) const; + inline ptrdiff_t estimate(map_handle map, const pair &from, + const pair &to) const; + inline ptrdiff_t estimate(map_handle map, const slice &from, + const slice &to) const; + inline ptrdiff_t estimate_from_first(map_handle map, const slice &to) const; + inline ptrdiff_t estimate_to_last(map_handle map, const slice &from) const; }; /// \brief Managed database transaction. @@ -4164,10 +4166,11 @@ public: struct move_result : public pair_result { inline move_result(const cursor &cursor, bool throw_notfound); - inline move_result(cursor &cursor, move_operation operation, - bool throw_notfound); - inline move_result(cursor &cursor, move_operation operation, - const slice &key, bool throw_notfound); + move_result(cursor &cursor, move_operation operation, bool throw_notfound) + : move_result(cursor, operation, slice(), slice(), throw_notfound) {} + move_result(cursor &cursor, move_operation operation, const slice &key, + bool throw_notfound) + : move_result(cursor, operation, key, slice(), throw_notfound) {} inline move_result(cursor &cursor, move_operation operation, const slice &key, const slice &value, bool throw_notfound); @@ -4175,6 +4178,19 @@ public: move_result &operator=(const move_result &) noexcept = default; }; + struct estimate_result : public pair { + ptrdiff_t approximate_quantity; + estimate_result(const cursor &cursor, move_operation operation) + : estimate_result(cursor, operation, slice(), slice()) {} + estimate_result(const cursor &cursor, move_operation operation, + const slice &key) + : estimate_result(cursor, operation, key, slice()) {} + inline estimate_result(const cursor &cursor, move_operation operation, + const slice &key, const slice &value); + estimate_result(const estimate_result &) noexcept = default; + estimate_result &operator=(const estimate_result &) noexcept = default; + }; + protected: inline bool move(move_operation operation, MDBX_val *key, MDBX_val *value, bool throw_notfound) const @@ -4219,9 +4235,10 @@ public: inline bool eof() const; inline bool on_first() const; inline bool on_last() const; - inline ptrdiff_t estimate(slice key, slice value) const; - inline ptrdiff_t estimate(slice key) const; - inline ptrdiff_t estimate(move_operation operation) const; + inline estimate_result estimate(const slice &key, const slice &value) const; + inline estimate_result estimate(const slice &key) const; + inline estimate_result estimate(move_operation operation) const; + inline estimate_result estimate(move_operation operation, slice &key) const; //---------------------------------------------------------------------------- @@ -5875,28 +5892,32 @@ inline size_t txn::put_multiple(map_handle map, const slice &key, return args[1].iov_len /* done item count */; } -inline ptrdiff_t txn::estimate(map_handle map, pair from, pair to) const { +inline ptrdiff_t txn::estimate(map_handle map, const pair &from, + const pair &to) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range( handle_, map.dbi, &from.key, &from.value, &to.key, &to.value, &result)); return result; } -inline ptrdiff_t txn::estimate(map_handle map, slice from, slice to) const { +inline ptrdiff_t txn::estimate(map_handle map, const slice &from, + const slice &to) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, &to, nullptr, &result)); return result; } -inline ptrdiff_t txn::estimate_from_first(map_handle map, slice to) const { +inline ptrdiff_t txn::estimate_from_first(map_handle map, + const slice &to) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, nullptr, nullptr, &to, nullptr, &result)); return result; } -inline ptrdiff_t txn::estimate_to_last(map_handle map, slice from) const { +inline ptrdiff_t txn::estimate_to_last(map_handle map, + const slice &from) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, nullptr, nullptr, &result)); @@ -5945,22 +5966,8 @@ MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) - : pair_result(key, value, false) { - done = cursor.move(get_current, &key, &value, throw_notfound); -} - -inline cursor::move_result::move_result(cursor &cursor, - move_operation operation, - bool throw_notfound) - : pair_result(key, value, false) { - done = cursor.move(operation, &key, &value, throw_notfound); -} - -inline cursor::move_result::move_result(cursor &cursor, - move_operation operation, - const slice &key, bool throw_notfound) - : pair_result(key, slice(), false) { - this->done = cursor.move(operation, &this->key, &this->value, throw_notfound); + : pair_result(slice(), slice(), false) { + done = cursor.move(get_current, &this->key, &this->value, throw_notfound); } inline cursor::move_result::move_result(cursor &cursor, @@ -5987,6 +5994,14 @@ inline bool cursor::move(move_operation operation, MDBX_val *key, } } +inline cursor::estimate_result::estimate_result(const cursor &cursor, + move_operation operation, + const slice &key, + const slice &value) + : pair(key, value), approximate_quantity(PTRDIFF_MIN) { + approximate_quantity = cursor.estimate(operation, &this->key, &this->value); +} + inline ptrdiff_t cursor::estimate(move_operation operation, MDBX_val *key, MDBX_val *value) const { ptrdiff_t result; @@ -6109,17 +6124,18 @@ inline bool cursor::on_last() const { return error::boolean_or_throw(::mdbx_cursor_on_last(*this)); } -inline ptrdiff_t cursor::estimate(slice key, slice value) const { - return estimate(multi_exactkey_lowerboundvalue, &key, &value); +inline cursor::estimate_result cursor::estimate(const slice &key, + const slice &value) const { + return estimate_result(*this, multi_exactkey_lowerboundvalue, key, value); } -inline ptrdiff_t cursor::estimate(slice key) const { - return estimate(key_lowerbound, &key, nullptr); +inline cursor::estimate_result cursor::estimate(const slice &key) const { + return estimate_result(*this, key_lowerbound, key); } -inline ptrdiff_t cursor::estimate(move_operation operation) const { - slice unused_key; - return estimate(operation, &unused_key, nullptr); +inline cursor::estimate_result +cursor::estimate(move_operation operation) const { + return estimate_result(*this, operation); } inline void cursor::renew(const ::mdbx::txn &txn) { From 7f21515940ff513cc2d0df7012858df0cdb5abcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 13 Nov 2023 20:52:35 +0300 Subject: [PATCH 053/443] =?UTF-8?q?mdbx:=20=D0=BC=D0=B8=D0=BA=D1=80=D0=BE-?= =?UTF-8?q?=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D1=8F=20`cursor=5Fset()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Чуть меньше сравнений и переходов. --- src/core.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/core.c b/src/core.c index 119d39d9..fec11370 100644 --- a/src/core.c +++ b/src/core.c @@ -16818,7 +16818,7 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { } MDBX_val aligned_key = *key; - uint64_t aligned_keybytes; + uint64_t aligned_key_buf; if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { switch (aligned_key.iov_len) { default: @@ -16829,13 +16829,13 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { if (unlikely(3 & (uintptr_t)aligned_key.iov_base)) /* copy instead of return error to avoid break compatibility */ aligned_key.iov_base = - memcpy(&aligned_keybytes, aligned_key.iov_base, 4); + memcpy(&aligned_key_buf, aligned_key.iov_base, 4); break; case 8: if (unlikely(7 & (uintptr_t)aligned_key.iov_base)) /* copy instead of return error to avoid break compatibility */ aligned_key.iov_base = - memcpy(&aligned_keybytes, aligned_key.iov_base, 8); + memcpy(&aligned_key_buf, aligned_key.iov_base, 8); break; } } @@ -16874,7 +16874,7 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { } if (cmp > 0) { const size_t nkeys = page_numkeys(mp); - if (nkeys > 1) { + if (likely(nkeys > 1)) { if (IS_LEAF2(mp)) { nodekey.iov_base = page_leaf2key(mp, nkeys - 1, nodekey.iov_len); } else { @@ -16918,23 +16918,22 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { } /* If any parents have right-sibs, search. * Otherwise, there's nothing further. */ - size_t i; - for (i = 0; i < mc->mc_top; i++) + for (size_t i = 0; i < mc->mc_top; i++) if (mc->mc_ki[i] < page_numkeys(mc->mc_pg[i]) - 1) - break; - if (i == mc->mc_top) { - /* There are no other pages */ - cASSERT(mc, nkeys <= UINT16_MAX); - mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; - mc->mc_flags |= C_EOF; - ret.err = MDBX_NOTFOUND; - return ret; - } + goto continue_other_pages; + + /* There are no other pages */ + cASSERT(mc, nkeys <= UINT16_MAX); + mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; + mc->mc_flags |= C_EOF; + ret.err = MDBX_NOTFOUND; + return ret; } + continue_other_pages: if (!mc->mc_top) { /* There are no other pages */ mc->mc_ki[mc->mc_top] = 0; - if (op == MDBX_SET_RANGE) + if (op >= MDBX_SET_RANGE) goto got_node; cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || @@ -16996,7 +16995,7 @@ got_node: } if (IS_LEAF2(mp)) { - if (op == MDBX_SET_RANGE || op == MDBX_SET_KEY) { + if (op >= MDBX_SET_KEY) { key->iov_len = mc->mc_db->md_xsize; key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); } @@ -17076,7 +17075,7 @@ got_node: } /* The key already matches in all other cases */ - if (op == MDBX_SET_RANGE || op == MDBX_SET_KEY) + if (op >= MDBX_SET_KEY) get_key_optional(node, key); DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), From c8319aabe7e5d2f30abce66b6f75fa4c7b4f69fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 16 Nov 2023 13:46:35 +0300 Subject: [PATCH 054/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=B0=D1=82?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B8=D1=85=20=D0=BC=D0=B5=D1=82?= =?UTF-8?q?=D0=BE=D0=B4=D0=BE=D0=B2=20`buffer::hex()`,=20`base64()`,=20`ba?= =?UTF-8?q?se58()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 1a7bc510..24ceea19 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2340,6 +2340,77 @@ public: return slice_.as_pod(); } + /// \brief Returns a new buffer with a hexadecimal dump of the slice content. + static buffer hex(const ::mdbx::slice &source, bool uppercase = false, + unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return source.template encode_hex( + uppercase, wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. + static buffer base58(const ::mdbx::slice &source, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return source.template encode_base58(wrap_width, + allocator); + } + /// \brief Returns a new buffer with a + /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. + static buffer base64(const ::mdbx::slice &source, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return source.template encode_base64(wrap_width, + allocator); + } + + /// \brief Returns a new buffer with a hexadecimal dump of the given pod. + template + static buffer hex(const POD &pod, bool uppercase = false, + unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return hex(mdbx::slice::wrap(pod), uppercase, wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the given pod. + template + static buffer base58(const POD &pod, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return base58(mdbx::slice::wrap(pod), wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the given pod. + template + static buffer base64(const POD &pod, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return base64(mdbx::slice::wrap(pod), wrap_width, allocator); + } + + /// \brief Returns a new buffer with a hexadecimal dump of the slice content. + buffer encode_hex(bool uppercase = false, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) const { + return slice().template encode_hex( + uppercase, wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. + buffer + encode_base58(unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) const { + return slice().template encode_base58( + wrap_width, allocator); + } + /// \brief Returns a new buffer with a + /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. + buffer + encode_base64(unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) const { + return slice().template encode_base64( + wrap_width, allocator); + } + /// \brief Reserves storage space. void reserve(size_t wanna_headroom, size_t wanna_tailroom) { wanna_headroom = ::std::min(::std::max(headroom(), wanna_headroom), From 649bbb9d902e3ba6fc4470ae3652e81b7b19f6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 16 Nov 2023 13:48:27 +0300 Subject: [PATCH 055/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B8=D0=BF=D0=B0?= =?UTF-8?q?=20`mdbx::comparator`=20=D0=B8=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B9=20`mdbx::default=5Fcomparator()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 24ceea19..d197167e 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3080,6 +3080,14 @@ struct LIBMDBX_API_TYPE map_handle { }; }; +using comparator = ::MDBX_cmp_func *; +inline comparator default_comparator(key_mode mode) noexcept { + return ::mdbx_get_keycmp(static_cast(mode)); +} +inline comparator default_comparator(value_mode mode) noexcept { + return ::mdbx_get_keycmp(static_cast(mode)); +} + /// \brief Key-value pairs put mode. enum put_mode { insert_unique = MDBX_NOOVERWRITE, ///< Insert only unique keys. From e66df2c21b64622b4b5eb0fd142526a7b094a7ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 16 Nov 2023 14:44:49 +0300 Subject: [PATCH 056/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BC=D0=B5=D1=82=D0=BE?= =?UTF-8?q?=D0=B4=D0=BE=D0=B2=20`buffer::hex=5Fdecode()`,=20`base64=5Fdeco?= =?UTF-8?q?de()`,=20`base58=5Fdecode()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index d197167e..2fd02072 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2411,6 +2411,55 @@ public: wrap_width, allocator); } + /// \brief Decodes hexadecimal dump from the slice content to returned buffer. + static buffer hex_decode(const ::mdbx::slice &source, + bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template hex_decode(ignore_spaces, + allocator); + } + + /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump + /// from the slice content to returned buffer. + static buffer + base58_decode(const ::mdbx::slice &source, bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template base58_decode( + ignore_spaces, allocator); + } + + /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump + /// from the slice content to returned buffer. + static buffer + base64_decode(const ::mdbx::slice &source, bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template base64_decode( + ignore_spaces, allocator); + } + + /// \brief Decodes hexadecimal dump + /// from the buffer content to new returned buffer. + buffer hex_decode(bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) const { + return hex_decode(slice(), ignore_spaces, allocator); + } + + /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump + /// from the buffer content to new returned buffer. + buffer + base58_decode(bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) const { + return base58_decode(slice(), ignore_spaces, allocator); + } + + /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump + /// from the buffer content to new returned buffer. + buffer + base64_decode(bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) const { + return base64_decode(slice(), ignore_spaces, allocator); + } + /// \brief Reserves storage space. void reserve(size_t wanna_headroom, size_t wanna_tailroom) { wanna_headroom = ::std::min(::std::max(headroom(), wanna_headroom), From 6facd20b2bbe3d8604341d4af7dc0b17ebfe0474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 17 Nov 2023 23:34:22 +0300 Subject: [PATCH 057/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`buffer::as=5Fuint64()`?= =?UTF-8?q?=20=D0=B8=20=D1=82.=D0=B4.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 2fd02072..af2aa246 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2340,6 +2340,22 @@ public: return slice_.as_pod(); } +#ifdef MDBX_U128_TYPE + MDBX_U128_TYPE as_uint128() const { return slice().as_uint128(); } +#endif /* MDBX_U128_TYPE */ + uint64_t as_uint64() const { return slice().as_uint64(); } + uint32_t as_uint32() const { return slice().as_uint32(); } + uint16_t as_uint16() const { return slice().as_uint16(); } + uint8_t as_uint8() const { return slice().as_uint8(); } + +#ifdef MDBX_I128_TYPE + MDBX_I128_TYPE as_int128() const { return slice().as_int128(); } +#endif /* MDBX_I128_TYPE */ + int64_t as_int64() const { return slice().as_int64(); } + int32_t as_int32() const { return slice().as_int32(); } + int16_t as_int16() const { return slice().as_int16(); } + int8_t as_int8() const { return slice().as_int8(); } + /// \brief Returns a new buffer with a hexadecimal dump of the slice content. static buffer hex(const ::mdbx::slice &source, bool uppercase = false, unsigned wrap_width = 0, From d6a79a9c5fdd6b66923fbe0f31be28aa495fb70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 17 Nov 2023 23:55:37 +0300 Subject: [PATCH 058/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?`slice::as=5FintXX()`=20=D0=B2=20`slice::as=5FintXX=5Fadapt()`?= =?UTF-8?q?=20=D0=B8=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`slice::as=5FintXX()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 86 ++++++++++++++++++++++++++++++++++++++++------------ src/mdbx.c++ | 36 +++++++++++----------- 2 files changed, 84 insertions(+), 38 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index af2aa246..486dfdeb 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1058,20 +1058,40 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { } #ifdef MDBX_U128_TYPE - MDBX_U128_TYPE as_uint128() const; + MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { + return as_pod(); + } #endif /* MDBX_U128_TYPE */ - uint64_t as_uint64() const; - uint32_t as_uint32() const; - uint16_t as_uint16() const; - uint8_t as_uint8() const; + MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR uint16_t as_uint16() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR uint8_t as_uint8() const { return as_pod(); } #ifdef MDBX_I128_TYPE - MDBX_I128_TYPE as_int128() const; + MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { + return as_pod(); + } #endif /* MDBX_I128_TYPE */ - int64_t as_int64() const; - int32_t as_int32() const; - int16_t as_int16() const; - int8_t as_int8() const; + MDBX_CXX14_CONSTEXPR int64_t as_int64() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR int32_t as_int32() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR int16_t as_int16() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR int8_t as_int8() const { return as_pod(); } + +#ifdef MDBX_U128_TYPE + MDBX_U128_TYPE as_uint128_adapt() const; +#endif /* MDBX_U128_TYPE */ + uint64_t as_uint64_adapt() const; + uint32_t as_uint32_adapt() const; + uint16_t as_uint16_adapt() const; + uint8_t as_uint8_adapt() const; + +#ifdef MDBX_I128_TYPE + MDBX_I128_TYPE as_int128_adapt() const; +#endif /* MDBX_I128_TYPE */ + int64_t as_int64_adapt() const; + int32_t as_int32_adapt() const; + int16_t as_int16_adapt() const; + int8_t as_int8_adapt() const; protected: MDBX_CXX11_CONSTEXPR slice(size_t invalid_length) noexcept @@ -2341,20 +2361,46 @@ public: } #ifdef MDBX_U128_TYPE - MDBX_U128_TYPE as_uint128() const { return slice().as_uint128(); } + MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { + return slice().as_uint128(); + } #endif /* MDBX_U128_TYPE */ - uint64_t as_uint64() const { return slice().as_uint64(); } - uint32_t as_uint32() const { return slice().as_uint32(); } - uint16_t as_uint16() const { return slice().as_uint16(); } - uint8_t as_uint8() const { return slice().as_uint8(); } + MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { + return slice().as_uint64(); + } + MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { + return slice().as_uint32(); + } + MDBX_CXX14_CONSTEXPR uint16_t as_uint16() const { + return slice().as_uint16(); + } + MDBX_CXX14_CONSTEXPR uint8_t as_uint8() const { return slice().as_uint8(); } #ifdef MDBX_I128_TYPE - MDBX_I128_TYPE as_int128() const { return slice().as_int128(); } + MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { + return slice().as_int128(); + } #endif /* MDBX_I128_TYPE */ - int64_t as_int64() const { return slice().as_int64(); } - int32_t as_int32() const { return slice().as_int32(); } - int16_t as_int16() const { return slice().as_int16(); } - int8_t as_int8() const { return slice().as_int8(); } + MDBX_CXX14_CONSTEXPR int64_t as_int64() const { return slice().as_int64(); } + MDBX_CXX14_CONSTEXPR int32_t as_int32() const { return slice().as_int32(); } + MDBX_CXX14_CONSTEXPR int16_t as_int16() const { return slice().as_int16(); } + MDBX_CXX14_CONSTEXPR int8_t as_int8() const { return slice().as_int8(); } + +#ifdef MDBX_U128_TYPE + MDBX_U128_TYPE as_uint128_adapt() const { return slice().as_uint128_adapt(); } +#endif /* MDBX_U128_TYPE */ + uint64_t as_uint64_adapt() const { return slice().as_uint64_adapt(); } + uint32_t as_uint32_adapt() const { return slice().as_uint32_adapt(); } + uint16_t as_uint16_adapt() const { return slice().as_uint16_adapt(); } + uint8_t as_uint8_adapt() const { return slice().as_uint8_adapt(); } + +#ifdef MDBX_I128_TYPE + MDBX_I128_TYPE as_int128_adapt() const { return slice().as_int128_adapt(); } +#endif /* MDBX_I128_TYPE */ + int64_t as_int64_adapt() const { return slice().as_int64_adapt(); } + int32_t as_int32_adapt() const { return slice().as_int32_adapt(); } + int16_t as_int16_adapt() const { return slice().as_int16_adapt(); } + int8_t as_int8_adapt() const { return slice().as_int8_adapt(); } /// \brief Returns a new buffer with a hexadecimal dump of the slice content. static buffer hex(const ::mdbx::slice &source, bool uppercase = false, diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 1b52739a..dd75aaa0 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -492,48 +492,48 @@ bool slice::is_printable(bool disable_utf8) const noexcept { } #ifdef MDBX_U128_TYPE -MDBX_U128_TYPE slice::as_uint128() const { +MDBX_U128_TYPE slice::as_uint128_adapt() const { static_assert(sizeof(MDBX_U128_TYPE) == 16, "WTF?"); if (size() == 16) { MDBX_U128_TYPE r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint64(); + return as_uint64_adapt(); } #endif /* MDBX_U128_TYPE */ -uint64_t slice::as_uint64() const { +uint64_t slice::as_uint64_adapt() const { static_assert(sizeof(uint64_t) == 8, "WTF?"); if (size() == 8) { uint64_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint32(); + return as_uint32_adapt(); } -uint32_t slice::as_uint32() const { +uint32_t slice::as_uint32_adapt() const { static_assert(sizeof(uint32_t) == 4, "WTF?"); if (size() == 4) { uint32_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint16(); + return as_uint16_adapt(); } -uint16_t slice::as_uint16() const { +uint16_t slice::as_uint16_adapt() const { static_assert(sizeof(uint16_t) == 2, "WTF?"); if (size() == 2) { uint16_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint8(); + return as_uint8_adapt(); } -uint8_t slice::as_uint8() const { +uint8_t slice::as_uint8_adapt() const { static_assert(sizeof(uint8_t) == 1, "WTF?"); if (size() == 1) return *static_cast(data()); @@ -544,48 +544,48 @@ uint8_t slice::as_uint8() const { } #ifdef MDBX_I128_TYPE -MDBX_I128_TYPE slice::as_int128() const { +MDBX_I128_TYPE slice::as_int128_adapt() const { static_assert(sizeof(MDBX_I128_TYPE) == 16, "WTF?"); if (size() == 16) { MDBX_I128_TYPE r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int64(); + return as_int64_adapt(); } #endif /* MDBX_I128_TYPE */ -int64_t slice::as_int64() const { +int64_t slice::as_int64_adapt() const { static_assert(sizeof(int64_t) == 8, "WTF?"); if (size() == 8) { uint64_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int32(); + return as_int32_adapt(); } -int32_t slice::as_int32() const { +int32_t slice::as_int32_adapt() const { static_assert(sizeof(int32_t) == 4, "WTF?"); if (size() == 4) { int32_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int16(); + return as_int16_adapt(); } -int16_t slice::as_int16() const { +int16_t slice::as_int16_adapt() const { static_assert(sizeof(int16_t) == 2, "WTF?"); if (size() == 2) { int16_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int8(); + return as_int8_adapt(); } -int8_t slice::as_int8() const { +int8_t slice::as_int8_adapt() const { if (size() == 1) return *static_cast(data()); else if (size() == 0) From 992eee4f0f6e8ad7ea2e5a6327573d3f447d453f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 00:40:10 +0300 Subject: [PATCH 059/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`cursor::clone()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index 486dfdeb..aeeb2252 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4324,6 +4324,7 @@ public: inline cursor &operator=(cursor &&other) noexcept; inline cursor(cursor &&other) noexcept; inline ~cursor() noexcept; + inline cursor_managed clone(void *your_context = nullptr) const; MDBX_CXX14_CONSTEXPR operator bool() const noexcept; MDBX_CXX14_CONSTEXPR operator const MDBX_cursor *() const; MDBX_CXX14_CONSTEXPR operator MDBX_cursor *(); @@ -4495,7 +4496,8 @@ class LIBMDBX_API_TYPE cursor_managed : public cursor { public: /// \brief Creates a new managed cursor with underlying object. - cursor_managed() : cursor_managed(::mdbx_cursor_create(nullptr)) { + cursor_managed(void *your_context = nullptr) + : cursor_managed(::mdbx_cursor_create(your_context)) { if (MDBX_UNLIKELY(!handle_)) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_ENOMEM); } @@ -6118,6 +6120,12 @@ inline ptrdiff_t txn::estimate_to_last(map_handle map, MDBX_CXX11_CONSTEXPR cursor::cursor(MDBX_cursor *ptr) noexcept : handle_(ptr) {} +inline cursor_managed cursor::clone(void *your_context) const { + cursor_managed clone(your_context); + error::success_or_throw(::mdbx_cursor_copy(handle_, clone.handle_)); + return clone; +} + inline cursor &cursor::operator=(cursor &&other) noexcept { handle_ = other.handle_; other.handle_ = nullptr; From 225f54833903d08e428c6f155b114d157c57653b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 00:53:42 +0300 Subject: [PATCH 060/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20get=5F/set=5Fcontext=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA?= =?UTF-8?q?=D1=86=D0=B8=D0=B9=20=D0=B8=20=D0=BA=D1=83=D1=80=D1=81=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index aeeb2252..9be7e341 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3636,7 +3636,7 @@ public: inline void *get_context() const noexcept; /// \brief Sets the application context associated with the environment. - inline env &set_context(void *); + inline env &set_context(void *your_context); /// \brief Sets threshold to force flush the data buffers to disk, for /// non-sync durability modes. @@ -3994,6 +3994,12 @@ public: /// \brief Return the transaction's ID. inline uint64_t id() const; + /// \brief Returns the application context associated with the transaction. + inline void *get_context() const noexcept; + + /// \brief Sets the application context associated with the transaction. + inline txn &set_context(void *your_context); + /// \brief Checks whether the given data is on a dirty page. inline bool is_dirty(const void *ptr) const; @@ -4333,6 +4339,12 @@ public: friend MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, const cursor &b) noexcept; + /// \brief Returns the application context associated with the cursor. + inline void *get_context() const noexcept; + + /// \brief Sets the application context associated with the cursor. + inline cursor &set_context(void *your_context); + enum move_operation { first = MDBX_FIRST, last = MDBX_LAST, @@ -5639,6 +5651,15 @@ MDBX_CXX11_CONSTEXPR bool operator!=(const txn &a, const txn &b) noexcept { return a.handle_ != b.handle_; } +inline void *txn::get_context() const noexcept { + return mdbx_txn_get_userctx(handle_); +} + +inline txn &txn::set_context(void *ptr) { + error::success_or_throw(::mdbx_txn_set_userctx(handle_, ptr)); + return *this; +} + inline bool txn::is_dirty(const void *ptr) const { int err = ::mdbx_is_dirty(handle_, ptr); switch (err) { @@ -6126,6 +6147,15 @@ inline cursor_managed cursor::clone(void *your_context) const { return clone; } +inline void *cursor::get_context() const noexcept { + return mdbx_cursor_get_userctx(handle_); +} + +inline cursor &cursor::set_context(void *ptr) { + error::success_or_throw(::mdbx_cursor_set_userctx(handle_, ptr)); + return *this; +} + inline cursor &cursor::operator=(cursor &&other) noexcept { handle_ = other.handle_; other.handle_ = nullptr; From 10abf731917e82a0c45c516653e9456abf4868bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 01:11:12 +0300 Subject: [PATCH 061/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B8=20=D0=BC=D0=B8?= =?UTF-8?q?=D0=BA=D1=80=D0=BE-=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7?= =?UTF-8?q?=D0=B0=D1=86=D0=B8=D1=8F=20`cursor=5Fnext|=5Fprev()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20dupsort-=D1=83=D0=B7=D0=BB=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - меньше сравнений и переходов. - вложенный курсор всегда сбрасывается/очищается при переходе с dupsort-узла. --- src/core.c | 75 +++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/src/core.c b/src/core.c index fec11370..eadfb491 100644 --- a/src/core.c +++ b/src/core.c @@ -3391,7 +3391,8 @@ static int __must_check_result cursor_last(MDBX_cursor *mc, MDBX_val *key, static int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi); static int __must_check_result cursor_xinit0(MDBX_cursor *mc); -static int __must_check_result cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, +static int __must_check_result cursor_xinit1(MDBX_cursor *mc, + const MDBX_node *node, const MDBX_page *mp); static int __must_check_result cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, @@ -16616,39 +16617,41 @@ static int cursor_sibling(MDBX_cursor *mc, int dir) { /* Move the cursor to the next data item. */ static int cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { - MDBX_page *mp; - MDBX_node *node; + assert(op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP); int rc; if (unlikely(mc->mc_flags & C_DEL) && op == MDBX_NEXT_DUP) return MDBX_NOTFOUND; - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) + if (unlikely(!(mc->mc_flags & C_INITIALIZED))) { + if (unlikely(mc->mc_flags & C_SUB)) + return MDBX_NOTFOUND; return cursor_first(mc, key, data); + } - mp = mc->mc_pg[mc->mc_top]; + const MDBX_page *mp = mc->mc_pg[mc->mc_top]; if (unlikely(mc->mc_flags & C_EOF)) { if (mc->mc_ki[mc->mc_top] + (size_t)1 >= page_numkeys(mp)) return MDBX_NOTFOUND; mc->mc_flags ^= C_EOF; } - if (mc->mc_db->md_flags & MDBX_DUPSORT) { - node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - if (op == MDBX_NEXT || op == MDBX_NEXT_DUP) { + if (mc->mc_xcursor) { + if (op != MDBX_NEXT_NODUP) { + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); + if (node_flags(node) & F_DUPDATA) { rc = cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_NEXT); - if (op != MDBX_NEXT || rc != MDBX_NOTFOUND) { - if (likely(rc == MDBX_SUCCESS)) - get_key_optional(node, key); - return rc; + if (likely(rc == MDBX_SUCCESS)) { + get_key_optional(node, key); + return MDBX_SUCCESS; } + if (unlikely(rc != MDBX_NOTFOUND)) + return rc; } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - if (op == MDBX_NEXT_DUP) + if (op != MDBX_NEXT) return MDBX_NOTFOUND; } + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); } DEBUG("cursor_next: top page is %" PRIaPGNO " in cursor %p", mp->mp_pgno, @@ -16692,7 +16695,7 @@ skip: return MDBX_SUCCESS; } - node = page_node(mp, mc->mc_ki[mc->mc_top]); + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); if (node_flags(node) & F_DUPDATA) { rc = cursor_xinit1(mc, node, mp); if (unlikely(rc != MDBX_SUCCESS)) @@ -16713,40 +16716,41 @@ skip: /* Move the cursor to the previous data item. */ static int cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { - MDBX_page *mp; - MDBX_node *node; + assert(op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP); int rc; if (unlikely(mc->mc_flags & C_DEL) && op == MDBX_PREV_DUP) return MDBX_NOTFOUND; if (unlikely(!(mc->mc_flags & C_INITIALIZED))) { + if (unlikely(mc->mc_flags & C_SUB)) + return MDBX_NOTFOUND; rc = cursor_last(mc, key, data); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; mc->mc_ki[mc->mc_top]++; } - mp = mc->mc_pg[mc->mc_top]; - if ((mc->mc_db->md_flags & MDBX_DUPSORT) && - mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { - node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - if (op == MDBX_PREV || op == MDBX_PREV_DUP) { - rc = cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_PREV); - if (op != MDBX_PREV || rc != MDBX_NOTFOUND) { + const MDBX_page *mp = mc->mc_pg[mc->mc_top]; + if (mc->mc_xcursor) { + if (op != MDBX_PREV_NODUP) { + if (likely(mc->mc_ki[mc->mc_top] < page_numkeys(mp))) { + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); + if (node_flags(node) & F_DUPDATA) { + rc = cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_PREV); if (likely(rc == MDBX_SUCCESS)) { get_key_optional(node, key); mc->mc_flags &= ~C_EOF; + return MDBX_SUCCESS; } - return rc; + if (unlikely(rc != MDBX_NOTFOUND)) + return rc; } } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - if (op == MDBX_PREV_DUP) + if (op != MDBX_PREV) return MDBX_NOTFOUND; } + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); } DEBUG("cursor_prev: top page is %" PRIaPGNO " in cursor %p", mp->mp_pgno, @@ -16782,8 +16786,7 @@ static int cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_SUCCESS; } - node = page_node(mp, mc->mc_ki[mc->mc_top]); - + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); if (node_flags(node) & F_DUPDATA) { rc = cursor_xinit1(mc, node, mp); if (unlikely(rc != MDBX_SUCCESS)) @@ -17234,6 +17237,8 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return rc; } } else { + cASSERT(mc, !mc->mc_xcursor || !(mc->mc_xcursor->mx_cursor.mc_flags & + C_INITIALIZED)); rc = node_read(mc, node, data, mp); if (unlikely(rc)) return rc; @@ -19025,7 +19030,7 @@ static int cursor_xinit0(MDBX_cursor *mc) { * [in] mc The main cursor whose sorted-dups cursor is to be initialized. * [in] node The data containing the MDBX_db record for the sorted-dup database. */ -static int cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, +static int cursor_xinit1(MDBX_cursor *mc, const MDBX_node *node, const MDBX_page *mp) { MDBX_xcursor *mx = mc->mc_xcursor; if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) { From adcbb393790fb3128910564c5b058d92319c2804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 01:55:29 +0300 Subject: [PATCH 062/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fcompare()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 ++++ src/core.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/mdbx.h b/mdbx.h index eb8e4ff6..765f11e3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4891,6 +4891,11 @@ LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *cursor); * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); +/** FIXME */ +LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, + const MDBX_cursor *right, + bool ignore_nested); + /** \brief Retrieve by cursor. * \ingroup c_crud * diff --git a/src/core.c b/src/core.c index eadfb491..909b8469 100644 --- a/src/core.c +++ b/src/core.c @@ -19355,6 +19355,83 @@ int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; } +int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, + bool ignore_nested) { + const int incomparable = INT16_MAX + 1; + if (unlikely(!l)) + return r ? -incomparable * 9 : 0; + if (unlikely(!r)) + return l ? incomparable * 9 : 0; + + if (unlikely(l->mc_signature != MDBX_MC_LIVE)) + return (r->mc_signature == MDBX_MC_LIVE) ? -incomparable * 8 : 0; + if (unlikely(r->mc_signature != MDBX_MC_LIVE)) + return (l->mc_signature == MDBX_MC_LIVE) ? incomparable * 8 : 0; + + if (unlikely(l->mc_dbx != r->mc_dbx)) { + if (l->mc_txn->mt_env != r->mc_txn->mt_env) + return (l->mc_txn->mt_env > r->mc_txn->mt_env) ? incomparable * 7 + : -incomparable * 7; + if (l->mc_txn->mt_txnid != r->mc_txn->mt_txnid) + return (l->mc_txn->mt_txnid > r->mc_txn->mt_txnid) ? incomparable * 6 + : -incomparable * 6; + return (l->mc_dbx > r->mc_dbx) ? incomparable * 5 : -incomparable * 5; + } + assert(l->mc_dbi == r->mc_dbi); + + int diff = (l->mc_flags & C_INITIALIZED) - (l->mc_flags & C_INITIALIZED); + if (unlikely(diff)) + return (diff > 0) ? incomparable * 4 : -incomparable * 4; + if (unlikely((l->mc_flags & C_INITIALIZED) == 0)) + return 0; + + size_t detent = (l->mc_snum <= r->mc_snum) ? l->mc_snum : r->mc_snum; + for (size_t i = 0; i < detent; ++i) { + diff = l->mc_ki[i] - r->mc_ki[i]; + if (diff) + return diff; + } + if (unlikely(l->mc_snum != r->mc_snum)) + return (l->mc_snum > r->mc_snum) ? incomparable * 3 : -incomparable * 3; + + assert((l->mc_xcursor != nullptr) == (r->mc_xcursor != nullptr)); + if (unlikely((l->mc_xcursor != nullptr) != (r->mc_xcursor != nullptr))) + return l->mc_xcursor ? incomparable * 2 : -incomparable * 2; + if (ignore_nested || !l->mc_xcursor) + return 0; + +#if MDBX_DEBUG + if (l->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + const MDBX_page *mp = l->mc_pg[l->mc_top]; + const MDBX_node *node = page_node(mp, l->mc_ki[l->mc_top]); + assert(node_flags(node) & F_DUPDATA); + } + if (l->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + const MDBX_page *mp = r->mc_pg[r->mc_top]; + const MDBX_node *node = page_node(mp, r->mc_ki[r->mc_top]); + assert(node_flags(node) & F_DUPDATA); + } +#endif /* MDBX_DEBUG */ + + l = &l->mc_xcursor->mx_cursor; + r = &r->mc_xcursor->mx_cursor; + diff = (l->mc_flags & C_INITIALIZED) - (l->mc_flags & C_INITIALIZED); + if (unlikely(diff)) + return (diff > 0) ? incomparable * 2 : -incomparable * 2; + if (unlikely((l->mc_flags & C_INITIALIZED) == 0)) + return 0; + + detent = (l->mc_snum <= r->mc_snum) ? l->mc_snum : r->mc_snum; + for (size_t i = 0; i < detent; ++i) { + diff = l->mc_ki[i] - r->mc_ki[i]; + if (diff) + return diff; + } + if (unlikely(l->mc_snum != r->mc_snum)) + return (l->mc_snum > r->mc_snum) ? incomparable : -incomparable; + return 0; +} + int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { if (unlikely(!src)) return MDBX_EINVAL; From eee3e6eb6be31843efc8bf81f5345d8c81405d70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 02:32:55 +0300 Subject: [PATCH 063/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`compare=5Fpositions()`?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80?= =?UTF-8?q?=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/mdbx.c++ | 5 +++++ 2 files changed, 49 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 9be7e341..c973573b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -567,6 +567,7 @@ MDBX_DECLARE_EXCEPTION(dangling_map_id); [[noreturn]] LIBMDBX_API void throw_out_range(); [[noreturn]] LIBMDBX_API void throw_allocators_mismatch(); [[noreturn]] LIBMDBX_API void throw_bad_value_size(); +[[noreturn]] LIBMDBX_API void throw_incomparable_cursors(); static MDBX_CXX14_CONSTEXPR size_t check_length(size_t bytes); static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload); @@ -4339,6 +4340,34 @@ public: friend MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, const cursor &b) noexcept; + friend inline int compare_position_nothrow(const cursor &left, + const cursor &right, + bool ignore_nested) noexcept; + friend inline int compare_position(const cursor &left, const cursor &right, + bool ignore_nested); + + bool is_before_than(const cursor &other, bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) < 0; + } + + bool is_same_or_before_than(const cursor &other, + bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) <= 0; + } + + bool is_same_position(const cursor &other, bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) == 0; + } + + bool is_after_than(const cursor &other, bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) > 0; + } + + bool is_same_or_after_than(const cursor &other, + bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) >= 0; + } + /// \brief Returns the application context associated with the cursor. inline void *get_context() const noexcept; @@ -6192,6 +6221,21 @@ MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, return a.handle_ != b.handle_; } +inline int compare_position_nothrow(const cursor &left, const cursor &right, + bool ignore_nested = false) noexcept { + return mdbx_cursor_compare(left.handle_, right.handle_, ignore_nested); +} + +inline int compare_position(const cursor &left, const cursor &right, + bool ignore_nested = false) { + const auto diff = compare_position_nothrow(left, right, ignore_nested); + assert(compare_position_nothrow(right, left, ignore_nested) == -diff); + if (MDBX_LIKELY(int16_t(diff) == diff)) + MDBX_CXX20_LIKELY + return int(diff); + throw_incomparable_cursors(); +} + inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) : pair_result(slice(), slice(), false) { diff --git a/src/mdbx.c++ b/src/mdbx.c++ index dd75aaa0..4381b8e8 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -233,6 +233,11 @@ namespace mdbx { "into an incompatible memory allocation scheme."); } +[[noreturn]] __cold void throw_incomparable_cursors() { + throw std::logic_error( + "mdbx:: incomparable and/or invalid cursors to compare positions."); +} + [[noreturn]] __cold void throw_bad_value_size() { throw bad_value_size(MDBX_BAD_VALSIZE); } From ed59ad22c64f53d6af7ee46010605228059dd3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 02:34:07 +0300 Subject: [PATCH 064/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fon=5Ffirst/la?= =?UTF-8?q?st=5Fdup()`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 8 ++++++++ mdbx.h++ | 10 ++++++++++ src/core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/mdbx.h b/mdbx.h index 765f11e3..0123a139 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5138,6 +5138,10 @@ mdbx_cursor_eof(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first(const MDBX_cursor *cursor); +/** FIXME */ +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int +mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); + /** \brief Determines whether the cursor is pointed to the last key-value pair * or not. * \ingroup c_cursors @@ -5152,6 +5156,10 @@ mdbx_cursor_on_first(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last(const MDBX_cursor *cursor); +/** FIXME */ +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int +mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); + /** \addtogroup c_rqest * \details \note The estimation result varies greatly depending on the filling * of specific pages and the overall balance of the b-tree: diff --git a/mdbx.h++ b/mdbx.h++ index c973573b..eaa5279c 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4467,6 +4467,8 @@ public: inline bool eof() const; inline bool on_first() const; inline bool on_last() const; + inline bool on_first_multival() const; + inline bool on_last_multival() const; inline estimate_result estimate(const slice &key, const slice &value) const; inline estimate_result estimate(const slice &key) const; inline estimate_result estimate(move_operation operation) const; @@ -6396,6 +6398,14 @@ inline bool cursor::on_last() const { return error::boolean_or_throw(::mdbx_cursor_on_last(*this)); } +inline bool cursor::on_first_multival() const { + return error::boolean_or_throw(::mdbx_cursor_on_first_dup(*this)); +} + +inline bool cursor::on_last_multival() const { + return error::boolean_or_throw(::mdbx_cursor_on_last_dup(*this)); +} + inline cursor::estimate_result cursor::estimate(const slice &key, const slice &value) const { return estimate_result(*this, multi_exactkey_lowerboundvalue, key, value); diff --git a/src/core.c b/src/core.c index 909b8469..2e577a93 100644 --- a/src/core.c +++ b/src/core.c @@ -24533,6 +24533,29 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { return MDBX_RESULT_TRUE; } +int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { + if (unlikely(mc == NULL)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + + if (!mc->mc_xcursor) + return MDBX_RESULT_TRUE; + + mc = &mc->mc_xcursor->mx_cursor; + for (size_t i = 0; i < mc->mc_snum; ++i) { + if (mc->mc_ki[i]) + return MDBX_RESULT_FALSE; + } + + return MDBX_RESULT_TRUE; +} + int mdbx_cursor_on_last(const MDBX_cursor *mc) { if (unlikely(mc == NULL)) return MDBX_EINVAL; @@ -24553,6 +24576,30 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { return MDBX_RESULT_TRUE; } +int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { + if (unlikely(mc == NULL)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + + if (!mc->mc_xcursor) + return MDBX_RESULT_TRUE; + + mc = &mc->mc_xcursor->mx_cursor; + for (size_t i = 0; i < mc->mc_snum; ++i) { + size_t nkeys = page_numkeys(mc->mc_pg[i]); + if (mc->mc_ki[i] < nkeys - 1) + return MDBX_RESULT_FALSE; + } + + return MDBX_RESULT_TRUE; +} + int mdbx_cursor_eof(const MDBX_cursor *mc) { if (unlikely(mc == NULL)) return MDBX_EINVAL; From bc562d5c06a3c1efab0f0834d7a1fba0145b0adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 22:52:13 +0300 Subject: [PATCH 065/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fscan()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 8 ++++++++ src/core.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/mdbx.h b/mdbx.h index 0123a139..c1fdd90e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4929,6 +4929,14 @@ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); +/** FIXME */ +typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, + void *arg) MDBX_CXX17_NOEXCEPT; +/** FIXME */ +LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, + MDBX_predicate_func *predicate, void *context, + MDBX_cursor_op start_op, + MDBX_cursor_op turn_op, void *arg); /** \brief Retrieve multiple non-dupsort key/value pairs by cursor. * \ingroup c_crud diff --git a/src/core.c b/src/core.c index 2e577a93..23eaaa92 100644 --- a/src/core.c +++ b/src/core.c @@ -17433,6 +17433,39 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return cursor_get(mc, key, data, op); } +int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op start_op, + MDBX_cursor_op turn_op, void *arg) { + if (unlikely(!predicate)) + return MDBX_EINVAL; + + const unsigned valid_start_mask = + 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | + 1 << MDBX_LAST_DUP | 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; + if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0)) + return MDBX_EINVAL; + + const unsigned valid_turn_mask = + 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | + 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | + 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) + return MDBX_EINVAL; + + MDBX_val key, data; + int rc = mdbx_cursor_get(mc, &key, &data, start_op); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + for (;;) { + rc = predicate(context, &key, &data, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = cursor_get(mc, &key, &data, turn_op); + if (rc != MDBX_SUCCESS) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } +} + static int cursor_first_batch(MDBX_cursor *mc) { if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { int err = page_search(mc, NULL, MDBX_PS_FIRST); From 5cf6542fa002a1cd543ac524b5bfb8de43e00337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 23:20:53 +0300 Subject: [PATCH 066/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fscan=5Ffrom()?= =?UTF-8?q?`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 7 +++++++ src/core.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/mdbx.h b/mdbx.h index c1fdd90e..611efd1e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4938,6 +4938,13 @@ LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, MDBX_cursor_op start_op, MDBX_cursor_op turn_op, void *arg); +/** FIXME */ +LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, + MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op from_op, + MDBX_val *from_key, MDBX_val *from_value, + MDBX_cursor_op turn_op, void *arg); + /** \brief Retrieve multiple non-dupsort key/value pairs by cursor. * \ingroup c_crud * diff --git a/src/core.c b/src/core.c index 23eaaa92..861c266f 100644 --- a/src/core.c +++ b/src/core.c @@ -17466,6 +17466,50 @@ int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, } } +int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op from_op, MDBX_val *key, + MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { + if (unlikely(!predicate)) + return MDBX_EINVAL; + + const unsigned valid_start_mask = + 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | + 1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | + 1 << MDBX_SET_UPPERBOUND; + ; + if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && + ((1 << from_op) & valid_start_mask) == 0)) + return MDBX_EINVAL; + + const unsigned valid_turn_mask = + 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | + 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | + 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) + return MDBX_EINVAL; + + int rc = mdbx_cursor_get(mc, key, value, from_op); + if (unlikely(MDBX_IS_ERROR(rc))) + return rc; + + cASSERT(mc, key != nullptr); + MDBX_val stub; + if (!value) { + value = &stub; + rc = cursor_get(mc, key, value, MDBX_GET_CURRENT); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = cursor_get(mc, key, value, turn_op); + if (rc != MDBX_SUCCESS) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } +} + static int cursor_first_batch(MDBX_cursor *mc) { if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { int err = page_search(mc, NULL, MDBX_PS_FIRST); From 25015c54e136ef60c3328c197bf221faf83ccdb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 13 Nov 2023 12:52:17 +0300 Subject: [PATCH 067/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=B5=D1=89=D0=B0?= =?UTF-8?q?=D0=BD=D0=BD=D0=BE=D0=B5=20"doubtless"=20API=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BF=D0=BE=D0=B7=D0=B8=D1=86=D0=B8=D0=BE=D0=BD=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BA=D1=83=D1=80?= =?UTF-8?q?=D1=81=D0=BE=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 25 +++++- mdbx.h++ | 256 +++++++++++++++++++++++++++++++++-------------------- src/core.c | 154 +++++++++++++++++++++++++++++++- 3 files changed, 336 insertions(+), 99 deletions(-) diff --git a/mdbx.h b/mdbx.h index 611efd1e..43d4eca3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1778,7 +1778,7 @@ enum MDBX_cursor_op { * return both key and data, and the return code depends on whether a * upper-bound was found. * - * For non DUPSORT-ed collections this work the same to \ref MDBX_SET_RANGE, + * For non DUPSORT-ed collections this work like \ref MDBX_SET_RANGE, * but returns \ref MDBX_SUCCESS if the greater key was found or * \ref MDBX_NOTFOUND otherwise. * @@ -1786,7 +1786,28 @@ enum MDBX_cursor_op { * i.e. for a pairs/tuples of a key and an each data value of duplicates. * Returns \ref MDBX_SUCCESS if the greater pair was returned or * \ref MDBX_NOTFOUND otherwise. */ - MDBX_SET_UPPERBOUND + MDBX_SET_UPPERBOUND, + + /* Doubtless cursor positioning at a specified key. */ + MDBX_TO_KEY_LESSER_THAN, + MDBX_TO_KEY_LESSER_OR_EQUAL, + MDBX_TO_KEY_EQUAL, + MDBX_TO_KEY_GREATER_OR_EQUAL, + MDBX_TO_KEY_GREATER_THAN, + + /* Doubtless cursor positioning at a specified key-value pair + * for dupsort/multi-value hives. */ + MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN, + MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, + MDBX_TO_EXACT_KEY_VALUE_EQUAL, + MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, + MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN, + + MDBX_TO_PAIR_LESSER_THAN, + MDBX_TO_PAIR_LESSER_OR_EQUAL, + MDBX_TO_PAIR_EQUAL, + MDBX_TO_PAIR_GREATER_OR_EQUAL, + MDBX_TO_PAIR_GREATER_THAN }; #ifndef __cplusplus /** \ingroup c_cursors */ diff --git a/mdbx.h++ b/mdbx.h++ index eaa5279c..bdeeb05b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4391,9 +4391,33 @@ public: multi_find_pair = MDBX_GET_BOTH, multi_exactkey_lowerboundvalue = MDBX_GET_BOTH_RANGE, - find_key = MDBX_SET, + seek_key = MDBX_SET, key_exact = MDBX_SET_KEY, - key_lowerbound = MDBX_SET_RANGE + key_lowerbound = MDBX_SET_RANGE, + + /* Doubtless cursor positioning at a specified key. */ + key_lesser_than = MDBX_TO_KEY_LESSER_THAN, + key_lesser_or_equal = MDBX_TO_KEY_LESSER_OR_EQUAL, + key_equal = MDBX_TO_KEY_EQUAL, + key_greater_or_equal = MDBX_TO_KEY_GREATER_OR_EQUAL, + key_greater_than = MDBX_TO_KEY_GREATER_THAN, + + /* Doubtless cursor positioning at a specified key-value pair + * for dupsort/multi-value hives. */ + multi_exactkey_value_lesser_than = MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN, + multi_exactkey_value_lesser_or_equal = + MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, + multi_exactkey_value_equal = MDBX_TO_EXACT_KEY_VALUE_EQUAL, + multi_exactkey_value_greater_or_equal = + MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, + multi_exactkey_value_greater = MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN, + + pair_lesser_than = MDBX_TO_PAIR_LESSER_THAN, + pair_lesser_or_equal = MDBX_TO_PAIR_LESSER_OR_EQUAL, + pair_equal = MDBX_TO_PAIR_EQUAL, + pair_exact = pair_equal, + pair_greater_or_equal = MDBX_TO_PAIR_GREATER_OR_EQUAL, + pair_greater_than = MDBX_TO_PAIR_GREATER_THAN, }; struct move_result : public pair_result { @@ -4424,45 +4448,154 @@ public: }; protected: + /* fake const, i.e. for some move/get operations */ inline bool move(move_operation operation, MDBX_val *key, MDBX_val *value, - bool throw_notfound) const - /* fake const, i.e. for some operations */; + bool throw_notfound) const; + inline ptrdiff_t estimate(move_operation operation, MDBX_val *key, MDBX_val *value) const; public: - inline move_result move(move_operation operation, bool throw_notfound); - inline move_result to_first(bool throw_notfound = true); - inline move_result to_previous(bool throw_notfound = true); - inline move_result to_previous_last_multi(bool throw_notfound = true); - inline move_result to_current_first_multi(bool throw_notfound = true); - inline move_result to_current_prev_multi(bool throw_notfound = true); - inline move_result current(bool throw_notfound = true) const; - inline move_result to_current_next_multi(bool throw_notfound = true); - inline move_result to_current_last_multi(bool throw_notfound = true); - inline move_result to_next_first_multi(bool throw_notfound = true); - inline move_result to_next(bool throw_notfound = true); - inline move_result to_last(bool throw_notfound = true); + move_result move(move_operation operation, bool throw_notfound) { + return move_result(*this, operation, throw_notfound); + } + move_result move(move_operation operation, const slice &key, + bool throw_notfound) { + return move_result(*this, operation, key, throw_notfound); + } + move_result move(move_operation operation, const slice &key, + const slice &value, bool throw_notfound) { + return move_result(*this, operation, key, value, throw_notfound); + } + bool move(move_operation operation, slice &key, slice &value, + bool throw_notfound) { + return move(operation, &key, &value, throw_notfound); + } - inline move_result move(move_operation operation, const slice &key, - bool throw_notfound); + move_result to_first(bool throw_notfound = true) { + return move(first, throw_notfound); + } + move_result to_previous(bool throw_notfound = true) { + return move(previous, throw_notfound); + } + move_result to_previous_last_multi(bool throw_notfound = true) { + return move(multi_prevkey_lastvalue, throw_notfound); + } + move_result to_current_first_multi(bool throw_notfound = true) { + return move(multi_currentkey_firstvalue, throw_notfound); + } + move_result to_current_prev_multi(bool throw_notfound = true) { + return move(multi_currentkey_prevvalue, throw_notfound); + } + move_result current(bool throw_notfound = true) const { + return move_result(*this, throw_notfound); + } + move_result to_current_next_multi(bool throw_notfound = true) { + return move(multi_currentkey_nextvalue, throw_notfound); + } + move_result to_current_last_multi(bool throw_notfound = true) { + return move(multi_currentkey_lastvalue, throw_notfound); + } + move_result to_next_first_multi(bool throw_notfound = true) { + return move(multi_nextkey_firstvalue, throw_notfound); + } + move_result to_next(bool throw_notfound = true) { + return move(next, throw_notfound); + } + move_result to_last(bool throw_notfound = true) { + return move(last, throw_notfound); + } + + move_result to_key_lesser_than(const slice &key, bool throw_notfound = true) { + return move(key_lesser_than, key, throw_notfound); + } + move_result to_key_lesser_or_equal(const slice &key, + bool throw_notfound = true) { + return move(key_lesser_or_equal, key, throw_notfound); + } + move_result to_key_equal(const slice &key, bool throw_notfound = true) { + return move(key_equal, key, throw_notfound); + } + move_result to_key_exact(const slice &key, bool throw_notfound = true) { + return move(key_exact, key, throw_notfound); + } + move_result to_key_greater_or_equal(const slice &key, + bool throw_notfound = true) { + return move(key_greater_or_equal, key, throw_notfound); + } + move_result to_key_greater_than(const slice &key, + bool throw_notfound = true) { + return move(key_greater_than, key, throw_notfound); + } + + move_result to_exact_key_value_lesser_than(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_lesser_than, key, value, throw_notfound); + } + move_result to_exact_key_value_lesser_or_equal(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_lesser_or_equal, key, value, + throw_notfound); + } + move_result to_exact_key_value_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_equal, key, value, throw_notfound); + } + move_result to_exact_key_value_greater_or_equal(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_greater_or_equal, key, value, + throw_notfound); + } + move_result to_exact_key_value_greater_than(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_greater, key, value, throw_notfound); + } + + move_result to_pair_lesser_than(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_lesser_than, key, value, throw_notfound); + } + move_result to_pair_lesser_or_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_lesser_or_equal, key, value, throw_notfound); + } + move_result to_pair_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_equal, key, value, throw_notfound); + } + move_result to_pair_exact(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_exact, key, value, throw_notfound); + } + move_result to_pair_greater_or_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_greater_or_equal, key, value, throw_notfound); + } + move_result to_pair_greater_than(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_greater_than, key, value, throw_notfound); + } + + inline bool seek(const slice &key); inline move_result find(const slice &key, bool throw_notfound = true); - inline move_result lower_bound(const slice &key, bool throw_notfound = true); + inline move_result lower_bound(const slice &key, bool throw_notfound = false); + inline move_result upper_bound(const slice &key, bool throw_notfound = false); + + /// \brief Return count of duplicates for current key. + inline size_t count_multivalue() const; - inline move_result move(move_operation operation, const slice &key, - const slice &value, bool throw_notfound); inline move_result find_multivalue(const slice &key, const slice &value, bool throw_notfound = true); inline move_result lower_bound_multivalue(const slice &key, const slice &value, bool throw_notfound = false); - - inline bool seek(const slice &key); - inline bool move(move_operation operation, slice &key, slice &value, - bool throw_notfound); - - /// \brief Return count of duplicates for current key. - inline size_t count_multivalue() const; + inline move_result upper_bound_multivalue(const slice &key, + const slice &value, + bool throw_notfound = false); inline bool eof() const; inline bool on_first() const; @@ -6290,60 +6423,6 @@ inline ptrdiff_t estimate(const cursor &from, const cursor &to) { return result; } -inline cursor::move_result cursor::move(move_operation operation, - bool throw_notfound) { - return move_result(*this, operation, throw_notfound); -} - -inline cursor::move_result cursor::to_first(bool throw_notfound) { - return move(first, throw_notfound); -} - -inline cursor::move_result cursor::to_previous(bool throw_notfound) { - return move(previous, throw_notfound); -} - -inline cursor::move_result cursor::to_previous_last_multi(bool throw_notfound) { - return move(multi_prevkey_lastvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_current_first_multi(bool throw_notfound) { - return move(multi_currentkey_firstvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_current_prev_multi(bool throw_notfound) { - return move(multi_currentkey_prevvalue, throw_notfound); -} - -inline cursor::move_result cursor::current(bool throw_notfound) const { - return move_result(*this, throw_notfound); -} - -inline cursor::move_result cursor::to_current_next_multi(bool throw_notfound) { - return move(multi_currentkey_nextvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_current_last_multi(bool throw_notfound) { - return move(multi_currentkey_lastvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_next_first_multi(bool throw_notfound) { - return move(multi_nextkey_firstvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_next(bool throw_notfound) { - return move(next, throw_notfound); -} - -inline cursor::move_result cursor::to_last(bool throw_notfound) { - return move(last, throw_notfound); -} - -inline cursor::move_result cursor::move(move_operation operation, - const slice &key, bool throw_notfound) { - return move_result(*this, operation, key, throw_notfound); -} - inline cursor::move_result cursor::find(const slice &key, bool throw_notfound) { return move(key_exact, key, throw_notfound); } @@ -6353,12 +6432,6 @@ inline cursor::move_result cursor::lower_bound(const slice &key, return move(key_lowerbound, key, throw_notfound); } -inline cursor::move_result cursor::move(move_operation operation, - const slice &key, const slice &value, - bool throw_notfound) { - return move_result(*this, operation, key, value, throw_notfound); -} - inline cursor::move_result cursor::find_multivalue(const slice &key, const slice &value, bool throw_notfound) { @@ -6372,12 +6445,7 @@ inline cursor::move_result cursor::lower_bound_multivalue(const slice &key, } inline bool cursor::seek(const slice &key) { - return move(find_key, const_cast(&key), nullptr, false); -} - -inline bool cursor::move(move_operation operation, slice &key, slice &value, - bool throw_notfound) { - return move(operation, &key, &value, throw_notfound); + return move(seek_key, const_cast(&key), nullptr, false); } inline size_t cursor::count_multivalue() const { diff --git a/src/core.c b/src/core.c index 861c266f..bd955953 100644 --- a/src/core.c +++ b/src/core.c @@ -16961,7 +16961,7 @@ search_node:; node = nsr.node; ret.exact = nsr.exact; if (!ret.exact) { - if (op != MDBX_SET_RANGE) { + if (op < MDBX_SET_RANGE) { /* MDBX_SET specified and not an exact match. */ if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top]))) @@ -17010,7 +17010,7 @@ got_node: ret.err = cursor_xinit1(mc, node, mp); if (unlikely(ret.err != MDBX_SUCCESS)) return ret; - if (op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_SET_RANGE) { + if (op >= MDBX_SET) { MDBX_ANALYSIS_ASSUME(mc->mc_xcursor != nullptr); ret.err = cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); if (unlikely(ret.err != MDBX_SUCCESS)) @@ -17026,7 +17026,7 @@ got_node: } } } else if (likely(data)) { - if (op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE) { + if (op <= MDBX_GET_BOTH_RANGE) { if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min || data->iov_len > mc->mc_dbx->md_vlen_max)) { cASSERT(mc, !"Invalid data-size"); @@ -17365,6 +17365,7 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, case MDBX_LAST_DUP: mfunc = cursor_last; goto move; + case MDBX_SET_UPPERBOUND: /* mostly same as MDBX_SET_LOWERBOUND */ case MDBX_SET_LOWERBOUND: { if (unlikely(key == NULL || data == NULL)) @@ -17408,6 +17409,153 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, } break; } + + /* Doubtless API to positioning of the cursor at a specified key. */ + case MDBX_TO_KEY_LESSER_THAN: + case MDBX_TO_KEY_LESSER_OR_EQUAL: + case MDBX_TO_KEY_EQUAL: + case MDBX_TO_KEY_GREATER_OR_EQUAL: + case MDBX_TO_KEY_GREATER_THAN: { + if (unlikely(key == NULL)) + return MDBX_EINVAL; + struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_KEY_LESSER_THAN) + rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_KEY_GREATER_THAN) + rc = cursor_next(mc, key, data, MDBX_NEXT_NODUP); + } else if (op < MDBX_TO_KEY_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_KEY_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + break; + } + + /* Doubtless API to positioning of the cursor at a specified key-value pair + * for multi-value hives. */ + case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: + case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: { + if (unlikely(key == NULL || data == NULL)) + return MDBX_EINVAL; + MDBX_val save_data = *data; + struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_KEY); + rc = csr.err; + if (rc == MDBX_SUCCESS) { + cASSERT(mc, csr.exact); + MDBX_cursor *const mx = + (mc->mc_xcursor && + (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + ? &mc->mc_xcursor->mx_cursor + : nullptr; + if (mx) { + csr = cursor_set(mx, &save_data, NULL, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN) + rc = cursor_prev(mx, data, NULL, MDBX_PREV); + else if (op == MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN) + rc = cursor_next(mx, data, NULL, MDBX_NEXT); + } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mx, data, NULL, MDBX_PREV); + else if (op == MDBX_TO_EXACT_KEY_VALUE_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + } else { + int cmp = mc->mc_dbx->md_dcmp(data, &save_data); + switch (op) { + default: + __unreachable(); + case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: + rc = (cmp < 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: + rc = (cmp <= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_EQUAL: + rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: + rc = (cmp >= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: + rc = (cmp > 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + } + } + } + break; + } + case MDBX_TO_PAIR_LESSER_THAN: + case MDBX_TO_PAIR_LESSER_OR_EQUAL: + case MDBX_TO_PAIR_EQUAL: + case MDBX_TO_PAIR_GREATER_OR_EQUAL: + case MDBX_TO_PAIR_GREATER_THAN: { + if (unlikely(key == NULL || data == NULL)) + return MDBX_EINVAL; + MDBX_val save_data = *data; + struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + MDBX_cursor *const mx = + (mc->mc_xcursor && + (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + ? &mc->mc_xcursor->mx_cursor + : nullptr; + if (mx) { + csr = cursor_set(mx, &save_data, NULL, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_PAIR_LESSER_THAN) + rc = cursor_prev(mc, key, data, MDBX_PREV); + else if (op == MDBX_TO_PAIR_GREATER_THAN) + rc = cursor_next(mc, key, data, MDBX_NEXT); + } else if (op < MDBX_TO_PAIR_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mc, key, data, MDBX_PREV); + else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + else if (op > MDBX_TO_PAIR_EQUAL && rc == MDBX_NOTFOUND) + rc = cursor_next(mc, key, data, MDBX_NEXT); + } else { + int cmp = mc->mc_dbx->md_dcmp(data, &save_data); + switch (op) { + default: + __unreachable(); + case MDBX_TO_PAIR_LESSER_THAN: + rc = (cmp < 0) ? MDBX_SUCCESS : cursor_prev(mc, key, data, MDBX_PREV); + break; + case MDBX_TO_PAIR_LESSER_OR_EQUAL: + rc = + (cmp <= 0) ? MDBX_SUCCESS : cursor_prev(mc, key, data, MDBX_PREV); + break; + case MDBX_TO_PAIR_EQUAL: + rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_PAIR_GREATER_OR_EQUAL: + rc = + (cmp >= 0) ? MDBX_SUCCESS : cursor_next(mc, key, data, MDBX_NEXT); + break; + case MDBX_TO_PAIR_GREATER_THAN: + rc = (cmp > 0) ? MDBX_SUCCESS : cursor_next(mc, key, data, MDBX_NEXT); + break; + } + } + } else if (op < MDBX_TO_PAIR_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + break; + } default: DEBUG("unhandled/unimplemented cursor operation %u", op); return MDBX_EINVAL; From 4999264460a8a35cf277473e15d50dd4ad2928aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 Nov 2023 21:22:12 +0300 Subject: [PATCH 068/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`error::boolean=5For=5Ft?= =?UTF-8?q?hrow(exception=5Fthunk)`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index bdeeb05b..e4bf7c15 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -485,6 +485,7 @@ public: static inline void throw_on_failure(int error_code); static inline bool boolean_or_throw(int error_code); static inline void success_or_throw(int error_code, const exception_thunk &); + static inline bool boolean_or_throw(int error_code, const exception_thunk &); static inline void panic_on_failure(int error_code, const char *context_where, const char *func_who) noexcept; static inline void success_or_panic(int error_code, const char *context_where, @@ -4883,7 +4884,8 @@ inline void error::success_or_throw() const { inline void error::success_or_throw(const exception_thunk &thunk) const { assert(thunk.is_clean() || code() != MDBX_SUCCESS); if (MDBX_UNLIKELY(!is_success())) { - MDBX_CXX20_UNLIKELY if (!thunk.is_clean()) thunk.rethrow_captured(); + MDBX_CXX20_UNLIKELY if (MDBX_UNLIKELY(!thunk.is_clean())) + thunk.rethrow_captured(); else throw_exception(); } } @@ -4944,6 +4946,13 @@ inline void error::success_or_panic(int error_code, const char *context_where, rc.success_or_panic(context_where, func_who); } +inline bool error::boolean_or_throw(int error_code, + const exception_thunk &thunk) { + if (MDBX_UNLIKELY(!thunk.is_clean())) + MDBX_CXX20_UNLIKELY thunk.rethrow_captured(); + return boolean_or_throw(error_code); +} + //------------------------------------------------------------------------------ MDBX_CXX11_CONSTEXPR slice::slice() noexcept : ::MDBX_val({nullptr, 0}) {} @@ -6392,6 +6401,8 @@ inline bool cursor::move(move_operation operation, MDBX_val *key, switch (err) { case MDBX_SUCCESS: MDBX_CXX20_LIKELY return true; + case MDBX_RESULT_TRUE: + return false; case MDBX_NOTFOUND: if (!throw_notfound) return false; From 0b68980489e8ded9cafa4be926d900e09b224709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 Nov 2023 21:22:36 +0300 Subject: [PATCH 069/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`cursor::scan(predicate.?= =?UTF-8?q?..)`=20=D0=B8=20=D1=82.=D0=BF.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index e4bf7c15..d1acd860 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4457,6 +4457,89 @@ protected: MDBX_val *value) const; public: + template + bool scan(CALLABLE_PREDICATE predicate, move_operation start = first, + move_operation turn = next) { + struct wrapper : public exception_thunk { + static int probe(void *context, MDBX_val *key, MDBX_val *value, + void *arg) noexcept { + auto thunk = static_cast(context); + assert(thunk->is_clean()); + auto &predicate = *static_cast(arg); + try { + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; + } catch (... /* capture any exception to rethrow it over C code */) { + thunk->capture(); + return MDBX_RESULT_TRUE; + } + } + } thunk; + return error::boolean_or_throw( + ::mdbx_cursor_scan(handle_, wrapper::probe, &thunk, + MDBX_cursor_op(start), MDBX_cursor_op(turn), + &predicate), + thunk); + } + + template + bool fullscan(CALLABLE_PREDICATE predicate, bool backward = false) { + return scan(std::move(predicate), backward ? last : first, + backward ? previous : next); + } + + template + bool scan_from(CALLABLE_PREDICATE predicate, slice &from, + move_operation start = key_greater_or_equal, + move_operation turn = next) { + struct wrapper : public exception_thunk { + static int probe(void *context, MDBX_val *key, MDBX_val *value, + void *arg) noexcept { + auto thunk = static_cast(context); + assert(thunk->is_clean()); + auto &predicate = *static_cast(arg); + try { + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; + } catch (... /* capture any exception to rethrow it over C code */) { + thunk->capture(); + return MDBX_RESULT_TRUE; + } + } + } thunk; + return error::boolean_or_throw( + ::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, + MDBX_cursor_op(start), &from, nullptr, + MDBX_cursor_op(turn), &predicate), + thunk); + } + + template + bool scan_from(CALLABLE_PREDICATE predicate, pair &from, + move_operation start = pair_greater_or_equal, + move_operation turn = next) { + struct wrapper : public exception_thunk { + static int probe(void *context, MDBX_val *key, MDBX_val *value, + void *arg) noexcept { + auto thunk = static_cast(context); + assert(thunk->is_clean()); + auto &predicate = *static_cast(arg); + try { + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; + } catch (... /* capture any exception to rethrow it over C code */) { + thunk->capture(); + return MDBX_RESULT_TRUE; + } + } + } thunk; + return error::boolean_or_throw( + ::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, + MDBX_cursor_op(start), &from.key, &from.value, + MDBX_cursor_op(turn), &predicate), + thunk); + } + move_result move(move_operation operation, bool throw_notfound) { return move_result(*this, operation, throw_notfound); } From bf21ee7bde54317c27985b8482512d878479364c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 Nov 2023 22:18:44 +0300 Subject: [PATCH 070/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80=D0=BE=D0=B2=20=D1=81=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=B4=D0=BB=D1=8F=20`mdbx::pa?= =?UTF-8?q?ir`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index d1acd860..5dc57b52 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3044,6 +3044,26 @@ struct pair { assert(bool(key) == bool(value)); return key; } + + /// \brief Three-way fast non-lexicographically length-based comparison. + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t + compare_fast(const pair &a, const pair &b) noexcept; + + /// \brief Three-way lexicographically comparison. + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t + compare_lexicographically(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator==(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<=(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>=(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator!=(const pair &a, + const pair &b) noexcept; }; /// \brief Combines pair of slices for key and value with boolean flag to @@ -5410,6 +5430,56 @@ slice::is_base64(bool ignore_spaces) const noexcept { //------------------------------------------------------------------------------ +MDBX_CXX14_CONSTEXPR intptr_t pair::compare_fast(const pair &a, + const pair &b) noexcept { + const auto diff = slice::compare_fast(a.key, b.key); + return diff ? diff : slice::compare_fast(a.value, b.value); +} + +MDBX_CXX14_CONSTEXPR intptr_t +pair::compare_lexicographically(const pair &a, const pair &b) noexcept { + const auto diff = slice::compare_lexicographically(a.key, b.key); + return diff ? diff : slice::compare_lexicographically(a.value, b.value); +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator==(const pair &a, const pair &b) noexcept { + return a.key.length() == b.key.length() && + a.value.length() == b.value.length() && + memcmp(a.key.data(), b.key.data(), a.key.length()) == 0 && + memcmp(a.value.data(), b.value.data(), a.value.length()) == 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator<(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) < 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator>(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) > 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator<=(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) <= 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator>=(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) >= 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator!=(const pair &a, const pair &b) noexcept { + return a.key.length() != b.key.length() || + a.value.length() != b.value.length() || + memcmp(a.key.data(), b.key.data(), a.key.length()) != 0 || + memcmp(a.value.data(), b.value.data(), a.value.length()) != 0; +} + +//------------------------------------------------------------------------------ + template inline buffer::buffer( const txn &txn, const struct slice &src, const allocator_type &allocator) From b9e2f6dc0907cf0f9e11ef4796c1b169226ce769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 15:14:55 +0300 Subject: [PATCH 071/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FCXXnn=5FCONSTEXP?= =?UTF-8?q?R=5FENUM`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 5dc57b52..e8614e41 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -162,6 +162,20 @@ #define MDBX_CXX20_CONSTEXPR inline #endif /* MDBX_CXX20_CONSTEXPR */ +#if CONSTEXPR_ENUM_FLAGS_OPERATIONS || defined(DOXYGEN) +#define MDBX_CXX01_CONSTEXPR_ENUM MDBX_CXX01_CONSTEXPR +#define MDBX_CXX11_CONSTEXPR_ENUM MDBX_CXX11_CONSTEXPR +#define MDBX_CXX14_CONSTEXPR_ENUM MDBX_CXX14_CONSTEXPR +#define MDBX_CXX17_CONSTEXPR_ENUM MDBX_CXX17_CONSTEXPR +#define MDBX_CXX20_CONSTEXPR_ENUM MDBX_CXX20_CONSTEXPR +#else +#define MDBX_CXX01_CONSTEXPR_ENUM inline +#define MDBX_CXX11_CONSTEXPR_ENUM inline +#define MDBX_CXX14_CONSTEXPR_ENUM inline +#define MDBX_CXX17_CONSTEXPR_ENUM inline +#define MDBX_CXX20_CONSTEXPR_ENUM inline +#endif /* CONSTEXPR_ENUM_FLAGS_OPERATIONS */ + /** Workaround for old compilers without support assertion inside `constexpr` * functions. */ #if defined(CONSTEXPR_ASSERT) @@ -3198,18 +3212,8 @@ struct LIBMDBX_API_TYPE map_handle { map_handle::state state) noexcept; info(const info &) noexcept = default; info &operator=(const info &) noexcept = default; -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS - MDBX_CXX11_CONSTEXPR -#else - inline -#endif - ::mdbx::key_mode key_mode() const noexcept; -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS - MDBX_CXX11_CONSTEXPR -#else - inline -#endif - ::mdbx::value_mode value_mode() const noexcept; + MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode key_mode() const noexcept; + MDBX_CXX11_CONSTEXPR_ENUM mdbx::value_mode value_mode() const noexcept; }; }; @@ -5491,17 +5495,13 @@ MDBX_CXX11_CONSTEXPR map_handle::info::info(map_handle::flags flags, map_handle::state state) noexcept : flags(flags), state(state) {} -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS -MDBX_CXX11_CONSTEXPR -#endif -::mdbx::key_mode map_handle::info::key_mode() const noexcept { +MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode +map_handle::info::key_mode() const noexcept { return ::mdbx::key_mode(flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); } -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS -MDBX_CXX11_CONSTEXPR -#endif -::mdbx::value_mode map_handle::info::value_mode() const noexcept { +MDBX_CXX11_CONSTEXPR_ENUM mdbx::value_mode +map_handle::info::value_mode() const noexcept { return ::mdbx::value_mode(flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)); } From 55142d8d6f3a1b5649241872da6af4a9ec7f00cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 15:17:07 +0300 Subject: [PATCH 072/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`txn::commit=5Fembark=5F?= =?UTF-8?q?read()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 4 ++++ src/mdbx.c++ | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index e8614e41..27df86de 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4318,6 +4318,10 @@ public: /// \brief Commit all the operations of a transaction into the database. void commit(); + /// \brief Commit all the operations of a transaction into the database + /// and then start read transaction. + void commit_embark_read(); + using commit_latency = MDBX_commit_latency; /// \brief Commit all the operations of a transaction into the database diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 4381b8e8..621c2695 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1555,6 +1555,13 @@ void txn_managed::commit(commit_latency *latency) { MDBX_CXX20_UNLIKELY err.throw_exception(); } +void txn_managed::commit_embark_read() { + auto env = this->env(); + commit(); + error::success_or_throw( + ::mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &handle_)); +} + //------------------------------------------------------------------------------ bool txn::drop_map(const char *name, bool throw_if_absent) { From 355090f02e9bc715b30c7142458daeb9ad7cc4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 15:18:16 +0300 Subject: [PATCH 073/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`is=5Fxyz()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20`key=5Fmode`=20=D0=B8=20`value=5Fmode`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 27df86de..6da418e3 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3119,6 +3119,26 @@ enum class key_mode { ///< \note Not yet implemented and PRs are welcome. }; +MDBX_CXX01_CONSTEXPR_ENUM bool is_usual(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) == 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_ordinal(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_INTEGERKEY) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_samelength(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_INTEGERKEY) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_reverse(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_REVERSEKEY) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(key_mode mode) noexcept { + return mode == key_mode::msgpack; +} + /// \brief Kind of the values and sorted multi-values with corresponding /// comparison. enum class value_mode { @@ -3171,6 +3191,15 @@ enum class value_mode { ///< end of the keys to the beginning. In terms of keys, ///< they are not unique, i.e. has duplicates which are ///< sorted by associated data values. +#else + multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP), + multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED), + multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) | + uint32_t(MDBX_INTEGERDUP), + multi_reverse_samelength = uint32_t(MDBX_DUPSORT) | + uint32_t(MDBX_REVERSEDUP) | + uint32_t(MDBX_DUPFIXED), +#endif msgpack = -1 ///< A more than one data value could be associated with each ///< key. Values are in [MessagePack](https://msgpack.org/) ///< format with appropriate comparison. Internally each key is @@ -3178,16 +3207,33 @@ enum class value_mode { ///< In terms of keys, they are not unique, i.e. has duplicates ///< which are sorted by associated data values. ///< \note Not yet implemented and PRs are welcome. -#else - multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP), - multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED), - multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) | - uint32_t(MDBX_INTEGERDUP), - multi_reverse_samelength = uint32_t(MDBX_DUPSORT) | - uint32_t(MDBX_REVERSEDUP) | uint32_t(MDBX_DUPFIXED) -#endif }; +MDBX_CXX01_CONSTEXPR_ENUM bool is_usual(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & (MDBX_DUPSORT | MDBX_INTEGERDUP | + MDBX_DUPFIXED | MDBX_REVERSEDUP)) == 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_multi(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_DUPSORT) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_ordinal(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_INTEGERDUP) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_samelength(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_DUPFIXED) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_reverse(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_REVERSEDUP) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(value_mode mode) noexcept { + return mode == value_mode::msgpack; +} + /// \brief A handle for an individual database (key-value spaces) in the /// environment. /// \see txn::open_map() \see txn::create_map() From 0a58601cdff51d6d3815ccc78effc5158fc4dcf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 17:35:42 +0300 Subject: [PATCH 074/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`pair::invalid()`=20?= =?UTF-8?q?=D0=B8=20=D0=B2=D0=B7=D0=B0=D0=B8=D0=BC=D0=BE=D0=B4=D0=B5=D0=B9?= =?UTF-8?q?=D1=81=D1=82=D0=B2=D0=B8=D1=8F=20=D1=81=20`std::pair<>`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 6da418e3..4be188bb 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3049,15 +3049,24 @@ struct value_result { /// \brief Combines pair of slices for key and value to represent result of /// certain operations. struct pair { + using stl_pair = std::pair; slice key, value; - pair(const slice &key, const slice &value) noexcept + MDBX_CXX11_CONSTEXPR pair(const slice &key, const slice &value) noexcept : key(key), value(value) {} + MDBX_CXX11_CONSTEXPR pair(const stl_pair &couple) noexcept + : key(couple.first), value(couple.second) {} + MDBX_CXX11_CONSTEXPR operator stl_pair() const noexcept { + return stl_pair(key, value); + } pair(const pair &) noexcept = default; pair &operator=(const pair &) noexcept = default; MDBX_CXX14_CONSTEXPR operator bool() const noexcept { assert(bool(key) == bool(value)); return key; } + MDBX_CXX14_CONSTEXPR static pair invalid() noexcept { + return pair(slice::invalid(), slice::invalid()); + } /// \brief Three-way fast non-lexicographically length-based comparison. MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t @@ -3084,7 +3093,10 @@ struct pair { /// represent result of certain operations. struct pair_result : public pair { bool done; - pair_result(const slice &key, const slice &value, bool done) noexcept + MDBX_CXX11_CONSTEXPR pair_result() noexcept + : pair(pair::invalid()), done(false) {} + MDBX_CXX11_CONSTEXPR pair_result(const slice &key, const slice &value, + bool done) noexcept : pair(key, value), done(done) {} pair_result(const pair_result &) noexcept = default; pair_result &operator=(const pair_result &) noexcept = default; @@ -6585,7 +6597,7 @@ inline int compare_position(const cursor &left, const cursor &right, inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) - : pair_result(slice(), slice(), false) { + : pair_result() { done = cursor.move(get_current, &this->key, &this->value, throw_notfound); } From 869cfb3fae41d5dde9f686c5453d0ac4eb3b43d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:12:53 +0300 Subject: [PATCH 075/443] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5?= =?UTF-8?q?=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20doxygen-=D0=BE=D0=BF?= =?UTF-8?q?=D0=B8=D1=81=D0=B0=D0=BD=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index 4be188bb..307e4175 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -376,7 +376,7 @@ using default_allocator = polymorphic_allocator; using default_allocator = legacy_allocator; #endif /* __cpp_lib_memory_resource >= 201603L */ -/// \brief Default singe-byte string. +/// \brief Default single-byte string. template using string = ::std::basic_string, ALLOCATOR>; From 304cf25149f6f741bf575aea2b8b1a23b15a6292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:14:26 +0300 Subject: [PATCH 076/443] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`slice?= =?UTF-8?q?::invalid()`=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D1=80=D0=B5=D0=B4?= =?UTF-8?q?=D0=BE=D1=82=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D0=BD=D0=B5=D0=B7=D0=B0=D0=BC=D0=B5=D1=82=D0=BD=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D0=BD=D0=B5=D0=B2=D0=B5=D1=80=D0=BD=D0=BE=D0=B3=D0=BE?= =?UTF-8?q?=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 307e4175..175ef2ca 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4510,10 +4510,12 @@ public: struct move_result : public pair_result { inline move_result(const cursor &cursor, bool throw_notfound); move_result(cursor &cursor, move_operation operation, bool throw_notfound) - : move_result(cursor, operation, slice(), slice(), throw_notfound) {} + : move_result(cursor, operation, slice::invalid(), slice::invalid(), + throw_notfound) {} move_result(cursor &cursor, move_operation operation, const slice &key, bool throw_notfound) - : move_result(cursor, operation, key, slice(), throw_notfound) {} + : move_result(cursor, operation, key, slice::invalid(), + throw_notfound) {} inline move_result(cursor &cursor, move_operation operation, const slice &key, const slice &value, bool throw_notfound); @@ -4524,10 +4526,11 @@ public: struct estimate_result : public pair { ptrdiff_t approximate_quantity; estimate_result(const cursor &cursor, move_operation operation) - : estimate_result(cursor, operation, slice(), slice()) {} + : estimate_result(cursor, operation, slice::invalid(), + slice::invalid()) {} estimate_result(const cursor &cursor, move_operation operation, const slice &key) - : estimate_result(cursor, operation, key, slice()) {} + : estimate_result(cursor, operation, key, slice::invalid()) {} inline estimate_result(const cursor &cursor, move_operation operation, const slice &key, const slice &value); estimate_result(const estimate_result &) noexcept = default; @@ -4631,7 +4634,7 @@ public: } move_result move(move_operation operation, const slice &key, bool throw_notfound) { - return move_result(*this, operation, key, throw_notfound); + return move_result(*this, operation, key, slice::invalid(), throw_notfound); } move_result move(move_operation operation, const slice &key, const slice &value, bool throw_notfound) { From 8a6bddef4474edb4126d6281bca92bd884990a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:34:38 +0300 Subject: [PATCH 077/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=BE=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BA=D0=B0=20=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=82?= =?UTF-8?q?=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D1=82=D0=B8=D0=B2=20=D0=B1=D0=B0=D0=B3=D0=B0=20clang-for?= =?UTF-8?q?mat.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 175ef2ca..7b64eaf9 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -6593,9 +6593,9 @@ inline int compare_position(const cursor &left, const cursor &right, const auto diff = compare_position_nothrow(left, right, ignore_nested); assert(compare_position_nothrow(right, left, ignore_nested) == -diff); if (MDBX_LIKELY(int16_t(diff) == diff)) - MDBX_CXX20_LIKELY - return int(diff); - throw_incomparable_cursors(); + MDBX_CXX20_LIKELY return int(diff); + else + throw_incomparable_cursors(); } inline cursor::move_result::move_result(const cursor &cursor, From b412807fc10718ee6e32a5d824df7e366e807842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:35:46 +0300 Subject: [PATCH 078/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx::default=5Fbuffer`?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index 7b64eaf9..a056e279 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -376,8 +376,11 @@ using default_allocator = polymorphic_allocator; using default_allocator = legacy_allocator; #endif /* __cpp_lib_memory_resource >= 201603L */ +/// \brief Default buffer. +using default_buffer = buffer; + /// \brief Default single-byte string. -template +template using string = ::std::basic_string, ALLOCATOR>; using filehandle = ::mdbx_filehandle_t; From 1f2ff0779698493d79c806219cf51a304d6e029a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 22 Nov 2023 19:31:05 +0300 Subject: [PATCH 079/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0/=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`to=5Fhex()`?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx.c++ | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 621c2695..b3187e82 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -607,7 +607,7 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { auto ptr = dest; auto src = source.byte_ptr(); - const char alphabase = (uppercase ? 'A' : 'a') - 10; + const char alpha_shift = (uppercase ? 'A' : 'a') - '9' - 1; auto line = ptr; for (const auto end = source.end_byte_ptr(); src != end; ++src) { if (wrap_width && size_t(ptr - line) >= wrap_width) { @@ -616,8 +616,8 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { } const int8_t hi = *src >> 4; const int8_t lo = *src & 15; - ptr[0] = char(alphabase + hi + (((hi - 10) >> 7) & -7)); - ptr[1] = char(alphabase + lo + (((lo - 10) >> 7) & -7)); + ptr[0] = char('0' + hi + (((9 - hi) >> 7) & alpha_shift)); + ptr[1] = char('0' + lo + (((9 - lo) >> 7) & alpha_shift)); ptr += 2; assert(ptr <= dest + dest_size); } @@ -629,7 +629,7 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { MDBX_CXX20_LIKELY { ::std::ostream::sentry sentry(out); auto src = source.byte_ptr(); - const char alphabase = (uppercase ? 'A' : 'a') - 10; + const char alpha_shift = (uppercase ? 'A' : 'a') - '9' - 1; unsigned width = 0; for (const auto end = source.end_byte_ptr(); src != end; ++src) { if (wrap_width && width >= wrap_width) { @@ -638,8 +638,8 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { } const int8_t hi = *src >> 4; const int8_t lo = *src & 15; - out.put(char(alphabase + hi + (((hi - 10) >> 7) & -7))); - out.put(char(alphabase + lo + (((lo - 10) >> 7) & -7))); + out.put(char('0' + hi + (((9 - hi) >> 7) & alpha_shift))); + out.put(char('0' + lo + (((9 - lo) >> 7) & alpha_shift))); width += 2; } } @@ -670,11 +670,11 @@ char *from_hex::write_bytes(char *__restrict const dest, int8_t hi = src[0]; hi = (hi | 0x20) - 'a'; - hi += 10 + ((hi >> 7) & 7); + hi += 10 + ((hi >> 7) & 39); int8_t lo = src[1]; lo = (lo | 0x20) - 'a'; - lo += 10 + ((lo >> 7) & 7); + lo += 10 + ((lo >> 7) & 39); *ptr++ = hi << 4 | lo; src += 2; From dd47f1bfd9077278bbc6442f0863280626a873e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 08:47:55 +0300 Subject: [PATCH 080/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D1=85=D0=BE=D0=B4=20=D0=BD=D0=B0=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BF?= =?UTF-8?q?=D0=BE-=D1=83=D0=BC=D0=BE=D0=BB=D1=87=D0=B0=D0=BD=D0=B8=D1=8E?= =?UTF-8?q?=20default=5Fallocator=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE?= =?UTF-8?q?=20legacy=5Fallocator.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 76 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index a056e279..321550ed 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -354,18 +354,6 @@ static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, /// but it is recommended to use \ref polymorphic_allocator. using legacy_allocator = ::std::string::allocator_type; -struct slice; -struct default_capacity_policy; -template -class buffer; -class env; -class env_managed; -class txn; -class txn_managed; -class cursor; -class cursor_managed; - #if defined(DOXYGEN) || \ (defined(__cpp_lib_memory_resource) && \ __cpp_lib_memory_resource >= 201603L && _GLIBCXX_USE_CXX11_ABI) @@ -376,6 +364,18 @@ using default_allocator = polymorphic_allocator; using default_allocator = legacy_allocator; #endif /* __cpp_lib_memory_resource >= 201603L */ +struct slice; +struct default_capacity_policy; +template +class buffer; +class env; +class env_managed; +class txn; +class txn_managed; +class cursor; +class cursor_managed; + /// \brief Default buffer. using default_buffer = buffer; @@ -633,24 +633,24 @@ concept SliceTranscoder = #endif /* MDBX_HAVE_CXX20_CONCEPTS */ -template inline buffer make_buffer(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template inline buffer make_buffer(const PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template inline string make_string(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template inline string make_string(const PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); @@ -783,7 +783,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { #endif /* __cpp_lib_string_view >= 201606L */ template , - class ALLOCATOR = legacy_allocator> + class ALLOCATOR = default_allocator> MDBX_CXX20_CONSTEXPR ::std::basic_string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { static_assert(sizeof(CHAR) == 1, "Must be single byte characters"); @@ -798,27 +798,27 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { } /// \brief Returns a string with a hexadecimal dump of the slice content. - template + template inline string as_hex_string(bool uppercase = false, unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. - template + template inline string as_base58_string(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base64) dump of the slice content. - template + template inline string as_base64_string(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a buffer with a hexadecimal dump of the slice content. - template inline buffer encode_hex(bool uppercase = false, unsigned wrap_width = 0, @@ -826,7 +826,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Returns a buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. - template inline buffer encode_base58(unsigned wrap_width = 0, @@ -834,14 +834,14 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Returns a buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. - template inline buffer encode_base64(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Decodes hexadecimal dump from the slice content to returned buffer. - template inline buffer hex_decode(bool ignore_spaces = false, @@ -849,7 +849,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump /// from the slice content to returned buffer. - template inline buffer base58_decode(bool ignore_spaces = false, @@ -857,7 +857,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump /// from the slice content to returned buffer. - template inline buffer base64_decode(bool ignore_spaces = false, @@ -1294,13 +1294,13 @@ struct LIBMDBX_API to_hex { } /// \brief Returns a string with a hexadecimal dump of a passed slice. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Returns a buffer with a hexadecimal dump of a passed slice. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1345,14 +1345,14 @@ struct LIBMDBX_API to_base58 { /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of a passed slice. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Returns a buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of a passed slice. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1400,14 +1400,14 @@ struct LIBMDBX_API to_base64 { /// \brief Returns a string with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of a passed slice. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Returns a buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of a passed slice. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1464,13 +1464,13 @@ struct LIBMDBX_API from_hex { } /// \brief Decodes hexadecimal dump from a passed slice to returned string. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Decodes hexadecimal dump from a passed slice to returned buffer. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1510,14 +1510,14 @@ struct LIBMDBX_API from_base58 { /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump from a /// passed slice to returned string. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump from a /// passed slice to returned buffer. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1559,14 +1559,14 @@ struct LIBMDBX_API from_base64 { /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump from a /// passed slice to returned string. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump from a /// passed slice to returned buffer. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { From be8428257d11c98c1961428c4f4212942ab76bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 08:49:43 +0300 Subject: [PATCH 081/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D0=BA=D0=B0=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5?= =?UTF-8?q?=D1=80=D0=B6=D0=BA=D0=B8=20base58=20=D0=BF=D0=BE=20RFC-draft.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 5 +- src/mdbx.c++ | 416 +++++++++++++++++++++++++-------------------------- 2 files changed, 204 insertions(+), 217 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 321550ed..f5e906c8 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1362,8 +1362,7 @@ struct LIBMDBX_API to_base58 { /// \brief Returns the buffer size in bytes needed for /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of passed slice. MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { - const size_t bytes = - source.length() / 8 * 11 + (source.length() % 8 * 43 + 31) / 32; + const size_t bytes = (source.length() * 11 + 7) / 8; return wrap_width ? bytes + bytes / wrap_width : bytes; } @@ -1528,7 +1527,7 @@ struct LIBMDBX_API from_base58 { /// [Base58](https://en.wikipedia.org/wiki/Base58) dump from a passed slice to /// decoded data. MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { - return source.length() / 11 * 8 + source.length() % 11 * 32 / 43; + return source.length() /* могут быть все нули кодируемые один-к-одному */; } /// \brief Fills the destination with data decoded from diff --git a/src/mdbx.c++ b/src/mdbx.c++ index b3187e82..823404b2 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -207,6 +207,44 @@ __cold bug::~bug() noexcept {} #endif /* Unused*/ +struct line_wrapper { + char *line, *ptr; + line_wrapper(char *buf) noexcept : line(buf), ptr(buf) {} + void put(char c, size_t wrap_width) noexcept { + *ptr++ = c; + if (wrap_width && ptr >= wrap_width + line) { + *ptr++ = '\n'; + line = ptr; + } + } + void put(const ::mdbx::slice &chunk, size_t wrap_width) noexcept { + if (!wrap_width || wrap_width > (ptr - line) + chunk.length()) { + memcpy(ptr, chunk.data(), chunk.length()); + ptr += chunk.length(); + } else { + for (size_t i = 0; i < chunk.length(); ++i) + put(chunk.char_ptr()[i], wrap_width); + } + } +}; + +template +struct temp_buffer { + TYPE inplace[(INPLACE_BYTES + sizeof(TYPE) - 1) / sizeof(TYPE)]; + const size_t size; + TYPE *const area; + temp_buffer(size_t bytes) + : size((bytes + sizeof(TYPE) - 1) / sizeof(TYPE)), + area((bytes > sizeof(inplace)) ? new TYPE[size] : inplace) { + memset(area, 0, sizeof(TYPE) * size); + } + ~temp_buffer() { + if (area != inplace) + delete[] area; + } + TYPE *end() const { return area + size; } +}; + } // namespace //------------------------------------------------------------------------------ @@ -717,156 +755,135 @@ enum : signed char { IL /* invalid */ = -1 }; -static const byte b58_alphabet[58] = { - '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', - 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; - -#ifndef bswap64 -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -static inline uint64_t bswap64(uint64_t v) noexcept { -#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \ - __has_builtin(__builtin_bswap64) - return __builtin_bswap64(v); -#elif defined(_MSC_VER) && !defined(__clang__) - return _byteswap_uint64(v); -#elif defined(__bswap_64) - return __bswap_64(v); -#elif defined(bswap_64) - return bswap_64(v); +#if MDBX_WORDBITS > 32 +using b58_uint = uint_fast64_t; #else - return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | - ((v << 24) & UINT64_C(0x0000ff0000000000)) | - ((v << 8) & UINT64_C(0x000000ff00000000)) | - ((v >> 8) & UINT64_C(0x00000000ff000000)) | - ((v >> 24) & UINT64_C(0x0000000000ff0000)) | - ((v >> 40) & UINT64_C(0x000000000000ff00)); +using b58_uint = uint_fast32_t; #endif -} -#endif /* __BYTE_ORDER__ */ -#endif /* ifndef bswap64 */ -static inline char b58_8to11(uint64_t &v) noexcept { - const unsigned i = unsigned(v % 58); +struct b58_buffer : public temp_buffer { + b58_buffer(size_t bytes, size_t estimation_ratio_numerator, + size_t estimation_ratio_denominator, size_t extra = 0) + : temp_buffer((/* пересчитываем по указанной пропорции */ + bytes = (bytes * estimation_ratio_numerator + + estimation_ratio_denominator - 1) / + estimation_ratio_denominator, + /* учитываем резервный старший байт в каждом слове */ + ((bytes + sizeof(b58_uint) - 2) / (sizeof(b58_uint) - 1) * + sizeof(b58_uint) + + extra) * + sizeof(b58_uint))) {} +}; + +static byte b58_8to11(b58_uint &v) noexcept { + static const char b58_alphabet[58] = { + '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; + + const auto i = size_t(v % 58); v /= 58; return b58_alphabet[i]; } +static slice b58_encode(b58_buffer &buf, const byte *begin, const byte *end) { + auto high = buf.end(); + const auto modulo = + b58_uint((sizeof(b58_uint) > 4) ? UINT64_C(0x1A636A90B07A00) /* 58^9 */ + : UINT32_C(0xACAD10) /* 58^4 */); + static_assert(sizeof(modulo) == 4 || sizeof(modulo) == 8, "WTF?"); + while (begin < end) { + b58_uint carry = *begin++; + auto ptr = buf.end(); + do { + assert(ptr > buf.area); + carry += *--ptr << CHAR_BIT; + *ptr = carry % modulo; + carry /= modulo; + } while (carry || ptr > high); + high = ptr; + } + + byte *output = static_cast(static_cast(buf.area)); + auto ptr = output; + for (auto porous = high; porous < buf.end();) { + auto chunk = *porous++; + static_assert(sizeof(chunk) == 4 || sizeof(chunk) == 8, "WTF?"); + assert(chunk < modulo); + if (sizeof(chunk) > 4) { + ptr[8] = b58_8to11(chunk); + ptr[7] = b58_8to11(chunk); + ptr[6] = b58_8to11(chunk); + ptr[5] = b58_8to11(chunk); + ptr[4] = b58_8to11(chunk); + ptr[3] = b58_8to11(chunk); + ptr[2] = b58_8to11(chunk); + ptr[1] = b58_8to11(chunk); + ptr[0] = b58_8to11(chunk); + ptr += 9; + } else { + ptr[3] = b58_8to11(chunk); + ptr[2] = b58_8to11(chunk); + ptr[1] = b58_8to11(chunk); + ptr[0] = b58_8to11(chunk); + ptr += 4; + } + assert(static_cast(ptr) < static_cast(porous)); + } + + while (output < ptr && *output == '1') + ++output; + return slice(output, ptr); +} + char *to_base58::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); - auto ptr = dest; - auto src = source.byte_ptr(); - size_t left = source.length(); - auto line = ptr; - while (MDBX_LIKELY(left > 7)) { - uint64_t v; - std::memcpy(&v, src, 8); - src += 8; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - v = bswap64(v); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - ptr[10] = b58_8to11(v); - ptr[9] = b58_8to11(v); - ptr[8] = b58_8to11(v); - ptr[7] = b58_8to11(v); - ptr[6] = b58_8to11(v); - ptr[5] = b58_8to11(v); - ptr[4] = b58_8to11(v); - ptr[3] = b58_8to11(v); - ptr[2] = b58_8to11(v); - ptr[1] = b58_8to11(v); - ptr[0] = b58_8to11(v); - assert(v == 0); - ptr += 11; - left -= 8; - if (wrap_width && size_t(ptr - line) >= wrap_width && left) { - *ptr = '\n'; - line = ++ptr; - } - assert(ptr <= dest + dest_size); + auto begin = source.byte_ptr(); + auto end = source.end_byte_ptr(); + line_wrapper wrapper(dest); + while (MDBX_LIKELY(begin < end) && *begin == 0) { + wrapper.put('1', wrap_width); + assert(wrapper.ptr <= dest + dest_size); + ++begin; } - if (left) { - uint64_t v = 0; - unsigned parrots = 31; - do { - v = (v << 8) + *src++; - parrots += 43; - } while (--left); - - auto tail = ptr += parrots >> 5; - assert(ptr <= dest + dest_size); - do { - *--tail = b58_8to11(v); - parrots -= 32; - } while (parrots > 31); - assert(v == 0); - } - - return ptr; + b58_buffer buf(end - begin, 11, 8); + wrapper.put(b58_encode(buf, begin, end), wrap_width); + return wrapper.ptr; } ::std::ostream &to_base58::output(::std::ostream &out) const { if (MDBX_LIKELY(!is_empty())) MDBX_CXX20_LIKELY { ::std::ostream::sentry sentry(out); - auto src = source.byte_ptr(); - size_t left = source.length(); + auto begin = source.byte_ptr(); + auto end = source.end_byte_ptr(); unsigned width = 0; - std::array buf; - - while (MDBX_LIKELY(left > 7)) { - uint64_t v; - std::memcpy(&v, src, 8); - src += 8; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - v = bswap64(v); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - buf[10] = b58_8to11(v); - buf[9] = b58_8to11(v); - buf[8] = b58_8to11(v); - buf[7] = b58_8to11(v); - buf[6] = b58_8to11(v); - buf[5] = b58_8to11(v); - buf[4] = b58_8to11(v); - buf[3] = b58_8to11(v); - buf[2] = b58_8to11(v); - buf[1] = b58_8to11(v); - buf[0] = b58_8to11(v); - assert(v == 0); - out.write(&buf.front(), 11); - left -= 8; - if (wrap_width && (width += 11) >= wrap_width && left) { + while (MDBX_LIKELY(begin < end) && *begin == 0) { + out.put('1'); + if (wrap_width && ++width >= wrap_width) { out << ::std::endl; width = 0; } + ++begin; } - if (left) { - uint64_t v = 0; - unsigned parrots = 31; - do { - v = (v << 8) + *src++; - parrots += 43; - } while (--left); - - auto ptr = buf.end(); - do { - *--ptr = b58_8to11(v); - parrots -= 32; - } while (parrots > 31); - assert(v == 0); - out.write(&*ptr, buf.end() - ptr); + b58_buffer buf(end - begin, 11, 8); + const auto chunk = b58_encode(buf, begin, end); + if (!wrap_width || wrap_width > width + chunk.length()) + out.write(chunk.char_ptr(), chunk.length()); + else { + for (size_t i = 0; i < chunk.length(); ++i) { + out.put(chunk.char_ptr()[i]); + if (wrap_width && ++width >= wrap_width) { + out << ::std::endl; + width = 0; + } + } } } return out; @@ -892,10 +909,46 @@ const signed char b58_map[256] = { IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL // f0 }; -static inline signed char b58_11to8(uint64_t &v, const byte c) noexcept { - const signed char m = b58_map[c]; - v = v * 58 + m; - return m; +static slice b58_decode(b58_buffer &buf, const byte *begin, const byte *end, + bool ignore_spaces) { + auto high = buf.end(); + while (begin < end) { + const auto c = b58_map[*begin++]; + if (MDBX_LIKELY(c >= 0)) { + b58_uint carry = c; + auto ptr = buf.end(); + do { + assert(ptr > buf.area); + carry += *--ptr * 58; + *ptr = carry & (~b58_uint(0) >> CHAR_BIT); + carry >>= CHAR_BIT * (sizeof(carry) - 1); + } while (carry || ptr > high); + high = ptr; + } else if (MDBX_UNLIKELY(!ignore_spaces || !isspace(begin[-1]))) + MDBX_CXX20_UNLIKELY + throw std::domain_error("mdbx::from_base58:: invalid base58 string"); + } + + byte *output = static_cast(static_cast(buf.area)); + auto ptr = output; + for (auto porous = high; porous < buf.end(); ++porous) { + auto chunk = *porous; + static_assert(sizeof(chunk) == 4 || sizeof(chunk) == 8, "WTF?"); + assert(chunk <= (~b58_uint(0) >> CHAR_BIT)); + if (sizeof(chunk) > 4) { + *ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 6); + *ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 5); + *ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 4); + *ptr++ = byte(chunk >> CHAR_BIT * 3); + } + *ptr++ = byte(chunk >> CHAR_BIT * 2); + *ptr++ = byte(chunk >> CHAR_BIT * 1); + *ptr++ = byte(chunk >> CHAR_BIT * 0); + } + + while (output < ptr && *output == 0) + ++output; + return slice(output, ptr); } char *from_base58::write_bytes(char *__restrict const dest, @@ -904,98 +957,33 @@ char *from_base58::write_bytes(char *__restrict const dest, MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); auto ptr = dest; - auto src = source.byte_ptr(); - for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(isspace(*src)) && ignore_spaces) { - ++src; - --left; - continue; - } - - if (MDBX_LIKELY(left > 10)) { - uint64_t v = 0; - if (MDBX_UNLIKELY((b58_11to8(v, src[0]) | b58_11to8(v, src[1]) | - b58_11to8(v, src[2]) | b58_11to8(v, src[3]) | - b58_11to8(v, src[4]) | b58_11to8(v, src[5]) | - b58_11to8(v, src[6]) | b58_11to8(v, src[7]) | - b58_11to8(v, src[8]) | b58_11to8(v, src[9]) | - b58_11to8(v, src[10])) < 0)) - MDBX_CXX20_UNLIKELY goto bailout; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - v = bswap64(v); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - std::memcpy(ptr, &v, 8); - ptr += 8; - src += 11; - left -= 11; - assert(ptr <= dest + dest_size); - continue; - } - - constexpr unsigned invalid_length_mask = 1 << 1 | 1 << 4 | 1 << 8; - if (MDBX_UNLIKELY(invalid_length_mask & (1 << left))) - MDBX_CXX20_UNLIKELY goto bailout; - - uint64_t v = 1; - unsigned parrots = 0; - do { - if (MDBX_UNLIKELY(b58_11to8(v, *src++) < 0)) - MDBX_CXX20_UNLIKELY goto bailout; - parrots += 32; - } while (--left); - - auto tail = ptr += parrots / 43; - assert(ptr <= dest + dest_size); - do { - *--tail = byte(v); - v >>= 8; - } while (v > 255); - break; + auto begin = source.byte_ptr(); + auto const end = source.end_byte_ptr(); + while (begin < end && *begin <= '1') { + if (MDBX_LIKELY(*begin == '1')) + MDBX_CXX20_LIKELY *ptr++ = 0; + else if (MDBX_UNLIKELY(!ignore_spaces || !isspace(*begin))) + MDBX_CXX20_UNLIKELY + throw std::domain_error("mdbx::from_base58:: invalid base58 string"); + ++begin; } - return ptr; -bailout: - throw std::domain_error("mdbx::from_base58:: invalid base58 string"); + b58_buffer buf(end - begin, 47, 64); + auto slice = b58_decode(buf, begin, end, ignore_spaces); + memcpy(ptr, slice.data(), slice.length()); + return ptr + slice.length(); } bool from_base58::is_erroneous() const noexcept { - bool got = false; - auto src = source.byte_ptr(); - for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(*src <= ' ') && - MDBX_LIKELY(ignore_spaces && isspace(*src))) { - ++src; - --left; - continue; - } - - if (MDBX_LIKELY(left > 10)) { - if (MDBX_UNLIKELY((b58_map[src[0]] | b58_map[src[1]] | b58_map[src[2]] | - b58_map[src[3]] | b58_map[src[4]] | b58_map[src[5]] | - b58_map[src[6]] | b58_map[src[7]] | b58_map[src[8]] | - b58_map[src[9]] | b58_map[src[10]]) < 0)) - MDBX_CXX20_UNLIKELY return true; - src += 11; - left -= 11; - got = true; - continue; - } - - constexpr unsigned invalid_length_mask = 1 << 1 | 1 << 4 | 1 << 8; - if (invalid_length_mask & (1 << left)) - return false; - - do - if (MDBX_UNLIKELY(b58_map[*src++] < 0)) - MDBX_CXX20_UNLIKELY return true; - while (--left); - got = true; - break; + auto begin = source.byte_ptr(); + auto const end = source.end_byte_ptr(); + while (begin < end) { + if (MDBX_UNLIKELY(b58_map[*begin] < 0 && + !(ignore_spaces && isspace(*begin)))) + return true; + ++begin; } - return !got; + return false; } //------------------------------------------------------------------------------ From 0e250a4457144090ea9248b6c2786424026727b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 15:49:05 +0300 Subject: [PATCH 082/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B8=20`std::span<>`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index f5e906c8..d42b5942 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -85,6 +85,10 @@ #include #endif +#if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L +#include +#endif + #if __cplusplus >= 201103L #include #include @@ -699,6 +703,47 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { MDBX_CXX14_CONSTEXPR slice(MDBX_val &&src); MDBX_CXX14_CONSTEXPR slice(slice &&src) noexcept; +#if defined(DOXYGEN) || (defined(__cpp_lib_span) && __cpp_lib_span >= 202002L) + template + MDBX_CXX14_CONSTEXPR slice(const ::std::span &span) + : slice(span.begin(), span.end()) { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + } + + template + MDBX_CXX14_CONSTEXPR ::std::span as_span() const { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + if (MDBX_LIKELY(size() % sizeof(POD) == 0)) + MDBX_CXX20_LIKELY + return ::std::span(static_cast(data()), + size() / sizeof(POD)); + throw_bad_value_size(); + } + + template MDBX_CXX14_CONSTEXPR ::std::span as_span() { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + if (MDBX_LIKELY(size() % sizeof(POD) == 0)) + MDBX_CXX20_LIKELY + return ::std::span(static_cast(data()), size() / sizeof(POD)); + throw_bad_value_size(); + } + + MDBX_CXX14_CONSTEXPR ::std::span bytes() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span bytes() { return as_span(); } + MDBX_CXX14_CONSTEXPR ::std::span chars() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span chars() { return as_span(); } +#endif /* __cpp_lib_span >= 202002L */ + #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) /// \brief Create a slice that refers to the same contents as "string_view" @@ -2368,6 +2413,33 @@ public: return slice_; } +#if defined(DOXYGEN) || (defined(__cpp_lib_span) && __cpp_lib_span >= 202002L) + template + MDBX_CXX14_CONSTEXPR buffer(const ::std::span &span) + : buffer(span.begin(), span.end()) { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + } + + template + MDBX_CXX14_CONSTEXPR ::std::span as_span() const { + return slice_.template as_span(); + } + template MDBX_CXX14_CONSTEXPR ::std::span as_span() { + return slice_.template as_span(); + } + + MDBX_CXX14_CONSTEXPR ::std::span bytes() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span bytes() { return as_span(); } + MDBX_CXX14_CONSTEXPR ::std::span chars() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span chars() { return as_span(); } +#endif /* __cpp_lib_span >= 202002L */ + template static buffer wrap(const POD &pod, bool make_reference = false, const allocator_type &allocator = allocator_type()) { From ef69336189b02ad3d48c4d952a25d19b805d54d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 14:11:41 +0300 Subject: [PATCH 083/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=B0=20=D0=B4=D0=BB=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D1=81?= =?UTF-8?q?=D0=BA=D0=BE=D0=B4=D0=B5=D1=80=D0=BE=D0=B2=20hex/base64/base58.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 8 ++ test/extra/hex_base64_base58.c++ | 128 +++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 test/extra/hex_base64_base58.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 23789be0..08866fe4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -88,6 +88,13 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_dupfixed_multiple PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() + add_executable(test_extra_hex_base64_base58 extra/hex_base64_base58.c++) + target_include_directories(test_extra_hex_base64_base58 PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_hex_base64_base58 ${TOOL_MDBX_LIB}) + if(MDBX_CXX_STANDARD) + set_target_properties(test_extra_hex_base64_base58 PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() endif() endif() @@ -172,6 +179,7 @@ else() if(MDBX_BUILD_CXX) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) + add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) endif() endif() diff --git a/test/extra/hex_base64_base58.c++ b/test/extra/hex_base64_base58.c++ new file mode 100644 index 00000000..879e8f7b --- /dev/null +++ b/test/extra/hex_base64_base58.c++ @@ -0,0 +1,128 @@ +#include "mdbx.h++" +#include +#include +#include + +#include +#include + +using buffer = mdbx::default_buffer; + +std::default_random_engine prng(42); + +static buffer random(size_t length) { + buffer result(length); +#if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L + for (auto &i : result.bytes()) + i = prng(); +#else + for (auto p = result.byte_ptr(); p < result.end_byte_ptr(); ++p) + *p = mdbx::byte(prng()); +#endif + return result; +} + +static bool basic() { + bool ok = true; + const char *const hex_dump = "1D58fa\n2e46E3\nBd9c7A\nC0bF"; + const uint8_t native[] = {0x1D, 0x58, 0xfa, 0x2e, 0x46, 0xE3, + 0xBd, 0x9c, 0x7A, 0xC0, 0xbF}; + + if (mdbx::slice(hex_dump).hex_decode(true) != mdbx::slice::wrap(native)) + std::cerr << "hex_decode() failed\n"; + else if (mdbx::slice::wrap(native).encode_hex(true, 4).hex_decode(true) != + mdbx::slice::wrap(native)) + std::cerr << "hex_encode(UPPERCASE) failed\n"; + else if (mdbx::slice::wrap(native).encode_hex(false).hex_decode(true) != + mdbx::slice::wrap(native)) + std::cerr << "hex_encode(lowercase) failed\n"; + + if (mdbx::slice("").as_base64_string() != "" || + mdbx::slice(" ").encode_base64().as_string() != "IA==" || + mdbx::slice("~0").encode_base64().as_string() != "fjA=" || + mdbx::slice("A_z").encode_base64().as_string() != "QV96" || + mdbx::slice("Ka9q").encode_base64().as_string() != "S2E5cQ==" || + mdbx::slice("123456789").encode_base64().as_string() != "MTIzNDU2Nzg5") { + std::cerr << "encode_base64() failed\n"; + ok = false; + } + + const uint8_t base58_rfc[] = {0x00, 0x00, 0x28, 0x7f, 0xb4, 0xcd}; + if (mdbx::slice("").as_base58_string() != "" || + mdbx::slice(" ").encode_base58().as_string() != "Z" || + mdbx::slice("Hello World!").as_base58_string() != "2NEpo7TZRRrLZSi2U" || + mdbx::slice("The quick brown fox jumps over the lazy dog.") + .encode_base58() + .as_string() != + "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z" || + mdbx::slice::wrap(base58_rfc).as_base58_string() != "11233QC4" || + mdbx::slice("~0").encode_base58().as_string() != "Aby" || + mdbx::slice("A_z").encode_base58().as_string() != "NxZw" || + mdbx::slice("Ka9q").encode_base58().as_string() != "2vkjDi" || + mdbx::slice("123456789").encode_base58().as_string() != "dKYWwnRHc7Ck") { + std::cerr << "encode_base58() failed\n"; + ok = false; + } + + if (mdbx::slice("").base58_decode() != mdbx::slice() || + mdbx::slice("Z").base58_decode() != mdbx::slice(" ") || + mdbx::slice("2NEpo7TZRRrLZSi2U").base58_decode() != "Hello World!" || + mdbx::slice( + "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z") + .base58_decode() != + mdbx::slice("The quick brown fox jumps over the lazy dog.") || + mdbx::slice("11233QC4").base58_decode() != + mdbx::slice::wrap(base58_rfc) || + mdbx::slice("Aby").base58_decode() != mdbx::slice("~0") || + mdbx::slice("NxZw").base58_decode() != mdbx::slice("A_z") || + mdbx::slice("2vkjDi").base58_decode() != mdbx::slice("Ka9q") || + mdbx::slice("dKYWwnRHc7Ck").base58_decode() != mdbx::slice("123456789")) { + std::cerr << "decode_base58() failed\n"; + ok = false; + } + + return ok; +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + auto ok = basic(); + for (size_t n = 0; n < 1000; ++n) { + for (size_t length = 0; ok && length < 111; ++length) { + const auto pattern = random(length); + if (pattern != pattern.encode_hex(bool(prng() & 1), prng() % 111) + .hex_decode(true) + .encode_hex() + .hex_decode(false)) { + std::cerr << "hex encode/decode failed: n " << n << ", length " + << length << std::endl; + ok = false; + } + if (pattern != pattern.encode_base64(unsigned(prng() % 111)) + .base64_decode(true) + .encode_base64() + .base64_decode(false)) { + std::cerr << "base64 encode/decode failed: n " << n << ", length " + << length << std::endl; + ok = false; + } + if (pattern != pattern.encode_base58(unsigned(prng() % 111)) + .base58_decode(true) + .encode_base58() + .base58_decode(false)) { + std::cerr << "base58 encode/decode failed: n " << n << ", length " + << length << std::endl; + ok = false; + } + } + } + + if (!ok) { + std::cerr << "Fail\n"; + return EXIT_FAILURE; + } + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From e68771df18fe874be1b97baf06f3d6841b122141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 12:34:06 +0300 Subject: [PATCH 084/443] =?UTF-8?q?mdbx++:=20=D0=94=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`buffer=5Fpair<>`=20?= =?UTF-8?q?=D0=B8=20`buffer=5Fpair=5Fspec<>`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index d42b5942..74373039 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3180,6 +3180,92 @@ struct pair_result : public pair { } }; +template +struct buffer_pair_spec { + using buffer_type = buffer; + using allocator_type = typename buffer_type::allocator_type; + using allocator_traits = typename buffer_type::allocator_traits; + using reservation_policy = CAPACITY_POLICY; + using stl_pair = ::std::pair; + buffer_type key, value; + + MDBX_CXX20_CONSTEXPR buffer_pair_spec() noexcept = default; + MDBX_CXX20_CONSTEXPR + buffer_pair_spec(const allocator_type &allocator) noexcept + : key(allocator), value(allocator) {} + + buffer_pair_spec(const buffer_type &key, const buffer_type &value, + const allocator_type &allocator = allocator_type()) + : key(key, allocator), value(value, allocator) {} + buffer_pair_spec(const buffer_type &key, const buffer_type &value, + bool make_reference, + const allocator_type &allocator = allocator_type()) + : key(key, make_reference, allocator), + value(value, make_reference, allocator) {} + + buffer_pair_spec(const stl_pair &pair, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.first, pair.second, allocator) {} + buffer_pair_spec(const stl_pair &pair, bool make_reference, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.first, pair.second, make_reference, allocator) {} + + buffer_pair_spec(const slice &key, const slice &value, + const allocator_type &allocator = allocator_type()) + : key(key, allocator), value(value, allocator) {} + buffer_pair_spec(const slice &key, const slice &value, bool make_reference, + const allocator_type &allocator = allocator_type()) + : key(key, make_reference, allocator), + value(value, make_reference, allocator) {} + + buffer_pair_spec(const pair &pair, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.key, pair.value, allocator) {} + buffer_pair_spec(const pair &pair, bool make_reference, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.key, pair.value, make_reference, allocator) {} + + buffer_pair_spec(const txn &txn, const slice &key, const slice &value, + const allocator_type &allocator = allocator_type()) + : key(txn, key, allocator), value(txn, value, allocator) {} + buffer_pair_spec(const txn &txn, const pair &pair, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(txn, pair.key, pair.value, allocator) {} + + buffer_pair_spec(buffer_type &&key, buffer_type &&value) noexcept( + buffer_type::move_assign_alloc::is_nothrow()) + : key(::std::move(key)), value(::std::move(value)) {} + buffer_pair_spec(buffer_pair_spec &&pair) noexcept( + buffer_type::move_assign_alloc::is_nothrow()) + : buffer_pair_spec(::std::move(pair.key), ::std::move(pair.value)) {} + + /// \brief Checks whether data chunk stored inside the buffers both, otherwise + /// at least one of buffers just refers to data located outside. + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool + is_freestanding() const noexcept { + return key.is_freestanding() && value.is_freestanding(); + } + /// \brief Checks whether one of the buffers just refers to data located + /// outside the buffer, rather than stores it. + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool + is_reference() const noexcept { + return key.is_reference() || value.is_reference(); + } + /// \brief Makes buffers owning the data. + /// \details If buffer refers to an external data, then makes it the owner + /// of clone by allocating storage and copying the data. + void make_freestanding() { + key.make_freestanding(); + value.make_freestanding(); + } + + operator pair() const noexcept { return pair(key, value); } +}; + +template +using buffer_pair = buffer_pair_spec; + /// end of cxx_data @} //------------------------------------------------------------------------------ From 100f07e89ac4e34ba990ab630916af1c5a8215b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 21:51:41 +0300 Subject: [PATCH 085/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D0=BD=D0=B8=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B5=20?= =?UTF-8?q?=D0=BE=D0=BF=D0=B5=D1=80=D0=B0=D1=82=D0=BE=D1=80=D1=8B=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B5=D0=BE=D0=B1=D1=80=D0=B0=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BA=20=D1=82=D0=B8=D0=BF=D0=B0=D0=BC=20?= =?UTF-8?q?C=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 74373039..c945545b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -798,6 +798,8 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { slice &operator=(const slice &) noexcept = default; inline slice &operator=(slice &&src) noexcept; inline slice &operator=(::MDBX_val &&src); + operator MDBX_val *() noexcept { return this; } + operator const MDBX_val *() const noexcept { return this; } #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) @@ -3420,6 +3422,7 @@ struct LIBMDBX_API_TYPE map_handle { map_handle(const map_handle &) noexcept = default; map_handle &operator=(const map_handle &) noexcept = default; operator bool() const noexcept { return dbi != 0; } + operator MDBX_dbi() const { return dbi; } using flags = ::MDBX_db_flags_t; using state = ::MDBX_dbi_state_t; From f97c127455517f130b3206f2090acdceb3d644ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 15 Nov 2023 23:38:31 +0300 Subject: [PATCH 086/443] =?UTF-8?q?mdbx-test:=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20doubtless-API=20=D0=BF=D0=BE=D0=B7?= =?UTF-8?q?=D0=B8=D1=86=D0=B8=D0=BE=D0=BD=D0=B8=D1=80=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80=D0=BE?= =?UTF-8?q?=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 8 + test/extra/doubtless_positioning.c++ | 263 +++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 test/extra/doubtless_positioning.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 08866fe4..59dc315c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -95,6 +95,13 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_hex_base64_base58 PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() + add_executable(test_extra_doubtless_positioning extra/doubtless_positioning.c++) + target_include_directories(test_extra_doubtless_positioning PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_doubtless_positioning ${TOOL_MDBX_LIB}) + if(MDBX_CXX_STANDARD) + set_target_properties(test_extra_doubtless_positioning PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() endif() endif() @@ -180,6 +187,7 @@ else() add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) + add_test(NAME extra_doubtless_positioning COMMAND test_extra_doubtless_positioning) endif() endif() diff --git a/test/extra/doubtless_positioning.c++ b/test/extra/doubtless_positioning.c++ new file mode 100644 index 00000000..aff30792 --- /dev/null +++ b/test/extra/doubtless_positioning.c++ @@ -0,0 +1,263 @@ +#include "mdbx.h++" +#include +#include +#include +#include +#include + +static ::std::ostream &operator<<(::std::ostream &out, + const mdbx::cursor::move_operation op) { + static const char *const str[] = {"FIRST", + "FIRST_DUP", + "GET_BOTH", + "GET_BOTH_RANGE", + "GET_CURRENT", + "GET_MULTIPLE", + "LAST", + "LAST_DUP", + "NEXT", + "NEXT_DUP", + "NEXT_MULTIPLE", + "NEXT_NODUP", + "PREV", + "PREV_DUP", + "PREV_NODUP", + "SET", + "SET_KEY", + "SET_RANGE", + "PREV_MULTIPLE", + "SET_LOWERBOUND", + "SET_UPPERBOUND", + "TO_KEY_LESSER_THAN", + "TO_KEY_LESSER_OR_EQUAL", + "TO_KEY_EQUAL", + "TO_KEY_GREATER_OR_EQUAL", + "TO_KEY_GREATER_THAN", + "TO_EXACT_KEY_VALUE_LESSER_THAN", + "TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL", + "TO_EXACT_KEY_VALUE_EQUAL", + "TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL", + "TO_EXACT_KEY_VALUE_GREATER_THAN", + "TO_PAIR_LESSER_THAN", + "TO_PAIR_LESSER_OR_EQUAL", + "TO_PAIR_EQUAL", + "TO_PAIR_GREATER_OR_EQUAL", + "TO_PAIR_GREATER_THAN"}; + return out << str[op]; +} + +using buffer = mdbx::default_buffer; +using buffer_pair = mdbx::buffer_pair; + +std::default_random_engine prng(42); + +static buffer random(const unsigned &value) { + switch (prng() % 3) { + default: + return buffer::hex(value); + case 1: + return buffer::base64(value); + case 2: + return buffer::base58(value); + } +} + +static buffer random_key() { return random(prng() % 10007); } + +static buffer random_value() { return random(prng() % 47); } + +using predicate = std::function; + +static bool probe(mdbx::txn txn, mdbx::map_handle dbi, + mdbx::cursor::move_operation op, predicate cmp, + const buffer_pair &pair) { + auto seeker = txn.open_cursor(dbi); + auto scanner = seeker.clone(); + + const bool scan_backward = + op == mdbx::cursor::key_lesser_than || + op == mdbx::cursor::key_lesser_or_equal || + op == mdbx::cursor::multi_exactkey_value_lesser_than || + op == mdbx::cursor::multi_exactkey_value_lesser_or_equal || + op == mdbx::cursor::pair_lesser_than || + op == mdbx::cursor::pair_lesser_or_equal; + + const bool is_multi = mdbx::is_multi(txn.get_handle_info(dbi).value_mode()); + + auto seek_result = seeker.move(op, pair.key, pair.value, false); + auto scan_result = scanner.fullscan( + [cmp, &pair](const mdbx::pair &scan) -> bool { return cmp(scan, pair); }, + scan_backward); + if (seek_result.done == scan_result && + (!scan_result || + seeker.is_same_position( + scanner, + op < mdbx::cursor::multi_exactkey_value_lesser_than && is_multi))) + return true; + + std::cerr << std::endl; + std::cerr << "bug:"; + std::cerr << std::endl; + std::cerr << std::string(is_multi ? "multi" : "single") << "-map, op " << op + << ", key " << pair.key << ", value " << pair.value; + std::cerr << std::endl; + std::cerr << "\tscanner: "; + if (scan_result) + std::cerr << " done, key " << scanner.current(false).key << ", value " + << scanner.current(false).value; + else + std::cerr << "not-found"; + std::cerr << std::endl; + std::cerr << "\t seeker: " << (seek_result.done ? " done" : "not-found") + << ", key " << seek_result.key << ", value " << seek_result.value; + std::cerr << std::endl; + return false; +} + +static bool probe(mdbx::txn txn, mdbx::map_handle dbi, + mdbx::cursor::move_operation op, predicate cmp) { + const auto pair = buffer_pair(random_key(), random_value()); + const bool ok = probe(txn, dbi, op, cmp, pair); +#if MDBX_DEBUG + if (!ok) + // повтор для отладки и поиска причин + probe(txn, dbi, op, cmp, pair); +#endif /* MDBX_DEBUG */ + return ok; +} + +static bool test(mdbx::txn txn, mdbx::map_handle dbi) { + bool ok = true; + + ok = probe(txn, dbi, mdbx::cursor::key_lesser_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) < 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_lesser_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) <= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_greater_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) >= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_greater_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) > 0; + }) && + ok; + + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_lesser_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) < 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_lesser_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) <= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) == 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_greater_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) >= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_greater, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) > 0; + }) && + ok; + + ok = probe(txn, dbi, mdbx::cursor::pair_lesser_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp < 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_lesser_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp <= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_equal, + [](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return l == r; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_greater_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp >= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_greater_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp > 0; + }) && + ok; + return ok; +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + unlink("." MDBX_DATANAME); + unlink("." MDBX_LOCKNAME); + mdbx::env_managed env(".", mdbx::env_managed::create_parameters(), + mdbx::env::operate_parameters(3)); + + auto txn = env.start_write(); + auto single = + txn.create_map("single", mdbx::key_mode::usual, mdbx::value_mode::single); + auto multi = + txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + for (size_t i = 0; i < 1000; ++i) { + auto key = random_key(); + txn.upsert(single, key, random_value()); + for (auto n = prng() % 5 + 1; n > 0; --n) + txn.upsert(multi, key, random_value()); + } + txn.commit_embark_read(); + + bool ok = true; + for (size_t i = 0; ok && i < 3333; ++i) { + ok = test(txn, single) && ok; + ok = test(txn, multi) && ok; + } + + if (!ok) { + std::cerr << "Fail\n"; + return EXIT_FAILURE; + } + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From 1db44c7914406c9d525a4f3167584a32b2a7b1f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 22:27:26 +0300 Subject: [PATCH 087/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=BE=D1=81=20=D0=B2=20public=20=D1=82=D0=B8=D0=BF=D0=BE?= =?UTF-8?q?=D0=B2=20`buffer::move=5Fassign=5Falloc`=20=D0=B8=20`buffer::co?= =?UTF-8?q?py=5Fassign=5Falloc`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D1=82=D0=B0?= =?UTF-8?q?=D1=80=D1=8B=D1=85=20=D1=81=D1=82=D0=B0=D0=BD=D0=B4=D0=B0=D1=80?= =?UTF-8?q?=D1=82=D0=BE=D0=B2=20C++.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index c945545b..9ab336d1 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1662,10 +1662,6 @@ public: private: friend class txn; struct silo; - using move_assign_alloc = - allocation_aware_details::move_assign_alloc; - using copy_assign_alloc = - allocation_aware_details::copy_assign_alloc; using swap_alloc = allocation_aware_details::swap_alloc; struct silo /* Empty Base Class Optimization */ : public allocator_type { MDBX_CXX20_CONSTEXPR const allocator_type &get_allocator() const noexcept { @@ -2157,6 +2153,11 @@ public: /// \todo buffer& operator>>(buffer&, ...) for reading (delegated to slice) /// \todo template key(X) for encoding keys while writing + using move_assign_alloc = + allocation_aware_details::move_assign_alloc; + using copy_assign_alloc = + allocation_aware_details::copy_assign_alloc; + /// \brief Returns the associated allocator. MDBX_CXX20_CONSTEXPR allocator_type get_allocator() const { return silo_.get_allocator(); From 14558fa90bcc93fcf47c0b67087a177c5707ef66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 23:18:05 +0300 Subject: [PATCH 088/443] =?UTF-8?q?mdbx-test:=20=D0=BF=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=BA=D0=B0=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=81=D0=BE=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=B8?= =?UTF-8?q?=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D1=81=20=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B1=D0=BB=D0=B5=D0=BC=D0=BD=D1=8B=D0=BC=D0=B8=20=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D1=81=D0=B8=D1=8F=D0=BC=D0=B8=20glibc=20=D0=B8=20glibc++?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/config.c++ | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/config.c++ b/test/config.c++ index 31cf9395..922f7b37 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -667,7 +667,10 @@ bool actor_config::deserialize(const char *str, actor_config &config) { } str = slash + 1; - uint64_t verify = std::stoull(std::string(str)); + uint64_t verify = 0; + while (*str >= '0' && *str <= '9') + verify = verify * 10 + *str++ - '0'; + if (checksum.value != verify) { TRACE("<< actor_config::deserialize: checksum mismatch\n"); return false; From 43dbf8ec4f953971f47adfa7e470745a53ca7c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 11:29:46 +0300 Subject: [PATCH 089/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20`txn=5Fvalgrind()`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D0=BB?= =?UTF-8?q?=D1=83=D1=87=D0=B0=D1=8F=20resurrect-after-fork.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index bd955953..fc0318fe 100644 --- a/src/core.c +++ b/src/core.c @@ -8761,7 +8761,10 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { } else { /* transaction end */ bool should_unlock = false; pgno_t last = MAX_PAGENO + 1; - if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) { + if (env->me_pid != osal_getpid()) { + /* resurrect after fork */ + return; + } else if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) { /* inside write-txn */ last = meta_recent(env, &env->me_txn0->tw.troika).ptr_v->mm_geo.next; } else if (env->me_flags & MDBX_RDONLY) { From 349759648d65c98c1e8a7c49270af1c59f31432c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 12:35:20 +0300 Subject: [PATCH 090/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20`rthc=5Fafterfork()`=20=D0=B4=D0=BB=D1=8F=20Valgrind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index fc0318fe..3eb8c028 100644 --- a/src/core.c +++ b/src/core.c @@ -26320,8 +26320,15 @@ __cold static void rthc_afterfork(void) { for (size_t i = 0; i < rthc_count; ++i) { MDBX_env *const env = rthc_table[i].env; NOTICE("drown env %p", __Wpedantic_format_voidptr(env)); - env->me_dxb_mmap.base = nullptr; - env->me_lck_mmap.base = nullptr; + if (env->me_lck_mmap.lck) + osal_munmap(&env->me_lck_mmap); + if (env->me_map) { + osal_munmap(&env->me_dxb_mmap); +#ifdef ENABLE_MEMCHECK + VALGRIND_DISCARD(env->me_valgrind_handle); + env->me_valgrind_handle = -1; +#endif /* ENABLE_MEMCHECK */ + } env->me_lck = lckless_stub(env); rthc_drown(env); } From 5a86afaac3a49eff0707914d48a0b4c126d7d801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 12:52:40 +0300 Subject: [PATCH 091/443] =?UTF-8?q?mdbx-test:=203-=D1=87=D0=B0=D1=81=D0=BE?= =?UTF-8?q?=D0=B2=D0=BE=D0=B9=20=D1=82=D0=B0=D0=B9=D0=BC=D0=B0=D1=83=D1=82?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20`extra=5Fdoubtless=5Fpositioning`=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20Valgrind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 59dc315c..bb3ab448 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -188,6 +188,9 @@ else() add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) add_test(NAME extra_doubtless_positioning COMMAND test_extra_doubtless_positioning) + if (ENABLE_MEMCHECK) + set_tests_properties(extra_doubtless_positioning PROPERTIES TIMEOUT 10800) + endif() endif() endif() From e316bc8b2912e0fbe083eada2d2c558883838dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 13:23:06 +0300 Subject: [PATCH 092/443] =?UTF-8?q?mdbx-test:=20=D1=83=D0=B2=D0=B5=D0=BB?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D0=BD=D0=B8=D1=8F=20=D1=82=D0=B0=D0=B9=D0=BC?= =?UTF-8?q?=D0=B0=D1=83=D1=82=D0=B0=20=D0=B4=D0=BB=D1=8F=20smoke-=D1=82?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B0=20=D0=BD=D0=B0=20=D1=81=D0=BB=D1=83?= =?UTF-8?q?=D1=87=D0=B0=D0=B9=20=D0=BF=D0=B0=D1=80=D0=B0=D0=BB=D0=BB=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=B2=D1=8B=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=BE=D0=B4=20Val?= =?UTF-8?q?grind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bb3ab448..232ff2e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -144,7 +144,7 @@ else() --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES - TIMEOUT 600 + TIMEOUT 3600 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvwc dupsort_writemap.db) From eb90ec61924ad68558ffb30997733ed60c6a404c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 Nov 2023 21:33:57 +0300 Subject: [PATCH 093/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B9?= =?UTF-8?q?=20=D1=80=D0=B0=D0=B7=D0=BC=D0=B5=D1=80=20MDBX=5Fopt=5Frp=5Faug?= =?UTF-8?q?ment=5Flimit=20=D0=BF=D0=BE=20=D1=83=D0=BC=D0=BE=D0=BB=D1=87?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8E=20=D0=B2=201/3=20=D0=BE=D1=82=20=D1=82?= =?UTF-8?q?=D0=B5=D0=BA=D1=83=D1=89=D0=B5=D0=B3=D0=BE=20=D0=BA=D0=BE=D0=BB?= =?UTF-8?q?-=D0=B2=D0=B0=20=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=20?= =?UTF-8?q?=D0=B2=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 3 ++- src/core.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mdbx.h b/mdbx.h index 43d4eca3..b8f3eac5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2146,7 +2146,8 @@ enum MDBX_option_t { * growth, or/and to the inability of put long values. * * The `MDBX_opt_rp_augment_limit` controls described limit for the current - * process. Default is 262144, it is usually enough for most cases. */ + * process. By default this limit adjusted dynamically to 1/3 of current + * quantity of DB pages, which is usually enough for most cases. */ MDBX_opt_rp_augment_limit, /** \brief Controls the in-process limit to grow a cache of dirty diff --git a/src/core.c b/src/core.c index 3eb8c028..24e0353c 100644 --- a/src/core.c +++ b/src/core.c @@ -6423,8 +6423,8 @@ __cold static void munlock_all(const MDBX_env *env) { } __cold static unsigned default_rp_augment_limit(const MDBX_env *env) { - /* default rp_augment_limit = ceil(npages / gold_ratio) */ - const size_t augment = (env->me_dbgeo.now >> (env->me_psize2log + 10)) * 633u; + /* default rp_augment_limit = npages / 3 */ + const size_t augment = env->me_dbgeo.now / 3 >> env->me_psize2log; eASSERT(env, augment < MDBX_PGL_LIMIT); return pnl_bytes2size(pnl_size2bytes( (augment > MDBX_PNL_INITIAL) ? augment : MDBX_PNL_INITIAL)); From 508cf83c321226aefc3f38519f6cfa3b4b1e4afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 Nov 2023 21:40:21 +0300 Subject: [PATCH 094/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D1=81=D1=83?= =?UTF-8?q?=D1=89=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=BE=D0=BF=D0=B5=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8?= =?UTF-8?q?=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 24e0353c..670a61e3 100644 --- a/src/core.c +++ b/src/core.c @@ -7743,7 +7743,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, * простейших случаях (тривиальный бенчмарк) интегральная производительность * становится вдвое меньше. А на платформах без mincore() и с проблемной * подсистемой виртуальной памяти ситуация может быть многократно хуже. - * Поэтому избегаем затрат в ситуациях когда prefaukt-write скорее всего не + * Поэтому избегаем затрат в ситуациях когда prefault-write скорее всего не * нужна. */ const bool readahead_enabled = env->me_lck->mti_readahead_anchor & 1; const pgno_t readahead_edge = env->me_lck->mti_readahead_anchor >> 1; From eeec44f56df34a01f23d94e76dc9651525220d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 29 Nov 2023 00:35:25 +0300 Subject: [PATCH 095/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20MDBX=5Fopt=5Fgc=5Ftime=5Flimit?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 28 ++++++++++++++++++++++ src/core.c | 64 +++++++++++++++++++++++++++++++++++++++++++------ src/internals.h | 2 ++ 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/mdbx.h b/mdbx.h index b8f3eac5..bf5bc560 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2133,6 +2133,7 @@ enum MDBX_option_t { /** \brief Controls the in-process limit to grow a list of reclaimed/recycled * page's numbers for finding a sequence of contiguous pages for large data * items. + * \see MDBX_opt_gc_time_limit * * \details A long values requires allocation of contiguous database pages. * To find such sequences, it may be necessary to accumulate very large lists, @@ -2293,6 +2294,33 @@ enum MDBX_option_t { * in the \ref MDBX_WRITEMAP mode by clearing ones through file handle before * touching. */ MDBX_opt_prefault_write_enable, + + /** \brief Controls the in-process spending time limit of searching + * consecutive pages inside GC. + * \see MDBX_opt_rp_augment_limit + * + * \details Задаёт ограничение времени в 1/65536 долях секунды, которое может + * быть потрачено в ходе пишущей транзакции на поиск последовательностей + * страниц внутри GC/freelist после достижения ограничения задаваемого опцией + * \ref MDBX_opt_rp_augment_limit. Контроль по времени не выполняется при + * поиске/выделении одиночных страниц и выделении страниц под нужды GC (при + * обновлении GC в ходе фиксации транзакции). + * + * Задаваемый лимит времени исчисляется по "настенным часам" и контролируется + * в рамках транзакции, наследуется для вложенных транзакций и с + * аккумулированием в родительской при их фиксации. Контроль по времени + * производится только при достижении ограничения задаваемого опцией \ref + * MDBX_opt_rp_augment_limit. Это позволяет гибко управлять поведением + * используя обе опции. + * + * По умолчанию ограничение устанавливается в 0, что приводит к + * незамедлительной остановке поиска в GC при достижении \ref + * MDBX_opt_rp_augment_limit во внутреннем состоянии транзакции и + * соответствует поведению до появления опции `MDBX_opt_gc_time_limit`. + * С другой стороны, при минимальном значении (включая 0) + * `MDBX_opt_rp_augment_limit` переработка GC будет ограничиваться + * преимущественно затраченным временем. */ + MDBX_opt_gc_time_limit }; #ifndef __cplusplus /** \ingroup c_settings */ diff --git a/src/core.c b/src/core.c index 670a61e3..1e0a400b 100644 --- a/src/core.c +++ b/src/core.c @@ -7674,12 +7674,24 @@ bailout: return ret; } +struct monotime_cache { + uint64_t value; + int expire_countdown; +}; + +static __inline uint64_t monotime_since_cached(uint64_t begin_timestamp, + struct monotime_cache *cache) { + if (cache->expire_countdown) + cache->expire_countdown -= 1; + else { + cache->value = osal_monotime(); + cache->expire_countdown = 42 / 3; + } + return cache->value - begin_timestamp; +} + static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, uint8_t flags) { -#if MDBX_ENABLE_PROFGC - const uint64_t monotime_before = osal_monotime(); -#endif /* MDBX_ENABLE_PROFGC */ - pgr_t ret; MDBX_txn *const txn = mc->mc_txn; MDBX_env *const env = txn->mt_env; @@ -7694,8 +7706,19 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - pgno_t pgno = 0; size_t newnext; + const uint64_t monotime_begin = + (MDBX_ENABLE_PROFGC || (num > 1 && env->me_options.gc_time_limit)) + ? osal_monotime() + : 0; + struct monotime_cache now_cache; + now_cache.expire_countdown = + 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда + лимит времени задан нулевой или уже исчерпан, так и от подсчета + времени при не-достижении rp_augment_limit */ + ; + now_cache.value = monotime_begin; + pgno_t pgno = 0; if (num > 1) { #if MDBX_ENABLE_PROFGC prof->xpages += 1; @@ -7871,7 +7894,10 @@ next_gc:; txn->tw.relist) >= env->me_options.rp_augment_limit) && ((/* not a slot-request from gc-update */ num && /* have enough unallocated space */ txn->mt_geo.upper >= - txn->mt_next_pgno + num) || + txn->mt_next_pgno + num && + monotime_since_cached(monotime_begin, &now_cache) + + txn->tw.gc_time_acc >= + env->me_options.gc_time_limit) || gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) { /* Stop reclaiming to avoid large/overflow the page list. This is a rare * case while search for a continuously multi-page region in a @@ -8173,6 +8199,8 @@ done: (size_t)txn->mt_dbs[FREE_DBI].md_entries); ret.page = NULL; } + if (num > 1) + txn->tw.gc_time_acc += monotime_since_cached(monotime_begin, &now_cache); } else { early_exit: DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num, @@ -8181,7 +8209,7 @@ done: } #if MDBX_ENABLE_PROFGC - prof->rtime_monotonic += osal_monotime() - monotime_before; + prof->rtime_monotonic += osal_monotime() - monotime_begin; #endif /* MDBX_ENABLE_PROFGC */ return ret; } @@ -9352,6 +9380,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0); txn->tw.spilled.list = NULL; txn->tw.spilled.least_removed = 0; + txn->tw.gc_time_acc = 0; txn->tw.last_reclaimed = 0; if (txn->tw.lifo_reclaimed) MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); @@ -9800,6 +9829,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, = parent->mt_next_pgno) - MDBX_ENABLE_REFUND)); + txn->tw.gc_time_acc = parent->tw.gc_time_acc; txn->tw.last_reclaimed = parent->tw.last_reclaimed; if (parent->tw.lifo_reclaimed) { txn->tw.lifo_reclaimed = parent->tw.lifo_reclaimed; @@ -12037,6 +12067,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { pnl_free(parent->tw.relist); parent->tw.relist = txn->tw.relist; txn->tw.relist = NULL; + parent->tw.gc_time_acc = txn->tw.gc_time_acc; parent->tw.last_reclaimed = txn->tw.last_reclaimed; parent->mt_geo = txn->mt_geo; @@ -25875,6 +25906,21 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, } break; + case MDBX_opt_gc_time_limit: + if (value == /* default */ UINT64_MAX) + value = 0; + if (unlikely(value > UINT32_MAX)) + return MDBX_EINVAL; + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + value = osal_16dot16_to_monotime((uint32_t)value); + if (value != env->me_options.gc_time_limit) { + if (env->me_txn && env->me_txn0->mt_owner != osal_thread_self()) + return MDBX_EPERM; + env->me_options.gc_time_limit = value; + } + break; + case MDBX_opt_txn_dp_limit: case MDBX_opt_txn_dp_initial: if (value == /* default */ UINT64_MAX) @@ -26027,6 +26073,10 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, *pvalue = env->me_options.rp_augment_limit; break; + case MDBX_opt_gc_time_limit: + *pvalue = osal_monotime_to_16dot16(env->me_options.gc_time_limit); + break; + case MDBX_opt_txn_dp_limit: *pvalue = env->me_options.dp_limit; break; diff --git a/src/internals.h b/src/internals.h index 7f9aedd0..f4e37ac3 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1260,6 +1260,7 @@ struct MDBX_txn { size_t writemap_dirty_npages; size_t writemap_spilled_npages; }; + uint64_t gc_time_acc; } tw; }; }; @@ -1422,6 +1423,7 @@ struct MDBX_env { unsigned rp_augment_limit; unsigned dp_limit; unsigned dp_initial; + uint64_t gc_time_limit; uint8_t dp_loose_limit; uint8_t spill_max_denominator; uint8_t spill_min_denominator; From 169e69c52e3f5389ae8b8729e02a279c48a919bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 30 Nov 2023 22:53:28 +0300 Subject: [PATCH 096/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE=D0=B4=D1=81=D1=82?= =?UTF-8?q?=D1=80=D0=BE=D0=B9=D0=BA=D0=B0=20rp=5Faugment=5Flimit=20=D0=B2?= =?UTF-8?q?=20=D0=B7=D0=B0=D0=B2=D0=B8=D1=81=D0=B8=D0=BC=D0=BE=D1=81=D1=82?= =?UTF-8?q?=D0=B8=20=D0=BE=D1=82=20gc=5Ftime=5Flimit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Когда rp_augment_limit не задан пользователем посредством `MDBX_opt_rp_augment_limit`, то как и ранее он подстраивается в зависимости от текущего размера БД (актуального кол-ва страниц). Теперь-же авто-устанавливаемое значение rp_augment_limit вычисляется обратно-пропорционально `MDBX_opt_gc_time_limit`: - Если gc_time_limit == 0, то rp_augment_limit устанавливается в 1/3 от общего кол-ва страниц БД, но не меньше рационального минимума. Это соответствует прежнему поведению и обеспечивает достаточно глубокую переработку GC во всех не-экстремальных сценариях. - При gc_time_limit >= 16_секунд rp_augment_limit устанавливается в минимальное значение. - Когда 0 < gc_time_limit < 16_секунд rp_augment_limit устанавливается между минимумом и 1/3 от размера БД пропорционально остатку gc_time_limit до 16 секунд. Соответственно, при больших значениях gc_time_limit, выбирается меньшее значение rp_augment_limit, и контроль глубины переработки GC ограничивается в основном по-времени. --- src/core.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/core.c b/src/core.c index 1e0a400b..c8f47537 100644 --- a/src/core.c +++ b/src/core.c @@ -6423,11 +6423,21 @@ __cold static void munlock_all(const MDBX_env *env) { } __cold static unsigned default_rp_augment_limit(const MDBX_env *env) { - /* default rp_augment_limit = npages / 3 */ - const size_t augment = env->me_dbgeo.now / 3 >> env->me_psize2log; - eASSERT(env, augment < MDBX_PGL_LIMIT); - return pnl_bytes2size(pnl_size2bytes( - (augment > MDBX_PNL_INITIAL) ? augment : MDBX_PNL_INITIAL)); + const size_t timeframe = 16 << 16; + const size_t remain_1sec = + (env->me_options.gc_time_limit < timeframe) + ? timeframe - (size_t)env->me_options.gc_time_limit + : 0; + const size_t minimum = (env->me_maxgc_ov1page * 2 > MDBX_PNL_INITIAL) + ? env->me_maxgc_ov1page * 2 + : MDBX_PNL_INITIAL; + const size_t one_third = env->me_dbgeo.now / 3 >> env->me_psize2log; + const size_t augment_limit = + (one_third > minimum) + ? minimum + (one_third - minimum) / timeframe * remain_1sec + : minimum; + eASSERT(env, augment_limit < MDBX_PGL_LIMIT); + return pnl_bytes2size(pnl_size2bytes(augment_limit)); } static bool default_prefault_write(const MDBX_env *env) { @@ -25918,6 +25928,8 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (env->me_txn && env->me_txn0->mt_owner != osal_thread_self()) return MDBX_EPERM; env->me_options.gc_time_limit = value; + if (!env->me_options.flags.non_auto.rp_augment_limit) + env->me_options.rp_augment_limit = default_rp_augment_limit(env); } break; From 4c139b36190b861dffef5c2d0c611c33c7243975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 4 Dec 2023 21:39:29 +0300 Subject: [PATCH 097/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=D0=BD=D1=83=D1=82=D1=80?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=B5=D0=B3=D0=BE=20=D1=80=D0=B5=D0=B3=D1=80?= =?UTF-8?q?=D0=B5=D1=81=D1=81=D0=B0=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D0=BA=D0=B8=20=D1=81=D0=BE=D1=81=D1=82=D0=BE=D1=8F=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit После f0d523c507042cc70eeeb690778c9b2be6a8b33f, при использовании добавленного API блокировок, возможно ложно-положительное определение состояние "внутри транзакции". --- src/core.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/core.c b/src/core.c index c8f47537..774eeb0e 100644 --- a/src/core.c +++ b/src/core.c @@ -8581,7 +8581,8 @@ retry:; goto bailout; } - const bool inside_txn = (env->me_txn0->mt_owner == osal_thread_self()); + const bool inside_txn = + (!locked && env->me_txn0->mt_owner == osal_thread_self()); const meta_troika_t troika = (inside_txn | locked) ? env->me_txn0->tw.troika : meta_tap(env); const meta_ptr_t head = meta_recent(env, &troika); @@ -8594,7 +8595,7 @@ retry:; goto bailout; } - if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) && + if (locked && (env->me_flags & MDBX_WRITEMAP) && unlikely(head.ptr_c->mm_geo.next > bytes2pgno(env, env->me_dxb_mmap.current))) { @@ -8934,7 +8935,7 @@ __cold int mdbx_thread_register(const MDBX_env *env) { } const uintptr_t tid = osal_thread_self(); - if (env->me_txn0 && unlikely(env->me_txn0->mt_owner == tid)) + if (env->me_txn0 && unlikely(env->me_txn0->mt_owner == tid) && env->me_txn) return MDBX_TXN_OVERLAPPING; return bind_rslot((MDBX_env *)env, tid).err; } @@ -9726,7 +9727,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, flags |= parent->mt_flags & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS); } else if (flags & MDBX_TXN_RDONLY) { if (env->me_txn0 && - unlikely(env->me_txn0->mt_owner == osal_thread_self()) && + unlikely(env->me_txn0->mt_owner == osal_thread_self()) && env->me_txn && (runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) return MDBX_TXN_OVERLAPPING; } else { @@ -13140,9 +13141,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid); NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, pending->unsafe_txnid, txnid); - ENSURE(env, !env->me_txn0 || - (env->me_txn0->mt_owner != osal_thread_self() && - !env->me_txn)); + ENSURE(env, !env->me_txn0 || !env->me_txn); if (unlikely(txnid > MAX_TXNID)) { rc = MDBX_TXN_FULL; ERROR("txnid overflow, raise %d", rc); @@ -13654,8 +13653,9 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (unlikely(rc != MDBX_SUCCESS)) return rc; - const bool inside_txn = - (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()); + const bool need_lock = + !env->me_txn0 || env->me_txn0->mt_owner != osal_thread_self(); + const bool inside_txn = !need_lock && env->me_txn; #if MDBX_DEBUG if (growth_step < 0) { @@ -13666,17 +13666,17 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, #endif /* MDBX_DEBUG */ intptr_t reasonable_maxsize = 0; - bool need_unlock = false; + bool should_unlock = false; if (env->me_map) { /* env already mapped */ if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; - if (!inside_txn) { + if (need_lock) { int err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; - need_unlock = true; + should_unlock = true; env->me_txn0->tw.troika = meta_tap(env); eASSERT(env, !env->me_txn && !env->me_txn0->mt_child); env->me_txn0->mt_txnid = @@ -13902,7 +13902,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, MDBX_meta meta; memset(&meta, 0, sizeof(meta)); if (!inside_txn) { - eASSERT(env, need_unlock); + eASSERT(env, should_unlock); const meta_ptr_t head = meta_recent(env, &env->me_txn0->tw.troika); uint64_t timestamp = 0; @@ -14023,7 +14023,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, } bailout: - if (need_unlock) + if (should_unlock) osal_txn_unlock(env); return rc; } @@ -22737,10 +22737,6 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; - if ((env->me_flags & MDBX_ENV_ACTIVE) && - unlikely(env->me_txn0->mt_owner == osal_thread_self())) - return MDBX_BUSY; - const bool lock_needed = (env->me_flags & MDBX_ENV_ACTIVE) && env->me_txn0->mt_owner != osal_thread_self(); bool should_unlock = false; @@ -22974,7 +22970,8 @@ __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely(err != MDBX_SUCCESS)) return err; - if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) + if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self() && + env->me_txn) /* inside write-txn */ return stat_acc(env->me_txn, dest, bytes); From 6b72d88fde2eee67a00af5b4853c34f00882669a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 4 Dec 2023 22:32:25 +0300 Subject: [PATCH 098/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B5=D0=BD=D0=BD=D0=B5=D0=B3=D0=BE=20=D1=80=D0=B5=D0=B3?= =?UTF-8?q?=D1=80=D0=B5=D1=81=D1=81=D0=B0=20lck=5Fserize=5Frc=20=D0=B2=20?= =?UTF-8?q?=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B8=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BE=D1=82?= =?UTF-8?q?=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D1=8F=20=D0=91=D0=94=20=D0=BE?= =?UTF-8?q?=D0=B4=D0=BD=D0=B8=D0=BC=20=D0=BF=D1=80=D0=BE=D1=86=D0=B5=D1=81?= =?UTF-8?q?=D1=81=D0=BE=D0=BC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 295 ++++++++++++++++++++++++++--------------------------- 1 file changed, 146 insertions(+), 149 deletions(-) diff --git a/src/core.c b/src/core.c index 774eeb0e..1fdc8528 100644 --- a/src/core.c +++ b/src/core.c @@ -14579,6 +14579,150 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, /******************************************************************************/ +__cold static int setup_lck_locked(MDBX_env *env) { + int err = rthc_register(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + int lck_seize_rc = osal_lck_seize(env); + if (unlikely(MDBX_IS_ERROR(lck_seize_rc))) + return lck_seize_rc; + + if (env->me_lfd == INVALID_HANDLE_VALUE) { + env->me_lck = lckless_stub(env); + env->me_maxreaders = UINT_MAX; + DEBUG("lck-setup:%s%s%s", " lck-less", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + return lck_seize_rc; + } + + DEBUG("lck-setup:%s%s%s", " with-lck", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + + MDBX_env *inprocess_neighbor = nullptr; + err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); + if (unlikely(MDBX_IS_ERROR(err))) + return err; + if (inprocess_neighbor) { + if ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || + (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0) + return MDBX_BUSY; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + err = osal_lck_downgrade(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + lck_seize_rc = MDBX_RESULT_FALSE; + } + } + + uint64_t size = 0; + err = osal_filesize(env->me_lfd, &size); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + if (lck_seize_rc == MDBX_RESULT_TRUE) { + size = ceil_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + + sizeof(MDBX_lockinfo), + env->me_os_psize); + jitter4testing(false); + } else { + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_BUSY; + if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 || + size < env->me_os_psize) { + ERROR("lck-file has invalid size %" PRIu64 " bytes", size); + return MDBX_PROBLEM; + } + } + + const size_t maxreaders = + ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader); + if (maxreaders < 4) { + ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); + return MDBX_PROBLEM; + } + env->me_maxreaders = (maxreaders <= MDBX_READERS_LIMIT) + ? (unsigned)maxreaders + : (unsigned)MDBX_READERS_LIMIT; + + err = osal_mmap((env->me_flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, + &env->me_lck_mmap, (size_t)size, (size_t)size, + lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE + : MMAP_OPTION_SEMAPHORE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_ENABLE_MADVISE +#ifdef MADV_DODUMP + err = madvise(env->me_lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_DODUMP */ + +#ifdef MADV_WILLNEED + err = madvise(env->me_lck_mmap.lck, size, MADV_WILLNEED) + ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_WILLNEED) + err = ignore_enosys( + posix_madvise(env->me_lck_mmap.lck, size, POSIX_MADV_WILLNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_WILLNEED */ +#endif /* MDBX_ENABLE_MADVISE */ + + struct MDBX_lockinfo *lck = env->me_lck_mmap.lck; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + /* If we succeed got exclusive lock, then nobody is using the lock region + * and we should initialize it. */ + memset(lck, 0, (size_t)size); + jitter4testing(false); + lck->mti_magic_and_version = MDBX_LOCK_MAGIC; + lck->mti_os_and_format = MDBX_LOCK_FORMAT; +#if MDBX_ENABLE_PGOP_STAT + lck->mti_pgop_stat.wops.weak = 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + err = osal_msync(&env->me_lck_mmap, 0, (size_t)size, + MDBX_SYNC_DATA | MDBX_SYNC_SIZE); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); + eASSERT(env, MDBX_IS_ERROR(err)); + return err; + } + } else { + if (lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { + const bool invalid = (lck->mti_magic_and_version >> 8) != MDBX_MAGIC; + ERROR("lock region has %s", + invalid + ? "invalid magic" + : "incompatible version (only applications with nearly or the " + "same versions of libmdbx can share the same database)"); + return invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; + } + if (lck->mti_os_and_format != MDBX_LOCK_FORMAT) { + ERROR("lock region has os/format signature 0x%" PRIx32 + ", expected 0x%" PRIx32, + lck->mti_os_and_format, MDBX_LOCK_FORMAT); + return MDBX_VERSION_MISMATCH; + } + } + + err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); + if (unlikely(err != MDBX_SUCCESS)) { + eASSERT(env, MDBX_IS_ERROR(err)); + return err; + } + + env->me_lck = lck; + eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); + return lck_seize_rc; +} + /* Open and/or initialize the lock region for the environment. */ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { eASSERT(env, env->me_lazy_fd != INVALID_HANDLE_VALUE); @@ -14615,157 +14759,10 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { env->me_lfd = INVALID_HANDLE_VALUE; } - /* beginning of a locked section ------------------------------------------ */ rthc_lock(); - err = rthc_register(env); - if (likely(err == MDBX_SUCCESS)) - err = osal_lck_seize(env); - - const int lck_seize_rc = err; - if (MDBX_IS_ERROR(err)) - goto bailout; - - struct MDBX_lockinfo *lck = nullptr; - if (env->me_lfd == INVALID_HANDLE_VALUE) { - lck = lckless_stub(env); - env->me_maxreaders = UINT_MAX; - DEBUG("lck-setup:%s%s%s", " lck-less", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - goto done; - } - - DEBUG("lck-setup:%s%s%s", " with-lck", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - - uint64_t size = 0; - err = osal_filesize(env->me_lfd, &size); - if (unlikely(err != MDBX_SUCCESS)) - goto bailout; - - if (lck_seize_rc == MDBX_RESULT_TRUE) { - size = ceil_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + - sizeof(MDBX_lockinfo), - env->me_os_psize); - jitter4testing(false); - } else { - if (env->me_flags & MDBX_EXCLUSIVE) { - err = MDBX_BUSY; - goto bailout; - } - if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 || - size < env->me_os_psize) { - ERROR("lck-file has invalid size %" PRIu64 " bytes", size); - err = MDBX_PROBLEM; - goto bailout; - } - } - - const size_t maxreaders = - ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader); - if (maxreaders < 4) { - ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); - err = MDBX_PROBLEM; - goto bailout; - } - env->me_maxreaders = (maxreaders <= MDBX_READERS_LIMIT) - ? (unsigned)maxreaders - : (unsigned)MDBX_READERS_LIMIT; - - err = osal_mmap((env->me_flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, - &env->me_lck_mmap, (size_t)size, (size_t)size, - lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE - : MMAP_OPTION_SEMAPHORE); - if (unlikely(err != MDBX_SUCCESS)) - goto bailout; - -#if MDBX_ENABLE_MADVISE -#ifdef MADV_DODUMP - err = madvise(env->me_lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - goto bailout; -#endif /* MADV_DODUMP */ - -#ifdef MADV_WILLNEED - err = madvise(env->me_lck_mmap.lck, size, MADV_WILLNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - goto bailout; -#elif defined(POSIX_MADV_WILLNEED) - err = ignore_enosys( - posix_madvise(env->me_lck_mmap.lck, size, POSIX_MADV_WILLNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - goto bailout; -#endif /* MADV_WILLNEED */ -#endif /* MDBX_ENABLE_MADVISE */ - - lck = env->me_lck_mmap.lck; - if (lck_seize_rc == MDBX_RESULT_TRUE) { - /* If we succeed got exclusive lock, then nobody is using the lock region - * and we should initialize it. */ - memset(lck, 0, (size_t)size); - jitter4testing(false); - lck->mti_magic_and_version = MDBX_LOCK_MAGIC; - lck->mti_os_and_format = MDBX_LOCK_FORMAT; -#if MDBX_ENABLE_PGOP_STAT - lck->mti_pgop_stat.wops.weak = 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - err = osal_msync(&env->me_lck_mmap, 0, (size_t)size, - MDBX_SYNC_DATA | MDBX_SYNC_SIZE); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); - goto bailout; - } - } else { - if (lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { - const bool invalid = (lck->mti_magic_and_version >> 8) != MDBX_MAGIC; - ERROR("lock region has %s", - invalid - ? "invalid magic" - : "incompatible version (only applications with nearly or the " - "same versions of libmdbx can share the same database)"); - err = invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; - goto bailout; - } - if (lck->mti_os_and_format != MDBX_LOCK_FORMAT) { - ERROR("lock region has os/format signature 0x%" PRIx32 - ", expected 0x%" PRIx32, - lck->mti_os_and_format, MDBX_LOCK_FORMAT); - err = MDBX_VERSION_MISMATCH; - goto bailout; - } - } - - MDBX_env *inprocess_neighbor = nullptr; - if (lck_seize_rc == MDBX_RESULT_TRUE) { - err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); - if (MDBX_IS_ERROR(err)) - goto bailout; - if (inprocess_neighbor && - ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0)) { - err = MDBX_BUSY; - goto bailout; - } - } - - err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); - if (MDBX_IS_ERROR(err)) - goto bailout; - -done: - env->me_lck = lck; - eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); - -bailout: - /* Calling osal_lck_destroy() is required to restore POSIX-filelock - * and this job will be done by env_close(). */ + err = setup_lck_locked(env); rthc_unlock(); - /* end of a locked section ------------------------------------------------ */ - return lck_seize_rc; + return err; } __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { From 72332a8f9e8dae39b33ee4c9add1b0c26097fc83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 8 Dec 2023 15:07:37 +0300 Subject: [PATCH 099/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D1=81=D1=83=D1=89?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D0=B7=D0=B0=D0=BC=D0=B5=D1=87=D0=B0=D0=BD=D0=B8=D1=8F=20Coveri?= =?UTF-8?q?ty.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 1fdc8528..d588bb5c 100644 --- a/src/core.c +++ b/src/core.c @@ -19626,8 +19626,8 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, const int incomparable = INT16_MAX + 1; if (unlikely(!l)) return r ? -incomparable * 9 : 0; - if (unlikely(!r)) - return l ? incomparable * 9 : 0; + else if (unlikely(!r)) + return incomparable * 9; if (unlikely(l->mc_signature != MDBX_MC_LIVE)) return (r->mc_signature == MDBX_MC_LIVE) ? -incomparable * 8 : 0; From f16c4303bff02328da08f9bc82bc9b8cf2e62062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 8 Dec 2023 16:47:40 +0300 Subject: [PATCH 100/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20=D0=BA=D0=BE=D0=BC=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index bf5bc560..693cfee6 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5868,7 +5868,7 @@ typedef struct MDBX_chk_context { problems_gc, problems_kv, total_problems; uint64_t steady_txnid, recent_txnid; /** Указатель на массив размером subdb_total с указателями на экземпляры - * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значние, + * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значение, * включая MainDB и GC/FreeDB. */ const MDBX_chk_subdb_t *const *subdbs; } result; From fbc83dd069a5d955ba378733de7f8204aacb9ff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 19 Feb 2024 01:20:27 +0300 Subject: [PATCH 101/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=BE=D0=B6=D0=BD?= =?UTF-8?q?=D0=BE=D0=B9=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20`MDBX=5FC?= =?UTF-8?q?ORRUPTED=20(-30796)`=20=D0=B2=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0?= =?UTF-8?q?=D1=80=D0=B8=D0=B8=20"odd=20dupfixed".?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Повреждение БД и/или потери данных не происходило, проблема лишь в возврате ложной ошибки. Благодарю пользователя/разработчика @Dvirsw (https://t.me/Dvirsw) за сообщения о проблеме и предоставление минимального/оптимального сценария воспроизведения. -- Проблема была из-за излишнего условия при контроле внутренего поля mp_upper в ходе проверки структуры страниц БД. Поле mp_upper указывает на нижнуюю границу заполнения страницы от конца к началу. Вследствие того, что значения ключей выравниваетня на четную границу, это поле четно во всех случаях за исключением LEAF2-страницы (листовая страница вложенного дерева для множественных значений финсированной/одинаковой длины одного ключа), на которой размещено нечетное количество значений нечетной длины. Ошибка не проявлялась в большинстве случаев (в том числе в стохастических тестах), так как штатно лишняя проверка производилась только при чтении страницы и перебалансировке ключей, но не при каждом добавлении значения. Тем не менее, сценарии тестов требуют доработки/расширения для явного добавления нечетных dupfixed-сценариев. --- src/core.c | 66 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/src/core.c b/src/core.c index d588bb5c..cba3f642 100644 --- a/src/core.c +++ b/src/core.c @@ -8416,7 +8416,7 @@ __hot static int page_touch(MDBX_cursor *mc) { np->mp_txnid = txn->mt_front; return MDBX_SUCCESS; } - tASSERT(txn, !IS_OVERFLOW(mp)); + tASSERT(txn, !IS_OVERFLOW(mp) && !IS_SUBP(mp)); if (IS_FROZEN(txn, mp)) { /* CoW the page */ @@ -16102,8 +16102,12 @@ __hot static __always_inline int page_get_checker_lite(const uint16_t ILL, if (((ILL & P_OVERFLOW) || !IS_OVERFLOW(page)) && (ILL & (P_BRANCH | P_LEAF | P_LEAF2)) == 0) { - if (unlikely(page->mp_upper < page->mp_lower || - ((page->mp_lower | page->mp_upper) & 1) || + /* Контроль четности page->mp_upper тут либо приводит к ложным ошибкам, + * либо слишком дорог по количеству операций. Заковырка в том, что mp_upper + * может быть нечетным на LEAF2-страницах, при нечетном количестве элементов + * нечетной длины. Поэтому четность page->mp_upper здесь не проверяется, но + * соответствующие полные проверки есть в page_check(). */ + if (unlikely(page->mp_upper < page->mp_lower || (page->mp_lower & 1) || PAGEHDRSZ + page->mp_upper > txn->mt_env->me_psize)) return bad_page(page, "invalid page' lower(%u)/upper(%u) with limit %zu\n", @@ -18082,9 +18086,9 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_xcursor->mx_dbx.md_klen_min = mc->mc_xcursor->mx_dbx.md_klen_max = data->iov_len); + if (mc->mc_flags & C_SUB) + npr.page->mp_flags |= P_LEAF2; } - if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED) - npr.page->mp_flags |= P_LEAF2; mc->mc_flags |= C_INITIALIZED; } @@ -18361,7 +18365,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(fp_flags & P_LEAF2)) { memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->mp_leaf2_ksize); + cASSERT(mc, + (((mp->mp_leaf2_ksize & page_numkeys(mp)) ^ mp->mp_upper) & + 1) == 0); } else { + cASSERT(mc, (mp->mp_upper & 1) == 0); memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ), ptr_disp(fp, fp->mp_upper + PAGEHDRSZ), olddata.iov_len - fp->mp_upper - PAGEHDRSZ); @@ -18979,6 +18987,7 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, const size_t ksize = mc->mc_db->md_xsize; cASSERT(mc, ksize == key->iov_len); const size_t nkeys = page_numkeys(mp); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); /* Just using these for counting */ const intptr_t lower = mp->mp_lower + sizeof(indx_t); @@ -18998,6 +19007,8 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, memmove(ptr_disp(ptr, ksize), ptr, diff * ksize); /* insert new key */ memcpy(ptr, key->iov_base, ksize); + + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); return MDBX_SUCCESS; } @@ -19164,6 +19175,7 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) { mp->mp_lower -= sizeof(indx_t); cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= ksize - sizeof(indx_t)); mp->mp_upper += (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); return; } @@ -20830,8 +20842,7 @@ __cold static int page_check(const MDBX_cursor *const mc, break; } - if (unlikely(mp->mp_upper < mp->mp_lower || - ((mp->mp_lower | mp->mp_upper) & 1) || + if (unlikely(mp->mp_upper < mp->mp_lower || (mp->mp_lower & 1) || PAGEHDRSZ + mp->mp_upper > env->me_psize)) rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", mp->mp_lower, mp->mp_upper, page_space(env)); @@ -20847,11 +20858,6 @@ __cold static int page_check(const MDBX_cursor *const mc, bad_page(mp, "%s-page nkeys (%zu) < %u\n", IS_BRANCH(mp) ? "branch" : "leaf", nkeys, 1 + IS_BRANCH(mp)); } - if (!IS_LEAF2(mp) && unlikely(PAGEHDRSZ + mp->mp_upper + - nkeys * sizeof(MDBX_node) + nkeys - 1 > - env->me_psize)) - rc = bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", - mp->mp_upper, nkeys, page_space(env)); const size_t ksize_max = keysize_max(env->me_psize, 0); const size_t leaf2_ksize = mp->mp_leaf2_ksize; @@ -20860,8 +20866,20 @@ __cold static int page_check(const MDBX_cursor *const mc, (mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", mc->mc_db->md_flags); - if (unlikely(leaf2_ksize < 1 || leaf2_ksize > ksize_max)) - rc = bad_page(mp, "invalid leaf2-key length (%zu)\n", leaf2_ksize); + else if (unlikely(leaf2_ksize != mc->mc_db->md_xsize)) + rc = bad_page(mp, "invalid leaf2_ksize %zu\n", leaf2_ksize); + else if (unlikely(((leaf2_ksize & nkeys) ^ mp->mp_upper) & 1)) + rc = bad_page( + mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", + mp->mp_upper, nkeys, leaf2_ksize); + } else { + if (unlikely((mp->mp_upper & 1) || PAGEHDRSZ + mp->mp_upper + + nkeys * sizeof(MDBX_node) + + nkeys - 1 > + env->me_psize)) + rc = + bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", + mp->mp_upper, nkeys, page_space(env)); } MDBX_val here, prev = {0, 0}; @@ -20869,7 +20887,7 @@ __cold static int page_check(const MDBX_cursor *const mc, if (IS_LEAF2(mp)) { const char *const key = page_leaf2key(mp, i, leaf2_ksize); if (unlikely(end_of_page < key + leaf2_ksize)) { - rc = bad_page(mp, "leaf2-key beyond (%zu) page-end\n", + rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", key + leaf2_ksize - end_of_page); continue; } @@ -20878,7 +20896,7 @@ __cold static int page_check(const MDBX_cursor *const mc, if (unlikely(leaf2_ksize < mc->mc_dbx->md_klen_min || leaf2_ksize > mc->mc_dbx->md_klen_max)) rc = bad_page( - mp, "leaf2-key size (%zu) <> min/max key-length (%zu/%zu)\n", + mp, "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", leaf2_ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max); else mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = leaf2_ksize; @@ -20887,7 +20905,7 @@ __cold static int page_check(const MDBX_cursor *const mc, here.iov_base = (void *)key; here.iov_len = leaf2_ksize; if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "leaf2-key #%zu wrong order (%s >= %s)\n", i, + rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, DKEY(&prev), DVAL(&here)); prev = here; } @@ -21299,6 +21317,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, DKBUF; MDBX_page *const mp = mc->mc_pg[mc->mc_top]; + cASSERT(mc, (mp->mp_flags & P_ILL_BITS) == 0); + const size_t newindx = mc->mc_ki[mc->mc_top]; size_t nkeys = page_numkeys(mp); if (AUDIT_ENABLED()) { @@ -21414,6 +21434,15 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, if (page_room(mn.mc_pg[ptop]) < branch_size(env, &sepkey)) split_indx = minkeys; } + if (foliage) { + TRACE("pure-left: foliage %u, top %i, ptop %zu, split_indx %zi, " + "minkeys %zi, sepkey %s, parent-room %zu, need4split %zu", + foliage, mc->mc_top, ptop, split_indx, minkeys, + DKEY_DEBUG(&sepkey), page_room(mc->mc_pg[ptop]), + branch_size(env, &sepkey)); + TRACE("pure-left: newkey %s, newdata %s, newindx %zu", + DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); + } } } @@ -21459,6 +21488,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mp->mp_lower += sizeof(indx_t); cASSERT(mc, mp->mp_upper >= ksize - sizeof(indx_t)); mp->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); } else { memcpy(sister->mp_ptrs, split, distance * ksize); void *const ins = page_leaf2key(sister, distance, ksize); @@ -21471,6 +21501,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sister->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); cASSERT(mc, distance <= (int)UINT16_MAX); mc->mc_ki[mc->mc_top] = (indx_t)distance; + cASSERT(mc, + (((ksize & page_numkeys(sister)) ^ sister->mp_upper) & 1) == 0); } if (AUDIT_ENABLED()) { From f0cfedc26f5557ab5283868765897fbe326a82f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 11:20:09 +0300 Subject: [PATCH 102/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/dupfixed=5Faddodd?= =?UTF-8?q?`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 5 ++ test/extra/dupfixed_addodd.c | 93 ++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 test/extra/dupfixed_addodd.c diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 232ff2e6..ac11ef63 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -73,6 +73,10 @@ if(UNIX AND NOT SUBPROJECT) target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) + add_executable(test_extra_dupfixed_addodd extra/dupfixed_addodd.c) + target_include_directories(test_extra_dupfixed_addodd PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dupfixed_addodd ${TOOL_MDBX_LIB}) + if(MDBX_BUILD_CXX) add_executable(test_extra_maindb_ordinal extra/maindb_ordinal.c++) target_include_directories(test_extra_maindb_ordinal PRIVATE "${PROJECT_SOURCE_DIR}") @@ -183,6 +187,7 @@ else() if(UNIX AND NOT SUBPROJECT) add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) + add_test(NAME extra_dupfixed_addodd COMMAND test_extra_dupfixed_addodd) if(MDBX_BUILD_CXX) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) diff --git a/test/extra/dupfixed_addodd.c b/test/extra/dupfixed_addodd.c new file mode 100644 index 00000000..da9ba944 --- /dev/null +++ b/test/extra/dupfixed_addodd.c @@ -0,0 +1,93 @@ +/* + * @Dvirsw (https://t.me/Dvirsw) + * I think there is a bug with DUPFIXED. The following code fails. + * + * https://t.me/libmdbx/5368 + */ + +#include +#include + +#include "mdbx.h" +#include +#include +#include +#include +#include +#include +#include +#include + +int main() { + int rc; + MDBX_env *env = NULL; + MDBX_dbi dbi = 0; + MDBX_val key, data; + MDBX_txn *txn = NULL; + + rc = mdbx_env_create(&env); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_env_set_maxdbs(env, 1); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_env_open(env, "./example-db", + MDBX_NOSUBDIR | MDBX_COALESCE | MDBX_LIFORECLAIM, 0664); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_txn_begin(env, NULL, 0, &txn); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_txn_begin: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_dbi_open(txn, "test", MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_CREATE, + &dbi); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_dbi_open: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + char key_bytes[32] = {0}; + key.iov_len = 32; + key.iov_base = key_bytes; + + // Another put after this will fail. + unsigned char idx; + for (idx = 0; idx < 129; idx++) { + char data_bytes[15] = {idx}; + data.iov_len = 15; + data.iov_base = data_bytes; + rc = mdbx_put(txn, dbi, &key, &data, 0); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_put: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + } + + // This will fail and exit. + char data_bytes[15] = {idx}; + data.iov_len = 15; + data.iov_base = data_bytes; + rc = mdbx_put(txn, dbi, &key, &data, 0); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_put: (%d) %s\n", rc, mdbx_strerror(rc)); + fprintf(stderr, "expected failure\n"); + exit(EXIT_FAILURE); + } + + rc = mdbx_txn_commit(txn); + if (rc) { + fprintf(stderr, "mdbx_txn_commit: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } +} From ba719ef12a413f5dba1ddc4c3879627f859ab4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 21 Feb 2024 01:28:51 +0300 Subject: [PATCH 103/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20after-fork=20=D1=81=D1=86?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B5=D0=B2=20=D1=81=20=D1=83?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20?= =?UTF-8?q?=D0=BB=D0=BE=D0=B3=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/fork.c++ | 93 +++++++++++++++++++++++++++++++++++++++++++++------ test/test.h++ | 5 +-- 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/test/fork.c++ b/test/fork.c++ index 7f1c9b19..81af98b4 100644 --- a/test/fork.c++ +++ b/test/fork.c++ @@ -22,13 +22,58 @@ class testcase_smoke4fork : public testcase { using inherited = testcase; +protected: + bool dbi_invalid{true}; + bool dbi_stable{false}; + unsigned dbi_state{0}; + public: testcase_smoke4fork(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} + virtual void txn_end(bool abort) override; bool run() override; virtual bool smoke() = 0; + bool open_dbi(); }; +bool testcase_smoke4fork::open_dbi() { + if (!dbi || dbi_invalid) { + if (dbi_stable || + (mdbx_txn_flags(txn_guard.get()) & int(MDBX_TXN_RDONLY)) == 0) { + dbi = db_table_open(!dbi_stable); + dbi_invalid = false; + } + } + + dbi_state = 0; + if (dbi && !dbi_invalid) { + unsigned unused_dbi_flags; + int err = + mdbx_dbi_flags_ex(txn_guard.get(), dbi, &unused_dbi_flags, &dbi_state); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_flags_ex()", err); + if ((dbi_state & (MDBX_DBI_CREAT | MDBX_DBI_FRESH)) == 0) + dbi_stable = true; + } + return !dbi_invalid; +} + +void testcase_smoke4fork::txn_end(bool abort) { + if (dbi) { + if (abort) { + if (dbi_state & MDBX_DBI_CREAT) + dbi_stable = false; + if (dbi_state & MDBX_DBI_FRESH) + dbi_invalid = true; + } else { + if (dbi_state & (MDBX_DBI_CREAT | MDBX_DBI_FRESH)) + dbi_stable = true; + } + dbi_state = 0; + } + inherited::txn_end(abort); +} + bool testcase_smoke4fork::run() { static std::vector history; const pid_t current_pid = getpid(); @@ -52,6 +97,7 @@ bool testcase_smoke4fork::run() { current_pid, mdbx_strerror(err)); return false; } + open_dbi(); if (flipcoin()) { if (!smoke()) { @@ -65,11 +111,11 @@ bool testcase_smoke4fork::run() { log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, "skipped"); #ifdef __SANITIZE_ADDRESS__ - const bool abort_txn_to_avoid_memleak = true; + const bool commit_txn_to_avoid_memleak = true; #else - const bool abort_txn_to_avoid_memleak = !RUNNING_ON_VALGRIND && flipcoin(); + const bool commit_txn_to_avoid_memleak = !RUNNING_ON_VALGRIND && flipcoin(); #endif - if (abort_txn_to_avoid_memleak && txn_guard) + if (commit_txn_to_avoid_memleak && txn_guard) txn_end(false); } @@ -90,8 +136,14 @@ bool testcase_smoke4fork::run() { log_flush(); if (err != MDBX_SUCCESS) failure_perror("mdbx_env_resurrect_after_fork()", err); - if (txn_guard) + if (txn_guard) { + if (dbi_state & MDBX_DBI_CREAT) + dbi_invalid = true; + // if (dbi_state & MDBX_DBI_FRESH) + // dbi_invalid = true; + dbi_state = 0; mdbx_txn_abort(txn_guard.release()); + } if (!smoke()) { log_notice("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, "failed"); @@ -182,9 +234,19 @@ bool testcase_forkread::smoke() { failure_perror("mdbx_env_info_ex()", err); uint64_t seq; - err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); - if (unlikely(err != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_sequence(get)", err); + if (dbi_invalid) { + err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != (dbi ? MDBX_BAD_DBI : MDBX_SUCCESS))) + failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", + mdbx_strerror(err), dbi); + open_dbi(); + } + if (!dbi_invalid) { + err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != MDBX_SUCCESS)) + failure("unexpected '%s' from mdbx_dbi_sequence(get, dbi %d)", + mdbx_strerror(err), dbi); + } txn_end(false); return true; } @@ -210,10 +272,21 @@ bool testcase_forkwrite::smoke() { if (!txn_guard) txn_begin(false); + uint64_t seq; - int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); - if (unlikely(err != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_sequence(inc)", err); + if (dbi_invalid) { + int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != (dbi ? MDBX_BAD_DBI : MDBX_EACCESS))) + failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", + mdbx_strerror(err), dbi); + open_dbi(); + } + if (!dbi_invalid) { + int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); + if (unlikely(err != MDBX_SUCCESS)) + failure("unexpected '%s' from mdbx_dbi_sequence(inc, dbi %d)", + mdbx_strerror(err), dbi); + } txn_end(false); if (!firstly_read && !testcase_forkread::smoke()) diff --git a/test/test.h++ b/test/test.h++ index 96d93a7c..b03b80e1 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -248,9 +248,10 @@ protected: void db_prepare(); void db_open(); void db_close(); - void txn_begin(bool readonly, MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); + virtual void txn_begin(bool readonly, + MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); int breakable_commit(); - void txn_end(bool abort); + virtual void txn_end(bool abort); int breakable_restart(); void txn_restart(bool abort, bool readonly, MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); From 603ce05435dddbf865ae69631c9a9eb3e31ad947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 22 Feb 2024 16:35:44 +0300 Subject: [PATCH 104/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20vlen=5Fmin=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=B0=20dupfixed.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index cba3f642..6abf0d06 100644 --- a/src/core.c +++ b/src/core.c @@ -16295,7 +16295,7 @@ static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, dbx->md_vlen_min = (db->md_flags & MDBX_INTEGERDUP) ? 4 /* sizeof(uint32_t) */ - : ((db->md_flags & MDBX_DUPFIXED) ? 1 : 0); + : ((db->md_flags & MDBX_DUPFIXED) ? sizeof(indx_t) : 0); dbx->md_vlen_max = valsize_max(pagesize, db->md_flags); assert(dbx->md_vlen_max != (size_t)-1); From b1dcd07be475114ddc9fd9759d22b9fbb31bfdc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 21 Feb 2024 01:28:19 +0300 Subject: [PATCH 105/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA?= =?UTF-8?q?=D0=B8=20=D0=B2=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80=D0=B6?= =?UTF-8?q?=D0=BA=D0=B5=20MDBX=5FENABLE=5FDBI=5FLOCKFREE.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core.c b/src/core.c index 6abf0d06..e6528c7c 100644 --- a/src/core.c +++ b/src/core.c @@ -15742,6 +15742,7 @@ __cold static int env_close(MDBX_env *env, bool resurrect_after_fork) { next = ptr->next; osal_free(ptr); } + env->me_defer_free = nullptr; #endif /* MDBX_ENABLE_DBI_LOCKFREE */ if (!(env->me_flags & MDBX_RDONLY)) From fb5dbbdf202f0d0a682326f8af92682f3ace8915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 13:45:46 +0300 Subject: [PATCH 106/443] =?UTF-8?q?mdbx-test:=20=D1=81=D0=BE=D0=BE=D1=82?= =?UTF-8?q?=D0=B2=D0=B5=D1=82=D1=81=D1=82=D0=B2=D0=B8=D0=B5=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D1=82=D0=BE=D0=BA=D0=BE=D0=BB=D0=B8=D1=80=D1=83=D0=B5?= =?UTF-8?q?=D0=BC=D1=8B=D1=85=20=D0=B8=D0=BC=D0=B5=D0=BD=20=D1=82=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=BE=D0=B2=20=D0=BE=D0=BF=D1=86=D0=B8=D1=8F=D0=BC?= =?UTF-8?q?=20=D0=BA=D0=BE=D0=BC=D0=B0=D0=BD=D0=B4=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=BE=D0=BA=D0=B8=20(=D0=BA=D0=BE=D1=81?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index 79ca8a43..b2d9da86 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -24,9 +24,9 @@ const char *testcase2str(const actor_testcase testcase) { case ac_hill: return "hill"; case ac_deadread: - return "deadread"; + return "dead.reader"; case ac_deadwrite: - return "deadwrite"; + return "dead.writer"; case ac_jitter: return "jitter"; case ac_try: @@ -41,9 +41,9 @@ const char *testcase2str(const actor_testcase testcase) { return "nested"; #if !defined(_WIN32) && !defined(_WIN64) case ac_forkread: - return "forkread"; + return "fork.reader"; case ac_forkwrite: - return "forkwrite"; + return "fork.writer"; #endif /* Windows */ } } From 164d1125075f16e0f74afa451bd6c9d966e4c7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 23:52:09 +0300 Subject: [PATCH 107/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20chk-=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=B0=20=D1=81=20=D1=83=D1=81?= =?UTF-8?q?=D1=82=D1=80=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20=D0=BE?= =?UTF-8?q?=D1=88=D0=B8=D0=B1=D0=BE=D0=BA=20=D0=B8=20=D0=BD=D0=B5=D0=B4?= =?UTF-8?q?=D0=BE=D1=87=D0=B5=D1=82=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 100 ++++++++++++++++++++++++++++--------------------- src/mdbx_chk.c | 3 +- 2 files changed, 60 insertions(+), 43 deletions(-) diff --git a/src/core.c b/src/core.c index e6528c7c..1ba69ff2 100644 --- a/src/core.c +++ b/src/core.c @@ -24529,32 +24529,35 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, (mp ? page_room(mp) : pagesize - header_size) - payload_size; size_t align_bytes = 0; - for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; - align_bytes += ((payload_size + align_bytes) & 1), ++i) { + for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { if (type == MDBX_page_dupfixed_leaf) { /* LEAF2 pages have no mp_ptrs[] or node headers */ payload_size += mp->mp_leaf2_ksize; continue; } - MDBX_node *node = page_node(mp, i); - payload_size += NODESIZE + node_ks(node); + const MDBX_node *node = page_node(mp, i); + header_size += NODESIZE; + const size_t node_key_size = node_ks(node); + payload_size += node_key_size; if (type == MDBX_page_branch) { assert(i > 0 || node_ks(node) == 0); + align_bytes += node_key_size & 1; continue; } + const size_t node_data_size = node_ds(node); assert(type == MDBX_page_leaf); switch (node_flags(node)) { case 0 /* usual node */: - payload_size += node_ds(node); + payload_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; break; case F_BIGDATA /* long data on the large/overflow page */: { - payload_size += sizeof(pgno_t); const pgno_t large_pgno = node_largedata_pgno(node); - const size_t over_payload = node_ds(node); + const size_t over_payload = node_data_size; const size_t over_header = PAGEHDRSZ; npages = 1; @@ -24573,27 +24576,31 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; + payload_size += sizeof(pgno_t); + align_bytes += node_key_size & 1; } break; case F_SUBDATA /* sub-db */: { - const size_t namelen = node_ks(node); - payload_size += node_ds(node); - if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db))) { + const size_t namelen = node_key_size; + if (unlikely(namelen == 0 || node_data_size != sizeof(MDBX_db))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; } break; case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - payload_size += sizeof(MDBX_db); - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + if (unlikely(node_data_size != sizeof(MDBX_db))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; break; case F_DUPDATA /* short sub-page */: { - if (unlikely(node_ds(node) <= PAGEHDRSZ)) { + if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; break; @@ -24621,16 +24628,17 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, err = MDBX_CORRUPTED; } - for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; - subalign_bytes += ((subpayload_size + subalign_bytes) & 1), ++j) { - + for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; ++j) { if (subtype == MDBX_subpage_dupfixed_leaf) { /* LEAF2 pages have no mp_ptrs[] or node headers */ subpayload_size += sp->mp_leaf2_ksize; } else { assert(subtype == MDBX_subpage_leaf); - MDBX_node *subnode = page_node(sp, j); - subpayload_size += NODESIZE + node_ks(subnode) + node_ds(subnode); + const MDBX_node *subnode = page_node(sp, j); + const size_t subnode_size = node_ks(subnode) + node_ds(subnode); + subheader_size += NODESIZE; + subpayload_size += subnode_size; + subalign_bytes += subnode_size & 1; if (unlikely(node_flags(subnode) != 0)) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; @@ -24639,7 +24647,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_ds(node), + ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_data_size, subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -24647,7 +24655,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, header_size += subheader_size; unused_size += subunused_size; payload_size += subpayload_size; - align_bytes += subalign_bytes; + align_bytes += subalign_bytes + (node_key_size & 1); } break; default: @@ -27581,19 +27589,12 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, pagetype_caption, sizeof(long), header_bytes, env->me_psize - sizeof(long)); } - if (payload_bytes < 1) { - if (nentries > 1) { - chk_object_issue(scope, "page", pgno, "zero size-of-entry", - "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE - " entries", - pagetype_caption, payload_bytes, nentries); - } else { - chk_object_issue(scope, "page", pgno, "empty", - "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - sdb->pages.empty += 1; - } + if (nentries < 1 || (pagetype == MDBX_page_branch && nentries < 2)) { + chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + sdb->pages.empty += 1; } if (npages) { @@ -28402,13 +28403,28 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { chk_line_end( chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs full check recent-txn-id with meta-pages")); - if (prefer_steady_txnid != chk->envinfo.mi_recent_txnid) { - chk_scope_issue( - inner, - "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")", - prefer_steady_metanum, prefer_steady_txnid, - chk->envinfo.mi_recent_txnid); + eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid); + if (prefer_steady_txnid != recent_txnid) { + if ((chk->flags & MDBX_CHK_READWRITE) != 0 && + (env->me_flags & MDBX_RDONLY) == 0 && + recent_txnid > prefer_steady_txnid && + (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0 && + chk->envinfo.mi_bootid.current.x == + chk->envinfo.mi_bootid.meta[recent_metanum].x && + chk->envinfo.mi_bootid.current.y == + chk->envinfo.mi_bootid.meta[recent_metanum].y) { + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_verbose), + "recent meta-%u is weak, but boot-id match current" + " (will synced upon successful check)", + recent_metanum)); + } else + chk_scope_issue( + inner, + "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, recent_txnid); } } else if (chk->write_locked) { chk_line_end( @@ -28441,7 +28457,6 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { //-------------------------------------------------------------------------- - eASSERT(env, err == MDBX_SUCCESS); if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skipping %s traversal...", "b-tree")); @@ -28699,7 +28714,8 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, rc = chk_scope_begin( chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); - if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { + if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && + (flags & MDBX_CHK_READWRITE)) { rc = mdbx_txn_lock(env, false); if (unlikely(rc)) chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index 55e6f98d..4f0790c1 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -691,7 +691,8 @@ int main(int argc, char *argv[]) { bailout: if (env) { - const bool dont_sync = rc != 0 || chk.result.total_problems; + const bool dont_sync = rc != 0 || chk.result.total_problems || + (chk_flags & MDBX_CHK_READWRITE) == 0; mdbx_env_close_ex(env, dont_sync); } flush(); From 544c6bc1e49b46d35c1d6a2b8eae64d3b2c11e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 13:45:12 +0300 Subject: [PATCH 108/443] =?UTF-8?q?mdbx-test:=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B0=20rnd/rand/random=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B9=20`--keylen`=20=D0=B8?= =?UTF-8?q?=20`--datalen`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/config.c++ | 14 +++++++++++++- test/config.h++ | 2 +- test/main.c++ | 50 +++++++++++++++++++++++++++++-------------------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/test/config.c++ b/test/config.c++ index 922f7b37..acad8fb6 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -145,6 +145,16 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return true; } + if (strcmp(value_cstr, "rnd") == 0 || strcmp(value_cstr, "rand") == 0 || + strcmp(value_cstr, "random") == 0) { + value = minval; + if (maxval > minval) + value += (prng32() + UINT64_C(44263400549519813)) % (maxval - minval); + if (scale == intkey) + value &= ~3u; + return true; + } + char *suffix = nullptr; errno = 0; unsigned long long raw = strtoull(value_cstr, &suffix, 0); @@ -159,7 +169,7 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, uint64_t multiplier = 1; if (suffix && *suffix) { - if (scale == no_scale) + if (scale == no_scale || scale == intkey) failure("Option '--%s' doesn't accepts suffixes, so '%s' is unexpected\n", option, suffix); if (strcmp(suffix, "K") == 0 || strcasecmp(suffix, "Kilo") == 0) @@ -203,6 +213,8 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, if (value < minval) failure("The minimal value for option '--%s' is %" PRIu64 "\n", option, minval); + if (scale == intkey) + value &= ~3u; return true; } diff --git a/test/config.h++ b/test/config.h++ index 80996157..be369171 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -63,7 +63,7 @@ const char *keygencase2str(const keygen_case); namespace config { -enum scale_mode { no_scale, decimal, binary, duration }; +enum scale_mode { no_scale, decimal, binary, duration, intkey }; bool parse_option(int argc, char *const argv[], int &narg, const char *option, const char **value, const char *default_value = nullptr); diff --git a/test/main.c++ b/test/main.c++ index 6242a05d..fe159142 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -468,49 +468,59 @@ int main(int argc, char *const argv[]) { keycase_setup(value, params); continue; } - if (config::parse_option(argc, argv, narg, "keylen.min", params.keylen_min, - config::no_scale, params.mdbx_keylen_min(), - params.mdbx_keylen_max())) { + if (config::parse_option( + argc, argv, narg, "keylen.min", params.keylen_min, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey + : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { if ((params.table_flags & MDBX_INTEGERKEY) || params.keylen_max < params.keylen_min) params.keylen_max = params.keylen_min; continue; } - if (config::parse_option(argc, argv, narg, "keylen.max", params.keylen_max, - config::no_scale, params.mdbx_keylen_min(), - params.mdbx_keylen_max())) { + if (config::parse_option( + argc, argv, narg, "keylen.max", params.keylen_max, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey + : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { if ((params.table_flags & MDBX_INTEGERKEY) || params.keylen_min > params.keylen_max) params.keylen_min = params.keylen_max; continue; } - if (config::parse_option(argc, argv, narg, "keylen", params.keylen_min, - config::no_scale, params.mdbx_keylen_min(), - params.mdbx_keylen_max())) { + if (config::parse_option( + argc, argv, narg, "keylen", params.keylen_min, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey + : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { params.keylen_max = params.keylen_min; continue; } - if (config::parse_option(argc, argv, narg, "datalen.min", - params.datalen_min, config::no_scale, - params.mdbx_datalen_min(), - params.mdbx_datalen_max())) { + if (config::parse_option( + argc, argv, narg, "datalen.min", params.datalen_min, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey + : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || params.datalen_max < params.datalen_min) params.datalen_max = params.datalen_min; continue; } - if (config::parse_option(argc, argv, narg, "datalen.max", - params.datalen_max, config::no_scale, - params.mdbx_datalen_min(), - params.mdbx_datalen_max())) { + if (config::parse_option( + argc, argv, narg, "datalen.max", params.datalen_max, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey + : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || params.datalen_min > params.datalen_max) params.datalen_min = params.datalen_max; continue; } - if (config::parse_option(argc, argv, narg, "datalen", params.datalen_min, - config::no_scale, params.mdbx_datalen_min(), - params.mdbx_datalen_max())) { + if (config::parse_option( + argc, argv, narg, "datalen", params.datalen_min, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey + : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { params.datalen_max = params.datalen_min; continue; } From c5ac7b25c90a722c13fdf1a26b340f7663100f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 22:51:35 +0300 Subject: [PATCH 109/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80=D0=B0=20=D0=BF=D0=B0=D1=80=20key-value?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80?= =?UTF-8?q?=D0=B6=D0=BA=D0=B8=20=D0=BA=D0=BE=D1=80=D0=BE=D1=82=D0=BA=D0=B8?= =?UTF-8?q?=D1=85=20=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=B9=20=D0=B2=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B6=D0=B8=D0=BC=D0=B5=20`MDBX=5FDUPFIXED`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/keygen.c++ | 56 +++++++++++++++++++++++++++++++++---------------- test/keygen.h++ | 10 +++------ 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/test/keygen.c++ b/test/keygen.c++ index a6d20f33..1829e0db 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -79,7 +79,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)))); assert(!(value_essentials.flags & ~(essentials::prng_fill_flag | - unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP)))); + unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)))); log_trace("keygen-pair: serial %" PRIu64 ", data-age %" PRIu64, serial, value_age); @@ -126,15 +126,14 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, actor_params::serial_mask(mapping.split); } - value_serial |= value_age << mapping.split; log_trace("keygen-pair: split@%u => k%" PRIu64 ", v%" PRIu64, mapping.split, key_serial, value_serial); } log_trace("keygen-pair: key %" PRIu64 ", value %" PRIu64, key_serial, value_serial); - mk_begin(key_serial, key_essentials, *key); - mk_begin(value_serial, value_essentials, *value); + key_serial = mk_begin(key_serial, key_essentials, *key); + value_serial = mk_begin(value_serial, value_essentials, *value); #if 0 /* unused for now */ if (key->value.iov_len + value->value.iov_len > pair_maxlen) { @@ -193,11 +192,13 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, unsigned thread_number) { #if CONSTEXPR_ENUM_FLAGS_OPERATIONS static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | - MDBX_INTEGERDUP | MDBX_REVERSEDUP) < UINT16_MAX, + MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < + UINT16_MAX, "WTF?"); #else assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | - MDBX_INTEGERDUP | MDBX_REVERSEDUP) < UINT16_MAX); + MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < + UINT16_MAX); #endif key_essentials.flags = uint16_t( actor.table_flags & @@ -205,20 +206,19 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, assert(actor.keylen_min <= UINT16_MAX); key_essentials.minlen = uint16_t(actor.keylen_min); assert(actor.keylen_max <= UINT32_MAX); - key_essentials.maxlen = - std::min(uint32_t(actor.keylen_max), - uint32_t(mdbx_limits_keysize_max( - actor.pagesize, MDBX_db_flags_t(key_essentials.flags)))); + key_essentials.maxlen = std::min( + uint32_t(actor.keylen_max), + uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags))); value_essentials.flags = uint16_t( - actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP)); + actor.table_flags & + MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)); assert(actor.datalen_min <= UINT16_MAX); value_essentials.minlen = uint16_t(actor.datalen_min); assert(actor.datalen_max <= UINT32_MAX); - value_essentials.maxlen = - std::min(uint32_t(actor.datalen_max), - uint32_t(mdbx_limits_valsize_max( - actor.pagesize, MDBX_db_flags_t(key_essentials.flags)))); + value_essentials.maxlen = std::min( + uint32_t(actor.datalen_max), + uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags))); if (!actor.keygen.zero_fill) { key_essentials.flags |= essentials::prng_fill_flag; @@ -227,6 +227,16 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, (void)thread_number; mapping = actor.keygen; + while (mapping.split > + essentials::value_age_width + value_essentials.maxlen * CHAR_BIT || + mapping.split >= mapping.width) + mapping.split -= 1; + + while (unsigned((actor.table_flags & MDBX_DUPSORT) + ? mapping.width - mapping.split + : mapping.width) > key_essentials.maxlen * CHAR_BIT) + mapping.width -= 1; + salt = (actor.keygen.seed + uint64_t(actor_id)) * UINT64_C(14653293970879851569); @@ -307,11 +317,20 @@ buffer alloc(size_t limit) { return buffer(ptr); } -void __hot maker::mk_begin(const serial_t serial, const essentials ¶ms, - result &out) { +serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, + result &out) { assert(out.limit >= params.maxlen); assert(params.maxlen >= params.minlen); - assert(params.maxlen >= length(serial)); + if (params.maxlen < sizeof(serial_t)) { + const serial_t max = actor_params::serial_mask(params.maxlen * CHAR_BIT); + if (serial > max) { + serial ^= (serial >> max / 2) * serial_t((sizeof(serial_t) > 4) + ? UINT64_C(40719303417517073) + : UINT32_C(3708688457)); + serial &= max; + } + assert(params.maxlen >= length(serial)); + } out.value.iov_len = std::max(unsigned(params.minlen), length(serial)); const auto variation = params.maxlen - params.minlen; @@ -328,6 +347,7 @@ void __hot maker::mk_begin(const serial_t serial, const essentials ¶ms, assert(length(serial) <= out.value.iov_len); assert(out.value.iov_len >= params.minlen); assert(out.value.iov_len <= params.maxlen); + return serial; } void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, diff --git a/test/keygen.h++ b/test/keygen.h++ index 9e2410fd..8c49eabb 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -108,19 +108,15 @@ class maker { struct essentials { uint16_t minlen{0}; - enum { prng_fill_flag = 1 }; + enum { prng_fill_flag = 1, value_age_width = 8 }; uint16_t flags{0}; uint32_t maxlen{0}; } key_essentials, value_essentials; - static void mk_begin(const serial_t serial, const essentials ¶ms, - result &out); + static serial_t mk_begin(serial_t serial, const essentials ¶ms, + result &out); static void mk_continue(const serial_t serial, const essentials ¶ms, result &out); - static void mk(const serial_t serial, const essentials ¶ms, result &out) { - mk_begin(serial, params, out); - mk_continue(serial, params, out); - } public: void pair(serial_t serial, const buffer &key, buffer &value, From 3373631cffc0a20831553700eca90f665a1814e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 3 Mar 2024 17:38:23 +0300 Subject: [PATCH 110/443] =?UTF-8?q?mdbx-test:=20=D1=83=D0=BD=D0=B8=D1=84?= =?UTF-8?q?=D0=B8=D0=BA=D0=B0=D1=86=D0=B8=D1=8F=20PRNG=20=D0=B8=20=D0=B8?= =?UTF-8?q?=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D1=86=D0=B8=D0=B8=20=D0=BA=D0=BE=D0=BC=D0=B0=D0=BD=D0=B4=D0=BD?= =?UTF-8?q?=D0=BE=D0=B9=20=D1=81=D1=82=D1=80=D0=BE=D0=BA=D0=B8=20=D0=BD?= =?UTF-8?q?=D0=B0=20`--prng-seed`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 4 +- test/append.c++ | 6 +-- test/cases.c++ | 13 +++--- test/config.c++ | 2 +- test/config.h++ | 1 + test/hill.c++ | 2 +- test/keygen.c++ | 4 +- test/keygen.h++ | 3 +- test/long_stochastic.sh | 95 +++++++++++++++++------------------------ test/main.c++ | 10 +++-- test/nested.c++ | 5 +-- test/test.c++ | 4 +- test/test.h++ | 9 ++-- test/ttl.c++ | 5 +-- test/utils.c++ | 2 +- test/utils.h++ | 1 + 16 files changed, 75 insertions(+), 91 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ac11ef63..7098cfed 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -122,7 +122,7 @@ else() add_test(NAME smoke COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=verbose - --keygen.seed=${test_seed} + --prng-seed=${test_seed} --progress --console=no --pathname=smoke.db --dont-cleanup-after basic) set_tests_properties(smoke PROPERTIES TIMEOUT 600 @@ -144,7 +144,7 @@ else() add_test(NAME dupsort_writemap COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice - --keygen.seed=${test_seed} + --prng-seed=${test_seed} --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES diff --git a/test/append.c++ b/test/append.c++ index d2486001..5c7adf3d 100644 --- a/test/append.c++ +++ b/test/append.c++ @@ -20,8 +20,8 @@ public: : testcase(config, pid) {} bool run() override; - static bool review_params(actor_params ¶ms) { - if (!testcase::review_params(params)) + static bool review_params(actor_params ¶ms, unsigned space_id) { + if (!testcase::review_params(params, space_id)) return false; const bool ordered = !flipcoin_x3(); log_notice("the '%s' key-generation mode is selected", @@ -45,7 +45,7 @@ bool testcase_append::run() { } cursor_open(dbi); - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + keyvalue_maker.setup(config.params, 0 /* thread_number */); /* LY: тест наполнения таблиц в append-режиме, * при котором записи добавляются строго в конец (в порядке сортировки) */ const MDBX_put_flags_t flags = diff --git a/test/cases.c++ b/test/cases.c++ index 5ccb87ae..1c650a91 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -41,8 +41,9 @@ testcase *registry::create_actor(const actor_config &config, } bool registry::review_actor_params(const actor_testcase id, - actor_params ¶ms) { - return instance()->id2record.at(id)->review_params(params); + actor_params ¶ms, + const unsigned space_id) { + return instance()->id2record.at(id)->review_params(params, space_id); } //----------------------------------------------------------------------------- @@ -78,13 +79,13 @@ void configure_actor(unsigned &last_space_id, const actor_testcase testcase, failure("The '%s' is unexpected for space-id\n", end); } - if (!registry::review_actor_params(testcase, params)) - failure("Actor config-review failed for space-id %lu\n", space_id); - if (space_id > ACTOR_ID_MAX) failure("Invalid space-id %lu\n", space_id); - last_space_id = unsigned(space_id); + if (!registry::review_actor_params(testcase, params, unsigned(space_id))) + failure("Actor config-review failed for space-id %lu\n", space_id); + + last_space_id = unsigned(space_id); log_trace("configure_actor: space %lu for %s", space_id, testcase2str(testcase)); global::actors.emplace_back( diff --git a/test/config.c++ b/test/config.c++ index acad8fb6..f675dee1 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -434,6 +434,7 @@ void dump(const char *title) { log_verbose("#%u, testcase %s, space_id/table %u\n", i->actor_id, testcase2str(i->testcase), i->space_id); indent.push(); + log_verbose("prng-seed: %u\n", i->params.prng_seed); if (i->params.loglevel) { log_verbose("log: level %u, %s\n", i->params.loglevel, @@ -473,7 +474,6 @@ void dump(const char *title) { i->params.keygen.mesh, i->params.keygen.rotate, i->params.keygen.offset, i->params.keygen.split, i->params.keygen.width - i->params.keygen.split); - log_verbose("keygen.seed: %u\n", i->params.keygen.seed); log_verbose("keygen.zerofill: %s\n", i->params.keygen.zero_fill ? "Yes" : "No"); log_verbose("key: minlen %u, maxlen %u\n", i->params.keylen_min, diff --git a/test/config.h++ b/test/config.h++ index be369171..18617e34 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -274,6 +274,7 @@ struct actor_params_pod { unsigned batch_read{0}; unsigned batch_write{0}; + unsigned prng_seed{0}; unsigned delaystart{0}; unsigned waitfor_nops{0}; unsigned inject_writefaultn{0}; diff --git a/test/hill.c++ b/test/hill.c++ index 79234b7d..f5ca1026 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -52,7 +52,7 @@ bool testcase_hill::run() { speculum_committed.clear(); /* TODO: работа в несколько потоков */ - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + keyvalue_maker.setup(config.params, 0 /* thread_number */); keygen::buffer a_key = keygen::alloc(config.params.keylen_max); keygen::buffer a_data_0 = keygen::alloc(config.params.datalen_max); diff --git a/test/keygen.c++ b/test/keygen.c++ index 1829e0db..46b64ecf 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -188,7 +188,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, log_pair(logging::trace, "kv", key, value); } -void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, +void maker::setup(const config::actor_params_pod &actor, unsigned thread_number) { #if CONSTEXPR_ENUM_FLAGS_OPERATIONS static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | @@ -238,7 +238,7 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, mapping.width -= 1; salt = - (actor.keygen.seed + uint64_t(actor_id)) * UINT64_C(14653293970879851569); + (prng_state + uint64_t(thread_number)) * UINT64_C(14653293970879851569); base = actor.serial_base(); } diff --git a/test/keygen.h++ b/test/keygen.h++ index 8c49eabb..0ded8130 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -121,8 +121,7 @@ class maker { public: void pair(serial_t serial, const buffer &key, buffer &value, serial_t value_age, const bool keylen_changeable); - void setup(const config::actor_params_pod &actor, unsigned actor_id, - unsigned thread_number); + void setup(const config::actor_params_pod &actor, unsigned thread_number); bool is_unordered() const; void seek2end(serial_t &serial) const; diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index ff73726c..b508b5f6 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -453,91 +453,72 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 split=30 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=24 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=16 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} if [ "$EXTRA" != "no" ]; then split=10 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} fi split=4 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options loop=$((loop + 1)) if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi diff --git a/test/main.c++ b/test/main.c++ index fe159142..84ab801c 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -37,6 +37,7 @@ MDBX_NORETURN void usage(void) { " --console[=yes/no] Enable/disable console-like output\n" " --cleanup-before[=YES/no] Cleanup/remove and re-create database\n" " --cleanup-after[=YES/no] Cleanup/remove database after completion\n" + " --prng-seed=N Seed PRNG\n" "Database size control:\n" " --pagesize=... Database page size: min, max, 256..65536\n" " --size-lower=N[K|M|G|T] Lower-bound of size in Kb/Mb/Gb/Tb\n" @@ -88,7 +89,6 @@ MDBX_NORETURN void usage(void) { " --datalen=N Set both min/max for data length\n" " --keygen.width=N TBD (see the source code)\n" " --keygen.mesh=N TBD (see the source code)\n" - " --keygen.seed=N TBD (see the source code)\n" " --keygen.zerofill=yes|NO TBD (see the source code)\n" " --keygen.split=N TBD (see the source code)\n" " --keygen.rotate=N TBD (see the source code)\n" @@ -144,7 +144,7 @@ void actor_params::set_defaults(const std::string &tmpdir) { growth_step = -1; pagesize = -1; - keygen.seed = 1; + prng_seed = 0; keygen.zero_fill = false; keygen.keycase = kc_random; keygen.width = (table_flags & MDBX_DUPSORT) ? 32 : 64; @@ -449,9 +449,11 @@ int main(int argc, char *const argv[]) { if (config::parse_option(argc, argv, narg, "keygen.mesh", params.keygen.mesh, 0, 64)) continue; - if (config::parse_option(argc, argv, narg, "keygen.seed", - params.keygen.seed, config::no_scale)) + if (config::parse_option(argc, argv, narg, "prng-seed", params.prng_seed, + config::no_scale)) { + prng_seed(params.prng_seed); continue; + } if (config::parse_option(argc, argv, narg, "keygen.zerofill", params.keygen.zero_fill)) continue; diff --git a/test/nested.c++ b/test/nested.c++ index 48299c79..55c7ab11 100644 --- a/test/nested.c++ +++ b/test/nested.c++ @@ -74,7 +74,7 @@ bool testcase_nested::setup() { return false; } - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + keyvalue_maker.setup(config.params, 0 /* thread_number */); key = keygen::alloc(config.params.keylen_max); data = keygen::alloc(config.params.datalen_max); serial = 0; @@ -292,8 +292,7 @@ retry: } bool testcase_nested::run() { - uint64_t seed = - prng64_map2_white(config.params.keygen.seed) + config.actor_id; + uint64_t seed = prng64_map2_white(prng_state) + config.space_id; clear_wholetable_passed = 0; clear_stepbystep_passed = 0; diff --git a/test/test.c++ b/test/test.c++ index b2d9da86..10107980 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -668,8 +668,8 @@ bool test_execute(const actor_config &config_const) { size_t(config.params.nrepeat)); else log_verbose("test successfully (iteration %zi)", iter); - config.params.keygen.seed += INT32_C(0xA4F4D37B); - log_verbose("turn keygen to %u", config.params.keygen.seed); + prng_seed(config.params.prng_seed += INT32_C(0xA4F4D37B)); + log_verbose("turn PRNG to %u", config.params.prng_seed); } } while (config.params.nrepeat == 0 || iter < config.params.nrepeat); diff --git a/test/test.h++ b/test/test.h++ index b03b80e1..9db1bc6c 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -103,7 +103,7 @@ class registry { struct record { actor_testcase id = ac_none; std::string name; - bool (*review_params)(actor_params &) = nullptr; + bool (*review_params)(actor_params &, unsigned space_id) = nullptr; testcase *(*constructor)(const actor_config &, const mdbx_pid_t) = nullptr; }; std::unordered_map name2id; @@ -124,8 +124,8 @@ public: add(this); } }; - static bool review_actor_params(const actor_testcase id, - actor_params ¶ms); + static bool review_actor_params(const actor_testcase id, actor_params ¶ms, + const unsigned space_id); static testcase *create_actor(const actor_config &config, const mdbx_pid_t pid); }; @@ -301,8 +301,9 @@ public: memset(&last, 0, sizeof(last)); } - static bool review_params(actor_params ¶ms) { + static bool review_params(actor_params ¶ms, unsigned space_id) { // silently fix key/data length for fixed-length modes + params.prng_seed += bleach32(space_id); if ((params.table_flags & MDBX_INTEGERKEY) && params.keylen_min != params.keylen_max) params.keylen_min = params.keylen_max; diff --git a/test/ttl.c++ b/test/ttl.c++ index a7049022..b3839357 100644 --- a/test/ttl.c++ +++ b/test/ttl.c++ @@ -119,9 +119,8 @@ bool testcase_ttl::run() { return false; } - uint64_t seed = - prng64_map2_white(config.params.keygen.seed) + config.actor_id; - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + uint64_t seed = prng64_map2_white(prng_state) + config.space_id; + keyvalue_maker.setup(config.params, 0 /* thread_number */); key = keygen::alloc(config.params.keylen_max); data = keygen::alloc(config.params.datalen_max); const MDBX_put_flags_t insert_flags = diff --git a/test/utils.c++ b/test/utils.c++ index 71d56eb8..399ea472 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -136,7 +136,7 @@ void prng_fill(uint64_t &state, void *ptr, size_t bytes) { } } -static __thread uint64_t prng_state; +/* __thread */ uint64_t prng_state; void prng_seed(uint64_t seed) { prng_state = bleach64(seed); } diff --git a/test/utils.h++ b/test/utils.h++ index 4e91226d..f8083437 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -346,6 +346,7 @@ uint64_t prng64_white(uint64_t &state); uint32_t prng32(uint64_t &state); void prng_fill(uint64_t &state, void *ptr, size_t bytes); +extern uint64_t prng_state; void prng_seed(uint64_t seed); uint32_t prng32(void); uint64_t prng64(void); From 00c4e2636ec0febc549a9fea5499655155ced093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 27 Feb 2024 20:46:27 +0300 Subject: [PATCH 111/443] =?UTF-8?q?mdbx-test:=20=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=93=D0=9F=D0=A1=D0=A7?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/utils.c++ | 14 +++++++++----- test/utils.h++ | 29 ++++++++++++++++------------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/test/utils.c++ b/test/utils.c++ index 399ea472..fd32c689 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -107,18 +107,22 @@ uint64_t prng64_white(uint64_t &state) { return bleach64(state); } -uint32_t prng32(uint64_t &state) { - return (uint32_t)(prng64_careless(state) >> 32); +uint32_t prng32_fast(uint64_t &state) { + return uint32_t(prng64_careless(state) >> 32); +} + +uint32_t prng32_white(uint64_t &state) { + return bleach32(uint32_t(prng64_careless(state) >> 32)); } void prng_fill(uint64_t &state, void *ptr, size_t bytes) { - uint32_t u32 = prng32(state); + uint32_t u32 = prng32_fast(state); while (bytes >= 4) { memcpy(ptr, &u32, 4); ptr = (uint32_t *)ptr + 1; bytes -= 4; - u32 = prng32(state); + u32 = prng32_fast(state); } switch (bytes & 3) { @@ -140,7 +144,7 @@ void prng_fill(uint64_t &state, void *ptr, size_t bytes) { void prng_seed(uint64_t seed) { prng_state = bleach64(seed); } -uint32_t prng32(void) { return prng32(prng_state); } +uint32_t prng32(void) { return prng32_white(prng_state); } uint64_t prng64(void) { return prng64_white(prng_state); } diff --git a/test/utils.h++ b/test/utils.h++ index f8083437..055e7912 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -288,24 +288,26 @@ inline bool is_samedata(const MDBX_val &a, const MDBX_val &b) { } std::string format(const char *fmt, ...); -static inline uint64_t bleach64(uint64_t v) { - // Tommy Ettinger, https://www.blogger.com/profile/04953541827437796598 - // http://mostlymangling.blogspot.com/2019/01/better-stronger-mixer-and-test-procedure.html - v ^= rot64(v, 25) ^ rot64(v, 50); - v *= UINT64_C(0xA24BAED4963EE407); - v ^= rot64(v, 24) ^ rot64(v, 49); - v *= UINT64_C(0x9FB21C651E98DF25); - return v ^ v >> 28; +static inline uint64_t bleach64(uint64_t x) { + // NASAM from Tommy Ettinger, + // https://www.blogger.com/profile/04953541827437796598 + // http://mostlymangling.blogspot.com/2020/01/nasam-not-another-strange-acronym-mixer.html + x ^= rot64(x, 25) ^ rot64(x, 47); + x *= UINT64_C(0x9E6C63D0676A9A99); + x ^= x >> 23 ^ x >> 51; + x *= UINT64_C(0x9E6D62D06F6A9A9B); + x ^= x >> 23 ^ x >> 51; + return x; } static inline uint32_t bleach32(uint32_t x) { // https://github.com/skeeto/hash-prospector - // exact bias: 0.17353355999581582 + // exact bias: 0.10760229515479501 x ^= x >> 16; - x *= UINT32_C(0x7feb352d); + x *= UINT32_C(0x21f0aaad); x ^= 0x3027C563 ^ (x >> 15); - x *= UINT32_C(0x846ca68b); - x ^= x >> 16; + x *= UINT32_C(0x0d35a2d97); + x ^= x >> 15; return x; } @@ -343,7 +345,8 @@ static inline double u64_to_double1(uint64_t v) { } uint64_t prng64_white(uint64_t &state); -uint32_t prng32(uint64_t &state); +uint32_t prng32_white(uint64_t &state); +uint32_t prng32_fast(uint64_t &state); void prng_fill(uint64_t &state, void *ptr, size_t bytes); extern uint64_t prng_state; From 826441741def28251fe437fefd85b4663bb99697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 23 Feb 2024 12:43:18 +0300 Subject: [PATCH 112/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20keysize=5Fmin()=20=D0=B8=20val?= =?UTF-8?q?size=5Fmin()=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 12 ++++++++++++ src/core.c | 28 +++++++++++++++++++++++----- test/config.c++ | 4 ++-- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/mdbx.h b/mdbx.h index 693cfee6..1fda47e2 100644 --- a/mdbx.h +++ b/mdbx.h @@ -3369,6 +3369,12 @@ mdbx_limits_dbsize_max(intptr_t pagesize); MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); +/** \brief Returns minimal key size in bytes for given database flags. + * \ingroup c_statinfo + * \see db_flags */ +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t +mdbx_limits_keysize_min(MDBX_db_flags_t flags); + /** \brief Returns maximal data size in bytes for given page size * and database flags, or -1 if pagesize is invalid. * \ingroup c_statinfo @@ -3376,6 +3382,12 @@ mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags); +/** \brief Returns minimal data size in bytes for given database flags. + * \ingroup c_statinfo + * \see db_flags */ +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t +mdbx_limits_valsize_min(MDBX_db_flags_t flags); + /** \brief Returns maximal size of key-value pair to fit in a single page with * the given size and database flags, or -1 if pagesize is invalid. * \ingroup c_statinfo diff --git a/src/core.c b/src/core.c index 1ba69ff2..405da396 100644 --- a/src/core.c +++ b/src/core.c @@ -458,6 +458,19 @@ static __inline size_t keysize_max(size_t pagesize, MDBX_db_flags_t flags) { return max_branch_key; } +static __inline size_t keysize_min(MDBX_db_flags_t flags) { + return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; +} + +static __inline size_t valsize_min(MDBX_db_flags_t flags) { + if (flags & MDBX_INTEGERDUP) + return 4 /* sizeof(uint32_t) */; + else if (flags & MDBX_DUPFIXED) + return sizeof(indx_t); + else + return 0; +} + static __inline size_t valsize_max(size_t pagesize, MDBX_db_flags_t flags) { assert(pagesize >= MIN_PAGESIZE && pagesize <= MAX_PAGESIZE && is_powerof2(pagesize)); @@ -510,6 +523,10 @@ __cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, return keysize_max(pagesize, flags); } +__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { + return keysize_min(flags); +} + __cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags) { if (unlikely(!env || env->me_signature.weak != MDBX_ME_SIGNATURE)) @@ -530,6 +547,10 @@ __cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, return valsize_max(pagesize, flags); } +__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { + return valsize_min(flags); +} + __cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) { if (pagesize < 1) @@ -16289,14 +16310,11 @@ static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, dbx->md_dcmp = get_default_datacmp(db->md_flags); } - dbx->md_klen_min = - (db->md_flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; + dbx->md_klen_min = keysize_min(db->md_flags); dbx->md_klen_max = keysize_max(pagesize, db->md_flags); assert(dbx->md_klen_max != (unsigned)-1); - dbx->md_vlen_min = (db->md_flags & MDBX_INTEGERDUP) - ? 4 /* sizeof(uint32_t) */ - : ((db->md_flags & MDBX_DUPFIXED) ? sizeof(indx_t) : 0); + dbx->md_vlen_min = valsize_min(db->md_flags); dbx->md_vlen_max = valsize_max(pagesize, db->md_flags); assert(dbx->md_vlen_max != (size_t)-1); diff --git a/test/config.c++ b/test/config.c++ index f675dee1..1ac2101d 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -693,7 +693,7 @@ bool actor_config::deserialize(const char *str, actor_config &config) { } unsigned actor_params::mdbx_keylen_min() const { - return (table_flags & MDBX_INTEGERKEY) ? 4 : 0; + return unsigned(mdbx_limits_keysize_min(table_flags)); } unsigned actor_params::mdbx_keylen_max() const { @@ -701,7 +701,7 @@ unsigned actor_params::mdbx_keylen_max() const { } unsigned actor_params::mdbx_datalen_min() const { - return (table_flags & MDBX_INTEGERDUP) ? 4 : 0; + return unsigned(mdbx_limits_valsize_min(table_flags)); } unsigned actor_params::mdbx_datalen_max() const { From 2e863cf7e0fd9a35c71407fcd24d47b6048042a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 25 Feb 2024 19:32:16 +0300 Subject: [PATCH 113/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D0=B4=D0=BE?= =?UTF-8?q?=D1=87=D0=B5=D1=82=D0=B0=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA?= =?UTF-8?q?=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B8=20=D1=81=D0=BE=D0=BF?= =?UTF-8?q?=D1=83=D1=82=D1=81=D1=82=D0=B2=D1=83=D1=8E=D1=89=D0=B8=D1=85=20?= =?UTF-8?q?=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80=D0=BE=D0=B2=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B0=D0=B7=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B8=20=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D1=8B=20?= =?UTF-8?q?=D0=BF=D0=BE=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D1=8E?= =?UTF-8?q?=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D1=8F?= =?UTF-8?q?=20=D0=BF=D1=83=D1=81=D1=82=D0=BE=D0=B9=20=D1=81=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=86=D1=8B=20=D1=81=D0=BB=D0=B5=D0=B2=D0=B0?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/core.c b/src/core.c index 405da396..0527e279 100644 --- a/src/core.c +++ b/src/core.c @@ -21429,7 +21429,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, /* It is reasonable and possible to split the page at the begin */ if (unlikely(newindx < minkeys)) { split_indx = minkeys; - if (newindx == 0 && foliage == 0 && !(naf & MDBX_SPLIT_REPLACE)) { + if (newindx == 0 && !(naf & MDBX_SPLIT_REPLACE)) { split_indx = 0; /* Checking for ability of splitting by the left-side insertion * of a pure page with the new key */ @@ -21449,8 +21449,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } else get_key(page_node(mp, 0), &sepkey); cASSERT(mc, mc->mc_dbx->md_cmp(newkey, &sepkey) < 0); - /* Avoiding rare complex cases of split the parent page */ - if (page_room(mn.mc_pg[ptop]) < branch_size(env, &sepkey)) + /* Avoiding rare complex cases of nested split the parent page(s) */ + if (page_room(mc->mc_pg[ptop]) < branch_size(env, &sepkey)) split_indx = minkeys; } if (foliage) { @@ -21474,9 +21474,10 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sepkey = *newkey; } else if (unlikely(pure_left)) { /* newindx == split_indx == 0 */ - TRACE("no-split, but add new pure page at the %s", "left/before"); + TRACE("pure-left: no-split, but add new pure page at the %s", + "left/before"); cASSERT(mc, newindx == 0 && split_indx == 0 && minkeys == 1); - TRACE("old-first-key is %s", DKEY_DEBUG(&sepkey)); + TRACE("pure-left: old-first-key is %s", DKEY_DEBUG(&sepkey)); } else { if (IS_LEAF2(sister)) { /* Move half of the keys to the right sibling */ @@ -21690,18 +21691,20 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } } else if (unlikely(pure_left)) { MDBX_page *ptop_page = mc->mc_pg[ptop]; - DEBUG("adding to parent page %u node[%u] left-leaf page #%u key %s", + TRACE("pure-left: adding to parent page %u node[%u] left-leaf page #%u key " + "%s", ptop_page->mp_pgno, mc->mc_ki[ptop], sister->mp_pgno, DKEY(mc->mc_ki[ptop] ? newkey : NULL)); - mc->mc_top--; + assert(mc->mc_top == ptop + 1); + mc->mc_top = (uint8_t)ptop; rc = node_add_branch(mc, mc->mc_ki[ptop], mc->mc_ki[ptop] ? newkey : NULL, sister->mp_pgno); cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1] && ptop == mc->mc_top); if (likely(rc == MDBX_SUCCESS) && mc->mc_ki[ptop] == 0) { - DEBUG("update prev-first key on parent %s", DKEY(&sepkey)); MDBX_node *node = page_node(mc->mc_pg[ptop], 1); + TRACE("pure-left: update prev-first key on parent to %s", DKEY(&sepkey)); cASSERT(mc, node_ks(node) == 0 && node_pgno(node) == mp->mp_pgno); cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 0); mc->mc_ki[ptop] = 1; @@ -21709,6 +21712,9 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 1); cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1]); mc->mc_ki[ptop] = 0; + } else { + TRACE("pure-left: no-need-update prev-first key on parent %s", + DKEY(&sepkey)); } mc->mc_top++; @@ -21757,7 +21763,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, &sepkey); if (mc->mc_dbx->md_cmp(newkey, &sepkey) < 0) { mc->mc_top -= (uint8_t)i; - DEBUG("update new-first on parent [%i] page %u key %s", + DEBUG("pure-left: update new-first on parent [%i] page %u key %s", mc->mc_ki[mc->mc_top], mc->mc_pg[mc->mc_top]->mp_pgno, DKEY(newkey)); rc = update_key(mc, newkey); @@ -21768,7 +21774,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, break; } } - } else if (tmp_ki_copy /* !IS_LEAF2(mp) */) { + } else if (tmp_ki_copy) { /* !IS_LEAF2(mp) */ /* Move nodes */ mc->mc_pg[mc->mc_top] = sister; i = split_indx; @@ -21887,7 +21893,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, m3->mc_ki[k + 1] = m3->mc_ki[k]; m3->mc_pg[k + 1] = m3->mc_pg[k]; } - m3->mc_ki[0] = m3->mc_ki[0] >= nkeys; + m3->mc_ki[0] = m3->mc_ki[0] >= nkeys + pure_left; m3->mc_pg[0] = mc->mc_pg[0]; m3->mc_snum++; m3->mc_top++; From d53dc4572c6ecc6a31383511d234bd3118039c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 02:21:59 +0300 Subject: [PATCH 114/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20node=5Fshrink()=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=8F=D1=81=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20?= =?UTF-8?q?=D0=B8=D1=81=D1=85=D0=BE=D0=B4=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=B4=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 58 ++++++++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/src/core.c b/src/core.c index 0527e279..f7a5e433 100644 --- a/src/core.c +++ b/src/core.c @@ -3357,7 +3357,7 @@ static int __must_check_result node_add_leaf2(MDBX_cursor *mc, size_t indx, const MDBX_val *key); static void node_del(MDBX_cursor *mc, size_t ksize); -static void node_shrink(MDBX_page *mp, size_t indx); +static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node); static int __must_check_result node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft); static int __must_check_result node_read(MDBX_cursor *mc, const MDBX_node *leaf, @@ -18766,7 +18766,7 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { if (!(node_flags(node) & F_SUBDATA)) mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); rc = cursor_del(&mc->mc_xcursor->mx_cursor, 0); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; /* If sub-DB still has entries, we're done */ if (mc->mc_xcursor->mx_db.md_entries) { @@ -18775,11 +18775,10 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; memcpy(node_data(node), &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); } else { - /* shrink fake page */ - node_shrink(mp, mc->mc_ki[mc->mc_top]); - node = page_node(mp, mc->mc_ki[mc->mc_top]); + /* shrink sub-page */ + node = node_shrink(mp, mc->mc_ki[mc->mc_top], node); mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); - /* fix other sub-DB cursors pointed at fake pages on this page */ + /* fix other sub-DB cursors pointed at sub-pages on this page */ for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) @@ -19234,35 +19233,28 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) { /* Compact the main page after deleting a node on a subpage. * [in] mp The main page to operate on. * [in] indx The index of the subpage on the main page. */ -static void node_shrink(MDBX_page *mp, size_t indx) { - MDBX_node *node; - MDBX_page *sp, *xp; - size_t nsize, delta, len, ptr; - intptr_t i; - - node = page_node(mp, indx); - sp = (MDBX_page *)node_data(node); - delta = page_room(sp); - assert(delta > 0); +static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node) { + assert(node = page_node(mp, indx)); + MDBX_page *sp = (MDBX_page *)node_data(node); + assert(IS_SUBP(sp) && page_numkeys(sp) > 0); + const size_t delta = + EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); + if (unlikely(delta) == 0) + return node; /* Prepare to shift upward, set len = length(subpage part to shift) */ - if (IS_LEAF2(sp)) { - delta &= /* do not make the node uneven-sized */ ~(size_t)1; - if (unlikely(delta) == 0) - return; - nsize = node_ds(node) - delta; - assert(nsize % 1 == 0); - len = nsize; - } else { - xp = ptr_disp(sp, delta); /* destination subpage */ - for (i = page_numkeys(sp); --i >= 0;) { + size_t nsize = node_ds(node) - delta, len = nsize; + assert(nsize % 1 == 0); + if (!IS_LEAF2(sp)) { + len = PAGEHDRSZ; + MDBX_page *xp = ptr_disp(sp, delta); /* destination subpage */ + for (intptr_t i = page_numkeys(sp); --i >= 0;) { assert(sp->mp_ptrs[i] >= delta); xp->mp_ptrs[i] = (indx_t)(sp->mp_ptrs[i] - delta); } - nsize = node_ds(node) - delta; - len = PAGEHDRSZ; } - sp->mp_upper = sp->mp_lower; + assert(sp->mp_upper >= sp->mp_lower + delta); + sp->mp_upper -= (indx_t)delta; sp->mp_pgno = mp->mp_pgno; node_set_ds(node, nsize); @@ -19270,15 +19262,17 @@ static void node_shrink(MDBX_page *mp, size_t indx) { void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ); memmove(ptr_disp(base, delta), base, ptr_dist(sp, base) + len); - ptr = mp->mp_ptrs[indx]; - for (i = page_numkeys(mp); --i >= 0;) { - if (mp->mp_ptrs[i] <= ptr) { + const size_t pivot = mp->mp_ptrs[indx]; + for (intptr_t i = page_numkeys(mp); --i >= 0;) { + if (mp->mp_ptrs[i] <= pivot) { assert((size_t)UINT16_MAX - mp->mp_ptrs[i] >= delta); mp->mp_ptrs[i] += (indx_t)delta; } } assert((size_t)UINT16_MAX - mp->mp_upper >= delta); mp->mp_upper += (indx_t)delta; + + return ptr_disp(node, delta); } /* Initial setup of a sorted-dups cursor. From d7f259110c408fdab8a4b33afbc4ab96078ef538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 02:24:53 +0300 Subject: [PATCH 115/443] =?UTF-8?q?mdbx-test:=20=D1=84=D0=B8=D0=BA=D1=81?= =?UTF-8?q?=D0=B0=D1=86=D0=B8=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0?= =?UTF-8?q?=D0=BA=D1=86=D0=B8=D0=B8=20=D0=BF=D1=80=D0=B8=20=D0=BE=D1=88?= =?UTF-8?q?=D0=B8=D0=B1=D0=BA=D0=B0=D1=85=20=D1=82=D0=B5=D1=81=D1=82=D0=B0?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5=D0=B4?= =?UTF-8?q?=D1=83=D1=8E=D1=89=D0=B5=D0=B3=D0=BE=20=D0=B0=D0=BD=D0=B0=D0=BB?= =?UTF-8?q?=D0=B8=D0=B7=D0=B0=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 12 ++++++++++++ test/test.h++ | 1 + 2 files changed, 13 insertions(+) diff --git a/test/test.c++ b/test/test.c++ index 10107980..bb7bd818 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -753,6 +753,18 @@ void testcase::speculum_check_iterator(const char *where, const char *stage, mdbx_dump_val(&v, dump_value, sizeof(dump_value))); } +void testcase::failure(const char *fmt, ...) const { + va_list ap; + va_start(ap, fmt); + fflush(nullptr); + logging::output_nocheckloglevel_ap(logging::failure, fmt, ap); + va_end(ap); + fflush(nullptr); + if (txn_guard) + mdbx_txn_commit(const_cast(this)->txn_guard.release()); + exit(EXIT_FAILURE); +} + #if SPECULUM_CURSORS void testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, diff --git a/test/test.h++ b/test/test.h++ index 9db1bc6c..d99ba4f8 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -281,6 +281,7 @@ protected: void signal(); bool should_continue(bool check_timeout_only = false) const; + void failure(const char *fmt, ...) const; void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, keygen::buffer &out_value, keygen::serial_t data_age) { keyvalue_maker.pair(serial, out_key, out_value, data_age, false); From fa0017591d25caf5e13967d630270a1d0d323c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 02:32:43 +0300 Subject: [PATCH 116/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=BE=D0=B4=D0=BE?= =?UTF-8?q?=D0=BB=D0=B6=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=87=D0=B8=D1=81?= =?UTF-8?q?=D1=82=D0=BA=D0=B8/=D1=80=D0=B5=D1=84=D0=B0=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=B8=D0=BD=D0=B3=D0=B0=20=D1=83=D0=BD=D0=B0=D1=81=D0=BB?= =?UTF-8?q?=D0=B5=D0=B4=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD=D1=8B=D1=85=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B1=D1=83=D1=81=D0=BE=D0=B2=20=D0=B2=20`cursor=5Fput?= =?UTF-8?q?=5Fnochecklen()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 225 ++++++++++++++++++++++++++--------------------------- 1 file changed, 111 insertions(+), 114 deletions(-) diff --git a/src/core.c b/src/core.c index f7a5e433..3e99a8dd 100644 --- a/src/core.c +++ b/src/core.c @@ -17997,11 +17997,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, rc = MDBX_NO_ROOT; } else if ((flags & MDBX_CURRENT) == 0) { bool exact = false; - MDBX_val lastkey, olddata; + MDBX_val last_key, old_data; if ((flags & MDBX_APPEND) && mc->mc_db->md_entries > 0) { - rc = cursor_last(mc, &lastkey, &olddata); + rc = cursor_last(mc, &last_key, &old_data); if (likely(rc == MDBX_SUCCESS)) { - const int cmp = mc->mc_dbx->md_cmp(key, &lastkey); + const int cmp = mc->mc_dbx->md_cmp(key, &last_key); if (likely(cmp > 0)) { mc->mc_ki[mc->mc_top]++; /* step forward for appending */ rc = MDBX_NOTFOUND; @@ -18016,7 +18016,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } else { struct cursor_set_result csr = /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ - cursor_set(mc, (MDBX_val *)key, &olddata, MDBX_SET); + cursor_set(mc, (MDBX_val *)key, &old_data, MDBX_SET); rc = csr.err; exact = csr.exact; } @@ -18024,14 +18024,14 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (exact) { if (unlikely(flags & MDBX_NOOVERWRITE)) { DEBUG("duplicate key [%s]", DKEY_DEBUG(key)); - *data = olddata; + *data = old_data; return MDBX_KEYEXIST; } if (unlikely(mc->mc_flags & C_SUB)) { /* nested subtree of DUPSORT-database with the same key, * nothing to update */ eASSERT(env, data->iov_len == 0 && - (olddata.iov_len == 0 || + (old_data.iov_len == 0 || /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ (mc->mc_db->md_flags & MDBX_DUPFIXED))); @@ -18047,8 +18047,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, exact = false; } else if (!(flags & (MDBX_RESERVE | MDBX_MULTIPLE))) { /* checking for early exit without dirtying pages */ - if (unlikely(eq_fast(data, &olddata))) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) == 0); + if (unlikely(eq_fast(data, &old_data))) { + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); if (mc->mc_xcursor) { if (flags & MDBX_NODUPDATA) return MDBX_KEYEXIST; @@ -18058,7 +18058,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, /* the same data, nothing to update */ return MDBX_SUCCESS; } - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) != 0); + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) != 0); } } } else if (unlikely(rc != MDBX_NOTFOUND)) @@ -18066,17 +18066,16 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } mc->mc_flags &= ~C_DEL; - MDBX_val xdata, *rdata = data; - size_t mcount = 0, dcount = 0; + MDBX_val xdata, *ref_data = data; + size_t *batch_dupfixed_done = nullptr, batch_dupfixed_given = 0; if (unlikely(flags & MDBX_MULTIPLE)) { - dcount = data[1].iov_len; - data[1].iov_len = 0 /* reset done item counter */; - rdata = &xdata; - xdata.iov_len = data->iov_len * dcount; + batch_dupfixed_given = data[1].iov_len; + batch_dupfixed_done = &data[1].iov_len; + *batch_dupfixed_done = 0; } /* Cursor is positioned, check for room in the dirty list */ - err = cursor_touch(mc, key, rdata); + err = cursor_touch(mc, key, ref_data); if (unlikely(err)) return err; @@ -18111,7 +18110,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_flags |= C_INITIALIZED; } - MDBX_val dkey, olddata; + MDBX_val old_singledup, old_data; MDBX_db nested_dupdb; MDBX_page *sub_root = nullptr; bool insert_key, insert_data; @@ -18119,19 +18118,19 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_page *fp = env->me_pbuf; fp->mp_txnid = mc->mc_txn->mt_front; insert_key = insert_data = (rc != MDBX_SUCCESS); - dkey.iov_base = nullptr; + old_singledup.iov_base = nullptr; if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->mc_ki[mc->mc_top]); if ((mc->mc_db->md_flags & MDBX_DUPSORT) && node_size(key, data) > env->me_leaf_nodemax) { /* Too big for a node, insert in sub-DB. Set up an empty - * "old sub-page" for prep_subDB to expand to a full page. */ + * "old sub-page" for convert_to_subtree to expand to a full page. */ fp->mp_leaf2_ksize = (mc->mc_db->md_flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; fp->mp_lower = fp->mp_upper = 0; - olddata.iov_len = PAGEHDRSZ; - goto prep_subDB; + old_data.iov_len = PAGEHDRSZ; + goto convert_to_subtree; } } else { /* there's only a key anyway, so this is a no-op */ @@ -18176,7 +18175,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(err != MDBX_SUCCESS)) return err; } - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + MDBX_node *const node = + page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); /* Large/Overflow page overwrites need special handling */ if (unlikely(node_flags(node) & F_BIGDATA)) { @@ -18250,19 +18250,18 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if ((err = page_retire(mc, lp.page)) != MDBX_SUCCESS) return err; } else { - olddata.iov_len = node_ds(node); - olddata.iov_base = node_data(node); - cASSERT(mc, ptr_disp(olddata.iov_base, olddata.iov_len) <= + old_data.iov_len = node_ds(node); + old_data.iov_base = node_data(node); + cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize)); /* DB has dups? */ if (mc->mc_db->md_flags & MDBX_DUPSORT) { /* Prepare (sub-)page/sub-DB to accept the new item, if needed. * fp: old sub-page or a header faking it. - * mp: new (sub-)page. offset: growth in page size. - * xdata: node data with new page or DB. */ - size_t i; - size_t offset = 0; + * mp: new (sub-)page. + * xdata: node data with new sub-page or sub-DB. */ + size_t growth = 0; /* growth in page size.*/ MDBX_page *mp = fp = xdata.iov_base = env->me_pbuf; mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; @@ -18270,19 +18269,19 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (!(node_flags(node) & F_DUPDATA)) { /* does data match? */ if (flags & MDBX_APPENDDUP) { - const int cmp = mc->mc_dbx->md_dcmp(data, &olddata); - cASSERT(mc, cmp != 0 || eq_fast(data, &olddata)); + const int cmp = mc->mc_dbx->md_dcmp(data, &old_data); + cASSERT(mc, cmp != 0 || eq_fast(data, &old_data)); if (unlikely(cmp <= 0)) return MDBX_EKEYMISMATCH; - } else if (eq_fast(data, &olddata)) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) == 0); + } else if (eq_fast(data, &old_data)) { + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); if (flags & MDBX_NODUPDATA) return MDBX_KEYEXIST; /* data is match exactly byte-to-byte, nothing to update */ rc = MDBX_SUCCESS; - if (likely((flags & MDBX_MULTIPLE) == 0)) - return rc; - goto continue_multiple; + if (unlikely(batch_dupfixed_done)) + goto batch_dupfixed_continue; + return rc; } /* Just overwrite the current item */ @@ -18292,13 +18291,13 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } /* Back up original data item */ - memcpy(dkey.iov_base = fp + 1, olddata.iov_base, - dkey.iov_len = olddata.iov_len); + memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, + old_singledup.iov_len = old_data.iov_len); /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF | P_SUBP; fp->mp_lower = 0; - xdata.iov_len = PAGEHDRSZ + dkey.iov_len + data->iov_len; + xdata.iov_len = PAGEHDRSZ + old_data.iov_len + data->iov_len; if (mc->mc_db->md_flags & MDBX_DUPFIXED) { fp->mp_flags |= P_LEAF2; fp->mp_leaf2_ksize = (uint16_t)data->iov_len; @@ -18306,27 +18305,26 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, cASSERT(mc, xdata.iov_len <= env->me_psize); } else { xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + - (dkey.iov_len & 1) + (data->iov_len & 1); - cASSERT(mc, xdata.iov_len <= env->me_psize); + (old_data.iov_len & 1) + (data->iov_len & 1); } fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); - olddata.iov_len = xdata.iov_len; /* pretend olddata is fp */ + old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ } else if (node_flags(node) & F_SUBDATA) { /* Data is on sub-DB, just store it */ flags |= F_DUPDATA | F_SUBDATA; - goto put_sub; + goto dupsort_put; } else { /* Data is on sub-page */ - fp = olddata.iov_base; + fp = old_data.iov_base; switch (flags) { default: if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) { - offset = node_size(data, nullptr) + sizeof(indx_t); + growth = node_size(data, nullptr) + sizeof(indx_t); break; } - offset = fp->mp_leaf2_ksize; - if (page_room(fp) < offset) { - offset *= 4; /* space for 4 more */ + growth = fp->mp_leaf2_ksize; + if (page_room(fp) < growth) { + growth *= 4; /* space for 4 more */ break; } /* FALLTHRU: Big enough MDBX_DUPFIXED sub-page */ @@ -18337,17 +18335,17 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, fp->mp_pgno = mp->mp_pgno; mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; flags |= F_DUPDATA; - goto put_sub; + goto dupsort_put; } - xdata.iov_len = olddata.iov_len + offset; + xdata.iov_len = old_data.iov_len + growth; } fp_flags = fp->mp_flags; if (node_size_len(node_ks(node), xdata.iov_len) > env->me_leaf_nodemax) { /* Too big for a sub-page, convert to sub-DB */ + convert_to_subtree: fp_flags &= ~P_SUBP; - prep_subDB: nested_dupdb.md_xsize = 0; nested_dupdb.md_flags = flags_db2sub(mc->mc_db->md_flags); if (mc->mc_db->md_flags & MDBX_DUPFIXED) { @@ -18366,8 +18364,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(par.err != MDBX_SUCCESS)) return par.err; mc->mc_db->md_leaf_pages += 1; - cASSERT(mc, env->me_psize > olddata.iov_len); - offset = env->me_psize - (unsigned)olddata.iov_len; + cASSERT(mc, env->me_psize > old_data.iov_len); + growth = env->me_psize - (unsigned)old_data.iov_len; flags |= F_DUPDATA | F_SUBDATA; nested_dupdb.md_root = mp->mp_pgno; nested_dupdb.md_seq = 0; @@ -18379,8 +18377,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mp->mp_txnid = mc->mc_txn->mt_front; mp->mp_leaf2_ksize = fp->mp_leaf2_ksize; mp->mp_lower = fp->mp_lower; - cASSERT(mc, fp->mp_upper + offset <= UINT16_MAX); - mp->mp_upper = (indx_t)(fp->mp_upper + offset); + cASSERT(mc, fp->mp_upper + growth < UINT16_MAX); + mp->mp_upper = fp->mp_upper + (indx_t)growth; if (unlikely(fp_flags & P_LEAF2)) { memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->mp_leaf2_ksize); @@ -18391,21 +18389,21 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, cASSERT(mc, (mp->mp_upper & 1) == 0); memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ), ptr_disp(fp, fp->mp_upper + PAGEHDRSZ), - olddata.iov_len - fp->mp_upper - PAGEHDRSZ); + old_data.iov_len - fp->mp_upper - PAGEHDRSZ); memcpy(mp->mp_ptrs, fp->mp_ptrs, page_numkeys(fp) * sizeof(mp->mp_ptrs[0])); - for (i = 0; i < page_numkeys(fp); i++) { - cASSERT(mc, mp->mp_ptrs[i] + offset <= UINT16_MAX); - mp->mp_ptrs[i] += (indx_t)offset; + for (size_t i = 0; i < page_numkeys(fp); i++) { + cASSERT(mc, mp->mp_ptrs[i] + growth <= UINT16_MAX); + mp->mp_ptrs[i] += (indx_t)growth; } } } if (!insert_key) node_del(mc, 0); - rdata = &xdata; + ref_data = &xdata; flags |= F_DUPDATA; - goto new_sub; + goto insert_node; } /* MDBX passes F_SUBDATA in 'flags' to write a DB record */ @@ -18413,15 +18411,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, return MDBX_INCOMPATIBLE; current: - if (data->iov_len == olddata.iov_len) { + if (data->iov_len == old_data.iov_len) { cASSERT(mc, EVEN(key->iov_len) == EVEN(node_ks(node))); /* same size, just replace it. Note that we could * also reuse this node if the new data is smaller, * but instead we opt to shrink the node in that case. */ if (flags & MDBX_RESERVE) - data->iov_base = olddata.iov_base; + data->iov_base = old_data.iov_base; else if (!(mc->mc_flags & C_SUB)) - memcpy(olddata.iov_base, data->iov_base, data->iov_len); + memcpy(old_data.iov_base, data->iov_base, data->iov_len); else { cASSERT(mc, page_numkeys(mc->mc_pg[mc->mc_top]) == 1); cASSERT(mc, PAGETYPE_COMPAT(mc->mc_pg[mc->mc_top]) == P_LEAF); @@ -18446,14 +18444,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, node_del(mc, 0); } - rdata = data; + ref_data = data; -new_sub:; +insert_node:; const unsigned naf = flags & NODE_ADD_FLAGS; - size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->iov_len - : leaf_size(env, key, rdata); + size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) + ? key->iov_len + : leaf_size(env, key, ref_data); if (page_room(mc->mc_pg[mc->mc_top]) < nsize) { - rc = page_split(mc, key, rdata, P_INVALID, + rc = page_split(mc, key, ref_data, P_INVALID, insert_key ? naf : naf | MDBX_SPLIT_REPLACE); if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) rc = insert_key ? cursor_check(mc) : cursor_check_updating(mc); @@ -18461,25 +18460,25 @@ new_sub:; /* There is room already in this leaf page. */ if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { cASSERT(mc, !(naf & (F_BIGDATA | F_SUBDATA | F_DUPDATA)) && - rdata->iov_len == 0); + ref_data->iov_len == 0); rc = node_add_leaf2(mc, mc->mc_ki[mc->mc_top], key); } else - rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, rdata, naf); + rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, ref_data, naf); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ const MDBX_dbi dbi = mc->mc_dbi; - const size_t i = mc->mc_top; - MDBX_page *const mp = mc->mc_pg[i]; + const size_t top = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[top]; for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[i] != mp) + if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[top] != mp) continue; - if (m3->mc_ki[i] >= mc->mc_ki[i]) - m3->mc_ki[i] += insert_key; + if (m3->mc_ki[top] >= mc->mc_ki[top]) + m3->mc_ki[top] += insert_key; if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); + XCURSOR_REFRESH(m3, mp, m3->mc_ki[top]); } } } @@ -18490,18 +18489,18 @@ new_sub:; * size limits on dupdata. The actual data fields of the child * DB are all zero size. */ if (flags & F_DUPDATA) { - unsigned xflags; - size_t ecount; - put_sub: - xdata.iov_len = 0; - xdata.iov_base = nullptr; + MDBX_val empty; + dupsort_put: + empty.iov_len = 0; + empty.iov_base = nullptr; MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); #define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1 STATIC_ASSERT( (MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == MDBX_NOOVERWRITE); - xflags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> - SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); + unsigned xflags = + MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> + SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); if ((flags & MDBX_CURRENT) == 0) { xflags -= MDBX_CURRENT; err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]); @@ -18511,80 +18510,78 @@ new_sub:; if (sub_root) mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; /* converted, write the original data first */ - if (dkey.iov_base) { - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, - xflags); + if (old_singledup.iov_base) { + rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &old_singledup, + &empty, xflags); if (unlikely(rc)) - goto bad_sub; + goto dupsort_error; } if (!(node_flags(node) & F_SUBDATA) || sub_root) { /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2; - MDBX_xcursor *mx = mc->mc_xcursor; - size_t i = mc->mc_top; - MDBX_page *mp = mc->mc_pg[i]; + MDBX_xcursor *const mx = mc->mc_xcursor; + const size_t top = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[top]; const intptr_t nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; + m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; - if (m2->mc_pg[i] == mp) { - if (m2->mc_ki[i] == mc->mc_ki[i]) { - err = cursor_xinit2(m2, mx, dkey.iov_base != nullptr); + if (m2->mc_pg[top] == mp) { + if (m2->mc_ki[top] == mc->mc_ki[top]) { + err = cursor_xinit2(m2, mx, old_singledup.iov_base != nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); + } else if (!insert_key && m2->mc_ki[top] < nkeys) { + XCURSOR_REFRESH(m2, mp, m2->mc_ki[top]); } } } } cASSERT(mc, mc->mc_xcursor->mx_db.md_entries < PTRDIFF_MAX); - ecount = (size_t)mc->mc_xcursor->mx_db.md_entries; + const size_t probe = (size_t)mc->mc_xcursor->mx_db.md_entries; #define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == MDBX_APPEND); xflags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &xdata, + rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &empty, xflags); if (flags & F_SUBDATA) { void *db = node_data(node); mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); } - insert_data = (ecount != (size_t)mc->mc_xcursor->mx_db.md_entries); + insert_data = (probe != (size_t)mc->mc_xcursor->mx_db.md_entries); } /* Increment count unless we just replaced an existing item. */ if (insert_data) mc->mc_db->md_entries++; if (insert_key) { - /* Invalidate txn if we created an empty sub-DB */ - if (unlikely(rc)) - goto bad_sub; + if (unlikely(rc != MDBX_SUCCESS)) + goto dupsort_error; /* If we succeeded and the key didn't exist before, * make sure the cursor is marked valid. */ mc->mc_flags |= C_INITIALIZED; } - if (unlikely(flags & MDBX_MULTIPLE)) { - if (likely(rc == MDBX_SUCCESS)) { - continue_multiple: - mcount++; + if (likely(rc == MDBX_SUCCESS)) { + if (unlikely(batch_dupfixed_done)) { + batch_dupfixed_continue: /* let caller know how many succeeded, if any */ - data[1].iov_len = mcount; - if (mcount < dcount) { + if ((*batch_dupfixed_done += 1) < batch_dupfixed_given) { data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); insert_key = insert_data = false; - dkey.iov_base = nullptr; + old_singledup.iov_base = nullptr; goto more; } } + if (AUDIT_ENABLED()) + rc = cursor_check(mc); } - if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) - rc = cursor_check(mc); return rc; - bad_sub: + + dupsort_error: if (unlikely(rc == MDBX_KEYEXIST)) { /* should not happen, we deleted that item */ ERROR("Unexpected %i error while put to nested dupsort's hive", rc); From aa9d2387e59da96d527b7f4dc47f068ec6740964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 15:49:05 +0300 Subject: [PATCH 117/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`me=5Fdxb=5Fmmap.current?= =?UTF-8?q?=20>=20me=5Fdxb=5Fmmap.limit`=20=D0=B8=20=D1=81=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=B0=D1=82=D1=8B=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=81?= =?UTF-8?q?=D0=BE=D0=BE=D1=82=D0=B2=D0=B5=D1=82=D1=81=D1=82=D0=B2=D1=83?= =?UTF-8?q?=D1=8E=D1=89=D0=B5=D0=B9=20assert-=D0=BF=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Устранение упущения приводящего к нелогичной ситуации `me_dxb_mmap.curren > me_dxb_mmap.limit` при "дребезге" размера БД. В текущем понимании, последствий кроме срабатывания assert-проверки нет, а вероятность проявления близка к нулю. --- src/core.c | 11 ++++++++++- src/osal.c | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 3e99a8dd..c163fe14 100644 --- a/src/core.c +++ b/src/core.c @@ -6670,6 +6670,7 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, #endif /* MDBX_ENABLE_MADVISE */ rc = osal_mresize(mresize_flags, &env->me_dxb_mmap, size_bytes, limit_bytes); + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); #if MDBX_ENABLE_MADVISE if (rc == MDBX_SUCCESS) { @@ -6695,6 +6696,7 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, bailout: if (rc == MDBX_SUCCESS) { + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); if (mode == explicit_resize) @@ -6725,6 +6727,7 @@ bailout: "present %" PRIuPTR " -> %" PRIuPTR ", " "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", prev_size, size_bytes, prev_limit, limit_bytes, rc); + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); } if (!env->me_dxb_mmap.base) { env->me_flags |= MDBX_FATAL_ERROR; @@ -9534,6 +9537,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { const size_t used_bytes = pgno2bytes(env, txn->mt_next_pgno); const size_t required_bytes = (txn->mt_flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); if (unlikely(required_bytes > env->me_dxb_mmap.current)) { /* Размер БД (для пишущих транзакций) или используемых данных (для * читающих транзакций) больше предыдущего/текущего размера внутри @@ -9551,6 +9555,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_geo.upper, implicit_grow); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); } else if (unlikely(size_bytes < env->me_dxb_mmap.current)) { /* Размер БД меньше предыдущего/текущего размера внутри процесса, можно * уменьшить, но всё сложнее: @@ -9576,11 +9581,15 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = osal_fastmutex_acquire(&env->me_remap_guard); #endif if (likely(rc == MDBX_SUCCESS)) { + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); rc = osal_filesize(env->me_dxb_mmap.fd, &env->me_dxb_mmap.filesize); if (likely(rc == MDBX_SUCCESS)) { eASSERT(env, env->me_dxb_mmap.filesize >= required_bytes); if (env->me_dxb_mmap.current > env->me_dxb_mmap.filesize) - env->me_dxb_mmap.current = (size_t)env->me_dxb_mmap.filesize; + env->me_dxb_mmap.current = + (env->me_dxb_mmap.limit < env->me_dxb_mmap.filesize) + ? env->me_dxb_mmap.limit + : (size_t)env->me_dxb_mmap.filesize; } #if defined(_WIN32) || defined(_WIN64) osal_srwlock_ReleaseShared(&env->me_remap_guard); diff --git a/src/osal.c b/src/osal.c index 5559b204..3865025a 100644 --- a/src/osal.c +++ b/src/osal.c @@ -2581,7 +2581,7 @@ retry_mapview:; ptr_disp(map->base, size), ((map->current < map->limit) ? map->current : map->limit) - size); } - map->current = size; + map->current = (size < map->limit) ? size : map->limit; } if (limit == map->limit) @@ -2742,6 +2742,7 @@ retry_mapview:; map->base = ptr; } map->limit = limit; + map->current = size; #if MDBX_ENABLE_MADVISE #ifdef MADV_DONTFORK From 72e51ee370c1e6d1f012c367f5f8155597d1ad5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 16:11:09 +0300 Subject: [PATCH 118/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20y=D0=BD=D0=B0=D1=81=D0=BB?= =?UTF-8?q?=D0=B5=D0=B4=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD=D0=BE=D0=B9=20=D0=BE?= =?UTF-8?q?=D1=82=20LMDB=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=D0=B2=D0=BE=D0=B4=D1=8F=D1=89=D0=B5=D0=B9=20=D0=BA?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=B2=D1=80=D0=B5=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8E=20=D0=91=D0=94=20=D0=BF=D1=80=D0=B8=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20?= =?UTF-8?q?MDBX=5FDUPFIXED.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Тезисно: - Использование DUPFIXED (включая INTEGERDUP) могло приводить к повреждению БД и/или потере данных. Этот коммит устраняет эту угрозу. - Вероятность проявления существенно увеличивается с увеличением размера/длины мульти-значений/дубликатов (не ключей). - В MDBX проблема унаследована от LMDB, где существует более 11 лет, начиная с коммита https://github.com/LMDB/lmdb/commit/ccc4d23e749edc5ea461261427a0ee0a663fdfe5 и до настоящего времени. Для вложенных страниц типа LEAF2 (которые содержат только значения одинаковой длины, без таблицы смещений к ним), упомянутым выше коммитом, было добавлено резервирование места (что в целом спорно, но в некоторых сценариях позволяет уменьшить накладные расходы). Ошибка была в том, что в коде не исключалась возможность превышения размера страницы БД, что далее приводило к арифметическому переполнению, повреждению БД и/или просписи памяти. --- src/core.c | 145 ++++++++++++++++++++++++++++++++++++++++++------ src/internals.h | 8 ++- 2 files changed, 135 insertions(+), 18 deletions(-) diff --git a/src/core.c b/src/core.c index c163fe14..af570585 100644 --- a/src/core.c +++ b/src/core.c @@ -13499,13 +13499,23 @@ __cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { leaf_nodemax > (intptr_t)(sizeof(MDBX_db) + NODESIZE + 42) && leaf_nodemax >= branch_nodemax && leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0); - env->me_leaf_nodemax = (unsigned)leaf_nodemax; - env->me_branch_nodemax = (unsigned)branch_nodemax; + env->me_leaf_nodemax = (uint16_t)leaf_nodemax; + env->me_branch_nodemax = (uint16_t)branch_nodemax; env->me_psize2log = (uint8_t)log2n_powerof2(pagesize); eASSERT(env, pgno2bytes(env, 1) == pagesize); eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); recalculate_merge_threshold(env); + /* TODO: recalculate me_subpage_xyz values from MDBX_opt_subpage_xyz. */ + env->me_subpage_limit = env->me_leaf_nodemax - NODESIZE; + env->me_subpage_room_threshold = 0; + env->me_subpage_reserve_prereq = env->me_leaf_nodemax; + env->me_subpage_reserve_limit = env->me_subpage_limit / 42; + eASSERT(env, + env->me_subpage_reserve_prereq > + env->me_subpage_room_threshold + env->me_subpage_reserve_limit); + eASSERT(env, env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); + const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); if (!env->me_options.flags.non_auto.dp_limit) { /* auto-setup dp_limit by "The42" ;-) */ @@ -17939,6 +17949,26 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, return rc; } +static size_t leaf2_reserve(const MDBX_env *const env, size_t host_page_room, + size_t subpage_len, size_t item_len) { + eASSERT(env, (subpage_len & 1) == 0); + eASSERT(env, + env->me_subpage_reserve_prereq > env->me_subpage_room_threshold + + env->me_subpage_reserve_limit && + env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); + size_t reserve = 0; + for (size_t n = 0; + n < 5 && reserve + item_len <= env->me_subpage_reserve_limit && + EVEN(subpage_len + item_len) <= env->me_subpage_limit && + host_page_room >= + env->me_subpage_reserve_prereq + EVEN(subpage_len + item_len); + ++n) { + subpage_len += item_len; + reserve += item_len; + } + return reserve + (subpage_len & 1); +} + static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { int err; @@ -18310,12 +18340,21 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (mc->mc_db->md_flags & MDBX_DUPFIXED) { fp->mp_flags |= P_LEAF2; fp->mp_leaf2_ksize = (uint16_t)data->iov_len; - xdata.iov_len += 2 * data->iov_len; /* leave space for 2 more */ - cASSERT(mc, xdata.iov_len <= env->me_psize); + /* Будем создавать LEAF2-страницу, как минимум с двумя элементами. + * При коротких значениях и наличии свободного места можно сделать + * некоторое резервирование места, чтобы при последующих добавлениях + * не сразу расширять созданную под-страницу. + * Резервирование в целом сомнительно (см ниже), но может сработать + * в плюс (а если в минус то несущественный) при коротких ключах. */ + xdata.iov_len += leaf2_reserve( + env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + cASSERT(mc, (xdata.iov_len & 1) == 0); } else { xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + (old_data.iov_len & 1) + (data->iov_len & 1); } + cASSERT(mc, (xdata.iov_len & 1) == 0); fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ } else if (node_flags(node) & F_SUBDATA) { @@ -18327,19 +18366,85 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, fp = old_data.iov_base; switch (flags) { default: - if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) { - growth = node_size(data, nullptr) + sizeof(indx_t); - break; + growth = IS_LEAF2(fp) ? fp->mp_leaf2_ksize + : (node_size(data, nullptr) + sizeof(indx_t)); + if (page_room(fp) >= growth) { + /* На текущей под-странице есть место для добавления элемента. + * Оптимальнее продолжить использовать эту страницу, ибо + * добавление вложенного дерева увеличит WAF на одну страницу. */ + goto continue_subpage; } - growth = fp->mp_leaf2_ksize; - if (page_room(fp) < growth) { - growth *= 4; /* space for 4 more */ - break; - } - /* FALLTHRU: Big enough MDBX_DUPFIXED sub-page */ - __fallthrough; + /* На текущей под-странице нет места для еще одного элемента. + * Можно либо увеличить эту под-страницу, либо вынести куст + * значений во вложенное дерево. + * + * Продолжать использовать текущую под-страницу возможно + * только пока и если размер после добавления элемента будет + * меньше me_leaf_nodemax. Соответственно, при превышении + * просто сразу переходим на вложенное дерево. */ + xdata.iov_len = old_data.iov_len + (growth += growth & 1); + if (xdata.iov_len > env->me_subpage_limit) + goto convert_to_subtree; + + /* Можно либо увеличить под-страницу, в том числе с некоторым + * запасом, либо перейти на вложенное поддерево. + * + * Резервирование места на под-странице представляется сомнительным: + * - Резервирование увеличит рыхлость страниц, в том числе + * вероятность разделения основной/гнездовой страницы; + * - Сложно предсказать полезный размер резервирования, + * особенно для не-MDBX_DUPFIXED; + * - Наличие резерва позволяет съекономить только на перемещении + * части элементов основной/гнездовой страницы при последующих + * добавлениях в нее элементов. Причем после первого изменения + * размера под-страницы, её тело будет примыкать + * к неиспользуемому месту на основной/гнездовой странице, + * поэтому последующие последовательные добавления потребуют + * только передвижения в mp_ptrs[]. + * + * Соответственно, более важным/определяющим представляется + * своевременный переход к вложеному дереву, но тут достаточно + * сложный конфликт интересов: + * - При склонности к переходу к вложенным деревьям, суммарно + * в БД будет большее кол-во более рыхлых страниц. Это увеличит + * WAF, а также RAF при последовательных чтениях большой БД. + * Однако, при коротких ключах и большом кол-ве + * дубликатов/мультизначений, плотность ключей в листовых + * страницах основного дерева будет выше. Соответственно, будет + * пропорционально меньше branch-страниц. Поэтому будет выше + * вероятность оседания/не-вымывания страниц основного дерева из + * LRU-кэша, а также попадания в write-back кэш при записи. + * - Наоботот, при склонности к использованию под-страниц, будут + * наблюдаться обратные эффекты. Плюс некоторые накладные расходы + * на лишнее копирование данных под-страниц в сценариях + * нескольких обонвлений дубликатов одного куста в одной + * транзакции. + * + * Суммарно наиболее рациональным представляется такая тактика: + * - Вводим три порога subpage_limit, subpage_room_threshold + * и subpage_reserve_prereq, которые могут быть + * заданы/скорректированы пользователем в ‰ от me_leaf_nodemax; + * - Используем под-страницу пока её размер меньше subpage_limit + * и на основной/гнездовой странице не-менее + * subpage_room_threshold свободного места; + * - Резервируем место только для 1-3 коротких dupfixed-элементов, + * расширяя размер под-страницы на размер кэш-линии ЦПУ, но + * только если на странице не менее subpage_reserve_prereq + * свободного места. + * - По-умолчанию устанавливаем: + * subpage_limit = me_leaf_nodemax (1000‰); + * subpage_room_threshold = 0; + * subpage_reserve_prereq = me_leaf_nodemax (1000‰). + */ + if (IS_LEAF2(fp)) + growth += leaf2_reserve( + env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + break; + case MDBX_CURRENT | MDBX_NODUPDATA: case MDBX_CURRENT: + continue_subpage: fp->mp_txnid = mc->mc_txn->mt_front; fp->mp_pgno = mp->mp_pgno; mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; @@ -18347,11 +18452,18 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, goto dupsort_put; } xdata.iov_len = old_data.iov_len + growth; + cASSERT(mc, (xdata.iov_len & 1) == 0); } fp_flags = fp->mp_flags; - if (node_size_len(node_ks(node), xdata.iov_len) > - env->me_leaf_nodemax) { + if (xdata.iov_len > env->me_subpage_limit || + node_size_len(node_ks(node), xdata.iov_len) > + env->me_leaf_nodemax || + (env->me_subpage_room_threshold && + page_room(mc->mc_pg[mc->mc_top]) + + node_size_len(node_ks(node), old_data.iov_len) < + env->me_subpage_room_threshold + + node_size_len(node_ks(node), xdata.iov_len))) { /* Too big for a sub-page, convert to sub-DB */ convert_to_subtree: fp_flags &= ~P_SUBP; @@ -18375,6 +18487,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_db->md_leaf_pages += 1; cASSERT(mc, env->me_psize > old_data.iov_len); growth = env->me_psize - (unsigned)old_data.iov_len; + cASSERT(mc, (growth & 1) == 0); flags |= F_DUPDATA | F_SUBDATA; nested_dupdb.md_root = mp->mp_pgno; nested_dupdb.md_seq = 0; diff --git a/src/internals.h b/src/internals.h index f4e37ac3..eaba18d0 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1388,8 +1388,12 @@ struct MDBX_env { #define me_lfd me_lck_mmap.fd struct MDBX_lockinfo *me_lck; - unsigned me_leaf_nodemax; /* max size of a leaf-node */ - unsigned me_branch_nodemax; /* max size of a branch-node */ + uint16_t me_leaf_nodemax; /* max size of a leaf-node */ + uint16_t me_branch_nodemax; /* max size of a branch-node */ + uint16_t me_subpage_limit; + uint16_t me_subpage_room_threshold; + uint16_t me_subpage_reserve_prereq; + uint16_t me_subpage_reserve_limit; atomic_pgno_t me_mlocked_pgno; uint8_t me_psize2log; /* log2 of DB page size */ int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */ From 9480599afa2fa89990cab5d5ce9eb931254321aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 29 Feb 2024 09:19:54 +0300 Subject: [PATCH 119/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`rebalance()`=20=D1=80=D0=B0=D0=B4?= =?UTF-8?q?=D0=B8=20=D1=83=D0=BC=D0=B5=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20WAF.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit После предыдущей серии доработок весной 2021 года, функция `rebalance()` обеспечивала слияние мало заполненной страницы с менее заполненной соседней, одновременно пытаясь не вовлекать соседних страниц, если те еще не были скопированы/клонированы/изменены в текущей транзакции. В целом, реализованная тактика представляется успешной. Однако, при обновлении GC она иногда приводила к исчерпанию подготовленного резерва извлеченных из GC страниц. Это не является проблемой, если не считать вероятность срабатывания `assert(txn->mt_flags & MDBX_TXN_DRAINED_GC)` в отладочных сборках. Тем не менее, из этой ситуации можно сделать вывод, что поведение `rebalance()`, как минимум, может быть обогащено опцией уменьшения WAF ценой меньшей сбалансированности дерева. Технически при этом слияние выполняется преимущественно с грязной страницей, если на ней достаточно места и соседняя страница с другой стороны еще чистая. Соответствующая опция в `enum MDBX_option_t` будет добавлена чуть позже. --- src/core.c | 59 +++++++++++++++++++++++++++++++------------------ src/internals.h | 2 ++ 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/core.c b/src/core.c index af570585..09095f8a 100644 --- a/src/core.c +++ b/src/core.c @@ -10642,7 +10642,9 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { const size_t for_all_before_touch = for_relist + for_tree_before_touch; const size_t for_all_after_touch = for_relist + for_tree_after_touch; - if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch)) + if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch) && + (ctx->cursor.mc_snum == 0 || + IS_MODIFIABLE(txn, ctx->cursor.mc_pg[ctx->cursor.mc_top]))) return MDBX_SUCCESS; TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " @@ -18867,6 +18869,7 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { return rc; MDBX_page *mp = mc->mc_pg[mc->mc_top]; + cASSERT(mc, IS_MODIFIABLE(mc->mc_txn, mp)); if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->mp_pgno, mp->mp_flags); @@ -20386,7 +20389,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { IS_LEAF(cdst->mc_pg[cdst->mc_db->md_depth - 1])); cASSERT(csrc, csrc->mc_snum < csrc->mc_db->md_depth || IS_LEAF(csrc->mc_pg[csrc->mc_db->md_depth - 1])); - cASSERT(cdst, page_room(pdst) >= page_used(cdst->mc_txn->mt_env, psrc)); + cASSERT(cdst, csrc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance || + page_room(pdst) >= page_used(cdst->mc_txn->mt_env, psrc)); const int pagetype = PAGETYPE_WHOLE(psrc); /* Move all nodes from src to dst */ @@ -20397,7 +20401,9 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { size_t j = dst_nkeys; if (unlikely(pagetype & P_LEAF2)) { /* Mark dst as dirty. */ - if (unlikely(rc = page_touch(cdst))) + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; key.iov_len = csrc->mc_db->md_xsize; @@ -20405,6 +20411,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { size_t i = 0; do { rc = node_add_leaf2(cdst, j++, &key); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); if (unlikely(rc != MDBX_SUCCESS)) return rc; key.iov_base = ptr_disp(key.iov_base, key.iov_len); @@ -20418,7 +20425,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cursor_copy(csrc, &mn); /* must find the lowest key below src */ rc = page_search_lowest(&mn); - if (unlikely(rc)) + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; const MDBX_page *mp = mn.mc_pg[mn.mc_top]; @@ -20443,7 +20451,9 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } /* Mark dst as dirty. */ - if (unlikely(rc = page_touch(cdst))) + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; size_t i = 0; @@ -20457,6 +20467,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(csrc, node_flags(srcnode) == 0); rc = node_add_branch(cdst, j++, &key, node_pgno(srcnode)); } + cASSERT(cdst, rc != MDBX_RESULT_TRUE); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -20483,7 +20494,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { if (csrc->mc_ki[csrc->mc_top] == 0) { const MDBX_val nullkey = {0, 0}; rc = update_key(csrc, &nullkey); - if (unlikely(rc)) { + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) { csrc->mc_top++; return rc; } @@ -20518,7 +20530,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } rc = page_retire(csrc, (MDBX_page *)psrc); - if (unlikely(rc)) + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(cdst, cdst->mc_db->md_entries > 0); @@ -20531,7 +20544,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { const uint16_t save_depth = cdst->mc_db->md_depth; cursor_pop(cdst); rc = rebalance(cdst); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(cdst, cdst->mc_db->md_entries > 0); @@ -20719,11 +20732,9 @@ static int rebalance(MDBX_cursor *mc) { mc->mc_snum = 0; mc->mc_top = 0; mc->mc_flags &= ~C_INITIALIZED; - - rc = page_retire(mc, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } else if (IS_BRANCH(mp) && nkeys == 1) { + return page_retire(mc, mp); + } + if (IS_BRANCH(mp) && nkeys == 1) { DEBUG("%s", "collapsing root page!"); mc->mc_db->md_root = node_pgno(page_node(mp, 0)); rc = page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], mp->mp_txnid); @@ -20756,15 +20767,10 @@ static int rebalance(MDBX_cursor *mc) { PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top]) == pagetype); cASSERT(mc, mc->mc_snum < mc->mc_db->md_depth || IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); - - rc = page_retire(mc, mp); - if (likely(rc == MDBX_SUCCESS)) - rc = page_touch(mc); - return rc; - } else { - DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", - mp->mp_pgno, mp->mp_flags); + return page_retire(mc, mp); } + DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", + mp->mp_pgno, mp->mp_flags); return MDBX_SUCCESS; } @@ -20813,6 +20819,7 @@ static int rebalance(MDBX_cursor *mc) { const size_t right_nkeys = right ? page_numkeys(right) : 0; bool involve = false; retry: + cASSERT(mc, mc->mc_snum > 1); if (left_room > room_threshold && left_room >= right_room && (IS_MODIFIABLE(mc->mc_txn, left) || involve)) { /* try merge with left */ @@ -20884,7 +20891,15 @@ retry: return MDBX_SUCCESS; } - if (likely(!involve)) { + if (mc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance && + likely(room_threshold > 0)) { + room_threshold = 0; + goto retry; + } + if (likely(!involve) && + (likely(mc->mc_dbi != FREE_DBI) || mc->mc_txn->tw.loose_pages || + MDBX_PNL_GETSIZE(mc->mc_txn->tw.relist) || (mc->mc_flags & C_GCU) || + (mc->mc_txn->mt_flags & MDBX_TXN_DRAINED_GC) || room_threshold)) { involve = true; goto retry; } diff --git a/src/internals.h b/src/internals.h index eaba18d0..483d545c 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1437,6 +1437,8 @@ struct MDBX_env { unsigned writethrough_threshold; #endif /* Windows */ bool prefault_write; + bool prefer_waf_insteadof_balance; /* Strive to minimize WAF instead of + balancing pages fullment */ union { unsigned all; /* tracks options with non-auto values but tuned by user */ From e29cb076d379a1cab1367884e0c62a77252276db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 2 Mar 2024 01:08:22 +0300 Subject: [PATCH 120/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80=D0=B0=20=D0=BA=D0=BB=D1=8E=D1=87=D0=B5?= =?UTF-8?q?=D0=B9/=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=B0=D0=B4=D0=B5=D0=B6=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D0=B8?= =?UTF-8?q?=20=D1=83=D0=BD=D0=B8=D0=BA=D0=B0=D0=BB=D1=8C=D0=BD=D1=8B=D1=85?= =?UTF-8?q?=20=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit В текущем понимании коммитом этим устраняется застарелая проблема редких сбоев стохастического теста из-за вероятности ошибочной генерации повторяющихся пар key-value. --- test/hill.c++ | 4 +- test/keygen.c++ | 97 ++++++++++++++++++++++++++++++++++++++++--------- test/keygen.h++ | 11 +++++- 3 files changed, 91 insertions(+), 21 deletions(-) diff --git a/test/hill.c++ b/test/hill.c++ index f5ca1026..bbb3b3cf 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -90,7 +90,7 @@ bool testcase_hill::run() { assert(b_serial > a_serial); // создаем первую запись из пары - const keygen::serial_t age_shift = UINT64_C(1) << (a_serial % 31); + const keygen::serial_t age_shift = keyvalue_maker.remix_age(a_serial); log_trace("uphill: insert-a (age %" PRIu64 ") %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_1, age_shift); @@ -302,7 +302,7 @@ bool testcase_hill::run() { assert(b_serial > a_serial); // обновляем первую запись из пары - const keygen::serial_t age_shift = UINT64_C(1) << (a_serial % 31); + const keygen::serial_t age_shift = keyvalue_maker.remix_age(a_serial); log_trace("downhill: update-a (age 0->%" PRIu64 ") %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_0, 0); diff --git a/test/keygen.c++ b/test/keygen.c++ index 46b64ecf..69b0550c 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -14,6 +14,39 @@ #include "test.h++" +static const uint64_t primes[64] = { + /* */ + 0, 1, 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, + /* */ + UINT64_C(32749), UINT64_C(65521), UINT64_C(131071), UINT64_C(262139), + UINT64_C(524287), UINT64_C(1048573), UINT64_C(2097143), UINT64_C(4194301), + UINT64_C(8388593), UINT64_C(16777213), UINT64_C(33554393), + UINT64_C(67108859), UINT64_C(134217689), UINT64_C(268435399), + UINT64_C(536870909), UINT64_C(1073741789), UINT64_C(2147483647), + UINT64_C(4294967291), UINT64_C(8589934583), UINT64_C(17179869143), + UINT64_C(34359738337), UINT64_C(68719476731), UINT64_C(137438953447), + UINT64_C(274877906899), UINT64_C(549755813881), UINT64_C(1099511627689), + UINT64_C(2199023255531), UINT64_C(4398046511093), UINT64_C(8796093022151), + UINT64_C(17592186044399), UINT64_C(35184372088777), + UINT64_C(70368744177643), UINT64_C(140737488355213), + UINT64_C(281474976710597), UINT64_C(562949953421231), + UINT64_C(1125899906842597), UINT64_C(2251799813685119), + UINT64_C(4503599627370449), UINT64_C(9007199254740881), + UINT64_C(18014398509481951), UINT64_C(36028797018963913), + UINT64_C(72057594037927931), UINT64_C(144115188075855859), + UINT64_C(288230376151711717), UINT64_C(576460752303423433), + UINT64_C(1152921504606846883), UINT64_C(2305843009213693951), + UINT64_C(4611686018427387847), UINT64_C(9223372036854775783)}; + +/* static unsigned supid_log2(uint64_t v) { + unsigned r = 0; + while (v > 1) { + v >>= 1; + r += 1; + } + return r; +} */ + namespace keygen { /* LY: https://en.wikipedia.org/wiki/Injective_function */ @@ -48,19 +81,19 @@ serial_t injective(const serial_t serial, 10, 14, 22, 19, 3, 21, 18, 19, 26, 24, 2, 21, 25, 29, 24, 10, 11, 14, 20, 19}; + const auto mask = actor_params::serial_mask(bits); const auto mult = m[bits - 8]; const auto shift = s[bits - 8]; serial_t result = serial * mult; if (salt) { const unsigned left = bits / 2; const unsigned right = bits - left; - result = (result << left) | - ((result & actor_params::serial_mask(bits)) >> right); + result = (result << left) | ((result & mask) >> right); result = (result ^ salt) * mult; } - result ^= result << shift; - result &= actor_params::serial_mask(bits); + result ^= (result & mask) >> shift; + result &= mask; log_trace("keygen-injective: serial %" PRIu64 "/%u @%" PRIx64 ",%u,%" PRIu64 " => %" PRIu64 "/%u", serial, bits, mult, shift, salt, result, bits); @@ -111,7 +144,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, } serial_t key_serial = serial; - serial_t value_serial = value_age << mapping.split; + serial_t value_serial = (value_age & value_age_mask) << mapping.split; if (mapping.split) { if (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) { key_serial >>= mapping.split; @@ -200,6 +233,7 @@ void maker::setup(const config::actor_params_pod &actor, MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < UINT16_MAX); #endif + key_essentials.flags = uint16_t( actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)); @@ -209,6 +243,12 @@ void maker::setup(const config::actor_params_pod &actor, key_essentials.maxlen = std::min( uint32_t(actor.keylen_max), uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags))); + key_essentials.bits = (key_essentials.maxlen < sizeof(serial_t)) + ? key_essentials.maxlen * CHAR_BIT + : sizeof(serial_t) * CHAR_BIT; + key_essentials.mask = actor_params::serial_mask(key_essentials.bits); + assert(key_essentials.bits > 63 || + key_essentials.mask > primes[key_essentials.bits]); value_essentials.flags = uint16_t( actor.table_flags & @@ -219,27 +259,44 @@ void maker::setup(const config::actor_params_pod &actor, value_essentials.maxlen = std::min( uint32_t(actor.datalen_max), uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags))); + value_essentials.bits = (value_essentials.maxlen < sizeof(serial_t)) + ? value_essentials.maxlen * CHAR_BIT + : sizeof(serial_t) * CHAR_BIT; + value_essentials.mask = actor_params::serial_mask(value_essentials.bits); + assert(value_essentials.bits > 63 || + value_essentials.mask > primes[value_essentials.bits]); if (!actor.keygen.zero_fill) { key_essentials.flags |= essentials::prng_fill_flag; value_essentials.flags |= essentials::prng_fill_flag; } - (void)thread_number; mapping = actor.keygen; + const auto split = mapping.split; while (mapping.split > - essentials::value_age_width + value_essentials.maxlen * CHAR_BIT || + value_essentials.bits - essentials::value_age_minwidth || mapping.split >= mapping.width) mapping.split -= 1; + if (split != mapping.width) + log_notice("keygen: reduce mapping-split from %u to %u", split, + mapping.split); + const auto width = mapping.width; while (unsigned((actor.table_flags & MDBX_DUPSORT) ? mapping.width - mapping.split - : mapping.width) > key_essentials.maxlen * CHAR_BIT) + : mapping.width) > key_essentials.bits) mapping.width -= 1; + if (width != mapping.width) + log_notice("keygen: reduce mapping-width from %u to %u", width, + mapping.width); - salt = - (prng_state + uint64_t(thread_number)) * UINT64_C(14653293970879851569); + value_age_bits = value_essentials.bits - mapping.split; + value_age_mask = actor_params::serial_mask(value_age_bits); + assert(value_age_bits >= essentials::value_age_minwidth); + salt = (prng_state ^ + (thread_number * 1575554837) * UINT64_C(59386707711075671)) * + UINT64_C(14653293970879851569); base = actor.serial_base(); } @@ -321,14 +378,18 @@ serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, result &out) { assert(out.limit >= params.maxlen); assert(params.maxlen >= params.minlen); - if (params.maxlen < sizeof(serial_t)) { - const serial_t max = actor_params::serial_mask(params.maxlen * CHAR_BIT); - if (serial > max) { - serial ^= (serial >> max / 2) * serial_t((sizeof(serial_t) > 4) - ? UINT64_C(40719303417517073) - : UINT32_C(3708688457)); - serial &= max; - } + assert(serial <= params.mask); + if (unlikely(serial > params.mask)) { +#if 1 + serial %= primes[params.bits]; + assert(params.mask > primes[params.bits]); +#else + const serial_t maxbits = params.maxlen * CHAR_BIT; + serial ^= (serial >> maxbits / 2) * + serial_t((sizeof(serial_t) > 4) ? UINT64_C(40719303417517073) + : UINT32_C(3708688457)); + serial &= params.mask; +#endif assert(params.maxlen >= length(serial)); } diff --git a/test/keygen.h++ b/test/keygen.h++ index 0ded8130..8eb78118 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -108,10 +108,14 @@ class maker { struct essentials { uint16_t minlen{0}; - enum { prng_fill_flag = 1, value_age_width = 8 }; + enum { prng_fill_flag = 1, value_age_minwidth = 5 }; uint16_t flags{0}; uint32_t maxlen{0}; + serial_t mask{0}; + unsigned bits; } key_essentials, value_essentials; + unsigned value_age_bits; + serial_t value_age_mask{0}; static serial_t mk_begin(serial_t serial, const essentials ¶ms, result &out); @@ -136,6 +140,11 @@ public: } return increment(serial, int64_t(uint64_t(delta) << mapping.split)); } + + serial_t remix_age(serial_t serial) const { + return (UINT64_C(768097847591) * (serial ^ UINT64_C(768097847591))) & + value_age_mask; + } }; void log_pair(logging::loglevel level, const char *prefix, const buffer &key, From d8db63a67d151b29d20938cf2f81e1dc0492b653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 2 Mar 2024 01:11:24 +0300 Subject: [PATCH 121/443] =?UTF-8?q?mdbx-test:=20=D1=87=D1=83=D1=82=D1=8C?= =?UTF-8?q?=20=D0=B1=D0=BE=D0=BB=D0=B5=D0=B5=20=D1=80=D0=B0=D0=B7=D1=83?= =?UTF-8?q?=D0=BC=D0=BD=D0=BE=D0=B5/=D1=83=D0=B4=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B5=20=D0=BF=D0=BE=D0=B2=D0=B5=D0=B4=D0=B5=D0=BD=D0=B8=D0=B5?= =?UTF-8?q?=20=D0=BF=D1=80=D0=B8=20=D0=BA=D0=BE=D0=BB=D0=BB=D0=B8=D0=B7?= =?UTF-8?q?=D0=B8=D0=B8=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20=D0=BD=D0=B5-=D1=83=D0=BD=D0=B8=D0=BA=D0=B0=D0=BB?= =?UTF-8?q?=D1=8C=D0=BD=D1=8B=D1=85=20=D0=BF=D0=B0=D1=80=20=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87-=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 55 +++++++++++++++++++++++++++++++-------------------- test/test.h++ | 5 +++-- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index bb7bd818..05f1be60 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -599,9 +599,9 @@ void testcase::db_table_drop(MDBX_dbi handle) { void testcase::db_table_clear(MDBX_dbi handle, MDBX_txn *txn) { log_trace(">> testcase::db_table_clear, handle %u", handle); - int rc = mdbx_drop(txn ? txn : txn_guard.get(), handle, false); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror("mdbx_drop(delete=false)", rc); + int err = mdbx_drop(txn ? txn : txn_guard.get(), handle, false); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_drop(delete=false)", err); speculum.clear(); log_trace("<< testcase::db_table_clear"); } @@ -609,21 +609,25 @@ void testcase::db_table_clear(MDBX_dbi handle, MDBX_txn *txn) { void testcase::db_table_close(MDBX_dbi handle) { log_trace(">> testcase::db_table_close, handle %u", handle); assert(!txn_guard); - int rc = mdbx_dbi_close(db_guard.get(), handle); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_close()", rc); + int err = mdbx_dbi_close(db_guard.get(), handle); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_close()", err); log_trace("<< testcase::db_table_close"); } -void testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, +bool testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued) { MDBX_val actual_value = expected_valued; - int rc = mdbx_get_equal_or_great(txn_guard.get(), handle, &key2check, - &actual_value); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror(step, rc); + int err = mdbx_get_equal_or_great(txn_guard.get(), handle, &key2check, + &actual_value); + if (unlikely(err != MDBX_SUCCESS)) { + if (!config.params.speculum || err != MDBX_RESULT_TRUE) + failure_perror(step, (err == MDBX_RESULT_TRUE) ? MDBX_NOTFOUND : err); + return false; + } if (!is_samedata(&actual_value, &expected_valued)) failure("%s data mismatch", step); + return true; } //----------------------------------------------------------------------------- @@ -988,7 +992,9 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, } auto it_lowerbound = insertion_result.first; - if (++it_lowerbound != speculum.end()) { + if (insertion_result.second) + ++it_lowerbound; + if (it_lowerbound != speculum.end()) { const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); speculum_check_cursor("after-insert", "lowerbound", it_lowerbound, cursor_lowerbound, MDBX_GET_CURRENT); @@ -1015,30 +1021,37 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, int testcase::replace(const keygen::buffer &akey, const keygen::buffer &new_data, - const keygen::buffer &old_data, MDBX_put_flags_t flags) { + const keygen::buffer &old_data, MDBX_put_flags_t flags, + bool hush_keygen_mistakes) { + int expected_err = MDBX_SUCCESS; if (config.params.speculum) { const auto S_key = iov2dataview(akey); const auto S_old = iov2dataview(old_data); const auto S_new = iov2dataview(new_data); const auto removed = speculum.erase(SET::key_type(S_key, S_old)); - if (unlikely(removed != 1)) { + if (unlikely(!removed)) { char dump_key[128], dump_value[128]; log_error( - "speculum-%s: %s old value {%s, %s}", "replace", - (removed > 1) ? "multi" : "no", + "speculum-%s: no old pair {%s, %s} (keygen mistake)", "replace", mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), mdbx_dump_val(&old_data->value, dump_value, sizeof(dump_value))); - } - if (unlikely(!speculum.emplace(S_key, S_new).second)) { + expected_err = MDBX_NOTFOUND; + } else if (unlikely(!speculum.emplace(S_key, S_new).second)) { char dump_key[128], dump_value[128]; log_error( - "speculum-replace: new pair not inserted {%s, %s}", + "speculum-%s: %s {%s, %s}", "replace", "new pair not inserted", mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), mdbx_dump_val(&new_data->value, dump_value, sizeof(dump_value))); + expected_err = MDBX_KEYEXIST; } } - return mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, - &old_data->value, flags); + int err = mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, + &old_data->value, flags); + if (err && err == expected_err && hush_keygen_mistakes) { + log_notice("speculum-%s: %s %d", "replace", "hust keygen mistake", err); + err = MDBX_SUCCESS; + } + return err; } int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { diff --git a/test/test.h++ b/test/test.h++ index d99ba4f8..ef9ea0c1 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -232,7 +232,8 @@ protected: int insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags); int replace(const keygen::buffer &akey, const keygen::buffer &new_value, - const keygen::buffer &old_value, MDBX_put_flags_t flags); + const keygen::buffer &old_value, MDBX_put_flags_t flags, + bool hush_keygen_mistakes = true); int remove(const keygen::buffer &akey, const keygen::buffer &adata); static int hsr_callback(const MDBX_env *env, const MDBX_txn *txn, @@ -262,7 +263,7 @@ protected: void txn_inject_writefault(MDBX_txn *txn); void fetch_canary(); void update_canary(uint64_t increment); - void checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, + bool checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued); unsigned txn_underutilization_x256(MDBX_txn *txn) const; From 0c24b49bbf88486ef751d1ee37d5e4556a5b9da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 13:47:44 +0300 Subject: [PATCH 122/443] =?UTF-8?q?mdbx-test:=20=D1=80=D0=B0=D1=81=D1=88?= =?UTF-8?q?=D0=B8=D1=80=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=BE=D1=85?= =?UTF-8?q?=D0=B0=D1=81=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D1=82=D0=B5=D1=81=D1=82=D0=B0=20dupfixed-=D1=81=D1=86?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D1=8F=D0=BC=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index b508b5f6..12b493cb 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -461,6 +461,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=24 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ @@ -472,6 +478,13 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + split=16 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ @@ -489,6 +502,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} if [ "$EXTRA" != "no" ]; then split=10 @@ -507,6 +526,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} fi split=4 @@ -519,6 +544,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options loop=$((loop + 1)) if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi From 4ed05689bcb39f57019ddb1263a88d811d737ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 29 Feb 2024 15:58:01 +0300 Subject: [PATCH 123/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20setup?= =?UTF-8?q?=5Fsdb()=20=D0=B4=D0=BB=D1=8F=20=D1=87=D0=B8=D1=82=D0=B0=D0=B5?= =?UTF-8?q?=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D0=BA=D0=BE=D0=B4=D0=B0=20(?= =?UTF-8?q?=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core.c b/src/core.c index 09095f8a..9d1f8f8e 100644 --- a/src/core.c +++ b/src/core.c @@ -3423,7 +3423,7 @@ static void cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst); static int __must_check_result drop_tree(MDBX_cursor *mc, const bool may_have_subDBs); static int __must_check_result fetch_sdb(MDBX_txn *txn, size_t dbi); -static int __must_check_result setup_dbx(MDBX_dbx *const dbx, +static int __must_check_result setup_sdb(MDBX_dbx *const dbx, const MDBX_db *const db, const unsigned pagesize); @@ -9493,7 +9493,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_dbs[MAIN_DBI].md_flags); env->me_db_flags[MAIN_DBI] = DB_POISON; atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); - rc = setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + rc = setup_sdb(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (likely(rc == MDBX_SUCCESS)) { seq = dbi_seq_next(env, MAIN_DBI); @@ -16320,7 +16320,7 @@ __hot __noinline static int page_search_root(MDBX_cursor *mc, return MDBX_SUCCESS; } -static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, +static int setup_sdb(MDBX_dbx *const dbx, const MDBX_db *const db, const unsigned pagesize) { if (unlikely(!db_check_flags(db->md_flags))) { ERROR("incompatible or invalid db.md_flags (%u) ", db->md_flags); @@ -16415,7 +16415,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { return MDBX_CORRUPTED; } #endif /* !MDBX_DISABLE_VALIDATION */ - rc = setup_dbx(dbx, db, txn->mt_env->me_psize); + rc = setup_sdb(dbx, db, txn->mt_env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -19596,7 +19596,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, rc = page_search(&couple->outer, NULL, MDBX_PS_ROOTONLY); rc = (rc != MDBX_NOTFOUND) ? rc : MDBX_SUCCESS; } else if (unlikely(dbx->md_klen_max == 0)) { - rc = setup_dbx(dbx, db, txn->mt_env->me_psize); + rc = setup_sdb(dbx, db, txn->mt_env->me_psize); } if (couple->outer.mc_db->md_flags & MDBX_DUPSORT) { @@ -23511,7 +23511,7 @@ static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, datacmp ? datacmp : get_default_datacmp(user_flags); txn->mt_dbs[dbi].md_flags = db_flags; txn->mt_dbs[dbi].md_xsize = 0; - if (unlikely(setup_dbx(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], + if (unlikely(setup_sdb(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], env->me_psize))) { txn->mt_dbi_state[dbi] = DBI_LINDO; txn->mt_flags |= MDBX_TXN_ERROR; @@ -23588,7 +23588,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(main_flags); txn->mt_dbs[MAIN_DBI].md_flags = main_flags; txn->mt_dbs[MAIN_DBI].md_xsize = 0; - if (unlikely(setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + if (unlikely(setup_sdb(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize) != MDBX_SUCCESS)) { txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO; txn->mt_flags |= MDBX_TXN_ERROR; From fe498de323a2836ce7ef0b06a8026b2da5f92797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 2 Mar 2024 01:06:05 +0300 Subject: [PATCH 124/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=20`cursor=5Fset()`=20?= =?UTF-8?q?=D0=BF=D0=BE=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D1=81=D1=80=D0=B0=D0=B2=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D1=81?= =?UTF-8?q?=20=D0=BD=D1=83=D0=BB=D0=B5=D0=B2=D1=8B=D0=BC=20=D1=8D=D0=BB?= =?UTF-8?q?=D0=B5=D0=BC=D0=B5=D0=BD=D1=82=D0=BE=D0=BC=20=D0=BD=D0=B0=20?= =?UTF-8?q?=D0=BB=D0=B8=D1=81=D1=82=D0=BE=D0=B2=D0=BE=D0=B9=20=D1=81=D1=82?= =?UTF-8?q?=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 9d1f8f8e..c8691237 100644 --- a/src/core.c +++ b/src/core.c @@ -16981,8 +16981,9 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { goto got_node; } if (cmp < 0) { - if (mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { - /* This is definitely the right page, skip search_page */ + /* This is definitely the right page, skip search_page */ + if (mc->mc_ki[mc->mc_top] != 0 /* уже проверяли выше */ && + mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { if (IS_LEAF2(mp)) { nodekey.iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], nodekey.iov_len); From eca0f463689def0092bd4ac20d4f1678df582976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 3 Mar 2024 23:10:52 +0300 Subject: [PATCH 125/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B8=20`check=5Ftxn()`=20=D0=B4=D0=BB=D1=8F=20=D1=81?= =?UTF-8?q?=D0=BB=D1=83=D1=87=D0=B0=D1=8F=20=D0=B7=D0=B0=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D1=88=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9=20=D0=B2=20=D1=80=D0=B5?= =?UTF-8?q?=D0=B6=D0=B8=D0=BC=D0=B5=20`MDBX=5FNO=5FTLS`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit По сообщению о проблеме https://t.me/libmdbx/5424 --- src/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/core.c b/src/core.c index c8691237..666998b4 100644 --- a/src/core.c +++ b/src/core.c @@ -9646,10 +9646,11 @@ static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) { if (unlikely(txn->mt_flags & bad_bits)) return MDBX_BAD_TXN; - tASSERT(txn, (txn->mt_flags & MDBX_NOTLS) == - ((txn->mt_flags & MDBX_TXN_RDONLY) - ? txn->mt_env->me_flags & MDBX_NOTLS - : 0)); + tASSERT(txn, (txn->mt_flags & MDBX_TXN_FINISHED) || + (txn->mt_flags & MDBX_NOTLS) == + ((txn->mt_flags & MDBX_TXN_RDONLY) + ? txn->mt_env->me_flags & MDBX_NOTLS + : 0)); #if MDBX_TXN_CHECKOWNER STATIC_ASSERT(MDBX_NOTLS > MDBX_TXN_FINISHED + MDBX_TXN_RDONLY); if (unlikely(txn->mt_owner != osal_thread_self()) && From 0b87ddc6d4eda23fb8a7728e6c900a2fb2a9ff9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 3 Mar 2024 23:07:45 +0300 Subject: [PATCH 126/443] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=20jitter=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D1=81=D1=82=D0=BE=D0=B3=D0=BE=20=D1=82=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=B0=20txn=5Freset+txn=5Frenew.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/jitter.c++ | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/jitter.c++ b/test/jitter.c++ index 993631e8..3e7a2b52 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -124,6 +124,37 @@ bool testcase_jitter::run() { jitter_delay(); txn_begin(true); fetch_canary(); + if (flipcoin()) { + MDBX_txn_info info; + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset()", err); + err = mdbx_txn_info(txn_guard.get(), &info, false); + if (err != MDBX_BAD_TXN) + failure_perror("mdbx_txn_info(MDBX_BAD_TXN)", err); + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset(again)", err); + err = mdbx_txn_break(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_break()", err); + + err = mdbx_txn_abort(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_abort()", err); + txn_guard.release(); + txn_begin(true); + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset()", err); + + err = mdbx_txn_renew(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_renew()", err); + err = mdbx_txn_info(txn_guard.get(), &info, false); + if (err) + failure_perror("mdbx_txn_info()", err); + } jitter_delay(); txn_end(flipcoin()); } From 1c174e84c4e8d6a1a9b851b2132e07e5590fefb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 5 Mar 2024 01:56:04 +0300 Subject: [PATCH 127/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fpreopen=5Fsnapinfo()`?= =?UTF-8?q?=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://gitflic.ru/project/erthink/libmdbx/issue/15 --- mdbx.h | 12 ++++++++ src/core.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ test/main.c++ | 8 +++++ 3 files changed, 103 insertions(+) diff --git a/mdbx.h b/mdbx.h index 1fda47e2..080b2509 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5714,6 +5714,18 @@ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, * leg(s). */ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); +/** \brief FIXME + */ +LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *arg, + size_t bytes); +#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) +/** \copydoc mdbx_preopen_snapinfo() + * \note Available only on Windows. + * \see mdbx_preopen_snapinfo() */ +LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, + MDBX_envinfo *arg, size_t bytes); +#endif /* Windows */ + /** \brief Флаги/опции для проверки целостности БД. * \see mdbx_env_chk() */ enum MDBX_chk_flags_t { diff --git a/src/core.c b/src/core.c index 666998b4..eb217b0a 100644 --- a/src/core.c +++ b/src/core.c @@ -23396,6 +23396,89 @@ __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, } } +__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, + size_t bytes) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes); + osal_free(pathnameW); + } + return rc; +} + +__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, + size_t bytes) { +#endif /* Windows */ + if (unlikely(!out)) + return MDBX_EINVAL; + + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && + bytes != size_before_pgop_stat) + return MDBX_EINVAL; + + memset(out, 0, bytes); + if (likely(bytes > size_before_bootid)) { + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; + } + + MDBX_env env; + memset(&env, 0, sizeof(env)); + env.me_pid = osal_getpid(); + const size_t os_psize = osal_syspagesize(); + if (unlikely(!is_powerof2(os_psize) || os_psize < MIN_PAGESIZE)) { + ERROR("unsuitable system pagesize %" PRIuPTR, os_psize); + return MDBX_INCOMPATIBLE; + } + out->mi_sys_pagesize = env.me_os_psize = (unsigned)os_psize; + env.me_flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION; + env.me_stuck_meta = -1; + env.me_lfd = INVALID_HANDLE_VALUE; + env.me_lazy_fd = INVALID_HANDLE_VALUE; + env.me_dsync_fd = INVALID_HANDLE_VALUE; + env.me_fd4meta = INVALID_HANDLE_VALUE; +#if defined(_WIN32) || defined(_WIN64) + env.me_data_lock_event = INVALID_HANDLE_VALUE; + env.me_overlapped_fd = INVALID_HANDLE_VALUE; +#endif /* Windows */ + + int rc = env_handle_pathname(&env, pathname, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.me_pathname.dxb, + &env.me_lazy_fd, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + MDBX_meta header; + rc = read_header(&env, &header, 0, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + setup_pagesize(&env, header.mm_psize); + out->mi_dxb_pagesize = env.me_psize; + out->mi_geo.lower = pgno2bytes(&env, header.mm_geo.lower); + out->mi_geo.upper = pgno2bytes(&env, header.mm_geo.upper); + out->mi_geo.shrink = pgno2bytes(&env, pv2pages(header.mm_geo.shrink_pv)); + out->mi_geo.grow = pgno2bytes(&env, pv2pages(header.mm_geo.grow_pv)); + out->mi_geo.current = pgno2bytes(&env, header.mm_geo.now); + out->mi_last_pgno = header.mm_geo.next - 1; + + const unsigned n = 0; + out->mi_recent_txnid = constmeta_txnid(&header); + out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.mm_sign); + if (likely(bytes > size_before_bootid)) + memcpy(&out->mi_bootid.meta[n], &header.mm_bootid, 16); + +bailout: + env_close(&env, false); + return rc; +} + __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == NULL && txn == NULL) || arg == NULL)) diff --git a/test/main.c++ b/test/main.c++ index 84ab801c..8732f5f7 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -743,6 +743,14 @@ int main(int argc, char *const argv[]) { log_trace("=== done..."); } + if (!failed) { + MDBX_envinfo info; + int err = + mdbx_preopen_snapinfo(params.pathname_db.c_str(), &info, sizeof(info)); + if (err != MDBX_SUCCESS) + failure_perror("mdbx_preopen_snapinfo()", err); + } + log_notice("RESULT: %s\n", failed ? "Failed" : "Successful"); if (global::config::cleanup_after) { if (failed) From 1549d3970c8b08873f57a318e2a8fa5c89138c3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 5 Mar 2024 15:07:00 +0300 Subject: [PATCH 128/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D1=83=D1=81?= =?UTF-8?q?=D0=BB=D0=BE=D0=B2=D0=B8=D1=8F=20=D0=B2=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B5=20=D0=B4=D0=BB=D1=8F=20MDB?= =?UTF-8?q?X=5FTXN=5FDRAINED=5FGC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index eb217b0a..0f8663ba 100644 --- a/src/core.c +++ b/src/core.c @@ -7772,7 +7772,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, //--------------------------------------------------------------------------- if (unlikely(!is_gc_usable(txn, mc, flags))) { - eASSERT(env, txn->mt_flags & MDBX_TXN_DRAINED_GC); + eASSERT(env, (txn->mt_flags & MDBX_TXN_DRAINED_GC) || num > 1); goto no_gc; } From 5df3eb6449867f97fd5c7492af38eba078dc51db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Mar 2024 11:27:22 +0300 Subject: [PATCH 129/443] =?UTF-8?q?mdbx-test:=20=D1=83=D1=81=D0=B8=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80?= =?UTF-8?q?=D0=B8=D0=B5=D0=B2=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2=D1=8B?= =?UTF-8?q?=D1=85=20=D1=86=D0=B5=D0=BB=D0=B5=D0=B9=20=D0=B2=20`GNUmakefile?= =?UTF-8?q?`=20=D0=B8=20`CMake`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 8 ++++---- test/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 0ddce68e..1694df6b 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -424,12 +424,12 @@ smoke-fault: build-test test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --extra --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) long-test: test-long test-long: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' - $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make --taillog + $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --extra --skip-make --taillog test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' @@ -439,7 +439,7 @@ test-valgrind: test-memcheck test-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK test-memcheck: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' - $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --with-valgrind --extra --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) memcheck: smoke-memcheck smoke-memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt @@ -447,7 +447,7 @@ smoke-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK smoke-memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." $(QUIET)rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG).gz && (set -o pipefail; ( \ - $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ + $(VALGRIND) ./mdbx_test --table=+data.fixed --keygen.split=29 --datalen=35 --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ $(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ $(VALGRIND) ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7098cfed..61531a57 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -145,7 +145,7 @@ else() add_test(NAME dupsort_writemap COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice --prng-seed=${test_seed} - --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no + --table=+data.fixed --keygen.split=29 --datalen=rnd --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES TIMEOUT 3600 From a6f7d74a32a3cbcc310916a624a31302dbebfa07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 7 Mar 2024 19:25:19 +0300 Subject: [PATCH 130/443] =?UTF-8?q?mdbx:=20=D0=BC=D0=B8=D0=BA=D1=80=D0=BE?= =?UTF-8?q?=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D1=8F=20`cursor=5Ftouch()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 0f8663ba..10718222 100644 --- a/src/core.c +++ b/src/core.c @@ -17940,7 +17940,8 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, } int rc = MDBX_SUCCESS; - if (likely(mc->mc_snum)) { + if (likely(mc->mc_snum) && + !IS_MODIFIABLE(mc->mc_txn, mc->mc_pg[mc->mc_snum - 1])) { mc->mc_top = 0; do { rc = page_touch(mc); From 471085788c427e5643ce1b7b8b41d2c413d78ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 10 Mar 2024 23:47:19 +0300 Subject: [PATCH 131/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=BE=D1=82=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D1=8F?= =?UTF-8?q?=20=D0=91=D0=94=20=D0=BD=D0=B0=20=D0=A4=D0=A1=20=D1=82=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=BA=D0=BE-=D0=B4=D0=BB=D1=8F-=D1=87=D1=82=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 22 +++++++++++----------- src/osal.c | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/core.c b/src/core.c index 10718222..d7edca17 100644 --- a/src/core.c +++ b/src/core.c @@ -6054,6 +6054,14 @@ __cold static void meta_troika_dump(const MDBX_env *env, /*----------------------------------------------------------------------------*/ +static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * +lckless_stub(const MDBX_env *env) { + uintptr_t stub = (uintptr_t)&env->x_lckless_stub; + /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ + stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); + return (MDBX_lockinfo *)stub; +} + /* Find oldest txnid still referenced. */ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { const uint32_t nothing_changed = MDBX_STRING_TETRAD("None"); @@ -6061,7 +6069,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { MDBX_lockinfo *const lck = env->me_lck_mmap.lck; if (unlikely(lck == NULL /* exclusive without-lck mode */)) { - eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub); + eASSERT(env, env->me_lck == lckless_stub(env)); env->me_lck->mti_readers_refresh_flag.weak = nothing_changed; return env->me_lck->mti_oldest_reader.weak = steady; } @@ -9296,8 +9304,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { mo_AcquireRelease); } else { /* exclusive mode without lck */ - eASSERT(env, !env->me_lck_mmap.lck && - env->me_lck == (void *)&env->x_lckless_stub); + eASSERT(env, !env->me_lck_mmap.lck && env->me_lck == lckless_stub(env)); } jitter4testing(true); @@ -13548,14 +13555,6 @@ __cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { env->me_options.dp_initial = env->me_options.dp_limit; } -static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * -lckless_stub(const MDBX_env *env) { - uintptr_t stub = (uintptr_t)&env->x_lckless_stub; - /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ - stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); - return (MDBX_lockinfo *)stub; -} - __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(!penv)) return MDBX_EINVAL; @@ -15559,6 +15558,7 @@ __cold static int env_open(MDBX_env *env, mdbx_mode_t mode) { if (rc == MDBX_RESULT_TRUE) { env->me_incore = true; NOTICE("%s", "in-core database"); + rc = MDBX_SUCCESS; } else if (unlikely(rc != MDBX_SUCCESS)) { ERROR("check_fs_incore(), err %d", rc); return rc; diff --git a/src/osal.c b/src/osal.c index 3865025a..161bed91 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1836,8 +1836,8 @@ MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, #else struct statvfs info; if (err != MDBX_ENOFILE) { - if (statvfs(pathname, &info) == 0 && (info.f_flag & ST_RDONLY) == 0) - return err; + if (statvfs(pathname, &info) == 0) + return (info.f_flag & ST_RDONLY) ? MDBX_SUCCESS : err; if (errno != MDBX_ENOFILE) return errno; } From aae6a0395acc2bd85349043f9a7202c425e7c0b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 14 Mar 2024 23:03:33 +0300 Subject: [PATCH 132/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8=20`=D1=80=D0=B0=D0=B2=D0=BD=D0=BE`/`?= =?UTF-8?q?=D0=BD=D0=B5=D1=80=D0=B0=D0=B2=D0=BD=D0=BE`=20=D0=B2=20=D1=83?= =?UTF-8?q?=D1=81=D0=BB=D0=BE=D0=B2=D0=B8=D0=B8=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B8=20`update=5Fgc()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Существенных последствий ошибки не было (иначе бы давно было замечено). Но в определенных сценариях, сходимость требовала еще одного цикла повтора внутри update_gc(). --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index d7edca17..31586aad 100644 --- a/src/core.c +++ b/src/core.c @@ -10967,7 +10967,7 @@ retry: if (unlikely(!ctx->retired_stored)) { /* Make sure last page of GC is touched and on retired-list */ rc = cursor_last(&ctx->cursor, nullptr, nullptr); - if (likely(rc != MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS)) rc = gcu_touch(ctx); if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND) goto bailout; From 93f76f43ac5e3b9af11305fe70f23123d07fb8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Mar 2024 02:25:13 +0300 Subject: [PATCH 133/443] =?UTF-8?q?mdbx-chk:=20=D0=BD=D0=B5=20=D1=81=D1=87?= =?UTF-8?q?=D0=B8=D1=82=D0=B0=D0=B5=D0=BC=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BE?= =?UTF-8?q?=D1=87=D0=BD=D1=8B=D0=BC=D0=B8/=D0=BF=D1=80=D0=BE=D0=B1=D0=BB?= =?UTF-8?q?=D0=B5=D0=BC=D0=BD=D1=8B=D0=BC=D0=B8=20=D0=B7=D0=B0=D0=BF=D0=B8?= =?UTF-8?q?=D1=81=D0=B8=20=D0=BD=D1=83=D0=BB=D0=B5=D0=B2=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=B4=D0=BB=D0=B8=D0=BD=D1=8B=20=D0=B2=20GC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Технически такие записи не являются проблемными, а образовываются в случае когда внутри update_gc() резервируется больше места, чем реально остается номеров свободных страниц для возврата в GC. Изначально такое избыточное резервирование считалось алгоритмическим недостатком update_gc(). Поэтому утилита mdbx_chk была временно доработана для выявления таких случаев в ходе стохастических тестов. Постепенно все реальные недочеты update_gc() (если не считать запутанности и неочевидности кода) были устранены, формирование пустых записей в GC не наблюдалось и излишне строгий контроль в mdbx_chk не создавал проблем. В ходе же последних точечных доработок была предпринята попытка еще немного уменьшить затраты ЦПУ внутри update_gc(), в частности уменьшить кол-во циклов/повторов посредством улучшения сходимости, а также уменьшить WAF. При этом образование пустых записей в GC стало возможным в достаточно редких ситуациях, когда (например) для возврата в GC остается только одна страница и добавление записи единичной длины приводит к перебалансировке или разделению листовой страницы по легковесному пути, без вовлечения других страниц дерева и без переработки дополнительных записей GC, но с поглощением остававшейся на возврат страницы. Проще говоря, в актуальная версии MDBX пустые записи в GC могут образовываться, когда это энергетически выгодно. Тогда как в предыдущих выпусках в таких ситуациях выполнялось более дорогое обновление GC с переработкой и возвратом дополнительных записей. --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 31586aad..e0b66d30 100644 --- a/src/core.c +++ b/src/core.c @@ -28387,7 +28387,7 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len); size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number < 1 || number > MDBX_PGL_LIMIT) + if (number > MDBX_PGL_LIMIT) chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, number); else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { From baaa26bb322799496d4fa32f9e59f750ea64c21c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 17 Mar 2024 00:25:08 +0300 Subject: [PATCH 134/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`update=5Fgc()`=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20=D1=83=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D1=81=D1=85=D0=BE=D0=B4=D0=B8=D0=BC=D0=BE=D1=81=D1=82=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 143 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 87 insertions(+), 56 deletions(-) diff --git a/src/core.c b/src/core.c index e0b66d30..81c768c1 100644 --- a/src/core.c +++ b/src/core.c @@ -10551,8 +10551,9 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, } typedef struct gc_update_context { - size_t retired_stored, loop; - size_t settled, cleaned_slot, reused_slot, filled_slot; + size_t loop, reserve_adj; + size_t retired_stored; + size_t reserved, cleaned_slot, reused_slot, fill_idx; txnid_t cleaned_id, rid; bool lifo, dense; #if MDBX_ENABLE_BIGFOOT @@ -10597,7 +10598,8 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) { err = cursor_del(gc, 0); TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err); - } + } else + err = (err == MDBX_NOTFOUND) ? MDBX_SUCCESS : err; } #if MDBX_ENABLE_BIGFOOT while (!err && --ctx->bigfoot >= txn->mt_txnid); @@ -10736,7 +10738,8 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { /* txn->tw.relist[] can grow and shrink during this call. * txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow. * But page numbers cannot disappear from txn->tw.retired_pages[]. */ - +retry_clean_adj: + ctx->reserve_adj = 0; retry: if (ctx->loop++) TRACE("%s", " >> restart"); @@ -10756,10 +10759,10 @@ retry: goto bailout; } - ctx->settled = 0; + ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; - ctx->filled_slot = ~0u; + ctx->fill_idx = ~0u; ctx->cleaned_id = 0; ctx->rid = txn->tw.last_reclaimed; while (true) { @@ -10781,10 +10784,10 @@ retry: if (ctx->cleaned_slot < (txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : 0)) { - ctx->settled = 0; + ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; - ctx->filled_slot = ~0u; + ctx->fill_idx = ~0u; /* LY: cleanup reclaimed records. */ do { ctx->cleaned_id = txn->tw.lifo_reclaimed[++ctx->cleaned_slot]; @@ -10827,7 +10830,7 @@ retry: goto bailout; } ctx->rid = ctx->cleaned_id; - ctx->settled = 0; + ctx->reserved = 0; ctx->reused_slot = 0; ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base); if (ctx->cleaned_id > txn->tw.last_reclaimed) @@ -11098,10 +11101,10 @@ retry: DEBUG_EXTRA_PRINT("%s\n", "."); } if (unlikely(amount != MDBX_PNL_GETSIZE(txn->tw.relist) && - ctx->settled)) { + ctx->reserved)) { TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix_mode, amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry /* rare case, but avoids GC fragmentation + goto retry_clean_adj /* rare case, but avoids GC fragmentation and one cycle. */ ; } @@ -11119,10 +11122,11 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - const size_t left = amount - ctx->settled; - TRACE("%s: amount %zu, settled %zd, left %zd, lifo-reclaimed-slots %zu, " + const size_t left = amount - ctx->reserved - ctx->reserve_adj; + TRACE("%s: amount %zu, settled %zd, reserve_adj %zu, left %zd, " + "lifo-reclaimed-slots %zu, " "reused-gc-slots %zu", - dbg_prefix_mode, amount, ctx->settled, left, + dbg_prefix_mode, amount, ctx->reserved, ctx->reserve_adj, left, txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : 0, ctx->reused_slot); if (0 >= (intptr_t)left) @@ -11280,7 +11284,7 @@ retry: if (unlikely(ctx->rid == 0)) { ERROR("%s", "** no GC tail-space to store (going dense-mode)"); ctx->dense = true; - goto retry; + goto retry_clean_adj; } } else if (rc != MDBX_NOTFOUND) goto bailout; @@ -11369,7 +11373,7 @@ retry: key.iov_base = &reservation_gc_id; data.iov_len = (chunk + 1) * sizeof(pgno_t); TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk, - ctx->settled + 1, ctx->settled + chunk + 1, reservation_gc_id); + ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id); gcu_prepare_backlog(txn, ctx); rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE); @@ -11379,17 +11383,17 @@ retry: goto bailout; gcu_clean_reserved(env, data); - ctx->settled += chunk; - TRACE("%s: settled %zu (+%zu), continue", dbg_prefix_mode, ctx->settled, + ctx->reserved += chunk; + TRACE("%s: settled %zu (+%zu), continue", dbg_prefix_mode, ctx->reserved, chunk); if (txn->tw.lifo_reclaimed && unlikely(amount < MDBX_PNL_GETSIZE(txn->tw.relist)) && - (ctx->loop < 5 || - MDBX_PNL_GETSIZE(txn->tw.relist) - amount > env->me_maxgc_ov1page)) { + (ctx->loop < 5 || MDBX_PNL_GETSIZE(txn->tw.relist) - amount > + env->me_maxgc_ov1page / 2)) { NOTICE("** restart: reclaimed-list growth %zu -> %zu", amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry; + goto retry_clean_adj; } continue; @@ -11402,7 +11406,8 @@ retry: TRACE("%s", " >> filling"); /* Fill in the reserved records */ - ctx->filled_slot = + size_t excess_slots = 0; + ctx->fill_idx = txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot : ctx->reused_slot; @@ -11410,18 +11415,21 @@ retry: tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); tASSERT(txn, dirtylist_check(txn)); - if (MDBX_PNL_GETSIZE(txn->tw.relist)) { + if (ctx->reserved || MDBX_PNL_GETSIZE(txn->tw.relist)) { MDBX_val key, data; key.iov_len = data.iov_len = 0; /* avoid MSVC warning */ key.iov_base = data.iov_base = NULL; const size_t amount = MDBX_PNL_GETSIZE(txn->tw.relist); - size_t left = amount; + size_t left = amount, excess = 0; if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); rc = cursor_first(&ctx->cursor, &key, &data); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_NOTFOUND && ctx->reserve_adj) + goto retry_clean_adj; goto bailout; + } } else { tASSERT(txn, ctx->lifo != 0); } @@ -11433,24 +11441,33 @@ retry: if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); fill_gc_id = unaligned_peek_u64(4, key.iov_base); - if (ctx->filled_slot-- == 0 || fill_gc_id > txn->tw.last_reclaimed) { - NOTICE( - "** restart: reserve depleted (filled_slot %zu, fill_id %" PRIaTXN - " > last_reclaimed %" PRIaTXN, - ctx->filled_slot, fill_gc_id, txn->tw.last_reclaimed); + if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.last_reclaimed) { + if (!left) + break; + NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN + " > last_reclaimed %" PRIaTXN ", left %zu", + ctx->fill_idx, fill_gc_id, txn->tw.last_reclaimed, left); + ctx->reserve_adj = + (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } + ctx->fill_idx -= 1; } else { tASSERT(txn, ctx->lifo != 0); - if (++ctx->filled_slot > MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { - NOTICE("** restart: reserve depleted (filled_gc_slot %zu > " - "lifo_reclaimed %zu" PRIaTXN, - ctx->filled_slot, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); + if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { + if (!left) + break; + NOTICE("** restart: reserve depleted (fill_idx %zu >= " + "lifo_reclaimed %zu, left %zu", + ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed), left); + ctx->reserve_adj = + (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } - fill_gc_id = txn->tw.lifo_reclaimed[ctx->filled_slot]; + ctx->fill_idx += 1; + fill_gc_id = txn->tw.lifo_reclaimed[ctx->fill_idx]; TRACE("%s: seek-reservation @%" PRIaTXN " at lifo_reclaimed[%zu]", - dbg_prefix_mode, fill_gc_id, ctx->filled_slot); + dbg_prefix_mode, fill_gc_id, ctx->fill_idx); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); rc = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; @@ -11469,12 +11486,17 @@ retry: tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2); size_t chunk = data.iov_len / sizeof(pgno_t) - 1; if (unlikely(chunk > left)) { + const size_t delta = chunk - left; + excess += delta; + if (!left) { + excess_slots += 1; + goto next; + } TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk, left, fill_gc_id); - if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) || - chunk - left > env->me_maxgc_ov1page) { + if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || + delta > env->me_maxgc_ov1page) data.iov_len = (left + 1) * sizeof(pgno_t); - } chunk = left; } rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, @@ -11487,14 +11509,14 @@ retry: amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { NOTICE("** restart: reclaimed-list growth (%zu -> %zu, loose +%zu)", amount, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); - goto retry; + goto retry_clean_adj; } if (unlikely(txn->tw.lifo_reclaimed ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : ctx->cleaned_id < txn->tw.last_reclaimed)) { NOTICE("%s", "** restart: reclaimed-slots changed"); - goto retry; + goto retry_clean_adj; } if (unlikely(ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { @@ -11502,7 +11524,7 @@ retry: ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); NOTICE("** restart: retired-list growth (%zu -> %zu)", ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - goto retry; + goto retry_clean_adj; } pgno_t *dst = data.iov_base; @@ -11520,35 +11542,44 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - if (left == 0) { - rc = MDBX_SUCCESS; - break; - } + next: if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND) + goto bailout; + rc = MDBX_SUCCESS; + break; + } } else { tASSERT(txn, ctx->lifo != 0); } } + + if (excess) { + size_t n = excess, adj = excess; + while (n >= env->me_maxgc_ov1page) + adj -= n /= env->me_maxgc_ov1page; + ctx->reserve_adj += adj; + TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix_mode, + excess, adj, ctx->reserve_adj); + } } tASSERT(txn, rc == MDBX_SUCCESS); if (unlikely(txn->tw.loose_count != 0)) { NOTICE("** restart: got %zu loose pages", txn->tw.loose_count); - goto retry; + goto retry_clean_adj; } - if (unlikely(ctx->filled_slot != - (txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - : 0))) { - const bool will_retry = ctx->loop < 9; - NOTICE("** %s: reserve excess (filled-slot %zu, loop %zu)", - will_retry ? "restart" : "ignore", ctx->filled_slot, ctx->loop); + if (unlikely(excess_slots)) { + const bool will_retry = ctx->loop < 5 || excess_slots > 1; + NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " + "loop %zu)", + will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, + ctx->reserve_adj, ctx->loop); if (will_retry) goto retry; } From 94a6bc140dc5199c2578faa160fdea95710cfd99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 18 Mar 2024 21:57:38 +0300 Subject: [PATCH 135/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BA=D1=83?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fenv=5Fresurrect=5Fafter=5Ffork()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_restrictions.md | 6 ++++ mdbx.h | 76 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/docs/_restrictions.md b/docs/_restrictions.md index 170924f7..d967cca7 100644 --- a/docs/_restrictions.md +++ b/docs/_restrictions.md @@ -106,6 +106,7 @@ reservation can deplete system resources (trigger ENOMEM error, etc) when setting an inadequately large upper DB size using \ref mdbx_env_set_geometry() or \ref mdbx::env::geometry. So just avoid this. + ## Remote filesystems Do not use MDBX databases on remote filesystems, even between processes on the same host. This breaks file locks on some platforms, possibly @@ -132,6 +133,11 @@ corruption in such cases. On the other hand, MDBX allow calling \ref mdbx_env_close() in such cases to release resources, but no more and in general this is a wrong way. +#### Since v0.13.1 and later +Начиная с версии 0.13.1 в API доступна функция \ref mdbx_env_resurrect_after_fork(), +которая позволяет пере-использовать в дочерних процессах уже открытую среду БД, +но строго без наследования транзакций от родительского процесса. + ## Read-only mode There is no pure read-only mode in a normal explicitly way, since diff --git a/mdbx.h b/mdbx.h index e91e8170..a7e78072 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2944,8 +2944,80 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { return mdbx_env_close_ex(env, false); } -#if !(defined(_WIN32) || defined(_WIN64)) -/** FIXME */ +#if defined(DOXYGEN) || !(defined(_WIN32) || defined(_WIN64)) +/** \brief Восстанавливает экземпляр среды в дочернем процессе после ветвления + * родительского процесса посредством `fork()` и родственных системных вызовов. + * \ingroup c_extra + * + * Без вызова \ref mdbx_env_resurrect_after_fork() использование открытого + * экземпляра среды в дочернем процессе не возможно, включая все выполняющиеся + * на момент ветвления транзакции. + * + * Выполняемые функцией действия можно рассматривать как повторное открытие БД + * в дочернем процессе, с сохранением заданных опций и адресов уже созданных + * экземпляров объектов связанных с API. + * + * \note Функция не доступна в ОС семейства Windows по причине отсутствия + * функционала ветвления процесса в API операционной системы. + * + * Ветвление не оказывает влияния на состояние MDBX-среды в родительском + * процессе. Все транзакции, которые были в родительском процессе на момент + * ветвления, после ветвления в родительском процессе продолжат выполняться без + * помех. Но в дочернем процессе все соответствующие транзакции безальтернативно + * перестают быть валидными, а попытка их использования приведет к возврату + * ошибки или отправке `SIGSEGV`. + * + * Использование экземпляра среды в дочернем процессе не возможно до вызова + * \ref mdbx_env_resurrect_after_fork(), так как в результате ветвления у + * процесса меняется PID, значение которого используется для организации + * совместно работы с БД, в том числе, для отслеживания процессов/потоков + * выполняющих читающие транзакции связанные с соответствующими снимками данных. + * Все активные на момент ветвления транзакции не могут продолжаться в дочернем + * процессе, так как не владеют какими-либо блокировками или каким-либо снимком + * данных и не удерживает его от переработки при сборке мусора. + * + * Функция \ref mdbx_env_resurrect_after_fork() восстанавливает переданный + * экземпляр среды в дочернем процессе после ветвления, а именно: обновляет + * используемые системные идентификаторы, повторно открывает дескрипторы файлов, + * производит захват необходимых блокировок связанных с LCK- и DXB-файлами БД, + * восстанавливает отображения в память страницы БД, таблицы читателей и + * служебных/вспомогательных данных в память. Однако унаследованные от + * родительского процесса транзакции не восстанавливаются, прием пишущие и + * читающие транзакции обрабатываются по-разному: + * + * - Пишущая транзакция, если таковая была на момент ветвления, + * прерывается в дочернем процессе с освобождение связанных с ней ресурсов, + * включая все вложенные транзакции. + * + * - Читающие же транзакции, если таковые были в родительском процессе, + * в дочернем процессе логически прерываются, но без освобождения ресурсов. + * Поэтому необходимо обеспечить вызов \ref mdbx_txn_abort() для каждой + * такой читающей транзакций в дочернем процессе, либо смириться с утечкой + * ресурсов до завершения дочернего процесса. + * + * Причина не-освобождения ресурсов читающих транзакций в том, что исторически + * MDBX не ведет какой-либо общий список экземпляров читающих, так как это не + * требуется для штатных режимов работы, но требует использования атомарных + * операций или дополнительных объектов синхронизации при создании/разрушении + * экземпляров \ref MDBX_txn. + * + * Вызов \ref mdbx_env_resurrect_after_fork() без ветвления, не в дочернем + * процессе, либо повторные вызовы не приводят к каким-либо действиям или + * изменениям. + * + * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении, + * некоторые возможные ошибки таковы: + * + * \retval MDBX_BUSY В родительском процессе БД была открыта + * в режиме \ref MDBX_EXCLUSIVE. + * + * \retval MDBX_EBADSIGN При повреждении сигнатуры экземпляра объекта, а также + * в случае одновременного вызова \ref + * mdbx_env_resurrect_after_fork() из разных потоков. + * + * \retval MDBX_PANIC Произошла критическая ошибка при восстановлении + * экземпляра среды, либо такая ошибка уже была + * до вызова функции. */ LIBMDBX_API int mdbx_env_resurrect_after_fork(MDBX_env *env); #endif /* Windows */ From 236afee80b39a35a46a6c60297aab3c47378d4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 20 Mar 2024 03:09:33 +0300 Subject: [PATCH 136/443] =?UTF-8?q?mdbx:=20=D0=B1=D1=8B=D1=81=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D1=8F=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA?= =?UTF-8?q?=D0=B0=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=B0=20`MDBX=5FEXCLUSI?= =?UTF-8?q?VE`=20=D0=B4=D0=BB=D1=8F=20`mdbx=5Fenv=5Fresurrect=5Fafter=5Ffo?= =?UTF-8?q?rk()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 24a6d892..8f061af2 100644 --- a/src/core.c +++ b/src/core.c @@ -15926,7 +15926,7 @@ __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { int rc = env_close(env, true); env->me_signature.weak = MDBX_ME_SIGNATURE; if (likely(rc == MDBX_SUCCESS)) { - rc = env_open(env, 0); + rc = (env->me_flags & MDBX_EXCLUSIVE) ? MDBX_BUSY : env_open(env, 0); if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { rc = MDBX_PANIC; env->me_flags |= MDBX_FATAL_ERROR; From 20d6d39ab35ad09932a33ded9c9f3aad089051bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 11:44:25 +0300 Subject: [PATCH 137/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 32143bf4..b637c62b 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,8 +4,42 @@ ChangeLog English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +## v0.13.1 (в процессе подготовки релиза) -## v0.13.0 at 2023-04-23 +Новая версия с существенным расширением API и добавлением функционала. + +Новое: + + - Управление основной блокировкой lock/unlock/upgrade/downgrade для координации пишущих транзакций. + - `mdbx_env_chk() `для проверка целостности структуры БД, с переработкой и переносом функционала утилиты `mdbx_chk` внутрь библиотеки. + - `mdbx_dbi_rename()` и `mdbx_dbi_rename()` для переименования таблиц. + - `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()` для управления курсорами. + - `mdbx_env_resurrect_after_fork()` для восстановление открытой среды работы с БД в дочернем процессе после ветвления/расщепления процесса. + - `mdbx_cursor_compare()` для сравнения позиций курсоров. + - `mdbx_cursor_scan()` и `mdbx_cursor_scan_from()` для сканирования таблиц с использованием функционального предиката. + - `mdbx_cursor_on_first_dup()` и `mdbx_cursor_on_last_dup()` для проверки позиции курсора. + - `mdbx_preopen_snapinfo()` для получения информации о БД без её открытия. + + - Расширение и доработка C++ API: + + - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов + `cursor::estimate()` унифицировано с `cursor::move()`; + - для предотвращения незаметного неверного использования API, для инициализации + возвращаемых по ссылке срезов, вместо пустых срезов задействован `slice::invalid()`; + - добавлены дополнительные C++ операторы преобразования к типам C API; + - для совместимости со старыми стандартами C++ и старыми версиями STL перенесены + в public классы `buffer::move_assign_alloc` и `buffer::copy_assign_alloc`; + - добавлен тип `mdbx::default_buffer`; + - для срезов и буферов добавлены методы `hex_decode()`, `base64_decode()`, `base58_decode()`; + - добавлен тип `mdbx::comparator` и функций `mdbx::default_comparator()`; + - добавлены статические методы `buffer::hex()`, `base64()`, `base58()`; + - для транзакций и курсоров добавлены методы `get_/set_context`; + - добавлен метод `cursor::clone()`; + - поддержка base58 переработана и приведена в соответствии с черновиком RFC, в текущем понимании теперь это одна из самых высокопроизводительных реализаций; + - переработка `to_hex()` и `from_hex()`. + + +## v0.13.0 от 2023-04-23 Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API. From 179d8d6d6bc31e98b7bb7d4572f5b8e7c6310cf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 13:22:33 +0300 Subject: [PATCH 138/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=B5=20=D0=B2=D0=B7?= =?UTF-8?q?=D0=B2=D0=BE=D0=B4=D0=B8=D0=BC=20`MDBX=5FFATAL=5FERROR`=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=B5-=D0=B0=D0=BA=D1=82=D0=B8?= =?UTF-8?q?=D0=B2=D0=BD=D0=BE=D0=B9=20=D1=81=D1=80=D0=B5=D0=B4=D1=8B=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA?= =?UTF-8?q?=D0=B5=20`MDBX=5FENV=5FCHECKPID`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 8f061af2..010184a3 100644 --- a/src/core.c +++ b/src/core.c @@ -8763,7 +8763,7 @@ static __inline int check_env(const MDBX_env *env, const bool wanna_active) { if (wanna_active) { #if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid())) { + if (unlikely(env->me_pid != osal_getpid()) && env->me_pid) { ((MDBX_env *)env)->me_flags |= MDBX_FATAL_ERROR; return MDBX_PANIC; } From 183d1e1a443e5513a7328a7c8c854c9ca0542c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 13:18:19 +0300 Subject: [PATCH 139/443] =?UTF-8?q?mdbx:=20=D0=B1=D1=8B=D1=81=D1=82=D1=80?= =?UTF-8?q?=D1=8B=D0=B9=20=D0=B2=D1=8B=D1=85=D0=BE=D0=B4=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BD=D0=B5-=D0=B0=D0=BA=D1=82=D0=B8=D0=B2=D0=BD?= =?UTF-8?q?=D0=BE=D0=B9=20=D1=81=D1=80=D0=B5=D0=B4=D1=8B.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core.c b/src/core.c index 010184a3..4fe17bfa 100644 --- a/src/core.c +++ b/src/core.c @@ -15913,6 +15913,9 @@ __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) return MDBX_PANIC; + if (unlikely((env->me_flags & MDBX_ENV_ACTIVE) == 0)) + return MDBX_SUCCESS; + const uint32_t new_pid = osal_getpid(); if (unlikely(env->me_pid == new_pid)) return MDBX_SUCCESS; From f8836aefa06e62b83f8fb135479b50ee8cf907ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 21:28:12 +0300 Subject: [PATCH 140/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BA=D1=83?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fcompare()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 24 ++++++++++++++++++++++-- src/core.c | 4 ++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index a7e78072..2c21cbf4 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5024,10 +5024,30 @@ LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *cursor); * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); -/** FIXME */ +/** \brief Сравнивает позицию курсоров. + * \ingroup c_cursors + * + * Функция предназначена для сравнения позиций двух + * инициализированных/установленных курсоров, связанных с одной транзакцией и + * одной таблицей (DBI-дескриптором). + * Если же курсоры связаны с разными транзакциями, либо с разными таблицами, + * либо один из них не инициализирован, то результат сравнения не определен + * (поведением может быть изменено в последующих версиях). + * + * \param [in] left Левый курсор для сравнения позиций. + * \param [in] right Правый курсор для сравнения позиций. + * \param [in] ignore_multival Булевой флаг, влияющий на результат только при + * сравнении курсоров для таблиц с мульти-значениями, т.е. с флагом + * \ref MDBX_DUPSORT. В случае `true`, позиции курсоров сравниваются + * только по ключам, без учета позиционирования среди мульти-значений. + * Иначе, в случае `false`, при совпадении позиций по ключам, + * сравниваются также позиции по мульти-значениям. + * + * \retval Значение со знаком в семантике оператора `<=>` (меньше нуля, ноль, + * либо больше нуля) как результат сравнения позиций курсоров. */ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, const MDBX_cursor *right, - bool ignore_nested); + bool ignore_multival); /** \brief Retrieve by cursor. * \ingroup c_crud diff --git a/src/core.c b/src/core.c index 4fe17bfa..18465a64 100644 --- a/src/core.c +++ b/src/core.c @@ -19806,7 +19806,7 @@ int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { } int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, - bool ignore_nested) { + bool ignore_multival) { const int incomparable = INT16_MAX + 1; if (unlikely(!l)) return r ? -incomparable * 9 : 0; @@ -19847,7 +19847,7 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, assert((l->mc_xcursor != nullptr) == (r->mc_xcursor != nullptr)); if (unlikely((l->mc_xcursor != nullptr) != (r->mc_xcursor != nullptr))) return l->mc_xcursor ? incomparable * 2 : -incomparable * 2; - if (ignore_nested || !l->mc_xcursor) + if (ignore_multival || !l->mc_xcursor) return 0; #if MDBX_DEBUG From 27893f52f147b42e24f8f61a71293d39576c9d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 22:01:07 +0300 Subject: [PATCH 141/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BA=D1=83?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fdbi=5Frename()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index 2c21cbf4..a01fd9a5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4363,7 +4363,20 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); -/** FIXME */ +/** \brief Переименовает таблицу по DBI-хендлу. + * \ingroup c_dbi + * + * Переименовывает пользовательскую именованную subDB связанную с передаваемым + * DBI-дескриптором. + * + * \param [in,out] txn Пишущая транзакция запущенная посредством + * \ref mdbx_txn_begin(). + * \param [in] dbi Дескриптор таблицы (именованной пользовательской subDB) + * открытый посредством \ref mdbx_dbi_open(). + * + * \param [in] name Новое имя для переименования. + * + * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении. */ LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *name); From 1d0ee509c2a61b7f7bcc92b5b6d3549f7faf806f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 23:15:11 +0300 Subject: [PATCH 142/443] =?UTF-8?q?mdbx-doc:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5?= =?UTF-8?q?=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20`MDBX=5FENV=5FJUST=5F?= =?UTF-8?q?DELETE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index a01fd9a5..b839775a 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2444,7 +2444,7 @@ enum MDBX_env_delete_mode_t { /** \brief Just delete the environment's files and directory if any. * \note On POSIX systems, processes already working with the database will * continue to work without interference until it close the environment. - * \note On Windows, the behavior of `MDB_ENV_JUST_DELETE` is different + * \note On Windows, the behavior of `MDBX_ENV_JUST_DELETE` is different * because the system does not support deleting files that are currently * memory mapped. */ MDBX_ENV_JUST_DELETE = 0, From c153a343824ebc5da64787179b310a3547f0ca9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Mar 2024 16:28:41 +0300 Subject: [PATCH 143/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`mdbx=5Fcursor=5Fscan()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Упрощение и обеспечение возврата `MDBX_RESULT_FALSE`, как при отсутствии данных, так и при неуспешном поиске. - Инициализация внутренних переменных key и value для устойчивости в случае использования контекстно-некорректных операций позиционирования курсора. --- src/core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/core.c b/src/core.c index 18465a64..04357264 100644 --- a/src/core.c +++ b/src/core.c @@ -17721,18 +17721,15 @@ int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) return MDBX_EINVAL; - MDBX_val key, data; + MDBX_val key = {nullptr, 0}, data = {nullptr, 0}; int rc = mdbx_cursor_get(mc, &key, &data, start_op); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - for (;;) { + while (likely(rc == MDBX_SUCCESS)) { rc = predicate(context, &key, &data, arg); if (rc != MDBX_RESULT_FALSE) return rc; rc = cursor_get(mc, &key, &data, turn_op); - if (rc != MDBX_SUCCESS) - return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; } + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; } int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, From fb17e8877c11635949fbe74006ba2658499b4b2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Mar 2024 17:27:37 +0300 Subject: [PATCH 144/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BA=D1=83?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fscan()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 3 deletions(-) diff --git a/mdbx.h b/mdbx.h index b839775a..6f7f5637 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5095,16 +5095,198 @@ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); -/** FIXME */ + +/** \brief Тип предикативных функций обратного вызова используемых + * \ref mdbx_cursor_scan() и \ref mdbx_cursor_scan_from() для пробирования + * пар ключ-значения. + * \ingroup c_crud + * + * \param [in,out] context Указатель на контекст с необходимой для оценки + * информацией, который полностью подготавливается + * и контролируется вами. + * \param [in] key Ключ для оценки пользовательской функцией. + * \param [in] value Значение для оценки пользовательской функцией. + * \param [in,out] arg Дополнительный аргумент предикативной функции, + * который полностью подготавливается + * и контролируется вами. + * + * \returns Результат проверки соответствия переданной пары ключ-значения + * искомой цели. Иначе код ошибки, который прерывает сканирование и возвращается + * без изменения в качестве результата из функций \ref mdbx_cursor_scan() + * или \ref mdbx_cursor_scan_from(). + * + * \retval MDBX_RESULT_TRUE если переданная пара ключ-значение соответствует + * искомой и следует завершить сканирование. + * \retval MDBX_RESULT_FALSE если переданная пара ключ-значение НЕ соответствует + * искомой и следует продолжать сканирование. + * \retval ИНАЧЕ любое другое значение, отличное от \ref MDBX_RESULT_TRUE + * и \ref MDBX_RESULT_FALSE, считается индикатором ошибки + * и возвращается без изменений в качестве результата сканирования. + * + * \see mdbx_cursor_scan() + * \see mdbx_cursor_scan_from() */ typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, void *arg) MDBX_CXX17_NOEXCEPT; -/** FIXME */ + +/** \brief Сканирует таблицу с использованием передаваемого предиката, + * с уменьшением сопутствующих накладных расходов. + * \ingroup c_crud + * + * Реализует функционал сходный с шаблоном `std::find_if<>()` с использованием + * курсора и пользовательской предикативной функции, экономя при этом + * на сопутствующих накладных расходах, в том числе, не выполняя часть проверок + * внутри цикла итерации записей и потенциально уменьшая количество + * DSO-трансграничных вызовов. + * + * Функция принимает курсор, который должен быть привязан к некоторой транзакции + * и DBI-дескриптору таблицы (именованной пользовательской subDB), выполняет + * первоначальное позиционирование курсора определяемое аргументом `start_op`. + * Далее, производится оценка каждой пары ключ-значения посредством + * предоставляемой вами предикативной функции `predicate` и затем, при + * необходимости, переход к следующему элементу посредством операции `turn_op`, + * до наступления одного из четырех событий: + * - достигается конец данных; + * - возникнет ошибка при позиционировании курсора; + * - оценочная функция вернет \ref MDBX_RESULT_TRUE, сигнализируя + * о необходимости остановить дальнейшее сканирование; + * - оценочная функция возвратит значение отличное от \ref MDBX_RESULT_FALSE + * и \ref MDBX_RESULT_TRUE сигнализируя об ошибке. + * + * \param [in,out] cursor Курсор для выполнения операции сканирования, + * связанный с активной транзакцией и DBI-дескриптором + * таблицы. Например, курсор созданный + * посредством \ref mdbx_cursor_open(). + * \param [in] predicate Предикативная функция для оценки итерируемых + * пар ключ-значения, + * более подробно смотрите \ref MDBX_predicate_func. + * \param [in,out] context Указатель на контекст с необходимой для оценки + * информацией, который полностью подготавливается + * и контролируется вами. + * \param [in] start_op Стартовая операция позиционирования курсора, + * более подробно смотрите \ref MDBX_cursor_op. + * Для сканирования без изменения исходной позиции + * курсора используйте \ref MDBX_GET_CURRENT. + * Допустимые значения \ref MDBX_FIRST, + * \ref MDBX_FIRST_DUP, \ref MDBX_LAST, + * \ref MDBX_LAST_DUP, \ref MDBX_GET_CURRENT, + * а также \ref MDBX_GET_MULTIPLE. + * \param [in] turn_op Операция позиционирования курсора для перехода + * к следующему элементу. Допустимые значения + * \ref MDBX_NEXT, \ref MDBX_NEXT_DUP, + * \ref MDBX_NEXT_NODUP, \ref MDBX_PREV, + * \ref MDBX_PREV_DUP, \ref MDBX_PREV_NODUP, а также + * \ref MDBX_NEXT_MULTIPLE и \ref MDBX_PREV_MULTIPLE. + * \param [in,out] arg Дополнительный аргумент предикативной функции, + * который полностью подготавливается + * и контролируется вами. + * + * \note При использовании \ref MDBX_GET_MULTIPLE, \ref MDBX_NEXT_MULTIPLE + * или \ref MDBX_PREV_MULTIPLE внимательно учитывайте пакетную специфику + * передачи значений через параметры предикативной функции. + * + * \see MDBX_predicate_func + * \see mdbx_cursor_scan_from + * + * \returns Результат операции сканирования, либо код ошибки. + * + * \retval MDBX_RESULT_TRUE если найдена пара ключ-значение, для которой + * предикативная функция вернула \ref MDBX_RESULT_TRUE. + * \retval MDBX_RESULT_FALSE если если подходящая пара ключ-значения НЕ найдена, + * в процессе поиска достигнут конец данных, либо нет данных для поиска. + * \retval ИНАЧЕ любое другое значение, отличное от \ref MDBX_RESULT_TRUE + * и \ref MDBX_RESULT_FALSE, является кодом ошибки при позиционировании + * курса, либо определяемым пользователем кодом остановки поиска + * или ошибочной ситуации. */ LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op start_op, MDBX_cursor_op turn_op, void *arg); -/** FIXME */ +/** Сканирует таблицу с использованием передаваемого предиката, + * начиная с передаваемой пары ключ-значение, + * с уменьшением сопутствующих накладных расходов. + * \ingroup c_crud + * + * Функция принимает курсор, который должен быть привязан к некоторой транзакции + * и DBI-дескриптору таблицы (именованной пользовательской subDB), выполняет + * первоначальное позиционирование курсора определяемое аргументом `from_op`. + * а также аргументами `from_key` и `from_value`. + * Далее, производится оценка каждой пары ключ-значения посредством + * предоставляемой вами предикативной функции `predicate` и затем, при + * необходимости, переход к следующему элементу посредством операции `turn_op`, + * до наступления одного из четырех событий: + * - достигается конец данных; + * - возникнет ошибка при позиционировании курсора; + * - оценочная функция вернет \ref MDBX_RESULT_TRUE, сигнализируя + * о необходимости остановить дальнейшее сканирование; + * - оценочная функция возвратит значение отличное от \ref MDBX_RESULT_FALSE + * и \ref MDBX_RESULT_TRUE сигнализируя об ошибке. + * + * \param [in,out] cursor Курсор для выполнения операции сканирования, + * связанный с активной транзакцией и DBI-дескриптором + * таблицы. Например, курсор созданный + * посредством \ref mdbx_cursor_open(). + * \param [in] predicate Предикативная функция для оценки итерируемых + * пар ключ-значения, + * более подробно смотрите \ref MDBX_predicate_func. + * \param [in,out] context Указатель на контекст с необходимой для оценки + * информацией, который полностью подготавливается + * и контролируется вами. + * \param [in] from_op Операция позиционирования курсора к исходной + * позиции, более подробно смотрите + * \ref MDBX_cursor_op. + * Допустимые значения \ref MDBX_GET_BOTH, + * \ref MDBX_GET_BOTH_RANGE, \ref MDBX_SET_KEY, + * \ref MDBX_SET_LOWERBOUND, \ref MDBX_SET_UPPERBOUND, + * \ref MDBX_TO_KEY_LESSER_THAN, + * \ref MDBX_TO_KEY_LESSER_OR_EQUAL, + * \ref MDBX_TO_KEY_EQUAL, + * \ref MDBX_TO_KEY_GREATER_OR_EQUAL, + * \ref MDBX_TO_KEY_GREATER_THAN, + * \ref MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN, + * \ref MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, + * \ref MDBX_TO_EXACT_KEY_VALUE_EQUAL, + * \ref MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, + * \ref MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN, + * \ref MDBX_TO_PAIR_LESSER_THAN, + * \ref MDBX_TO_PAIR_LESSER_OR_EQUAL, + * \ref MDBX_TO_PAIR_EQUAL, + * \ref MDBX_TO_PAIR_GREATER_OR_EQUAL, + * \ref MDBX_TO_PAIR_GREATER_THAN, + * а также \ref MDBX_GET_MULTIPLE. + * \param [in,out] from_key Указатель на ключ используемый как для исходного + * позиционирования, так и для последующих итераций + * перехода. + * \param [in,out] from_value Указатель на значние используемое как для + * исходного позиционирования, так и для последующих + * итераций перехода. + * \param [in] turn_op Операция позиционирования курсора для перехода + * к следующему элементу. Допустимые значения + * \ref MDBX_NEXT, \ref MDBX_NEXT_DUP, + * \ref MDBX_NEXT_NODUP, \ref MDBX_PREV, + * \ref MDBX_PREV_DUP, \ref MDBX_PREV_NODUP, а также + * \ref MDBX_NEXT_MULTIPLE и \ref MDBX_PREV_MULTIPLE. + * \param [in,out] arg Дополнительный аргумент предикативной функции, + * который полностью подготавливается + * и контролируется вами. + * + * \note При использовании \ref MDBX_GET_MULTIPLE, \ref MDBX_NEXT_MULTIPLE + * или \ref MDBX_PREV_MULTIPLE внимательно учитывайте пакетную специфику + * передачи значений через параметры предикативной функции. + * + * \see MDBX_predicate_func + * \see mdbx_cursor_scan + * + * \returns Результат операции сканирования, либо код ошибки. + * + * \retval MDBX_RESULT_TRUE если найдена пара ключ-значение, для которой + * предикативная функция вернула \ref MDBX_RESULT_TRUE. + * \retval MDBX_RESULT_FALSE если если подходящая пара ключ-значения НЕ найдена, + * в процессе поиска достигнут конец данных, либо нет данных для поиска. + * \retval ИНАЧЕ любое другое значение, отличное от \ref MDBX_RESULT_TRUE + * и \ref MDBX_RESULT_FALSE, является кодом ошибки при позиционировании + * курса, либо определяемым пользователем кодом остановки поиска + * или ошибочной ситуации. */ LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op from_op, From d0799fd373dabb86ad908bfd64dba6860a46a5bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Mar 2024 22:29:16 +0300 Subject: [PATCH 145/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BA=D1=83?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fon=5Ffirst=5Fdup()`=20=D0=B8=20?= =?UTF-8?q?`mdbx=5Fcursor=5Fon=5Flast=5Fdup()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 24 ++++++++++++++++++++++-- src/core.c | 4 ++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index 6f7f5637..fbea08f5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5501,7 +5501,17 @@ mdbx_cursor_eof(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first(const MDBX_cursor *cursor); -/** FIXME */ +/** \brief Определяет стоит ли курсор на первом или единственном мульти-значении + * соответствующем ключу. + * \ingroup c_cursors + * \param [in] cursor Курсор созданный посредством \ref mdbx_cursor_open(). + * \returns Значание \ref MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, + * иначе код ошибки. + * \retval MDBX_RESULT_TRUE курсор установлен на первом или единственном + * мульти-значении соответствующем ключу. + * \retval MDBX_RESULT_FALSE курсор НЕ установлен на первом или единственном + * мульти-значении соответствующем ключу. + * \retval ИНАЧЕ код ошибки. */ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); @@ -5519,7 +5529,17 @@ mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last(const MDBX_cursor *cursor); -/** FIXME */ +/** \brief Определяет стоит ли курсор на последнем или единственном мульти-значении + * соответствующем ключу. + * \ingroup c_cursors + * \param [in] cursor Курсор созданный посредством \ref mdbx_cursor_open(). + * \returns Значание \ref MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, + * иначе код ошибки. + * \retval MDBX_RESULT_TRUE курсор установлен на последнем или единственном + * мульти-значении соответствующем ключу. + * \retval MDBX_RESULT_FALSE курсор НЕ установлен на последнем или единственном + * мульти-значении соответствующем ключу. + * \retval ИНАЧЕ код ошибки. */ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); diff --git a/src/core.c b/src/core.c index 04357264..e69b709b 100644 --- a/src/core.c +++ b/src/core.c @@ -25115,7 +25115,7 @@ int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) - return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + return MDBX_RESULT_TRUE; if (!mc->mc_xcursor) return MDBX_RESULT_TRUE; @@ -25158,7 +25158,7 @@ int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) - return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + return MDBX_RESULT_FALSE; if (!mc->mc_xcursor) return MDBX_RESULT_TRUE; From 80e9667ead57da8b90a5215d36497a2629f6f514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 23 Mar 2024 15:32:02 +0300 Subject: [PATCH 146/443] =?UTF-8?q?mdbx++:=20=D1=8F=D0=B2=D0=BD=D0=BE?= =?UTF-8?q?=D0=B5=20=D0=BF=D1=80=D0=B8=D0=B2=D0=B5=D0=B4=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=BA=20`int`=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8?= =?UTF-8?q?=20`constexpr=20mdbx::memcmp()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 0507774f..90d981b8 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -5120,8 +5120,8 @@ static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, __cpp_lib_is_constant_evaluated >= 201811L if (::std::is_constant_evaluated()) { for (size_t i = 0; i < bytes; ++i) { - const int diff = - static_cast(a)[i] - static_cast(b)[i]; + const int diff = int(static_cast(a)[i]) - + int(static_cast(b)[i]); if (diff) return diff; } From dedcdd4c944e70017d2f449f75d4aaaea5c675bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 24 Mar 2024 11:11:19 +0300 Subject: [PATCH 147/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BA=D1=83=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20`mdbx=5Fpreopen=5Fsnapinfo()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index fbea08f5..d15c3e1e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -6020,16 +6020,45 @@ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, * leg(s). */ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); -/** \brief FIXME - */ -LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *arg, +/** \brief Получает базовую информацию о БД не открывая её. + * \ingroup c_opening + * + * Назначение функции в получении базовой информации без открытия БД и + * отображения данных в память (что может быть достаточно затратным действием + * для ядра ОС). Полученная таким образом информация может быть полезной для + * подстройки опций работы с БД перед её открытием, а также в сценариях файловых + * менеджерах и прочих вспомогательных утилитах. + * + * \todo Добавить в API возможность установки обратного вызова для ревизии опций + * работы с БД в процессе её открытия (при удержании блокировок). + * + * \param [in] pathname Путь к директории или файлу БД. + * \param [out] into Указатель на структуру \ref MDBX_envinfo + * для получения информации. + * \param [int] bytes Актуальный размер структуры \ref MDBX_envinfo, это + * значение используется для обеспечения совместимости + * ABI. + * + * \note Заполняется только некоторые поля структуры \ref MDBX_envinfo, значения + * которых возможно получить без отображения файлов БД в память и без захвата + * блокировок: размер страницы БД, геометрия БД, размер распределенного места + * (номер последней распределенной страницы), номер последней транзакции и + * boot-id. + * + * \warning Полученная информация является снимком на время выполнения функции и + * может быть в любой момент изменена работающим с БД процессом. В том числе, + * нет препятствий к тому, чтобы другой процесс удалил БД и создал её заново с + * другим размером страницы и/или изменением любых других параметров. + * + * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении. */ +LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *info, size_t bytes); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_preopen_snapinfo() * \note Available only on Windows. * \see mdbx_preopen_snapinfo() */ LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, - MDBX_envinfo *arg, size_t bytes); + MDBX_envinfo *info, size_t bytes); #endif /* Windows */ /** \brief Флаги/опции для проверки целостности БД. From 31e8f290e73b1efd422920c27a77e6f7e17974b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 28 Mar 2024 11:32:58 +0300 Subject: [PATCH 148/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=BF=D0=B5=D1=87=D0=B0?= =?UTF-8?q?=D1=82=D0=BA=D0=B8=20=D0=B8=20=D0=BE=D1=80=D1=84=D0=BE=D0=B3?= =?UTF-8?q?=D1=80=D0=B0=D1=84=D0=B8=D1=8F=20=D0=B2=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index b637c62b..19bdbd73 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,7 +4,7 @@ ChangeLog English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). -## v0.13.1 (в процессе подготовки релиза) +## v0.13.1 (в процессе подготовки выпуска) Новая версия с существенным расширением API и добавлением функционала. @@ -96,7 +96,7 @@ Signed-off-by: Леонид Юрьев (Leonid Yuriev) вероятность проявления близка к нулю, а сценарий такого проявления найти не удалось. В MDBX ошибка присутствовала с момента отделения проекта от LMDB, - где эта ошибка присутствует более 11 лети, по настоящее время. + где эта ошибка присутствует более 11 лет, по настоящее время. - Исправление ложной ошибки `MDBX_CORRUPTED (-30796)` в сценарии работы в режиме `MDBX_DUPFIXED` и нечетной длиной мульти-значений. @@ -107,7 +107,7 @@ Signed-off-by: Леонид Юрьев (Leonid Yuriev) - Доработка `rebalance()` ради уменьшения WAF. Новый функционал, включая контролируемую пользователем опцию `enum MDBX_option_t`, будет доступен в выпусках ветки `0.13.x`, а в этом выпуске доработка сводится к тактике - не-вовленичения чистой страницы при нехватке запаса страниц в ходе обновления GC, + не-вовлечения чистой страницы при нехватке запаса страниц в ходе обновления GC, за счет ухудшения баланса дерева страниц. - Устранение упущения приводящего к нелогичной ситуации @@ -231,7 +231,7 @@ Signed-off-by: Леонид Юрьев (Leonid Yuriev) - Устранение регресса/ошибки в пути обработки `put(MDBX_MULTIPLE)` при пакетном/оптовом помещении в БД множественных значений одного ключа (aka multi-value или dupsort). - Проявление проблемы зависит от компилятора и опций оптимизации/кодогенерации, но с большой вероятностью возвращется + Проявление проблемы зависит от компилятора и опций оптимизации/кодогенерации, но с большой вероятностью возвращается ошибка `MDBX_BAD_VALSIZE` (`-30781`), а в отладочных сборках срабатывает проверка `cASSERT(mc, !"Invalid key-size")`. Сценарии приводящие к другим проявлениям на данный момент не известны. From f548f00d8eaaf3121ca7d28fd0abeb7c8c4a0c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 25 Mar 2024 00:25:39 +0300 Subject: [PATCH 149/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=B8=D0=B0=D0=B3=D0=BD?= =?UTF-8?q?=D0=BE=D1=81=D1=82=D0=B8=D0=BA=D0=B0/=D0=BB=D0=BE=D0=B3=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20=D0=BA=D0=B0=D0=B6=D0=B4=D0=BE=D0=B3=D0=BE=20=D1=81=D0=BB?= =?UTF-8?q?=D1=83=D1=87=D0=B0=D1=8F=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80=D0=B0?= =?UTF-8?q?=D1=82=D0=B0=20`MDBX=5FCORRUPTED`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/src/core.c b/src/core.c index e69b709b..df815706 100644 --- a/src/core.c +++ b/src/core.c @@ -7874,6 +7874,8 @@ next_gc:; goto depleted_gc; } if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC key-length"); ret.err = MDBX_CORRUPTED; goto fail; } @@ -7900,6 +7902,8 @@ next_gc:; if (unlikely(data.iov_len % sizeof(pgno_t) || data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) || !pnl_check(gc_pnl, txn->mt_next_pgno))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC value-length"); ret.err = MDBX_CORRUPTED; goto fail; } @@ -7983,6 +7987,8 @@ next_gc:; flags |= MDBX_ALLOC_SHOULD_SCAN; if (AUDIT_ENABLED()) { if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid txn retired-list"); ret.err = MDBX_CORRUPTED; goto fail; } @@ -10442,8 +10448,11 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, MDBX_val key, data; while ((rc = cursor_get(&cx.outer, &key, &data, MDBX_NEXT)) == 0) { if (!dont_filter_gc) { - if (unlikely(key.iov_len != sizeof(txnid_t))) + if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); return MDBX_CORRUPTED; + } txnid_t id = unaligned_peek_u64(4, key.iov_base); if (txn->tw.lifo_reclaimed) { for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); ++i) @@ -10484,8 +10493,11 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, MDBX_node *node = page_node(mp, k); if (node_flags(node) != F_SUBDATA) continue; - if (unlikely(node_ds(node) != sizeof(MDBX_db))) + if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); return MDBX_CORRUPTED; + } MDBX_db reside; const MDBX_db *db = memcpy(&reside, node_data(node), sizeof(reside)); @@ -10826,6 +10838,8 @@ retry: goto bailout; if (!MDBX_DISABLE_VALIDATION && unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); rc = MDBX_CORRUPTED; goto bailout; } @@ -11220,6 +11234,8 @@ retry: rc = cursor_first(&ctx->cursor, &key, nullptr); if (unlikely(rc != MDBX_SUCCESS || key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); rc = MDBX_CORRUPTED; goto bailout; } @@ -11275,6 +11291,8 @@ retry: rc = cursor_first(&ctx->cursor, &key, nullptr); if (likely(rc == MDBX_SUCCESS)) { if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); rc = MDBX_CORRUPTED; goto bailout; } @@ -22371,6 +22389,9 @@ __cold static int compacting_walk_tree(mdbx_compacting_ctx *ctx, } else if (node_flags(node) & F_SUBDATA) { if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", + (unsigned)node_ds(node)); rc = MDBX_CORRUPTED; goto done; } @@ -22568,9 +22589,16 @@ __cold static int env_compact(MDBX_env *env, MDBX_txn *read_txn, MDBX_SUCCESS) { const MDBX_PNL pnl = data.iov_base; if (unlikely(data.iov_len % sizeof(pgno_t) || - data.iov_len < MDBX_PNL_SIZEOF(pnl) || - !(pnl_check(pnl, read_txn->mt_next_pgno)))) + data.iov_len < MDBX_PNL_SIZEOF(pnl))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-record length", data.iov_len); return MDBX_CORRUPTED; + } + if (unlikely(!pnl_check(pnl, read_txn->mt_next_pgno))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-record content"); + return MDBX_CORRUPTED; + } gc += MDBX_PNL_GETSIZE(pnl); } if (unlikely(rc != MDBX_NOTFOUND)) @@ -23139,8 +23167,11 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { const MDBX_node *node = page_node(mp, i); if (node_flags(node) != F_SUBDATA) continue; - if (unlikely(node_ds(node) != sizeof(MDBX_db))) + if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid subDb node size", node_ds(node)); return MDBX_CORRUPTED; + } /* skip opened and already accounted */ const MDBX_val name = {node_key(node), node_ks(node)}; @@ -23242,7 +23273,8 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, *mask |= 1 << UNALIGNED_PEEK_16(db, MDBX_db, md_depth); break; default: - ERROR("wrong node-flags %u", flags); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid node-size", flags); return MDBX_CORRUPTED; } rc = cursor_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); @@ -23788,8 +23820,11 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, cx.outer.mc_ki[cx.outer.mc_top]); if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) return MDBX_INCOMPATIBLE; - if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(MDBX_db))) + if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid subDb node size", body.iov_len); return MDBX_CORRUPTED; + } memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(MDBX_db)); } @@ -24850,8 +24885,9 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } break; case F_SUBDATA /* sub-db */: { - const size_t namelen = node_key_size; - if (unlikely(namelen == 0 || node_data_size != sizeof(MDBX_db))) { + if (unlikely(node_data_size != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid subDb node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -24861,6 +24897,8 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: if (unlikely(node_data_size != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-tree node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -24870,6 +24908,8 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, case F_DUPDATA /* short sub-page */: { if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-page node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; break; @@ -24892,6 +24932,8 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, subtype = MDBX_subpage_dupfixed_leaf; break; default: + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-page flags", sp->mp_flags); assert(err == MDBX_CORRUPTED); subtype = MDBX_subpage_broken; err = MDBX_CORRUPTED; @@ -24909,6 +24951,8 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, subpayload_size += subnode_size; subalign_bytes += subnode_size & 1; if (unlikely(node_flags(subnode) != 0)) { + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "unexpected sub-node flags", node_flags(subnode)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -24928,6 +24972,8 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } break; default: + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid node flags", node_flags(node)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -24962,6 +25008,8 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, case F_SUBDATA /* sub-db */: if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-tree node size", (unsigned)node_ds(node)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { @@ -24976,8 +25024,14 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, break; case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - if (unlikely(node_ds(node) != sizeof(MDBX_db) || - ctx->mw_cursor->mc_xcursor == NULL)) { + if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } else if (unlikely(!ctx->mw_cursor->mc_xcursor)) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "unexpected dupsort sub-tree node for non-dupsort subDB"); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { @@ -28558,9 +28612,16 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { if (unlikely(usr->result.backed_pages < NUM_METAS)) chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, NUM_METAS); - if (unlikely(usr->result.backed_pages < NUM_METAS || - dxbfile_pages < NUM_METAS)) + if (unlikely(usr->result.backed_pages < NUM_METAS)) { + chk_scope_issue(inner, "backed-pages %zu < num-metas %u", + usr->result.backed_pages, NUM_METAS); return MDBX_CORRUPTED; + } + if (unlikely(dxbfile_pages < NUM_METAS)) { + chk_scope_issue(inner, "backed-pages %zu < num-metas %u", + usr->result.backed_pages, NUM_METAS); + return MDBX_CORRUPTED; + } if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", usr->result.backed_pages, (size_t)MAX_PAGENO + 1); From af060b496011c873e7ef74ac23ad0efd66db8c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 25 Mar 2024 18:45:42 +0300 Subject: [PATCH 150/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=BD=D0=B5=D1=81?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=B0=D1=82=D0=B8=D1=87?= =?UTF-8?q?=D0=B5=D1=81=D0=BA=D0=B8=D1=85=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20=D0=B2=20=D1=81=D1=82=D1=80?= =?UTF-8?q?=D1=83=D0=BA=D1=82=D1=83=D1=80=D1=83=20`mdbx=5Fstatic`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 31 +++++++++++++++---------------- src/internals.h | 26 ++++++++++++++++---------- src/lck-posix.c | 9 ++++----- src/osal.c | 10 ++++------ src/osal.h | 33 ++++++++++++++++++--------------- test/base.h++ | 3 ++- 6 files changed, 59 insertions(+), 53 deletions(-) diff --git a/src/core.c b/src/core.c index df815706..09d6ae84 100644 --- a/src/core.c +++ b/src/core.c @@ -3233,9 +3233,8 @@ static __always_inline int __must_check_result dpl_append(MDBX_txn *txn, /*----------------------------------------------------------------------------*/ -uint8_t runtime_flags = MDBX_RUNTIME_FLAGS_INIT; -uint8_t loglevel = MDBX_LOG_FATAL; -MDBX_debug_func *debug_logger; +MDBX_INTERNAL_VAR_INSTA struct mdbx_static mdbx_static = { + MDBX_RUNTIME_FLAGS_INIT, MDBX_LOG_FATAL, nullptr, 0, nullptr}; static __must_check_result __inline int page_retire(MDBX_cursor *mc, MDBX_page *mp); @@ -3588,8 +3587,8 @@ const char *mdbx_strerror_ANSI2OEM(int errnum) { __cold void debug_log_va(int level, const char *function, int line, const char *fmt, va_list args) { - if (debug_logger) - debug_logger(level, function, line, fmt, args); + if (mdbx_static.logger) + mdbx_static.logger(level, function, line, fmt, args); else { #if defined(_WIN32) || defined(_WIN64) if (IsDebuggerPresent()) { @@ -9243,7 +9242,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { r = thread_rthc_get(env->me_txkey); if (likely(r)) { if (unlikely(!r->mr_pid.weak) && - (runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN)) { + (mdbx_static.flags & MDBX_DBG_LEGACY_MULTIOPEN)) { thread_rthc_set(env->me_txkey, nullptr); r = nullptr; } else { @@ -9367,7 +9366,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { return MDBX_BUSY; MDBX_lockinfo *const lck = env->me_lck_mmap.lck; if (lck && (env->me_flags & MDBX_NOTLS) == 0 && - (runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { + (mdbx_static.flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { const size_t snap_nreaders = atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); for (size_t i = 0; i < snap_nreaders; ++i) { @@ -9772,7 +9771,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, } else if (flags & MDBX_TXN_RDONLY) { if (env->me_txn0 && unlikely(env->me_txn0->mt_owner == osal_thread_self()) && env->me_txn && - (runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) + (mdbx_static.flags & MDBX_DBG_LEGACY_OVERLAP) == 0) return MDBX_TXN_OVERLAPPING; } else { /* Reuse preallocated write txn. However, do not touch it until @@ -14339,7 +14338,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, return err; #endif /* MADV_DONTDUMP */ #if defined(MADV_DODUMP) - if (runtime_flags & MDBX_DBG_DUMP) { + if (mdbx_static.flags & MDBX_DBG_DUMP) { const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS); err = madvise(env->me_map, meta_length_aligned2os, MADV_DODUMP) ? ignore_enosys(errno) @@ -14589,7 +14588,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed); if ((env->me_flags & MDBX_RDONLY) == 0 && env->me_stuck_meta < 0 && - (runtime_flags & MDBX_DBG_DONT_UPGRADE) == 0) { + (mdbx_static.flags & MDBX_DBG_DONT_UPGRADE) == 0) { for (int n = 0; n < NUM_METAS; ++n) { MDBX_meta *const meta = METAPAGE(env, n); if (unlikely(unaligned_peek_u64(4, &meta->mm_magic_and_version) != @@ -14697,7 +14696,7 @@ __cold static int setup_lck_locked(MDBX_env *env) { if (unlikely(MDBX_IS_ERROR(err))) return err; if (inprocess_neighbor) { - if ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || + if ((mdbx_static.flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0) return MDBX_BUSY; if (lck_seize_rc == MDBX_RESULT_TRUE) { @@ -14929,7 +14928,7 @@ __cold static int __must_check_result override_meta(MDBX_env *env, target, "pre", constmeta_txnid(shape)); return MDBX_PROBLEM; } - if (runtime_flags & MDBX_DBG_DONT_UPGRADE) + if (mdbx_static.flags & MDBX_DBG_DONT_UPGRADE) memcpy(&model->mm_magic_and_version, &shape->mm_magic_and_version, sizeof(model->mm_magic_and_version)); model->mm_extra_flags = shape->mm_extra_flags; @@ -24624,10 +24623,10 @@ __cold MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, __cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, MDBX_debug_func *logger) { - const int rc = runtime_flags | (loglevel << 16); + const int rc = mdbx_static.flags | (mdbx_static.loglevel << 16); if (level != MDBX_LOG_DONTCHANGE) - loglevel = (uint8_t)level; + mdbx_static.loglevel = (uint8_t)level; if (flags != MDBX_DBG_DONTCHANGE) { flags &= @@ -24636,11 +24635,11 @@ __cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, #endif MDBX_DBG_DUMP | MDBX_DBG_LEGACY_MULTIOPEN | MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE; - runtime_flags = (uint8_t)flags; + mdbx_static.flags = (uint8_t)flags; } if (logger != MDBX_LOGGER_DONTCHANGE) - debug_logger = logger; + mdbx_static.logger = logger; return rc; } diff --git a/src/internals.h b/src/internals.h index d6ce1758..98005f99 100644 --- a/src/internals.h +++ b/src/internals.h @@ -24,11 +24,13 @@ #ifdef xMDBX_ALLOY /* Amalgamated build */ #define MDBX_INTERNAL_FUNC static -#define MDBX_INTERNAL_VAR static +#define MDBX_INTERNAL_VAR_PROTO static +#define MDBX_INTERNAL_VAR_INSTA static #else /* Non-amalgamated build */ #define MDBX_INTERNAL_FUNC -#define MDBX_INTERNAL_VAR extern +#define MDBX_INTERNAL_VAR_PROTO extern +#define MDBX_INTERNAL_VAR_INSTA #endif /* xMDBX_ALLOY */ /*----------------------------------------------------------------------------*/ @@ -242,13 +244,17 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor; #define MDBX_RUNTIME_FLAGS_INIT \ ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT -extern uint8_t runtime_flags; -extern uint8_t loglevel; -extern MDBX_debug_func *debug_logger; +MDBX_INTERNAL_VAR_PROTO struct mdbx_static { + uint8_t flags; + uint8_t loglevel; + MDBX_debug_func *logger; + size_t logger_buffer_size; + char *logger_buffer; +} mdbx_static; MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny) { #if MDBX_DEBUG - if (MDBX_DBG_JITTER & runtime_flags) + if (MDBX_DBG_JITTER & mdbx_static.flags) osal_jitter(tiny); #else (void)tiny; @@ -262,17 +268,17 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line, const char *fmt, va_list args); #if MDBX_DEBUG -#define LOG_ENABLED(msg) unlikely(msg <= loglevel) -#define AUDIT_ENABLED() unlikely((runtime_flags & MDBX_DBG_AUDIT)) +#define LOG_ENABLED(msg) unlikely(msg <= mdbx_static.loglevel) +#define AUDIT_ENABLED() unlikely((mdbx_static.flags & MDBX_DBG_AUDIT)) #else /* MDBX_DEBUG */ -#define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= loglevel) +#define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= mdbx_static.loglevel) #define AUDIT_ENABLED() (0) #endif /* MDBX_DEBUG */ #if MDBX_FORCE_ASSERTIONS #define ASSERT_ENABLED() (1) #elif MDBX_DEBUG -#define ASSERT_ENABLED() likely((runtime_flags & MDBX_DBG_ASSERT)) +#define ASSERT_ENABLED() likely((mdbx_static.flags & MDBX_DBG_ASSERT)) #else #define ASSERT_ENABLED() (0) #endif /* assertions */ diff --git a/src/lck-posix.c b/src/lck-posix.c index c0c0909b..7108106d 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -27,10 +27,9 @@ #include -#ifndef xMDBX_ALLOY -uint32_t linux_kernel_version; -bool mdbx_RunningOnWSL1; -#endif /* xMDBX_ALLOY */ +MDBX_INTERNAL_VAR_INSTA uint32_t linux_kernel_version; +MDBX_INTERNAL_VAR_INSTA bool + mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */; MDBX_EXCLUDE_FOR_GPROF __cold static uint8_t probe_for_WSL(const char *tag) { @@ -170,7 +169,7 @@ mdbx_global_destructor(void) { static int op_setlk, op_setlkw, op_getlk; __cold static void choice_fcntl(void) { assert(!op_setlk && !op_setlkw && !op_getlk); - if ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 + if ((mdbx_static.flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 #if defined(__linux__) || defined(__gnu_linux__) && linux_kernel_version > 0x030f0000 /* OFD locks are available since 3.15, but engages here diff --git a/src/osal.c b/src/osal.c index 5a0ae633..8109c749 100644 --- a/src/osal.c +++ b/src/osal.c @@ -244,7 +244,7 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line) { #endif /* MDBX_DEBUG */ - if (debug_logger) + if (mdbx_static.logger) debug_log(MDBX_LOG_FATAL, func, line, "assert: %s\n", msg); else { #if defined(_WIN32) || defined(_WIN64) @@ -287,7 +287,7 @@ __cold void mdbx_panic(const char *fmt, ...) { ? "" : message; - if (debug_logger) + if (mdbx_static.logger) debug_log(MDBX_LOG_FATAL, "panic", 0, "%s", const_message); while (1) { @@ -3488,10 +3488,8 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, return MDBX_SUCCESS; } -#ifndef xMDBX_ALLOY -unsigned sys_pagesize; -MDBX_MAYBE_UNUSED unsigned sys_pagesize_ln2, sys_allocation_granularity; -#endif /* xMDBX_ALLOY */ +MDBX_INTERNAL_VAR_INSTA unsigned sys_pagesize, sys_pagesize_ln2, + sys_allocation_granularity; void osal_ctor(void) { #if MDBX_HAVE_PWRITEV && defined(_SC_IOV_MAX) diff --git a/src/osal.h b/src/osal.h index fcdb2370..bb1651fa 100644 --- a/src/osal.h +++ b/src/osal.h @@ -210,8 +210,8 @@ typedef pthread_mutex_t osal_fastmutex_t; /*----------------------------------------------------------------------------*/ /* OS abstraction layer stuff */ -MDBX_INTERNAL_VAR unsigned sys_pagesize; -MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_pagesize_ln2, +MDBX_INTERNAL_VAR_PROTO unsigned sys_pagesize; +MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR_PROTO unsigned sys_pagesize_ln2, sys_allocation_granularity; /* Get the size of a memory page for the system. @@ -475,8 +475,9 @@ MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny); #endif #if defined(__linux__) || defined(__gnu_linux__) -MDBX_INTERNAL_VAR uint32_t linux_kernel_version; -MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */; +MDBX_INTERNAL_VAR_PROTO uint32_t linux_kernel_version; +MDBX_INTERNAL_VAR_PROTO bool + mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */; #endif /* Linux */ #ifndef osal_strdup @@ -763,7 +764,7 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid); MDBX_INTERNAL_FUNC int osal_mb2w(const char *const src, wchar_t **const pdst); typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *); -MDBX_INTERNAL_VAR osal_srwlock_t_function osal_srwlock_Init, +MDBX_INTERNAL_VAR_PROTO osal_srwlock_t_function osal_srwlock_Init, osal_srwlock_AcquireShared, osal_srwlock_ReleaseShared, osal_srwlock_AcquireExclusive, osal_srwlock_ReleaseExclusive; @@ -816,7 +817,7 @@ typedef struct _FILE_REMOTE_PROTOCOL_INFO { typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)( _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); -MDBX_INTERNAL_VAR MDBX_GetFileInformationByHandleEx +MDBX_INTERNAL_VAR_PROTO MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx; typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( @@ -825,19 +826,20 @@ typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( _Out_opt_ LPDWORD lpMaximumComponentLength, _Out_opt_ LPDWORD lpFileSystemFlags, _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); -MDBX_INTERNAL_VAR MDBX_GetVolumeInformationByHandleW +MDBX_INTERNAL_VAR_PROTO MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW; typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, _Out_ LPWSTR lpszFilePath, _In_ DWORD cchFilePath, _In_ DWORD dwFlags); -MDBX_INTERNAL_VAR MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW; +MDBX_INTERNAL_VAR_PROTO MDBX_GetFinalPathNameByHandleW + mdbx_GetFinalPathNameByHandleW; typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)( _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); -MDBX_INTERNAL_VAR MDBX_SetFileInformationByHandle +MDBX_INTERNAL_VAR_PROTO MDBX_SetFileInformationByHandle mdbx_SetFileInformationByHandle; typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( @@ -846,10 +848,10 @@ typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); -MDBX_INTERNAL_VAR MDBX_NtFsControlFile mdbx_NtFsControlFile; +MDBX_INTERNAL_VAR_PROTO MDBX_NtFsControlFile mdbx_NtFsControlFile; typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void); -MDBX_INTERNAL_VAR MDBX_GetTickCount64 mdbx_GetTickCount64; +MDBX_INTERNAL_VAR_PROTO MDBX_GetTickCount64 mdbx_GetTickCount64; #if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8 typedef struct _WIN32_MEMORY_RANGE_ENTRY { @@ -861,13 +863,13 @@ typedef struct _WIN32_MEMORY_RANGE_ENTRY { typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)( HANDLE hProcess, ULONG_PTR NumberOfEntries, PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); -MDBX_INTERNAL_VAR MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; +MDBX_INTERNAL_VAR_PROTO MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT; typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, IN PLARGE_INTEGER NewSectionSize); -MDBX_INTERNAL_VAR MDBX_NtExtendSection mdbx_NtExtendSection; +MDBX_INTERNAL_VAR_PROTO MDBX_NtExtendSection mdbx_NtExtendSection; static __inline bool mdbx_RunningUnderWine(void) { return !mdbx_NtExtendSection; @@ -877,14 +879,15 @@ typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, LPCSTR lpValue, DWORD dwFlags, LPDWORD pdwType, PVOID pvData, LPDWORD pcbData); -MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA; +MDBX_INTERNAL_VAR_PROTO MDBX_RegGetValueA mdbx_RegGetValueA; NTSYSAPI ULONG RtlRandomEx(PULONG Seed); typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, PUCHAR OverlappedRangeStart, ULONG Length); -MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange; +MDBX_INTERNAL_VAR_PROTO MDBX_SetFileIoOverlappedRange + mdbx_SetFileIoOverlappedRange; #endif /* Windows */ diff --git a/test/base.h++ b/test/base.h++ index 7f605ba1..5cc3beb2 100644 --- a/test/base.h++ +++ b/test/base.h++ @@ -97,7 +97,8 @@ #include #define MDBX_INTERNAL_FUNC -#define MDBX_INTERNAL_VAR extern +#define MDBX_INTERNAL_VAR_PROTO extern +#define MDBX_INTERNAL_VAR_INSTA #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ #include "../mdbx.h++" #include "../src/base.h" From 5c84c405ace1640b0d79fc6363838175d24bace9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 25 Mar 2024 18:39:56 +0300 Subject: [PATCH 151/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fsetup=5Fdebug=5Fnofmt(?= =?UTF-8?q?)`=20=D0=B8=20=D0=B2=D0=BE=D0=B7=D0=BC=D0=BE=D0=B6=D0=BD=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B8=20=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=BB=D0=BE=D0=B3=D0=B5=D1=80=D0=B0=20=D0=B1?= =?UTF-8?q?=D0=B5=D0=B7=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=BE=D0=BD?= =?UTF-8?q?=D0=B0=D0=BB=D0=B0=20`printf()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 12 ++++++++++ src/core.c | 60 ++++++++++++++++++++++++++++++++++++++++--------- src/internals.h | 8 ++++++- src/osal.c | 4 ++-- 4 files changed, 70 insertions(+), 14 deletions(-) diff --git a/mdbx.h b/mdbx.h index d15c3e1e..9c721b47 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1012,6 +1012,7 @@ typedef void MDBX_debug_func(MDBX_log_level_t loglevel, const char *function, /** \brief The "don't change `logger`" value for mdbx_setup_debug() */ #define MDBX_LOGGER_DONTCHANGE ((MDBX_debug_func *)(intptr_t)-1) +#define MDBX_LOGGER_NOFMT_DONTCHANGE ((MDBX_debug_func_nofmt *)(intptr_t)-1) /** \brief Setup global log-level, debug options and debug logger. * \returns The previously `debug_flags` in the 0-15 bits @@ -1020,6 +1021,17 @@ LIBMDBX_API int mdbx_setup_debug(MDBX_log_level_t log_level, MDBX_debug_flags_t debug_flags, MDBX_debug_func *logger); +typedef void MDBX_debug_func_nofmt(MDBX_log_level_t loglevel, + const char *function, int line, + const char *msg, + unsigned length) MDBX_CXX17_NOEXCEPT; + +LIBMDBX_API int mdbx_setup_debug_nofmt(MDBX_log_level_t log_level, + MDBX_debug_flags_t debug_flags, + MDBX_debug_func_nofmt *logger, + char *logger_buffer, + size_t logger_buffer_size); + /** \brief A callback function for most MDBX assert() failures, * called before printing the message and aborting. * \see mdbx_env_set_assert() diff --git a/src/core.c b/src/core.c index 09d6ae84..2fbf0bf2 100644 --- a/src/core.c +++ b/src/core.c @@ -1445,6 +1445,10 @@ __cold void thread_dtor(void *rthc) { #endif } +MDBX_INTERNAL_VAR_INSTA struct mdbx_static mdbx_static = { + MDBX_RUNTIME_FLAGS_INIT, MDBX_LOG_FATAL, {nullptr}, 0, nullptr}; +static osal_fastmutex_t debug_lock; + MDBX_EXCLUDE_FOR_GPROF __cold void global_dtor(void) { const uint32_t current_pid = osal_getpid(); @@ -1547,6 +1551,7 @@ __cold void global_dtor(void) { osal_dtor(); TRACE("<< pid %d\n", current_pid); + ENSURE(nullptr, osal_fastmutex_destroy(&debug_lock) == 0); } __cold int rthc_register(MDBX_env *const env) { @@ -3233,9 +3238,6 @@ static __always_inline int __must_check_result dpl_append(MDBX_txn *txn, /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_VAR_INSTA struct mdbx_static mdbx_static = { - MDBX_RUNTIME_FLAGS_INIT, MDBX_LOG_FATAL, nullptr, 0, nullptr}; - static __must_check_result __inline int page_retire(MDBX_cursor *mc, MDBX_page *mp); @@ -3587,9 +3589,18 @@ const char *mdbx_strerror_ANSI2OEM(int errnum) { __cold void debug_log_va(int level, const char *function, int line, const char *fmt, va_list args) { - if (mdbx_static.logger) - mdbx_static.logger(level, function, line, fmt, args); - else { + ENSURE(nullptr, osal_fastmutex_acquire(&debug_lock) == 0); + if (mdbx_static.logger.ptr) { + if (mdbx_static.logger_buffer == nullptr) + mdbx_static.logger.fmt(level, function, line, fmt, args); + else { + const int len = vsnprintf(mdbx_static.logger_buffer, + mdbx_static.logger_buffer_size, fmt, args); + if (len > 0) + mdbx_static.logger.nofmt(level, function, line, + mdbx_static.logger_buffer, len); + } + } else { #if defined(_WIN32) || defined(_WIN64) if (IsDebuggerPresent()) { int prefix_len = 0; @@ -3622,6 +3633,7 @@ __cold void debug_log_va(int level, const char *function, int line, fflush(stderr); #endif } + ENSURE(nullptr, osal_fastmutex_release(&debug_lock) == 0); } __cold void debug_log(int level, const char *function, int line, @@ -24621,10 +24633,12 @@ __cold MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, return rc; } -__cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, - MDBX_debug_func *logger) { - const int rc = mdbx_static.flags | (mdbx_static.loglevel << 16); +__cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, + union logger_union logger, char *buffer, + size_t buffer_size) { + ENSURE(nullptr, osal_fastmutex_acquire(&debug_lock) == 0); + const int rc = mdbx_static.flags | (mdbx_static.loglevel << 16); if (level != MDBX_LOG_DONTCHANGE) mdbx_static.loglevel = (uint8_t)level; @@ -24638,11 +24652,34 @@ __cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, mdbx_static.flags = (uint8_t)flags; } - if (logger != MDBX_LOGGER_DONTCHANGE) - mdbx_static.logger = logger; + assert(MDBX_LOGGER_DONTCHANGE == ((MDBX_debug_func *)(intptr_t)-1)); + if (logger.ptr != (void *)((intptr_t)-1)) { + mdbx_static.logger.ptr = logger.ptr; + mdbx_static.logger_buffer = buffer; + mdbx_static.logger_buffer_size = buffer_size; + } + + ENSURE(nullptr, osal_fastmutex_release(&debug_lock) == 0); return rc; } +__cold int mdbx_setup_debug_nofmt(MDBX_log_level_t level, + MDBX_debug_flags_t flags, + MDBX_debug_func_nofmt *logger, char *buffer, + size_t buffer_size) { + union logger_union thunk; + thunk.nofmt = + (logger && buffer && buffer_size) ? logger : MDBX_LOGGER_NOFMT_DONTCHANGE; + return setup_debug(level, flags, thunk, buffer, buffer_size); +} + +__cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, + MDBX_debug_func *logger) { + union logger_union thunk; + thunk.fmt = logger; + return setup_debug(level, flags, thunk, nullptr, 0); +} + __cold static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard) { DEBUG("DB size maxed out by reading #%" PRIaTXN, laggard); @@ -26761,6 +26798,7 @@ __cold static void rthc_afterfork(void) { #endif /* ! Windows */ __cold void global_ctor(void) { + ENSURE(nullptr, osal_fastmutex_init(&debug_lock) == 0); osal_ctor(); rthc_limit = RTHC_INITIAL_LIMIT; rthc_table = rthc_table_static; diff --git a/src/internals.h b/src/internals.h index 98005f99..bb6b5e76 100644 --- a/src/internals.h +++ b/src/internals.h @@ -244,10 +244,16 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor; #define MDBX_RUNTIME_FLAGS_INIT \ ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT +union logger_union { + void *ptr; + MDBX_debug_func *fmt; + MDBX_debug_func_nofmt *nofmt; +}; + MDBX_INTERNAL_VAR_PROTO struct mdbx_static { uint8_t flags; uint8_t loglevel; - MDBX_debug_func *logger; + union logger_union logger; size_t logger_buffer_size; char *logger_buffer; } mdbx_static; diff --git a/src/osal.c b/src/osal.c index 8109c749..86f93d44 100644 --- a/src/osal.c +++ b/src/osal.c @@ -244,7 +244,7 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line) { #endif /* MDBX_DEBUG */ - if (mdbx_static.logger) + if (mdbx_static.logger.ptr) debug_log(MDBX_LOG_FATAL, func, line, "assert: %s\n", msg); else { #if defined(_WIN32) || defined(_WIN64) @@ -287,7 +287,7 @@ __cold void mdbx_panic(const char *fmt, ...) { ? "" : message; - if (mdbx_static.logger) + if (mdbx_static.logger.ptr) debug_log(MDBX_LOG_FATAL, "panic", 0, "%s", const_message); while (1) { From 7b1f8ba642d0dfba7c4c3d4b891ed8f17b0f1cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 30 Mar 2024 17:04:14 +0300 Subject: [PATCH 152/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=20C++=20API=20?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=BE=D0=B4=D0=BE=D0=B2=20`txn::rename=5Fmap?= =?UTF-8?q?()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 23 +++++++++++++++++++++++ src/mdbx.c++ | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 90d981b8..fd07c123 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4338,6 +4338,21 @@ public: inline bool clear_map(const ::std::string &name, bool throw_if_absent = false); + /// \brief Переименовывает таблицу ключ-значение. + inline void rename_map(map_handle map, const char *new_name); + /// \brief Переименовывает таблицу ключ-значение. + inline void rename_map(map_handle map, const ::std::string &new_name); + /// \brief Переименовывает таблицу ключ-значение. + /// \return `True` если таблица существует и была переименована, либо + /// `false` в случае отсутствия исходной таблицы. + bool rename_map(const char *old_name, const char *new_name, + bool throw_if_absent = false); + /// \brief Переименовывает таблицу ключ-значение. + /// \return `True` если таблица существует и была переименована, либо + /// `false` в случае отсутствия исходной таблицы. + bool rename_map(const ::std::string &old_name, const ::std::string &new_name, + bool throw_if_absent = false); + using map_stat = ::MDBX_stat; /// \brief Returns statistics for a sub-database. inline map_stat get_map_stat(map_handle map) const; @@ -6319,6 +6334,14 @@ inline bool txn::clear_map(const ::std::string &name, bool throw_if_absent) { return clear_map(name.c_str(), throw_if_absent); } +inline void txn::rename_map(map_handle map, const char *new_name) { + error::success_or_throw(::mdbx_dbi_rename(handle_, map, new_name)); +} + +inline void txn::rename_map(map_handle map, const ::std::string &new_name) { + return rename_map(map, new_name.c_str()); +} + inline txn::map_stat txn::get_map_stat(map_handle map) const { txn::map_stat r; error::success_or_throw(::mdbx_dbi_stat(handle_, map.dbi, &r, sizeof(r))); diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 60ef7ead..45372996 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1586,6 +1586,29 @@ bool txn::clear_map(const char *name, bool throw_if_absent) { } } +bool txn::rename_map(const char *old_name, const char *new_name, + bool throw_if_absent) { + map_handle map; + const int err = ::mdbx_dbi_open(handle_, old_name, MDBX_DB_ACCEDE, &map.dbi); + switch (err) { + case MDBX_SUCCESS: + rename_map(map, new_name); + return true; + case MDBX_NOTFOUND: + case MDBX_BAD_DBI: + if (!throw_if_absent) + return false; + MDBX_CXX17_FALLTHROUGH /* fallthrough */; + default: + MDBX_CXX20_UNLIKELY error::throw_exception(err); + } +} + +bool txn::rename_map(const ::std::string &old_name, + const ::std::string &new_name, bool throw_if_absent) { + return rename_map(old_name.c_str(), new_name.c_str(), throw_if_absent); +} + //------------------------------------------------------------------------------ void cursor_managed::close() { From cce5c8249cf52202138bead9182d8c951a1e774d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 30 Mar 2024 17:26:52 +0300 Subject: [PATCH 153/443] =?UTF-8?q?mdbx++:=20=D0=B1=D0=BE=D0=BB=D1=8C?= =?UTF-8?q?=D1=88=D0=B5=20`=5F=5Fcold`=20=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B5?= =?UTF-8?q?=D0=B4=D0=BA=D0=BE-=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7?= =?UTF-8?q?=D1=83=D0=B5=D0=BC=D1=8B=D1=85=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx.c++ | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 45372996..01334c5c 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1291,7 +1291,7 @@ bool env::is_pristine() const { bool env::is_empty() const { return get_stat().ms_leaf_pages == 0; } -env &env::copy(filehandle fd, bool compactify, bool force_dynamic_size) { +__cold env &env::copy(filehandle fd, bool compactify, bool force_dynamic_size) { error::success_or_throw( ::mdbx_env_copy2fd(handle_, fd, (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | @@ -1300,8 +1300,8 @@ env &env::copy(filehandle fd, bool compactify, bool force_dynamic_size) { return *this; } -env &env::copy(const char *destination, bool compactify, - bool force_dynamic_size) { +__cold env &env::copy(const char *destination, bool compactify, + bool force_dynamic_size) { error::success_or_throw( ::mdbx_env_copy(handle_, destination, (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | @@ -1310,14 +1310,14 @@ env &env::copy(const char *destination, bool compactify, return *this; } -env &env::copy(const ::std::string &destination, bool compactify, - bool force_dynamic_size) { +__cold env &env::copy(const ::std::string &destination, bool compactify, + bool force_dynamic_size) { return copy(destination.c_str(), compactify, force_dynamic_size); } #if defined(_WIN32) || defined(_WIN64) -env &env::copy(const wchar_t *destination, bool compactify, - bool force_dynamic_size) { +__cold env &env::copy(const wchar_t *destination, bool compactify, + bool force_dynamic_size) { error::success_or_throw( ::mdbx_env_copyW(handle_, destination, (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | @@ -1333,13 +1333,13 @@ env &env::copy(const ::std::wstring &destination, bool compactify, #endif /* Windows */ #ifdef MDBX_STD_FILESYSTEM_PATH -env &env::copy(const MDBX_STD_FILESYSTEM_PATH &destination, bool compactify, - bool force_dynamic_size) { +__cold env &env::copy(const MDBX_STD_FILESYSTEM_PATH &destination, + bool compactify, bool force_dynamic_size) { return copy(destination.native(), compactify, force_dynamic_size); } #endif /* MDBX_STD_FILESYSTEM_PATH */ -path env::get_path() const { +__cold path env::get_path() const { #if defined(_WIN32) || defined(_WIN64) const wchar_t *c_wstr; error::success_or_throw(::mdbx_env_get_pathW(handle_, &c_wstr)); @@ -1353,29 +1353,30 @@ path env::get_path() const { #endif } -bool env::remove(const char *pathname, const remove_mode mode) { +__cold bool env::remove(const char *pathname, const remove_mode mode) { return error::boolean_or_throw( ::mdbx_env_delete(pathname, MDBX_env_delete_mode_t(mode))); } -bool env::remove(const ::std::string &pathname, const remove_mode mode) { +__cold bool env::remove(const ::std::string &pathname, const remove_mode mode) { return remove(pathname.c_str(), mode); } #if defined(_WIN32) || defined(_WIN64) -bool env::remove(const wchar_t *pathname, const remove_mode mode) { +__cold bool env::remove(const wchar_t *pathname, const remove_mode mode) { return error::boolean_or_throw( ::mdbx_env_deleteW(pathname, MDBX_env_delete_mode_t(mode))); } -bool env::remove(const ::std::wstring &pathname, const remove_mode mode) { +__cold bool env::remove(const ::std::wstring &pathname, + const remove_mode mode) { return remove(pathname.c_str(), mode); } #endif /* Windows */ #ifdef MDBX_STD_FILESYSTEM_PATH -bool env::remove(const MDBX_STD_FILESYSTEM_PATH &pathname, - const remove_mode mode) { +__cold bool env::remove(const MDBX_STD_FILESYSTEM_PATH &pathname, + const remove_mode mode) { return remove(pathname.native(), mode); } #endif /* MDBX_STD_FILESYSTEM_PATH */ @@ -1389,13 +1390,13 @@ static inline MDBX_env *create_env() { return ptr; } -env_managed::~env_managed() noexcept { +__cold env_managed::~env_managed() noexcept { if (MDBX_UNLIKELY(handle_)) MDBX_CXX20_UNLIKELY error::success_or_panic( ::mdbx_env_close(handle_), "mdbx::~env()", "mdbx_env_close"); } -void env_managed::close(bool dont_sync) { +__cold void env_managed::close(bool dont_sync) { const error rc = static_cast(::mdbx_env_close_ex(handle_, dont_sync)); switch (rc.code()) { @@ -1552,7 +1553,7 @@ void txn_managed::commit_embark_read() { //------------------------------------------------------------------------------ -bool txn::drop_map(const char *name, bool throw_if_absent) { +__cold bool txn::drop_map(const char *name, bool throw_if_absent) { map_handle map; const int err = ::mdbx_dbi_open(handle_, name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { @@ -1569,7 +1570,7 @@ bool txn::drop_map(const char *name, bool throw_if_absent) { } } -bool txn::clear_map(const char *name, bool throw_if_absent) { +__cold bool txn::clear_map(const char *name, bool throw_if_absent) { map_handle map; const int err = ::mdbx_dbi_open(handle_, name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { @@ -1586,8 +1587,8 @@ bool txn::clear_map(const char *name, bool throw_if_absent) { } } -bool txn::rename_map(const char *old_name, const char *new_name, - bool throw_if_absent) { +__cold bool txn::rename_map(const char *old_name, const char *new_name, + bool throw_if_absent) { map_handle map; const int err = ::mdbx_dbi_open(handle_, old_name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { @@ -1604,8 +1605,9 @@ bool txn::rename_map(const char *old_name, const char *new_name, } } -bool txn::rename_map(const ::std::string &old_name, - const ::std::string &new_name, bool throw_if_absent) { +__cold bool txn::rename_map(const ::std::string &old_name, + const ::std::string &new_name, + bool throw_if_absent) { return rename_map(old_name.c_str(), new_name.c_str(), throw_if_absent); } From e9a49e3715ebef55e16199126ac765159ea1e32d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 30 Mar 2024 17:38:53 +0300 Subject: [PATCH 154/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B3=D1=80=D1=83=D0=B7=D0=BE=D0=BA=20=D1=81=D0=BE=20`std::str?= =?UTF-8?q?ing=5Fview`=20=D0=B4=D0=BB=D1=8F=20=D0=BC=D0=B5=D1=82=D0=BE?= =?UTF-8?q?=D0=B4=D0=BE=D0=B2=20open=5Fmap/create=5Fmap=5F/drop=5Fmap/clea?= =?UTF-8?q?r=5Fmap/rename=5Fmap().?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 131 +++++++++++++++++++++++++++++++++++++++++++-------- src/mdbx.c++ | 71 ++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 20 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index fd07c123..e8a58bfe 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4353,6 +4353,37 @@ public: bool rename_map(const ::std::string &old_name, const ::std::string &new_name, bool throw_if_absent = false); +#if defined(DOXYGEN) || \ + (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + + /// \brief Open existing key-value map. + inline map_handle open_map( + const ::std::string_view &name, + const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + /// \brief Create new or open existing key-value map. + inline map_handle + create_map(const ::std::string_view &name, + const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); + /// \brief Drop key-value map. + /// \return `True` if the key-value map existed and was deleted, either + /// `false` if the key-value map did not exist and there is nothing to delete. + bool drop_map(const ::std::string_view &name, bool throw_if_absent = false); + /// \return `True` if the key-value map existed and was cleared, either + /// `false` if the key-value map did not exist and there is nothing to clear. + bool clear_map(const ::std::string_view &name, bool throw_if_absent = false); + /// \brief Переименовывает таблицу ключ-значение. + inline void rename_map(map_handle map, const ::std::string_view &new_name); + /// \brief Переименовывает таблицу ключ-значение. + /// \return `True` если таблица существует и была переименована, либо + /// `false` в случае отсутствия исходной таблицы. + bool rename_map(const ::std::string_view &old_name, + const ::std::string_view &new_name, + bool throw_if_absent = false); + +#endif /* __cpp_lib_string_view >= 201606L */ + using map_stat = ::MDBX_stat; /// \brief Returns statistics for a sub-database. inline map_stat get_map_stat(map_handle map) const; @@ -6294,12 +6325,6 @@ txn::open_map(const char *name, const ::mdbx::key_mode key_mode, return map; } -inline ::mdbx::map_handle -txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) const { - return open_map(name.c_str(), key_mode, value_mode); -} - inline ::mdbx::map_handle txn::create_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { @@ -6312,36 +6337,102 @@ inline ::mdbx::map_handle txn::create_map(const char *name, return map; } -inline ::mdbx::map_handle txn::create_map(const ::std::string &name, - const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) { - return create_map(name.c_str(), key_mode, value_mode); -} - inline void txn::drop_map(map_handle map) { error::success_or_throw(::mdbx_drop(handle_, map.dbi, true)); } -inline bool txn::drop_map(const ::std::string &name, bool throw_if_absent) { - return drop_map(name.c_str(), throw_if_absent); -} - inline void txn::clear_map(map_handle map) { error::success_or_throw(::mdbx_drop(handle_, map.dbi, false)); } -inline bool txn::clear_map(const ::std::string &name, bool throw_if_absent) { - return clear_map(name.c_str(), throw_if_absent); -} - inline void txn::rename_map(map_handle map, const char *new_name) { error::success_or_throw(::mdbx_dbi_rename(handle_, map, new_name)); } +#if defined(DOXYGEN) || \ + (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + +inline ::mdbx::map_handle +txn::open_map(const ::std::string_view &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { + ::mdbx::map_handle map; + error::success_or_throw(::mdbx_dbi_open2( + handle_, ::mdbx::slice(name), + MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), &map.dbi)); + assert(map.dbi != 0); + return map; +} + +inline ::mdbx::map_handle txn::create_map(const ::std::string_view &name, + const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) { + ::mdbx::map_handle map; + error::success_or_throw(::mdbx_dbi_open2( + handle_, ::mdbx::slice(name), + MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), + &map.dbi)); + assert(map.dbi != 0); + return map; +} + +inline void txn::rename_map(map_handle map, + const ::std::string_view &new_name) { + error::success_or_throw( + ::mdbx_dbi_rename2(handle_, map, ::mdbx::slice(new_name))); +} + +inline ::mdbx::map_handle +txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { + return open_map(::std::string_view(name), key_mode, value_mode); +} + +inline ::mdbx::map_handle txn::create_map(const ::std::string &name, + const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) { + return create_map(::std::string_view(name), key_mode, value_mode); +} + +inline bool txn::drop_map(const ::std::string &name, bool throw_if_absent) { + return drop_map(::std::string_view(name), throw_if_absent); +} + +inline bool txn::clear_map(const ::std::string &name, bool throw_if_absent) { + return clear_map(::std::string_view(name), throw_if_absent); +} + +inline void txn::rename_map(map_handle map, const ::std::string &new_name) { + return rename_map(map, ::std::string_view(new_name)); +} + +#else + +inline ::mdbx::map_handle +txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { + return open_map(name.c_str(), key_mode, value_mode); +} + +inline ::mdbx::map_handle txn::create_map(const ::std::string &name, + const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) { + return create_map(name.c_str(), key_mode, value_mode); +} + +inline bool txn::drop_map(const ::std::string &name, bool throw_if_absent) { + return drop_map(name.c_str(), throw_if_absent); +} + +inline bool txn::clear_map(const ::std::string &name, bool throw_if_absent) { + return clear_map(name.c_str(), throw_if_absent); +} + inline void txn::rename_map(map_handle map, const ::std::string &new_name) { return rename_map(map, new_name.c_str()); } +#endif /* __cpp_lib_string_view >= 201606L */ + inline txn::map_stat txn::get_map_stat(map_handle map) const { txn::map_stat r; error::success_or_throw(::mdbx_dbi_stat(handle_, map.dbi, &r, sizeof(r))); diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 01334c5c..28977e42 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1605,12 +1605,83 @@ __cold bool txn::rename_map(const char *old_name, const char *new_name, } } +#if defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L + +__cold bool txn::drop_map(const ::std::string_view &name, + bool throw_if_absent) { + map_handle map; + const int err = + ::mdbx_dbi_open2(handle_, mdbx::slice(name), MDBX_DB_ACCEDE, &map.dbi); + switch (err) { + case MDBX_SUCCESS: + drop_map(map); + return true; + case MDBX_NOTFOUND: + case MDBX_BAD_DBI: + if (!throw_if_absent) + return false; + MDBX_CXX17_FALLTHROUGH /* fallthrough */; + default: + MDBX_CXX20_UNLIKELY error::throw_exception(err); + } +} + +__cold bool txn::clear_map(const ::std::string_view &name, + bool throw_if_absent) { + map_handle map; + const int err = + ::mdbx_dbi_open2(handle_, mdbx::slice(name), MDBX_DB_ACCEDE, &map.dbi); + switch (err) { + case MDBX_SUCCESS: + clear_map(map); + return true; + case MDBX_NOTFOUND: + case MDBX_BAD_DBI: + if (!throw_if_absent) + return false; + MDBX_CXX17_FALLTHROUGH /* fallthrough */; + default: + MDBX_CXX20_UNLIKELY error::throw_exception(err); + } +} + +__cold bool txn::rename_map(const ::std::string_view &old_name, + const ::std::string_view &new_name, + bool throw_if_absent) { + map_handle map; + const int err = ::mdbx_dbi_open2(handle_, mdbx::slice(old_name), + MDBX_DB_ACCEDE, &map.dbi); + switch (err) { + case MDBX_SUCCESS: + rename_map(map, new_name); + return true; + case MDBX_NOTFOUND: + case MDBX_BAD_DBI: + if (!throw_if_absent) + return false; + MDBX_CXX17_FALLTHROUGH /* fallthrough */; + default: + MDBX_CXX20_UNLIKELY error::throw_exception(err); + } +} + +__cold bool txn::rename_map(const ::std::string &old_name, + const ::std::string &new_name, + bool throw_if_absent) { + return rename_map(::std::string_view(old_name), ::std::string_view(new_name), + throw_if_absent); +} + +#else + __cold bool txn::rename_map(const ::std::string &old_name, const ::std::string &new_name, bool throw_if_absent) { return rename_map(old_name.c_str(), new_name.c_str(), throw_if_absent); } +#endif /* __cpp_lib_string_view >= 201606L */ + //------------------------------------------------------------------------------ void cursor_managed::close() { From d4f7b4114bdaf5e9889686e77e9bc2d9c0aa588c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 30 Mar 2024 23:30:06 +0300 Subject: [PATCH 155/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`buffer::clear=5Fand=5Fr?= =?UTF-8?q?eserve()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index e8a58bfe..8ef9b44b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1907,7 +1907,6 @@ private: const size_t old_capacity = bin_.capacity(); const size_t new_capacity = bin::advise_capacity(old_capacity, wanna_capacity); - assert(new_capacity >= wanna_capacity); if (MDBX_LIKELY(new_capacity == old_capacity)) MDBX_CXX20_LIKELY { assert(bin_.is_inplace() == @@ -2073,7 +2072,13 @@ private: return *this; } - MDBX_CXX20_CONSTEXPR void clear() { reshape(0, 0, nullptr, 0); } + MDBX_CXX20_CONSTEXPR void *clear() { + return reshape(0, 0, nullptr, 0); + } + MDBX_CXX20_CONSTEXPR void *clear_and_reserve(size_t whole_capacity, + size_t headroom) { + return reshape(whole_capacity, headroom, nullptr, 0); + } MDBX_CXX20_CONSTEXPR void resize(size_t capacity, size_t headroom, slice &content) { content.iov_base = @@ -2803,9 +2808,11 @@ public: } /// \brief Clears the contents and storage. - void clear() noexcept { - slice_.clear(); - silo_.clear(); + void clear() noexcept { slice_.assign(silo_.clear(), size_t(0)); } + + /// \brief Clears the contents and reserve storage. + void clear_and_reserve(size_t whole_capacity, size_t headroom = 0) noexcept { + slice_.assign(silo_.clear_and_reserve(whole_capacity, headroom), size_t(0)); } /// \brief Reduces memory usage by freeing unused storage space. From b36679ddcbf75d3fb99f344e32c8a9d7b94001b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 30 Mar 2024 23:30:30 +0300 Subject: [PATCH 156/443] mdbx++: buffer::append_bytes(). --- mdbx.h++ | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 8ef9b44b..008dd808 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2966,6 +2966,79 @@ public: return append_producer(from_base64(data, ignore_spaces)); } + buffer &append_u8(uint_fast8_t u8) { + if (MDBX_UNLIKELY(tailroom() < 1)) + MDBX_CXX20_UNLIKELY reserve_tailroom(1); + *slice_.byte_ptr() = u8; + slice_.iov_len += 1; + return *this; + } + + buffer &append_byte(uint_fast8_t byte) { return append_u8(byte); } + + buffer &append_u16(uint_fast16_t u16) { + if (MDBX_UNLIKELY(tailroom() < 2)) + MDBX_CXX20_UNLIKELY reserve_tailroom(2); + const auto ptr = slice_.byte_ptr(); + ptr[0] = uint8_t(u16); + ptr[1] = uint8_t(u16 >> 8); + slice_.iov_len += 2; + return *this; + } + + buffer &append_u24(uint_fast32_t u24) { + if (MDBX_UNLIKELY(tailroom() < 3)) + MDBX_CXX20_UNLIKELY reserve_tailroom(3); + const auto ptr = slice_.byte_ptr(); + ptr[0] = uint8_t(u24); + ptr[1] = uint8_t(u24 >> 8); + ptr[2] = uint8_t(u24 >> 16); + slice_.iov_len += 3; + return *this; + } + + buffer &append_u32(uint_fast32_t u32) { + if (MDBX_UNLIKELY(tailroom() < 4)) + MDBX_CXX20_UNLIKELY reserve_tailroom(4); + const auto ptr = slice_.byte_ptr(); + ptr[0] = uint8_t(u32); + ptr[1] = uint8_t(u32 >> 8); + ptr[2] = uint8_t(u32 >> 16); + ptr[3] = uint8_t(u32 >> 24); + slice_.iov_len += 4; + return *this; + } + + buffer &append_u48(uint_fast64_t u48) { + if (MDBX_UNLIKELY(tailroom() < 6)) + MDBX_CXX20_UNLIKELY reserve_tailroom(6); + const auto ptr = slice_.byte_ptr(); + ptr[0] = uint8_t(u48); + ptr[1] = uint8_t(u48 >> 8); + ptr[2] = uint8_t(u48 >> 16); + ptr[3] = uint8_t(u48 >> 24); + ptr[4] = uint8_t(u48 >> 32); + ptr[5] = uint8_t(u48 >> 40); + slice_.iov_len += 6; + return *this; + } + + buffer &append_u64(uint_fast64_t u64) { + if (MDBX_UNLIKELY(tailroom() < 8)) + MDBX_CXX20_UNLIKELY reserve_tailroom(8); + const auto ptr = slice_.byte_ptr(); + ptr[0] = uint8_t(u64); + ptr[1] = uint8_t(u64 >> 8); + ptr[2] = uint8_t(u64 >> 16); + ptr[3] = uint8_t(u64 >> 24); + ptr[4] = uint8_t(u64 >> 32); + ptr[5] = uint8_t(u64 >> 40); + ptr[6] = uint8_t(u64 >> 48); + ptr[7] = uint8_t(u64 >> 56); + slice_.iov_len += 8; + return *this; + } + //---------------------------------------------------------------------------- template From 5c3c7b92926a32e96fb17e321a0f25cb8e556610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 30 Mar 2024 23:32:00 +0300 Subject: [PATCH 157/443] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=D0=BD=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D1=81=D0=B8=D0=B8=20bool-=D1=80=D0=B5=D0=B7=D1=83?= =?UTF-8?q?=D0=BB=D1=8C=D1=82=D0=B0=D1=82=D0=B0=20env::remove().?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx.c++ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 28977e42..74690740 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1354,7 +1354,7 @@ __cold path env::get_path() const { } __cold bool env::remove(const char *pathname, const remove_mode mode) { - return error::boolean_or_throw( + return !error::boolean_or_throw( ::mdbx_env_delete(pathname, MDBX_env_delete_mode_t(mode))); } @@ -1364,7 +1364,7 @@ __cold bool env::remove(const ::std::string &pathname, const remove_mode mode) { #if defined(_WIN32) || defined(_WIN64) __cold bool env::remove(const wchar_t *pathname, const remove_mode mode) { - return error::boolean_or_throw( + return !error::boolean_or_throw( ::mdbx_env_deleteW(pathname, MDBX_env_delete_mode_t(mode))); } From 2ce6ed33fa52b62931777a9d2c56133d73e85615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 31 Mar 2024 14:20:21 +0300 Subject: [PATCH 158/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FEINVAL`=20=D0=B2=20?= =?UTF-8?q?=D1=81=D0=BB=D1=83=D1=87=D0=B0=D0=B5=20`mdbx=5Fenv=5Fremove("."?= =?UTF-8?q?)`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 2fbf0bf2..65da0b2e 100644 --- a/src/core.c +++ b/src/core.c @@ -15321,7 +15321,10 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, err = MDBX_SUCCESS; } - if (err == MDBX_SUCCESS && !(dummy_env->me_flags & MDBX_NOSUBDIR)) { + if (err == MDBX_SUCCESS && !(dummy_env->me_flags & MDBX_NOSUBDIR) && + (/* pathname != "." */ pathname[0] != '.' || pathname[1] != 0) && + (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || + pathname[2] != 0)) { err = osal_removedirectory(pathname); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; From 2cc6d68c07e8256c96d01db395469114a1c12d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 31 Mar 2024 14:23:23 +0300 Subject: [PATCH 159/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`txn::open=5Fmap=5Facced?= =?UTF-8?q?e()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 008dd808..884fb216 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4386,6 +4386,11 @@ public: const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + /// \brief Open existing key-value map. + inline map_handle open_map_accede(const char *name) const; + /// \brief Open existing key-value map. + inline map_handle open_map_accede(const ::std::string &name) const; + /// \brief Create new or open existing key-value map. inline map_handle create_map(const char *name, @@ -4441,6 +4446,8 @@ public: const ::std::string_view &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + /// \brief Open existing key-value map. + inline map_handle open_map_accede(const ::std::string_view &name) const; /// \brief Create new or open existing key-value map. inline map_handle create_map(const ::std::string_view &name, @@ -6405,6 +6412,14 @@ txn::open_map(const char *name, const ::mdbx::key_mode key_mode, return map; } +inline ::mdbx::map_handle txn::open_map_accede(const char *name) const { + ::mdbx::map_handle map; + error::success_or_throw( + ::mdbx_dbi_open(handle_, name, MDBX_DB_ACCEDE, &map.dbi)); + assert(map.dbi != 0); + return map; +} + inline ::mdbx::map_handle txn::create_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { @@ -6443,6 +6458,15 @@ txn::open_map(const ::std::string_view &name, const ::mdbx::key_mode key_mode, return map; } +inline ::mdbx::map_handle +txn::open_map_accede(const ::std::string_view &name) const { + ::mdbx::map_handle map; + error::success_or_throw( + ::mdbx_dbi_open2(handle_, ::mdbx::slice(name), MDBX_DB_ACCEDE, &map.dbi)); + assert(map.dbi != 0); + return map; +} + inline ::mdbx::map_handle txn::create_map(const ::std::string_view &name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { @@ -6467,6 +6491,11 @@ txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, return open_map(::std::string_view(name), key_mode, value_mode); } +inline ::mdbx::map_handle +txn::open_map_accede(const ::std::string &name) const { + return open_map_accede(::std::string_view(name)); +} + inline ::mdbx::map_handle txn::create_map(const ::std::string &name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { @@ -6493,6 +6522,11 @@ txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, return open_map(name.c_str(), key_mode, value_mode); } +inline ::mdbx::map_handle +txn::open_map_accede(const ::std::string &name) const { + return open_map_accede(name.c_str()); +} + inline ::mdbx::map_handle txn::create_map(const ::std::string &name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { From 639ba8b7a5b4a9c2f07c895775370e542342dbe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 1 Apr 2024 12:46:23 +0300 Subject: [PATCH 160/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`mdbx?= =?UTF-8?q?=5Fenv=5Fchk=5Fencount=5Fproblem()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- src/core.c | 8 ++++---- src/mdbx_chk.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mdbx.h b/mdbx.h index 9c721b47..aa9de079 100644 --- a/mdbx.h +++ b/mdbx.h @@ -6288,7 +6288,7 @@ LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, enum MDBX_chk_severity verbosity, unsigned timeout_seconds_16dot16); /** FIXME */ -LIBMDBX_API int mdbx_env_chk_problem(MDBX_chk_context_t *ctx); +LIBMDBX_API int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx); /** end of chk @} */ diff --git a/src/core.c b/src/core.c index 65da0b2e..6d000e9a 100644 --- a/src/core.c +++ b/src/core.c @@ -27075,7 +27075,7 @@ __cold __must_check_result static MDBX_chk_line_t * chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { MDBX_chk_internal_t *const chk = scope->internal; if (severity < MDBX_chk_warning) - mdbx_env_chk_problem(chk->usr); + mdbx_env_chk_encount_problem(chk->usr); MDBX_chk_line_t *line = nullptr; if (likely(chk->cb->print_begin)) { line = chk->cb->print_begin(chk->usr, severity); @@ -27270,7 +27270,7 @@ __cold static void MDBX_PRINTF_ARGS(5, 6) va_list args; va_start(args, extra_fmt); if (chk->cb->issue) { - mdbx_env_chk_problem(chk->usr); + mdbx_env_chk_encount_problem(chk->usr); chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); } else { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); @@ -27291,7 +27291,7 @@ __cold static void MDBX_PRINTF_ARGS(2, 3) va_list args; va_start(args, fmt); if (likely(chk->cb->issue)) { - mdbx_env_chk_problem(chk->usr); + mdbx_env_chk_encount_problem(chk->usr); chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); } else chk_line_end( @@ -29025,7 +29025,7 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { nullptr, nullptr)); } -__cold int mdbx_env_chk_problem(MDBX_chk_context_t *ctx) { +__cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) { if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && ctx->internal->problem_counter && ctx->scope)) { *ctx->internal->problem_counter += 1; diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index a66e86d1..f84a0ad6 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -196,7 +196,7 @@ static FILE *MDBX_PRINTF_ARGS(2, 3) static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { if (level <= MDBX_LOG_ERROR) - mdbx_env_chk_problem(&chk); + mdbx_env_chk_encount_problem(&chk); const unsigned kind = (level > MDBX_LOG_NOTICE) ? level - MDBX_LOG_NOTICE + From 01458065c4dee53dc316f8dd6dbccdc1c731602a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 1 Apr 2024 14:29:52 +0300 Subject: [PATCH 161/443] =?UTF-8?q?mdbx-doc:=20=D0=B1=D0=B0=D0=B7=D0=BE?= =?UTF-8?q?=D0=B2=D0=BE=D0=B5/=D0=BC=D0=B8=D0=BD=D0=B8=D0=BC=D0=B0=D0=BB?= =?UTF-8?q?=D1=8C=D0=BD=D0=BE=D0=B5=20=D0=BE=D0=BF=D0=B8=D1=81=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fenv=5Fchk()`=20=D0=B8=20=D1=81=D0=B2?= =?UTF-8?q?=D1=8F=D0=B7=D0=B0=D0=BD=D0=BD=D1=8B=D1=85=20=D1=8D=D0=BB=D0=B5?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=BE=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 89 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/mdbx.h b/mdbx.h index aa9de079..25ff6030 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5541,13 +5541,11 @@ mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last(const MDBX_cursor *cursor); -/** \brief Определяет стоит ли курсор на последнем или единственном мульти-значении - * соответствующем ключу. - * \ingroup c_cursors - * \param [in] cursor Курсор созданный посредством \ref mdbx_cursor_open(). - * \returns Значание \ref MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, - * иначе код ошибки. - * \retval MDBX_RESULT_TRUE курсор установлен на последнем или единственном +/** \brief Определяет стоит ли курсор на последнем или единственном + * мульти-значении соответствующем ключу. \ingroup c_cursors \param [in] cursor + * Курсор созданный посредством \ref mdbx_cursor_open(). \returns Значание \ref + * MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, иначе код ошибки. \retval + * MDBX_RESULT_TRUE курсор установлен на последнем или единственном * мульти-значении соответствующем ключу. * \retval MDBX_RESULT_FALSE курсор НЕ установлен на последнем или единственном * мульти-значении соответствующем ключу. @@ -6073,7 +6071,9 @@ LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *info, size_t bytes); #endif /* Windows */ -/** \brief Флаги/опции для проверки целостности БД. +/** \brief Флаги/опции для проверки целостности базы данных. + * \note Данный API еще не зафиксирован, в последующих версиях могут быть + * незначительные доработки и изменения. * \see mdbx_env_chk() */ enum MDBX_chk_flags_t { /** Режим проверки по-умолчанию, в том числе в режиме только-чтения. */ @@ -6102,7 +6102,7 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_chk_flags_t) #endif /** \brief Уровни логирование/детализации информации, - * поставляемой через обратные вызовы при проверке целостности БД. + * поставляемой через обратные вызовы при проверке целостности базы данных. * \see mdbx_env_chk() */ enum MDBX_chk_severity { MDBX_chk_severity_prio_shift = 4, @@ -6121,7 +6121,7 @@ enum MDBX_chk_severity { }; /** \brief Стадии проверки, - * сообщаемые через обратные вызовы при проверке целостности БД. + * сообщаемые через обратные вызовы при проверке целостности базы данных. * \see mdbx_env_chk() */ enum MDBX_chk_stage { MDBX_chk_none, @@ -6138,15 +6138,15 @@ enum MDBX_chk_stage { MDBX_chk_finalize }; -/** \brief Виртуальная строка отчета, формируемого при проверке целостности БД. - * \see mdbx_env_chk() */ +/** \brief Виртуальная строка отчета, формируемого при проверке целостности базы + * данных. \see mdbx_env_chk() */ typedef struct MDBX_chk_line { struct MDBX_chk_context *ctx; uint8_t severity, scope_depth, empty; char *begin, *end, *out; } MDBX_chk_line_t; -/** \brief Проблема обнаруженная при проверке целостности БД. +/** \brief Проблема обнаруженная при проверке целостности базы данных. * \see mdbx_env_chk() */ typedef struct MDBX_chk_issue { struct MDBX_chk_issue *next; @@ -6154,7 +6154,7 @@ typedef struct MDBX_chk_issue { const char *caption; } MDBX_chk_issue_t; -/** \brief Иерархический контекст при проверке целостности БД. +/** \brief Иерархический контекст при проверке целостности базы данных. * \see mdbx_env_chk() */ typedef struct MDBX_chk_scope { MDBX_chk_issue_t *issues; @@ -6170,8 +6170,8 @@ typedef struct MDBX_chk_scope { } MDBX_chk_scope_t; /** \brief Пользовательский тип для привязки дополнительных данных, - * связанных с некоторой таблицей ключ-значение, при проверке целостности БД. - * \see mdbx_env_chk() */ + * связанных с некоторой таблицей ключ-значение, при проверке целостности базы + * данных. \see mdbx_env_chk() */ typedef struct MDBX_chk_user_subdb_cookie MDBX_chk_user_subdb_cookie_t; /** \brief Гистограмма с некоторой статистической информацией, @@ -6185,7 +6185,7 @@ struct MDBX_chk_histogram { }; /** \brief Информация о некоторой таблицей ключ-значение, - * при проверке целостности БД. + * при проверке целостности базы данных. * \see mdbx_env_chk() */ typedef struct MDBX_chk_subdb { MDBX_chk_user_subdb_cookie_t *cookie; @@ -6221,7 +6221,7 @@ typedef struct MDBX_chk_subdb { } histogram; } MDBX_chk_subdb_t; -/** \brief Контекст проверки целостности БД. +/** \brief Контекст проверки целостности базы данных. * \see mdbx_env_chk() */ typedef struct MDBX_chk_context { struct MDBX_chk_internal *internal; @@ -6245,7 +6245,21 @@ typedef struct MDBX_chk_context { } result; } MDBX_chk_context_t; -/** FIXME */ +/** \brief Набор функций обратного вызова используемых при проверке целостности + * базы данных. + * + * Функции обратного вызова предназначены для организации взаимодействия с кодом + * приложения. В том числе, для интеграции логики приложения проверяющей + * целостность стуктуры данных выше уровня ключ-значение, подготовки и + * структурированного вывода информации как о ходе, так и результатов проверки. + * + * Все функции обратного вызова опциональны, неиспользуемые указатели должны + * быть установлены в `nullptr`. + * + * \note Данный API еще не зафиксирован, в последующих версиях могут быть + * незначительные доработки и изменения. + * + * \see mdbx_env_chk() */ typedef struct MDBX_chk_callbacks { bool (*check_break)(MDBX_chk_context_t *ctx); int (*scope_push)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, @@ -6281,13 +6295,46 @@ typedef struct MDBX_chk_callbacks { const uint64_t value, const char *suffix); } MDBX_chk_callbacks_t; -/** FIXME */ +/** \brief Проверяет целостность базы данных. + * + * Взаимодействие с кодом приложения реализуется через функции обратного вызова, + * предоставляемые приложением посредством параметра `cb`. В ходе такого + * взаимодействия приложение может контролировать ход проверки, в том числе, + * пропускать/фильтровать обработку отдельных элементов, а также реализовать + * дополнительную верификацию структуры и/или информации с учетом назначения и + * семантической значимости для приложения. Например, приложение может выполнить + * проверку собственных индексов и корректность записей в БД. Именно с этой + * целью функционал проверки целостности был доработан для интенсивного + * использования обратных вызовов и перенесен из утилиты `mdbx_chk` в основную + * библиотеку. + * + * Проверка выполняется в несколько стадий, начиная с инициализации и до + * завершения, более подробно см \ref enum MDBX_chk_stage. О начале и завершении + * каждой стадии код приложения уведомляется через соответствующие функции + * обратного вызова, более подробно см \ref MDBX_chk_callbacks_t. + * + * \param [in] env Указатель на экземпляр среды. + * \param [in] cb Набор функций обратного вызова. + * \param [in,out] ctx Контекст проверки целостности базы данных, + * где будут формироваться результаты проверки. + * \param [in] flags Флаги/опции проверки целостности базы данных. + * \param [in] verbosity Необходимый уровень детализации информации о ходе + * и результатах проверки. + * \param [in] timeout_seconds_16dot16 Ограничение длительности в 1/65536 долях + * секунды для выполнения проверки, + * либо 0 при отсутствии ограничения. + * \returns Нулевое значение в случае успеха, иначе код ошибки. */ LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, MDBX_chk_context_t *ctx, const enum MDBX_chk_flags_t flags, enum MDBX_chk_severity verbosity, unsigned timeout_seconds_16dot16); -/** FIXME */ + +/** \brief Вспомогательная функция для подсчета проблем детектируемых + * приложением, в том числе, поступающим к приложению через логирование. + * \see mdbx_env_chk() + * \see MDBX_debug_func + * \returns Нулевое значение в случае успеха, иначе код ошибки. */ LIBMDBX_API int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx); /** end of chk @} */ From 3670a30c001391150bb4cc686fbc2a8ae9d6d0ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 1 Apr 2024 14:35:21 +0300 Subject: [PATCH 162/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20doxygen=20=D0=BA=D0=BE=D0=BC?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8=D0=B5=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/mdbx.h b/mdbx.h index 25ff6030..9476d0dc 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2721,7 +2721,8 @@ typedef struct MDBX_envinfo MDBX_envinfo; * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin() * \param [out] info The address of an \ref MDBX_envinfo structure * where the information will be copied - * \param [in] bytes The size of \ref MDBX_envinfo. + * \param [in] bytes The actual size of \ref MDBX_envinfo, + * this value is used to provide ABI compatibility. * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, @@ -3017,6 +3018,9 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { * процессе, либо повторные вызовы не приводят к каким-либо действиям или * изменениям. * + * \param [in,out] env Экземпляр среды созданный функцией + * \ref mdbx_env_create(). + * * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении, * некоторые возможные ошибки таковы: * @@ -3410,7 +3414,7 @@ MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_set_mapsize, * value. * * \returns A \ref MDBX_RESULT_TRUE or \ref MDBX_RESULT_FALSE value, - * otherwise the error code: + * otherwise the error code. * \retval MDBX_RESULT_TRUE Readahead is reasonable. * \retval MDBX_RESULT_FALSE Readahead is NOT reasonable, * i.e. \ref MDBX_NORDAHEAD is useful to @@ -4350,6 +4354,7 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * by current thread. */ LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi); +/** \copydoc mdbx_dbi_open() */ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi); @@ -4371,6 +4376,7 @@ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +/** \copydoc mdbx_dbi_open_ex() */ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); @@ -4390,6 +4396,7 @@ mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, * * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении. */ LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); +/** \copydoc mdbx_dbi_rename() */ LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *name); @@ -5491,7 +5498,7 @@ LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *pcount); * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). * * \returns A \ref MDBX_RESULT_TRUE or \ref MDBX_RESULT_FALSE value, - * otherwise the error code: + * otherwise the error code. * \retval MDBX_RESULT_TRUE No more data available or cursor not * positioned * \retval MDBX_RESULT_FALSE A data is available @@ -5506,15 +5513,15 @@ mdbx_cursor_eof(const MDBX_cursor *cursor); * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). * * \returns A MDBX_RESULT_TRUE or MDBX_RESULT_FALSE value, - * otherwise the error code: + * otherwise the error code. * \retval MDBX_RESULT_TRUE Cursor positioned to the first key-value pair * \retval MDBX_RESULT_FALSE Cursor NOT positioned to the first key-value * pair \retval Otherwise the error code */ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first(const MDBX_cursor *cursor); -/** \brief Определяет стоит ли курсор на первом или единственном мульти-значении - * соответствующем ключу. +/** \brief Определяет стоит ли курсор на первом или единственном + * мульти-значении соответствующем ключу. * \ingroup c_cursors * \param [in] cursor Курсор созданный посредством \ref mdbx_cursor_open(). * \returns Значание \ref MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, @@ -5534,7 +5541,7 @@ mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). * * \returns A \ref MDBX_RESULT_TRUE or \ref MDBX_RESULT_FALSE value, - * otherwise the error code: + * otherwise the error code. * \retval MDBX_RESULT_TRUE Cursor positioned to the last key-value pair * \retval MDBX_RESULT_FALSE Cursor NOT positioned to the last key-value pair * \retval Otherwise the error code */ @@ -5542,10 +5549,12 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last(const MDBX_cursor *cursor); /** \brief Определяет стоит ли курсор на последнем или единственном - * мульти-значении соответствующем ключу. \ingroup c_cursors \param [in] cursor - * Курсор созданный посредством \ref mdbx_cursor_open(). \returns Значание \ref - * MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, иначе код ошибки. \retval - * MDBX_RESULT_TRUE курсор установлен на последнем или единственном + * мульти-значении соответствующем ключу. + * \ingroup c_cursors + * \param [in] cursor Курсор созданный посредством \ref mdbx_cursor_open(). + * \returns Значание \ref MDBX_RESULT_TRUE, либо \ref MDBX_RESULT_FALSE, + * иначе код ошибки. + * \retval MDBX_RESULT_TRUE курсор установлен на последнем или единственном * мульти-значении соответствующем ключу. * \retval MDBX_RESULT_FALSE курсор НЕ установлен на последнем или единственном * мульти-значении соответствующем ключу. @@ -5689,7 +5698,7 @@ LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, * \param [in] ptr The address of data to check. * * \returns A MDBX_RESULT_TRUE or MDBX_RESULT_FALSE value, - * otherwise the error code: + * otherwise the error code. * \retval MDBX_RESULT_TRUE Given address is on the dirty page. * \retval MDBX_RESULT_FALSE Given address is NOT on the dirty page. * \retval Otherwise the error code. */ From bdff60e6a743ab66b28fe76b1ef66416893fd958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 2 Apr 2024 00:54:41 +0300 Subject: [PATCH 163/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=84=D0=BE=D1=80=D0=BC?= =?UTF-8?q?=D0=B0=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?(=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx_chk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index f84a0ad6..12431b10 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -196,7 +196,7 @@ static FILE *MDBX_PRINTF_ARGS(2, 3) static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { if (level <= MDBX_LOG_ERROR) - mdbx_env_chk_encount_problem(&chk); + mdbx_env_chk_encount_problem(&chk); const unsigned kind = (level > MDBX_LOG_NOTICE) ? level - MDBX_LOG_NOTICE + From d603de4a87f1c3632fdf57e3fe5e49c557ad94ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 3 Apr 2024 12:51:15 +0300 Subject: [PATCH 164/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D1=80=D0=B8=D1=82?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=B9=20=D0=BE=D1=88=D0=B8?= =?UTF-8?q?=D0=B1=D0=BA=D0=B8=20=D0=B2=20`TXN=5FFOREACH=5FDBI=5FFROM`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Какие-либо выпуски и стабильные ветки не были затронуты проблемой. Ошибка была внесена 2023-11-05 коммитом e6af7d7c53428ca2892bcbf7eec1c2acee06fd44 в ветку `devel`. Большое спасибо команде Erigon и особенно Алексею Шарову за помощь в поиске причины проблемы. --- src/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 6d000e9a..9038b8ef 100644 --- a/src/core.c +++ b/src/core.c @@ -3924,8 +3924,10 @@ static __inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { bitmap_item = TXN->mt_dbi_sparse[0] >> FROM, I = FROM; \ I < TXN->mt_numdbs; ++I) \ if (bitmap_item == 0) { \ - I |= bitmap_chunk - 1; \ + I = (I - 1) | (bitmap_chunk - 1); \ bitmap_item = TXN->mt_dbi_sparse[(1 + I) / bitmap_chunk]; \ + if (!bitmap_item) \ + I += bitmap_chunk; \ continue; \ } else if ((bitmap_item & 1) == 0) { \ size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ From 1727b697a0d8a88ca019ace74a1678832cdb7b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 3 Apr 2024 19:05:34 +0300 Subject: [PATCH 165/443] =?UTF-8?q?mdbx-doc:=20=D1=83=D1=81=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=82=D0=BB=D0=BE?= =?UTF-8?q?=D0=B6=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20=D0=BD=D0=B5=D0=B4=D0=BE?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BE=D0=BA=20=D0=B2=20=D0=B4?= =?UTF-8?q?=D0=BE=D0=BA=D1=83=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 4 +++- src/options.h | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 884fb216..5bc42809 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -354,7 +354,7 @@ static MDBX_CXX20_CONSTEXPR void *memcpy(void *dest, const void *src, static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, size_t bytes) noexcept; -/// \brief Legacy default allocator +/// \brief Legacy allocator /// but it is recommended to use \ref polymorphic_allocator. using legacy_allocator = ::std::string::allocator_type; @@ -3681,6 +3681,8 @@ public: struct LIBMDBX_API_TYPE operate_options { /// \copydoc MDBX_NOTLS bool orphan_read_transactions{false}; + /// \brief Разрешает вложенные транзакции ценой отключения + /// \ref MDBX_WRITEMAP и увеличением накладных расходов. bool nested_write_transactions{false}; /// \copydoc MDBX_EXCLUSIVE bool exclusive{false}; diff --git a/src/options.h b/src/options.h index 21dd57bc..73d892af 100644 --- a/src/options.h +++ b/src/options.h @@ -147,7 +147,7 @@ #error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1 #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ -/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP +/** Controls dirty pages tracking, spilling and persisting in `MDBX_WRITEMAP` * mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use * msync() to persist data. This is by-default on Linux and other systems where * kernel provides properly LRU tracking and effective flushing on-demand. 1/ON @@ -164,14 +164,16 @@ #error MDBX_AVOID_MSYNC must be defined as 0 or 1 #endif /* MDBX_AVOID_MSYNC */ -/** FIXME */ +/** Управляет механизмом поддержки разреженных наборов DBI-хендлов для снижения + * накладных расходов при запуске и обработке транзакций. */ #ifndef MDBX_ENABLE_DBI_SPARSE #define MDBX_ENABLE_DBI_SPARSE 1 #elif !(MDBX_ENABLE_DBI_SPARSE == 0 || MDBX_ENABLE_DBI_SPARSE == 1) #error MDBX_ENABLE_DBI_SPARSE must be defined as 0 or 1 #endif /* MDBX_ENABLE_DBI_SPARSE */ -/** FIXME */ +/** Управляет механизмом отложенного освобождения и поддержки пути быстрого + * открытия DBI-хендлов без захвата блокировок. */ #ifndef MDBX_ENABLE_DBI_LOCKFREE #define MDBX_ENABLE_DBI_LOCKFREE 1 #elif !(MDBX_ENABLE_DBI_LOCKFREE == 0 || MDBX_ENABLE_DBI_LOCKFREE == 1) From e56c73b4e6815be6ec56f37bae98ac3221be6e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 2 Apr 2024 00:22:09 +0300 Subject: [PATCH 166/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC?= =?UTF-8?q?=D0=B0=20`MDBX=5FNOSTICKYTHREADS`=20=D0=B2=D0=BC=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=BE=20`MDBX=5FNOTLS`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 4 +- docs/_restrictions.md | 24 ++-- docs/_starting.md | 29 +++-- mdbx.h | 128 ++++++++++++++------ mdbx.h++ | 4 +- src/bits.md | 2 +- src/core.c | 253 +++++++++++++++++++++------------------ src/internals.h | 9 +- src/lck-windows.c | 2 +- src/mdbx.c++ | 15 +-- test/config.c++ | 3 +- test/long_stochastic.sh | 4 +- test/main.c++ | 2 +- test/stochastic_small.sh | 2 +- 14 files changed, 283 insertions(+), 198 deletions(-) diff --git a/TODO.md b/TODO.md index d8e2d0b7..b79a5824 100644 --- a/TODO.md +++ b/TODO.md @@ -11,8 +11,7 @@ For the same reason ~~Github~~ is blacklisted forever. So currently most of the links are broken due to noted malicious ~~Github~~ sabotage. - - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOTLS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). - - [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200). + - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOSTICKYTHREADS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). - [Support for RAW devices](https://libmdbx.dqdkfa.ru/dead-github/issues/124). - [Support MessagePack for Keys & Values](https://libmdbx.dqdkfa.ru/dead-github/issues/115). @@ -22,6 +21,7 @@ So currently most of the links are broken due to noted malicious ~~Github~~ sabo Done ---- + - [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200). - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223). - [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224). diff --git a/docs/_restrictions.md b/docs/_restrictions.md index d967cca7..64c54de6 100644 --- a/docs/_restrictions.md +++ b/docs/_restrictions.md @@ -190,18 +190,20 @@ readers without writer" case. ## One thread - One transaction - A thread can only use one transaction at a time, plus any nested - read-write transactions in the non-writemap mode. Each transaction - belongs to one thread. The \ref MDBX_NOTLS flag changes this for read-only - transactions. See below. +A thread can only use one transaction at a time, plus any nested +read-write transactions in the non-writemap mode. Each transaction +belongs to one thread. The \ref MDBX_NOSTICKYTHREADS flag changes this, +see below. - Do not start more than one transaction for a one thread. If you think - about this, it's really strange to do something with two data snapshots - at once, which may be different. MDBX checks and preventing this by - returning corresponding error code (\ref MDBX_TXN_OVERLAPPING, \ref MDBX_BAD_RSLOT, - \ref MDBX_BUSY) unless you using \ref MDBX_NOTLS option on the environment. - Nonetheless, with the `MDBX_NOTLS` option, you must know exactly what you - are doing, otherwise you will get deadlocks or reading an alien data. +Do not start more than one transaction for a one thread. If you think +about this, it's really strange to do something with two data snapshots +at once, which may be different. MDBX checks and preventing this by +returning corresponding error code (\ref MDBX_TXN_OVERLAPPING, +\ref MDBX_BAD_RSLOT, \ref MDBX_BUSY) unless you using +\ref MDBX_NOSTICKYTHREADS option on the environment. +Nonetheless, with the `MDBX_NOSTICKYTHREADS` option, you must know +exactly what you are doing, otherwise you will get deadlocks or reading +an alien data. ## Do not open twice diff --git a/docs/_starting.md b/docs/_starting.md index 30336857..f030ecbf 100644 --- a/docs/_starting.md +++ b/docs/_starting.md @@ -129,20 +129,23 @@ no open MDBX-instance(s) during fork(), or at least close it immediately after necessary) in a child process would be both extreme complicated and so fragile. -Do not start more than one transaction for a one thread. If you think about -this, it's really strange to do something with two data snapshots at once, -which may be different. MDBX checks and preventing this by returning -corresponding error code (\ref MDBX_TXN_OVERLAPPING, \ref MDBX_BAD_RSLOT, \ref MDBX_BUSY) -unless you using \ref MDBX_NOTLS option on the environment. Nonetheless, with the -\ref MDBX_NOTLS option, you must know exactly what you are doing, otherwise you -will get deadlocks or reading an alien data. +Do not start more than one transaction for a one thread. If you think +about this, it's really strange to do something with two data snapshots +at once, which may be different. MDBX checks and preventing this by +returning corresponding error code (\ref MDBX_TXN_OVERLAPPING, +\ref MDBX_BAD_RSLOT, \ref MDBX_BUSY) unless you using +\ref MDBX_NOSTICKYTHREADS option on the environment. Nonetheless, +with the \ref MDBX_NOSTICKYTHREADS option, you must know exactly what +you are doing, otherwise you will get deadlocks or reading an alien +data. -Also note that a transaction is tied to one thread by default using Thread -Local Storage. If you want to pass read-only transactions across threads, -you can use the \ref MDBX_NOTLS option on the environment. Nevertheless, a write -transaction entirely should only be used in one thread from start to finish. -MDBX checks this in a reasonable manner and return the \ref MDBX_THREAD_MISMATCH -error in rules violation. +Also note that a transaction is tied to one thread by default using +Thread Local Storage. If you want to pass transactions across threads, +you can use the \ref MDBX_NOSTICKYTHREADS option on the environment. +Nevertheless, a write transaction must be committed or aborted in the +same thread which it was started. MDBX checks this in a reasonable +manner and return the \ref MDBX_THREAD_MISMATCH error in rules +violation. ## Transactions, rollbacks etc diff --git a/mdbx.h b/mdbx.h index 9476d0dc..d259277f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1207,28 +1207,80 @@ enum MDBX_env_flags_t { */ MDBX_WRITEMAP = UINT32_C(0x80000), - /** Tie reader locktable slots to read-only transactions - * instead of to threads. + /** Отвязывает транзакции от потоков/threads насколько это возможно. * - * Don't use Thread-Local Storage, instead tie reader locktable slots to - * \ref MDBX_txn objects instead of to threads. So, \ref mdbx_txn_reset() - * keeps the slot reserved for the \ref MDBX_txn object. A thread may use - * parallel read-only transactions. And a read-only transaction may span - * threads if you synchronizes its use. + * Эта опция предназначена для приложений, которые мультиплексируют множество + * пользовательских легковесных потоков выполнения по отдельным потокам + * операционной системы, например как это происходит в средах выполнения + * GoLang и Rust. Таким приложениям также рекомендуется сериализовать + * транзакции записи в одном потоке операционной системы, поскольку блокировка + * записи MDBX использует базовые системные примитивы синхронизации и ничего + * не знает о пользовательских потоках и/или легковесных потоков среды + * выполнения. Как минимум, обязательно требуется обеспечить завершение каждой + * пишущей транзакции строго в том же потоке операционной системы где она была + * запущена. * - * Applications that multiplex many user threads over individual OS threads - * need this option. Such an application must also serialize the write - * transactions in an OS thread, since MDBX's write locking is unaware of - * the user threads. + * \note Начиная с версии v0.13 опция `MDBX_NOSTICKYTHREADS` полностью + * заменяет опцию \ref MDBX_NOTLS. * - * \note Regardless to `MDBX_NOTLS` flag a write transaction entirely should - * always be used in one thread from start to finish. MDBX checks this in a - * reasonable manner and return the \ref MDBX_THREAD_MISMATCH error in rules - * violation. + * При использовании `MDBX_NOSTICKYTHREADS` транзакции становятся не + * ассоциированными с создавшими их потоками выполнения. Поэтому в функциях + * API не выполняется проверка соответствия транзакции и текущего потока + * выполнения. Большинство функций работающих с транзакциями и курсорами + * становится возможным вызывать из любых потоков выполнения. Однако, также + * становится невозможно обнаружить ошибки одновременного использования + * транзакций и/или курсоров в разных потоках. * - * This flag affects only at environment opening but can't be changed after. + * Использование `MDBX_NOSTICKYTHREADS` также сужает возможности по изменению + * размера БД, так как теряется возможность отслеживать работающие с БД потоки + * выполнения и приостанавливать их на время снятия отображения БД в ОЗУ. В + * частности, по этой причине на Windows уменьшение файла БД не возможно до + * закрытия БД последним работающим с ней процессом или до последующего + * открытия БД в режиме чтения-записи. + * + * \warning Вне зависимости от \ref MDBX_NOSTICKYTHREADS и \ref MDBX_NOTLS не + * допускается одновременно использование объектов API из разных потоков + * выполнения! Обеспечение всех мер для исключения одновременного + * использования объектов API из разных потоков выполнения целиком ложится на + * вас! + * + * \warning Транзакции записи могут быть завершены только в том же потоке + * выполнения где они были запущены. Это ограничение следует из требований + * большинства операционных систем о том, что захваченный примитив + * синхронизации (мьютекс, семафор, критическая секция) должен освобождаться + * только захватившим его потоком выполнения. + * + * \warning Создание курсора в контексте транзакции, привязка курсора к + * транзакции, отвязка курсора от транзакции и закрытие привязанного к + * транзакции курсора, являются операциями использующими как сам курсор так и + * соответствующую транзакцию. Аналогично, завершение или прерывание + * транзакции является операцией использующей как саму транзакцию, так и все + * привязанные к ней курсоры. Во избежание повреждения внутренних структур + * данных, непредсказуемого поведения, разрушение БД и потери данных следует + * не допускать возможности одновременного использования каких-либо курсора + * или транзакций из разных потоков выполнения. + * + * Читающие транзакции при использовании `MDBX_NOSTICKYTHREADS` перестают + * использовать TLS (Thread Local Storage), а слоты блокировок MVCC-снимков в + * таблице читателей привязываются только к транзакциям. Завершение каких-либо + * потоков не приводит к снятию блокировок MVCC-снимков до явного завершения + * транзакций, либо до завершения соответствующего процесса в целом. + * + * Для пишущих транзакций не выполняется проверка соответствия текущего потока + * выполнения и потока создавшего транзакцию. Однако, фиксация или прерывание + * пишущих транзакций должны выполняться строго в потоке запустившим + * транзакцию, так как эти операции связаны с захватом и освобождением + * примитивов синхронизации (мьютексов, критических секций), для которых + * большинство операционных систем требует освобождение только потоком + * захватившим ресурс. + * + * Этот флаг вступает в силу при открытии среды и не может быть изменен после. */ - MDBX_NOTLS = UINT32_C(0x200000), + MDBX_NOSTICKYTHREADS = UINT32_C(0x200000), +#ifndef _MSC_VER /* avoid madness MSVC */ + /** \deprecated Please use \ref MDBX_NOSTICKYTHREADS instead. */ + MDBX_NOTLS MDBX_DEPRECATED = MDBX_NOSTICKYTHREADS, +#endif /* avoid madness MSVC */ /** Don't do readahead. * @@ -2121,11 +2173,12 @@ enum MDBX_option_t { * track readers in the the environment. The default is about 100 for 4K * system page size. Starting a read-only transaction normally ties a lock * table slot to the current thread until the environment closes or the thread - * exits. If \ref MDBX_NOTLS is in use, \ref mdbx_txn_begin() instead ties the - * slot to the \ref MDBX_txn object until it or the \ref MDBX_env object is - * destroyed. This option may only set after \ref mdbx_env_create() and before - * \ref mdbx_env_open(), and has an effect only when the database is opened by - * the first process interacts with the database. + * exits. If \ref MDBX_NOSTICKYTHREADS is in use, \ref mdbx_txn_begin() + * instead ties the slot to the \ref MDBX_txn object until it or the \ref + * MDBX_env object is destroyed. This option may only set after \ref + * mdbx_env_create() and before \ref mdbx_env_open(), and has an effect only + * when the database is opened by the first process interacts with the + * database. * * \see mdbx_env_set_maxreaders() \see mdbx_env_get_maxreaders() */ MDBX_opt_max_readers, @@ -2389,7 +2442,7 @@ LIBMDBX_API int mdbx_env_get_option(const MDBX_env *env, * * Flags set by mdbx_env_set_flags() are also used: * - \ref MDBX_ENV_DEFAULTS, \ref MDBX_NOSUBDIR, \ref MDBX_RDONLY, - * \ref MDBX_EXCLUSIVE, \ref MDBX_WRITEMAP, \ref MDBX_NOTLS, + * \ref MDBX_EXCLUSIVE, \ref MDBX_WRITEMAP, \ref MDBX_NOSTICKYTHREADS, * \ref MDBX_NORDAHEAD, \ref MDBX_NOMEMINIT, \ref MDBX_COALESCE, * \ref MDBX_LIFORECLAIM. See \ref env_flags section. * @@ -3385,7 +3438,7 @@ LIBMDBX_API int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *fd); * 2) Temporary close memory mapped is required to change * geometry, but there read transaction(s) is running * and no corresponding thread(s) could be suspended - * since the \ref MDBX_NOTLS mode is used. + * since the \ref MDBX_NOSTICKYTHREADS mode is used. * \retval MDBX_EACCESS The environment opened in read-only. * \retval MDBX_MAP_FULL Specified size smaller than the space already * consumed by the environment. @@ -3504,11 +3557,11 @@ mdbx_limits_txnsize_max(intptr_t pagesize); * track readers in the the environment. The default is about 100 for 4K system * page size. Starting a read-only transaction normally ties a lock table slot * to the current thread until the environment closes or the thread exits. If - * \ref MDBX_NOTLS is in use, \ref mdbx_txn_begin() instead ties the slot to the - * \ref MDBX_txn object until it or the \ref MDBX_env object is destroyed. - * This function may only be called after \ref mdbx_env_create() and before - * \ref mdbx_env_open(), and has an effect only when the database is opened by - * the first process interacts with the database. + * \ref MDBX_NOSTICKYTHREADS is in use, \ref mdbx_txn_begin() instead ties the + * slot to the \ref MDBX_txn object until it or the \ref MDBX_env object is + * destroyed. This function may only be called after \ref mdbx_env_create() and + * before \ref mdbx_env_open(), and has an effect only when the database is + * opened by the first process interacts with the database. * \see mdbx_env_get_maxreaders() * * \param [in] env An environment handle returned @@ -3702,8 +3755,8 @@ mdbx_env_get_userctx(const MDBX_env *env); * \see mdbx_txn_begin() * * \note A transaction and its cursors must only be used by a single thread, - * and a thread may only have a single transaction at a time. If \ref MDBX_NOTLS - * is in use, this does not apply to read-only transactions. + * and a thread may only have a single transaction at a time unless + * the \ref MDBX_NOSTICKYTHREADS is used. * * \note Cursors may not span transactions. * @@ -3764,8 +3817,8 @@ LIBMDBX_API int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, * \see mdbx_txn_begin_ex() * * \note A transaction and its cursors must only be used by a single thread, - * and a thread may only have a single transaction at a time. If \ref MDBX_NOTLS - * is in use, this does not apply to read-only transactions. + * and a thread may only have a single transaction at a time unless + * the \ref MDBX_NOSTICKYTHREADS is used. * * \note Cursors may not span transactions. * @@ -4140,10 +4193,11 @@ LIBMDBX_API int mdbx_txn_break(MDBX_txn *txn); * Abort the read-only transaction like \ref mdbx_txn_abort(), but keep the * transaction handle. Therefore \ref mdbx_txn_renew() may reuse the handle. * This saves allocation overhead if the process will start a new read-only - * transaction soon, and also locking overhead if \ref MDBX_NOTLS is in use. The - * reader table lock is released, but the table slot stays tied to its thread - * or \ref MDBX_txn. Use \ref mdbx_txn_abort() to discard a reset handle, and to - * free its lock table slot if \ref MDBX_NOTLS is in use. + * transaction soon, and also locking overhead if \ref MDBX_NOSTICKYTHREADS is + * in use. The reader table lock is released, but the table slot stays tied to + * its thread or \ref MDBX_txn. Use \ref mdbx_txn_abort() to discard a reset + * handle, and to free its lock table slot if \ref MDBX_NOSTICKYTHREADS is in + * use. * * Cursors opened within the transaction must not be used again after this * call, except with \ref mdbx_cursor_renew() and \ref mdbx_cursor_close(). diff --git a/mdbx.h++ b/mdbx.h++ index 5bc42809..f8f6df4c 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3679,8 +3679,8 @@ public: /// \brief Operate options. struct LIBMDBX_API_TYPE operate_options { - /// \copydoc MDBX_NOTLS - bool orphan_read_transactions{false}; + /// \copydoc MDBX_NOSTICKYTHREADS + bool no_sticky_threads{false}; /// \brief Разрешает вложенные транзакции ценой отключения /// \ref MDBX_WRITEMAP и увеличением накладных расходов. bool nested_write_transactions{false}; diff --git a/src/bits.md b/src/bits.md index d8166d16..abcedf8b 100644 --- a/src/bits.md +++ b/src/bits.md @@ -21,7 +21,7 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP | | | | | 19|0008 0000|WRITEMAP |<= | |MULTIPLE | | | | <= | 20|0010 0000|UTTERLY | | | | | | | <= | -21|0020 0000|NOTLS |<= | | | | | | | +21|0020 0000|NOSTICKYTHR|<= | | | | | | | 22|0040 0000|EXCLUSIVE | | | | | | | | 23|0080 0000|NORDAHEAD | | | | | | | | 24|0100 0000|NOMEMINIT |TXN_PREPARE | | | | | | | diff --git a/src/core.c b/src/core.c index 9038b8ef..2f5a7a2f 100644 --- a/src/core.c +++ b/src/core.c @@ -1580,7 +1580,7 @@ __cold int rthc_register(MDBX_env *const env) { rthc_limit *= 2; } - if ((env->me_flags & MDBX_NOTLS) == 0) { + if ((env->me_flags & MDBX_NOSTICKYTHREADS) == 0) { rc = thread_key_create(&env->me_txkey); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -3275,7 +3275,7 @@ enum { #define TXN_END_UPDATE 0x10 /* update env state (DBIs) */ #define TXN_END_FREE 0x20 /* free txn unless it is MDBX_env.me_txn0 */ #define TXN_END_EOTDONE 0x40 /* txn's cursors already closed */ -#define TXN_END_SLOT 0x80 /* release any reader slot if MDBX_NOTLS */ +#define TXN_END_SLOT 0x80 /* release any reader slot if NOSTICKYTHREADS */ static int txn_end(MDBX_txn *txn, const unsigned mode); static __always_inline pgr_t page_get_inline(const uint16_t ILL, @@ -6562,60 +6562,63 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, size_bytes == env->me_dxb_mmap.filesize) goto bailout; + /* При использовании MDBX_NOSTICKYTHREADS с транзакциями могут работать любые + * потоки и у нас нет информации о том, какие именно. Поэтому нет возможности + * выполнить remap-действия требующие приостановки работающих с БД потоков. */ + if ((env->me_flags & MDBX_NOSTICKYTHREADS) == 0) { #if defined(_WIN32) || defined(_WIN64) - if ((env->me_flags & MDBX_NOTLS) == 0 && - ((size_bytes < env->me_dxb_mmap.current && mode > implicit_grow) || - limit_bytes != env->me_dxb_mmap.limit)) { - /* 1) Windows allows only extending a read-write section, but not a - * corresponding mapped view. Therefore in other cases we must suspend - * the local threads for safe remap. - * 2) At least on Windows 10 1803 the entire mapped section is unavailable - * for short time during NtExtendSection() or VirtualAlloc() execution. - * 3) Under Wine runtime environment on Linux a section extending is not - * supported. - * - * THEREFORE LOCAL THREADS SUSPENDING IS ALWAYS REQUIRED! */ - array_onstack.limit = ARRAY_LENGTH(array_onstack.handles); - array_onstack.count = 0; - suspended = &array_onstack; - rc = osal_suspend_threads_before_remap(env, &suspended); - if (rc != MDBX_SUCCESS) { - ERROR("failed suspend-for-remap: errcode %d", rc); - goto bailout; - } - mresize_flags |= (mode < explicit_resize) - ? MDBX_MRESIZE_MAY_UNMAP - : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; - } -#else /* Windows */ - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (mode == explicit_resize && limit_bytes != env->me_dxb_mmap.limit && - !(env->me_flags & MDBX_NOTLS)) { - mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; - if (lck) { - int err = osal_rdt_lock(env) /* lock readers table until remap done */; - if (unlikely(MDBX_IS_ERROR(err))) { - rc = err; + if ((size_bytes < env->me_dxb_mmap.current && mode > implicit_grow) || + limit_bytes != env->me_dxb_mmap.limit) { + /* 1) Windows allows only extending a read-write section, but not a + * corresponding mapped view. Therefore in other cases we must suspend + * the local threads for safe remap. + * 2) At least on Windows 10 1803 the entire mapped section is unavailable + * for short time during NtExtendSection() or VirtualAlloc() execution. + * 3) Under Wine runtime environment on Linux a section extending is not + * supported. + * + * THEREFORE LOCAL THREADS SUSPENDING IS ALWAYS REQUIRED! */ + array_onstack.limit = ARRAY_LENGTH(array_onstack.handles); + array_onstack.count = 0; + suspended = &array_onstack; + rc = osal_suspend_threads_before_remap(env, &suspended); + if (rc != MDBX_SUCCESS) { + ERROR("failed suspend-for-remap: errcode %d", rc); goto bailout; } + mresize_flags |= (mode < explicit_resize) + ? MDBX_MRESIZE_MAY_UNMAP + : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; + } +#else /* Windows */ + MDBX_lockinfo *const lck = env->me_lck_mmap.lck; + if (mode == explicit_resize && limit_bytes != env->me_dxb_mmap.limit) { + mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; + if (lck) { + int err = osal_rdt_lock(env) /* lock readers table until remap done */; + if (unlikely(MDBX_IS_ERROR(err))) { + rc = err; + goto bailout; + } - /* looking for readers from this process */ - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - eASSERT(env, mode == explicit_resize); - for (size_t i = 0; i < snap_nreaders; ++i) { - if (lck->mti_readers[i].mr_pid.weak == env->me_pid && - lck->mti_readers[i].mr_tid.weak != osal_thread_self()) { - /* the base address of the mapping can't be changed since - * the other reader thread from this process exists. */ - osal_rdt_unlock(env); - mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE); - break; + /* looking for readers from this process */ + const size_t snap_nreaders = + atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); + eASSERT(env, mode == explicit_resize); + for (size_t i = 0; i < snap_nreaders; ++i) { + if (lck->mti_readers[i].mr_pid.weak == env->me_pid && + lck->mti_readers[i].mr_tid.weak != osal_thread_self()) { + /* the base address of the mapping can't be changed since + * the other reader thread from this process exists. */ + osal_rdt_unlock(env); + mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE); + break; + } } } } - } #endif /* ! Windows */ + } const pgno_t aligned_munlock_pgno = (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) @@ -8616,26 +8619,30 @@ static int meta_sync(const MDBX_env *env, const meta_ptr_t head) { return rc; } +static __inline bool env_txn0_owned(const MDBX_env *env) { + return (env->me_flags & MDBX_NOSTICKYTHREADS) + ? (env->me_txn0->mt_owner != 0) + : (env->me_txn0->mt_owner == osal_thread_self()); +} + __cold static int env_sync(MDBX_env *env, bool force, bool nonblock) { - bool locked = false; + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + + const bool txn0_owned = env_txn0_owned(env); + bool should_unlock = false; int rc = MDBX_RESULT_TRUE /* means "nothing to sync" */; retry:; unsigned flags = env->me_flags & ~(MDBX_NOMETASYNC | MDBX_SHRINK_ALLOWED); - if (unlikely((flags & (MDBX_RDONLY | MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE)) != + if (unlikely((flags & (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE)) != MDBX_ENV_ACTIVE)) { - rc = MDBX_EACCESS; - if (!(flags & MDBX_ENV_ACTIVE)) - rc = MDBX_EPERM; - if (flags & MDBX_FATAL_ERROR) - rc = MDBX_PANIC; + rc = (flags & MDBX_FATAL_ERROR) ? MDBX_PANIC : MDBX_EPERM; goto bailout; } - const bool inside_txn = - (!locked && env->me_txn0->mt_owner == osal_thread_self()); const meta_troika_t troika = - (inside_txn | locked) ? env->me_txn0->tw.troika : meta_tap(env); + (txn0_owned | should_unlock) ? env->me_txn0->tw.troika : meta_tap(env); const meta_ptr_t head = meta_recent(env, &troika); const uint64_t unsynced_pages = atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed); @@ -8646,7 +8653,7 @@ retry:; goto bailout; } - if (locked && (env->me_flags & MDBX_WRITEMAP) && + if (should_unlock && (env->me_flags & MDBX_WRITEMAP) && unlikely(head.ptr_c->mm_geo.next > bytes2pgno(env, env->me_dxb_mmap.current))) { @@ -8676,8 +8683,8 @@ retry:; osal_monotime() - eoos_timestamp >= autosync_period)) flags &= MDBX_WRITEMAP /* clear flags for full steady sync */; - if (!inside_txn) { - if (!locked) { + if (!txn0_owned) { + if (!should_unlock) { #if MDBX_ENABLE_PGOP_STAT unsigned wops = 0; #endif /* MDBX_ENABLE_PGOP_STAT */ @@ -8723,7 +8730,7 @@ retry:; if (unlikely(err != MDBX_SUCCESS)) return err; - locked = true; + should_unlock = true; #if MDBX_ENABLE_PGOP_STAT env->me_lck->mti_pgop_stat.wops.weak += wops; #endif /* MDBX_ENABLE_PGOP_STAT */ @@ -8737,8 +8744,8 @@ retry:; flags |= MDBX_SHRINK_ALLOWED; } - eASSERT(env, inside_txn || locked); - eASSERT(env, !inside_txn || (flags & MDBX_SHRINK_ALLOWED) == 0); + eASSERT(env, txn0_owned || should_unlock); + eASSERT(env, !txn0_owned || (flags & MDBX_SHRINK_ALLOWED) == 0); if (!head.is_steady && unlikely(env->me_stuck_meta >= 0) && troika.recent != (uint8_t)env->me_stuck_meta) { @@ -8765,7 +8772,7 @@ retry:; rc = meta_sync(env, head); bailout: - if (locked) + if (should_unlock) osal_txn_unlock(env); return rc; } @@ -8854,7 +8861,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { if (env->me_pid != osal_getpid()) { /* resurrect after fork */ return; - } else if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) { + } else if (env->me_txn && env_txn0_owned(env)) { /* inside write-txn */ last = meta_recent(env, &env->me_txn0->tw.troika).ptr_v->mm_geo.next; } else if (env->me_flags & MDBX_RDONLY) { @@ -8950,7 +8957,7 @@ static bind_rslot_result bind_rslot(MDBX_env *env, const uintptr_t tid) { safe64_reset(&result.rslot->mr_txnid, true); if (slot == nreaders) env->me_lck->mti_numreaders.weak = (uint32_t)++nreaders; - result.rslot->mr_tid.weak = (env->me_flags & MDBX_NOTLS) ? 0 : tid; + result.rslot->mr_tid.weak = (env->me_flags & MDBX_NOSTICKYTHREADS) ? 0 : tid; atomic_store32(&result.rslot->mr_pid, env->me_pid, mo_AcquireRelease); osal_rdt_unlock(env); @@ -8970,12 +8977,12 @@ __cold int mdbx_thread_register(const MDBX_env *env) { return (env->me_flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM; if (unlikely((env->me_flags & MDBX_ENV_TXKEY) == 0)) { - eASSERT(env, !env->me_lck_mmap.lck || (env->me_flags & MDBX_NOTLS)); - return MDBX_EINVAL /* MDBX_NOTLS mode */; + eASSERT(env, env->me_flags & MDBX_NOSTICKYTHREADS); + return MDBX_EINVAL /* MDBX_NOSTICKYTHREADS mode */; } - eASSERT(env, (env->me_flags & (MDBX_NOTLS | MDBX_ENV_TXKEY | - MDBX_EXCLUSIVE)) == MDBX_ENV_TXKEY); + eASSERT(env, (env->me_flags & (MDBX_NOSTICKYTHREADS | MDBX_ENV_TXKEY)) == + MDBX_ENV_TXKEY); MDBX_reader *r = thread_rthc_get(env->me_txkey); if (unlikely(r != NULL)) { eASSERT(env, r->mr_pid.weak == env->me_pid); @@ -8986,7 +8993,7 @@ __cold int mdbx_thread_register(const MDBX_env *env) { } const uintptr_t tid = osal_thread_self(); - if (env->me_txn0 && unlikely(env->me_txn0->mt_owner == tid) && env->me_txn) + if (env->me_txn && unlikely(env->me_txn0->mt_owner == tid)) return MDBX_TXN_OVERLAPPING; return bind_rslot((MDBX_env *)env, tid).err; } @@ -9000,12 +9007,12 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) { return MDBX_RESULT_TRUE; if (unlikely((env->me_flags & MDBX_ENV_TXKEY) == 0)) { - eASSERT(env, !env->me_lck_mmap.lck || (env->me_flags & MDBX_NOTLS)); - return MDBX_RESULT_TRUE /* MDBX_NOTLS mode */; + eASSERT(env, env->me_flags & MDBX_NOSTICKYTHREADS); + return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */; } - eASSERT(env, (env->me_flags & (MDBX_NOTLS | MDBX_ENV_TXKEY | - MDBX_EXCLUSIVE)) == MDBX_ENV_TXKEY); + eASSERT(env, (env->me_flags & (MDBX_NOSTICKYTHREADS | MDBX_ENV_TXKEY)) == + MDBX_ENV_TXKEY); MDBX_reader *r = thread_rthc_get(env->me_txkey); if (unlikely(r == NULL)) return MDBX_RESULT_TRUE /* not registered */; @@ -9220,7 +9227,7 @@ static bool check_meta_coherency(const MDBX_env *env, } /* Common code for mdbx_txn_begin() and mdbx_txn_renew(). */ -static int txn_renew(MDBX_txn *txn, const unsigned flags) { +static int txn_renew(MDBX_txn *txn, unsigned flags) { MDBX_env *env = txn->mt_env; int rc; @@ -9245,14 +9252,15 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { 0); const uintptr_t tid = osal_thread_self(); + flags |= env->me_flags & (MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); if (flags & MDBX_TXN_RDONLY) { - eASSERT(env, (flags & ~(MDBX_TXN_RO_BEGIN_FLAGS | MDBX_WRITEMAP)) == 0); - txn->mt_flags = - MDBX_TXN_RDONLY | (env->me_flags & (MDBX_NOTLS | MDBX_WRITEMAP)); + eASSERT(env, (flags & ~(MDBX_TXN_RO_BEGIN_FLAGS | MDBX_WRITEMAP | + MDBX_NOSTICKYTHREADS)) == 0); + txn->mt_flags = flags; MDBX_reader *r = txn->to.reader; STATIC_ASSERT(sizeof(uintptr_t) <= sizeof(r->mr_tid)); if (likely(env->me_flags & MDBX_ENV_TXKEY)) { - eASSERT(env, !(env->me_flags & MDBX_NOTLS)); + eASSERT(env, !(env->me_flags & MDBX_NOSTICKYTHREADS)); r = thread_rthc_get(env->me_txkey); if (likely(r)) { if (unlikely(!r->mr_pid.weak) && @@ -9265,7 +9273,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } } } else { - eASSERT(env, !env->me_lck_mmap.lck || (env->me_flags & MDBX_NOTLS)); + eASSERT(env, + !env->me_lck_mmap.lck || (env->me_flags & MDBX_NOSTICKYTHREADS)); } if (likely(r)) { @@ -9313,9 +9322,9 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { mo_Relaxed); safe64_write(&r->mr_txnid, head.txnid); eASSERT(env, r->mr_pid.weak == osal_getpid()); - eASSERT(env, - r->mr_tid.weak == - ((env->me_flags & MDBX_NOTLS) ? 0 : osal_thread_self())); + eASSERT(env, r->mr_tid.weak == ((env->me_flags & MDBX_NOSTICKYTHREADS) + ? 0 + : osal_thread_self())); eASSERT(env, r->mr_txnid.weak == head.txnid || (r->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD && head.txnid < env->me_lck->mti_oldest_reader.weak)); @@ -9374,12 +9383,12 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); } else { eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | - MDBX_WRITEMAP)) == 0); + MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); if (unlikely(txn->mt_owner == tid || /* not recovery mode */ env->me_stuck_meta >= 0)) return MDBX_BUSY; MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (lck && (env->me_flags & MDBX_NOTLS) == 0 && + if (lck && (env->me_flags & MDBX_NOSTICKYTHREADS) == 0 && (mdbx_static.flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { const size_t snap_nreaders = atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); @@ -9639,7 +9648,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { * since Wine don't support section extending, * i.e. in both cases unmap+map are required. */ used_bytes < env->me_dbgeo.upper && env->me_dbgeo.grow)) && - /* avoid recursive use SRW */ (txn->mt_flags & MDBX_NOTLS) == 0) { + /* avoid recursive use SRW */ (txn->mt_flags & + MDBX_NOSTICKYTHREADS) == 0) { txn->mt_flags |= MDBX_SHRINK_ALLOWED; osal_srwlock_AcquireShared(&env->me_remap_guard); } @@ -9673,15 +9683,13 @@ static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) { return MDBX_BAD_TXN; tASSERT(txn, (txn->mt_flags & MDBX_TXN_FINISHED) || - (txn->mt_flags & MDBX_NOTLS) == - ((txn->mt_flags & MDBX_TXN_RDONLY) - ? txn->mt_env->me_flags & MDBX_NOTLS - : 0)); + (txn->mt_flags & MDBX_NOSTICKYTHREADS) == + (txn->mt_env->me_flags & MDBX_NOSTICKYTHREADS)); #if MDBX_TXN_CHECKOWNER - STATIC_ASSERT(MDBX_NOTLS > MDBX_TXN_FINISHED + MDBX_TXN_RDONLY); - if (unlikely(txn->mt_owner != osal_thread_self()) && - (txn->mt_flags & (MDBX_NOTLS | MDBX_TXN_FINISHED | MDBX_TXN_RDONLY)) < - (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY)) + STATIC_ASSERT((long)MDBX_NOSTICKYTHREADS > (long)MDBX_TXN_FINISHED); + if ((txn->mt_flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) < + MDBX_TXN_FINISHED && + unlikely(txn->mt_owner != osal_thread_self())) return txn->mt_owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN; #endif /* MDBX_TXN_CHECKOWNER */ @@ -9762,7 +9770,6 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, ~flags)) /* write txn in RDONLY env */ return MDBX_EACCESS; - flags |= env->me_flags & MDBX_WRITEMAP; MDBX_txn *txn = nullptr; if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ @@ -9781,10 +9788,11 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, } tASSERT(parent, audit_ex(parent, 0, false) == 0); - flags |= parent->mt_flags & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS); + flags |= parent->mt_flags & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | + MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); } else if (flags & MDBX_TXN_RDONLY) { - if (env->me_txn0 && - unlikely(env->me_txn0->mt_owner == osal_thread_self()) && env->me_txn && + if ((env->me_flags & MDBX_NOSTICKYTHREADS) == 0 && env->me_txn && + unlikely(env->me_txn0->mt_owner == osal_thread_self()) && (mdbx_static.flags & MDBX_DBG_LEGACY_OVERLAP) == 0) return MDBX_TXN_OVERLAPPING; } else { @@ -9967,12 +9975,13 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, eASSERT(env, txn->mt_flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)); else if (flags & MDBX_TXN_RDONLY) eASSERT(env, (txn->mt_flags & - ~(MDBX_NOTLS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | + ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | /* Win32: SRWL flag */ MDBX_SHRINK_ALLOWED)) == 0); else { - eASSERT(env, (txn->mt_flags & - ~(MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC | - MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); + eASSERT(env, + (txn->mt_flags & + ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | + MDBX_NOMETASYNC | MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed); } txn->mt_signature = MDBX_MT_SIGNATURE; @@ -10409,6 +10418,13 @@ int mdbx_txn_abort(MDBX_txn *txn) { if (unlikely(rc != MDBX_SUCCESS)) return rc; + if ((txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == + MDBX_NOSTICKYTHREADS && + unlikely(txn->mt_owner != osal_thread_self())) { + mdbx_txn_break(txn); + return MDBX_THREAD_MISMATCH; + } + return txn_abort(txn); } @@ -12093,6 +12109,12 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) goto done; + if ((txn->mt_flags & MDBX_NOSTICKYTHREADS) && + unlikely(txn->mt_owner != osal_thread_self())) { + rc = MDBX_THREAD_MISMATCH; + goto fail; + } + if (txn->mt_child) { rc = mdbx_txn_commit_ex(txn->mt_child, NULL); tASSERT(txn, txn->mt_child == NULL); @@ -13757,9 +13779,9 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (unlikely(rc != MDBX_SUCCESS)) return rc; - const bool need_lock = - !env->me_txn0 || env->me_txn0->mt_owner != osal_thread_self(); - const bool inside_txn = !need_lock && env->me_txn; + const bool txn0_owned = env->me_txn0 && env_txn0_owned(env); + const bool inside_txn = txn0_owned && env->me_txn; + bool should_unlock = false; #if MDBX_DEBUG if (growth_step < 0) { @@ -13770,13 +13792,12 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, #endif /* MDBX_DEBUG */ intptr_t reasonable_maxsize = 0; - bool should_unlock = false; if (env->me_map) { /* env already mapped */ if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; - if (need_lock) { + if (!txn0_owned) { int err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -16024,6 +16045,9 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { #endif /* Windows */ } + if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) + osal_txn_unlock(env); + eASSERT(env, env->me_signature.weak == 0); rc = env_close(env, false) ? MDBX_PANIC : rc; ENSURE(env, osal_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS); @@ -22997,8 +23021,8 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; - const bool lock_needed = (env->me_flags & MDBX_ENV_ACTIVE) && - env->me_txn0->mt_owner != osal_thread_self(); + const bool lock_needed = + (env->me_flags & MDBX_ENV_ACTIVE) && !env_txn0_owned(env); bool should_unlock = false; if (lock_needed) { rc = osal_txn_lock(env, false); @@ -23233,8 +23257,7 @@ __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely(err != MDBX_SUCCESS)) return err; - if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self() && - env->me_txn) + if (env->me_txn && env_txn0_owned(env)) /* inside write-txn */ return stat_acc(env->me_txn, dest, bytes); @@ -26209,7 +26232,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return err; const bool lock_needed = ((env->me_flags & MDBX_ENV_ACTIVE) && env->me_txn0 && - env->me_txn0->mt_owner != osal_thread_self()); + !env_txn0_owned(env)); bool should_unlock = false; switch (option) { case MDBX_opt_sync_bytes: @@ -26324,7 +26347,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return MDBX_EACCESS; value = osal_16dot16_to_monotime((uint32_t)value); if (value != env->me_options.gc_time_limit) { - if (env->me_txn && env->me_txn0->mt_owner != osal_thread_self()) + if (env->me_txn && lock_needed) return MDBX_EPERM; env->me_options.gc_time_limit = value; if (!env->me_options.flags.non_auto.rp_augment_limit) diff --git a/src/internals.h b/src/internals.h index bb6b5e76..f5213d85 100644 --- a/src/internals.h +++ b/src/internals.h @@ -842,8 +842,9 @@ MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc); * read transactions started by the same thread need no further locking to * proceed. * - * If MDBX_NOTLS is set, the slot address is not saved in thread-specific data. - * No reader table is used if the database is on a read-only filesystem. + * If MDBX_NOSTICKYTHREADS is set, the slot address is not saved in + * thread-specific data. No reader table is used if the database is on a + * read-only filesystem. * * Since the database uses multi-version concurrency control, readers don't * actually need any locking. This table is used to keep track of which @@ -1786,8 +1787,8 @@ log2n_powerof2(size_t value_uintptr) { MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | \ MDBX_VALIDATION) #define ENV_CHANGELESS_FLAGS \ - (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ - MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) + (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS | \ + MDBX_NORDAHEAD | MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) #define ENV_USABLE_FLAGS (ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS) #if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS diff --git a/src/lck-windows.c b/src/lck-windows.c index bc63170d..241800aa 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -326,7 +326,7 @@ static int suspend_and_append(mdbx_handle_array_t **array, MDBX_INTERNAL_FUNC int osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { - eASSERT(env, (env->me_flags & MDBX_NOTLS) == 0); + eASSERT(env, (env->me_flags & MDBX_NOSTICKYTHREADS) == 0); const uintptr_t CurrentTid = GetCurrentThreadId(); int rc; if (env->me_lck_mmap.lck) { diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 74690740..51a0f35f 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1216,8 +1216,8 @@ env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { flags |= MDBX_NOSUBDIR; if (options.exclusive) flags |= MDBX_EXCLUSIVE; - if (options.orphan_read_transactions) - flags |= MDBX_NOTLS; + if (options.no_sticky_threads) + flags |= MDBX_NOSTICKYTHREADS; if (options.disable_readahead) flags |= MDBX_NORDAHEAD; if (options.disable_clear_memory) @@ -1275,9 +1275,10 @@ env::reclaiming_options::reclaiming_options(MDBX_env_flags_t flags) noexcept coalesce((flags & MDBX_COALESCE) ? true : false) {} env::operate_options::operate_options(MDBX_env_flags_t flags) noexcept - : orphan_read_transactions( - ((flags & (MDBX_NOTLS | MDBX_EXCLUSIVE)) == MDBX_NOTLS) ? true - : false), + : no_sticky_threads(((flags & (MDBX_NOSTICKYTHREADS | MDBX_EXCLUSIVE)) == + MDBX_NOSTICKYTHREADS) + ? true + : false), nested_write_transactions((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) ? false : true), exclusive((flags & MDBX_EXCLUSIVE) ? true : false), @@ -1831,8 +1832,8 @@ __cold ::std::ostream &operator<<(::std::ostream &out, static const char comma[] = ", "; const char *delimiter = ""; out << "{"; - if (it.orphan_read_transactions) { - out << delimiter << "orphan_read_transactions"; + if (it.no_sticky_threads) { + out << delimiter << "no_sticky_threads"; delimiter = comma; } if (it.nested_write_transactions) { diff --git a/test/config.c++ b/test/config.c++ index 99577f87..d0e14e86 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -378,7 +378,8 @@ const struct option_verb mode_bits[] = { {"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)}, {"nometasync", unsigned(MDBX_NOMETASYNC)}, {"writemap", unsigned(MDBX_WRITEMAP)}, - {"notls", unsigned(MDBX_NOTLS)}, + {"nostickythreads", unsigned(MDBX_NOSTICKYTHREADS)}, + {"no-sticky-threads", unsigned(MDBX_NOSTICKYTHREADS)}, {"nordahead", unsigned(MDBX_NORDAHEAD)}, {"nomeminit", unsigned(MDBX_NOMEMINIT)}, {"lifo", unsigned(MDBX_LIFORECLAIM)}, diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 12b493cb..c74623d5 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -385,9 +385,9 @@ else fi if [ "$EXTRA" != "no" ]; then - options=(writemap lifo notls perturb nomeminit nordahead) + options=(writemap lifo nostickythreads perturb nomeminit nordahead) else - options=(writemap lifo notls) + options=(writemap lifo nostickythreads) fi syncmodes=("" ,+nosync-safe ,+nosync-utterly ,+nometasync) function join { local IFS="$1"; shift; echo "$*"; } diff --git a/test/main.c++ b/test/main.c++ index 16664a2c..e4081b7b 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -106,7 +106,7 @@ MDBX_NORETURN void usage(void) { " writemap == MDBX_WRITEMAP\n" " nosync-utterly == MDBX_UTTERLY_NOSYNC\n" " perturb == MDBX_PAGEPERTURB\n" - " notls == MDBX_NOTLS\n" + " nostickythreads== MDBX_NOSTICKYTHREADS\n" " nordahead == MDBX_NORDAHEAD\n" " nomeminit == MDBX_NOMEMINIT\n" " --random-writemap[=YES|no] Toggle MDBX_WRITEMAP randomly\n" diff --git a/test/stochastic_small.sh b/test/stochastic_small.sh index 20785a22..136fc7a7 100755 --- a/test/stochastic_small.sh +++ b/test/stochastic_small.sh @@ -351,7 +351,7 @@ else fi syncmodes=("" ,+nosync-safe ,+nosync-utterly) -options=(writemap lifo notls perturb) +options=(writemap lifo nostickythreads perturb) function join { local IFS="$1"; shift; echo "$*"; } From ae5d541efbbafd409b7e2ad2610aef8e539228bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Apr 2024 00:40:04 +0300 Subject: [PATCH 167/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D1=81=D0=BB=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D0=B9=20=D0=B4=D0=BB=D1=8F=20`MDBX=5FDEPRECATED`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index d259277f..eac303fa 100644 --- a/mdbx.h +++ b/mdbx.h @@ -343,13 +343,14 @@ typedef mode_t mdbx_mode_t; #ifdef __deprecated #define MDBX_DEPRECATED __deprecated #elif defined(DOXYGEN) || \ - (defined(__cplusplus) && __cplusplus >= 201603L && \ - __has_cpp_attribute(maybe_unused) && \ - __has_cpp_attribute(maybe_unused) >= 201603L) || \ + (defined(__cplusplus) && __cplusplus >= 201403L && \ + __has_cpp_attribute(deprecated) && \ + __has_cpp_attribute(deprecated) >= 201309L) || \ (!defined(__cplusplus) && defined(__STDC_VERSION__) && \ - __STDC_VERSION__ > 202005L) + __STDC_VERSION__ >= 202304L) #define MDBX_DEPRECATED [[deprecated]] -#elif defined(__GNUC__) || __has_attribute(__deprecated__) +#elif (defined(__GNUC__) && __GNUC__ > 5) || \ + (__has_attribute(__deprecated__) && !defined(__GNUC__)) #define MDBX_DEPRECATED __attribute__((__deprecated__)) #elif defined(_MSC_VER) #define MDBX_DEPRECATED __declspec(deprecated) From b9fd42b9b3471b988b74c8b76b794b551b5a675e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Apr 2024 11:59:39 +0300 Subject: [PATCH 168/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D1=8A=D1=8F=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B8?= =?UTF-8?q?=20`MDBX=5FCOALESCE`=20=D1=83=D1=81=D1=82=D0=B0=D1=80=D0=B5?= =?UTF-8?q?=D0=B2=D1=88=D0=B5=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/example-mdbx.c | 4 ++-- mdbx.h | 6 ++++-- src/internals.h | 4 ++-- src/mdbx.c++ | 4 ++-- test/extra/dupfixed_addodd.c | 4 ++-- test/extra/upsert_alldups.c | 3 +-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/example/example-mdbx.c b/example/example-mdbx.c index 0e6148d9..215a0fca 100644 --- a/example/example-mdbx.c +++ b/example/example-mdbx.c @@ -94,8 +94,8 @@ int main(int argc, char *argv[]) { fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); goto bailout; } - rc = mdbx_env_open(env, "./example-db", - MDBX_NOSUBDIR | MDBX_COALESCE | MDBX_LIFORECLAIM, 0664); + rc = mdbx_env_open(env, "./example-db", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, + 0664); if (rc != MDBX_SUCCESS) { fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); goto bailout; diff --git a/mdbx.h b/mdbx.h index eac303fa..79cf025f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1327,8 +1327,9 @@ enum MDBX_env_flags_t { * This flag may be changed at any time using `mdbx_env_set_flags()`. */ MDBX_NOMEMINIT = UINT32_C(0x1000000), +#ifndef _MSC_VER /* avoid madness MSVC */ /** Aims to coalesce a Garbage Collection items. - * \note Always enabled since v0.12 + * \deprecated Always enabled since v0.12 and deprecated since v0.13. * * With `MDBX_COALESCE` flag MDBX will aims to coalesce items while recycling * a Garbage Collection. Technically, when possible short lists of pages @@ -1338,7 +1339,8 @@ enum MDBX_env_flags_t { * Unallocated space and reducing the database file. * * This flag may be changed at any time using mdbx_env_set_flags(). */ - MDBX_COALESCE = UINT32_C(0x2000000), + MDBX_COALESCE MDBX_DEPRECATED = UINT32_C(0x2000000), +#endif /* avoid madness MSVC */ /** LIFO policy for recycling a Garbage Collection items. * diff --git a/src/internals.h b/src/internals.h index f5213d85..64bf7355 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1784,8 +1784,8 @@ log2n_powerof2(size_t value_uintptr) { * environment and re-opening it with the new flags. */ #define ENV_CHANGEABLE_FLAGS \ (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC | \ - MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | \ - MDBX_VALIDATION) + MDBX_NOMEMINIT | MDBX_DEPRECATED_COALESCE | MDBX_PAGEPERTURB | \ + MDBX_ACCEDE | MDBX_VALIDATION) #define ENV_CHANGELESS_FLAGS \ (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS | \ MDBX_NORDAHEAD | MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 51a0f35f..8f4e740d 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1227,7 +1227,7 @@ env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { if (options.nested_write_transactions) flags &= ~MDBX_WRITEMAP; if (reclaiming.coalesce) - flags |= MDBX_COALESCE; + flags |= MDBX_env_flags_t(MDBX_DEPRECATED_COALESCE); if (reclaiming.lifo) flags |= MDBX_LIFORECLAIM; switch (durability) { @@ -1272,7 +1272,7 @@ env::durability env::operate_parameters::durability_from_flags( env::reclaiming_options::reclaiming_options(MDBX_env_flags_t flags) noexcept : lifo((flags & MDBX_LIFORECLAIM) ? true : false), - coalesce((flags & MDBX_COALESCE) ? true : false) {} + coalesce((flags & MDBX_DEPRECATED_COALESCE) ? true : false) {} env::operate_options::operate_options(MDBX_env_flags_t flags) noexcept : no_sticky_threads(((flags & (MDBX_NOSTICKYTHREADS | MDBX_EXCLUSIVE)) == diff --git a/test/extra/dupfixed_addodd.c b/test/extra/dupfixed_addodd.c index da9ba944..5b666af9 100644 --- a/test/extra/dupfixed_addodd.c +++ b/test/extra/dupfixed_addodd.c @@ -37,8 +37,8 @@ int main() { exit(EXIT_FAILURE); } - rc = mdbx_env_open(env, "./example-db", - MDBX_NOSUBDIR | MDBX_COALESCE | MDBX_LIFORECLAIM, 0664); + rc = mdbx_env_open(env, "./example-db", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, + 0664); if (rc != MDBX_SUCCESS) { fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); exit(EXIT_FAILURE); diff --git a/test/extra/upsert_alldups.c b/test/extra/upsert_alldups.c index 702cdb82..7999f081 100644 --- a/test/extra/upsert_alldups.c +++ b/test/extra/upsert_alldups.c @@ -62,8 +62,7 @@ int main(int argc, const char *argv[]) { errmsg = "failed to mdbx_env_create: %s\n"; goto Fail; } - if ((rc = mdbx_env_open( - env, ".", MDBX_NOSUBDIR | MDBX_COALESCE | MDBX_LIFORECLAIM, 0644))) { + if ((rc = mdbx_env_open(env, ".", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, 0644))) { errmsg = "failed to mdbx_env_open: %s\n"; goto Fail; } From e34d4de760744d5937dad075811a954d97e4e07a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Apr 2024 12:53:13 +0300 Subject: [PATCH 169/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 78 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 8 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 19bdbd73..51269f92 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -7,18 +7,73 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic ## v0.13.1 (в процессе подготовки выпуска) Новая версия с существенным расширением API и добавлением функционала. +В том числе, с незначительным нарушением обратной совместимости API +библиотеки. Новое: + - Перенос функционала утилиты `mdbx_chk` внутрь библиотеки в виде + функции `mdbx_env_chk() `для проверка целостности структуры БД, в том + числе с вовлечением логики приложения. + + - Опция `MDBX_opt_gc_time_limit` для более гибкого контроля времени + расходуемого на поиск последовательностей соседствующих свободных + страниц в GC. + + - Снижение накладных расходов на запуск транзакций в сценариях с + большим количеством DBI-хендов, за счет отложенной/ленивой инициализации + элементов служебных таблиц. В том числе, механизм поддержки разреженных + наборов DBI-хендов, управляемый опцией сборки `MDBX_ENABLE_DBI_SPARSE`, + которая включена по-умолчанию. + + - Снижение накладных расходов на открытие DBI-хендов. В том числе, + механизм отложенного освобождения и поддержки быстрого пути открытия без + использования блокировок, управляемый опцией сборки + `MDBX_ENABLE_DBI_LOCKFREE`, которая включена по-умолчанию. + + - Расширение API позиционирования курсоров более удобными и очевидными + операциями по аналогии условиям `<`, `<=`, `==`, `>=`, `>` как для + ключей, так и для пар ключ-значение. + + - Функции `mdbx_dbi_rename()` и `mdbx_dbi_rename2()` для переименования таблиц. + + - Функции `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()` для + гибкого управления курсорами в сценариях повторного использования для + уменьшения накладных расходов. + + - Функция `mdbx_env_resurrect_after_fork()` для восстановление открытой + среды работы с БД в дочернем процессе после ветвления/расщепления + процесса. + + - Функция `mdbx_cursor_compare()` для сравнения позиций курсоров + аналогично оператору `<=>`. + + - Функции `mdbx_cursor_scan()` и `mdbx_cursor_scan_from()` для + сканирования таблиц с использованием функционального предиката и + уменьшением сопутствующих накладных расходов. + + - Функции `mdbx_cursor_on_first_dup()` и `mdbx_cursor_on_last_dup()` + для оценки позиции курсора. + + - Функция `mdbx_preopen_snapinfo()` для получения информации о БД без + её открытия. + + - Поддержка функций логирования обратного вызова без функционала + `vprintf()`, что существенно облегчает использование логирования в + привязках к другим языкам программирования. + + - Режим работы `MDBX_NOSTICKYTHREADS` вместо `MDBX_NOTLS` для упрощения + интеграции с легковесными потоками/нитями их мультиплексирования вместе + с транзакциями по потокам операционной системы. + + - TODO: Опция `MDBX_opt_prefer_waf_insteadof_balance`. + + - TODO: Опции `MDBX_opt_subpage_limit`, `MDBX_opt_subpage_room_threshold`, `MDBX_opt_subpage_reserve_prereq`, `MDBX_opt_subpage_reserve_limit`. + - Управление основной блокировкой lock/unlock/upgrade/downgrade для координации пишущих транзакций. - - `mdbx_env_chk() `для проверка целостности структуры БД, с переработкой и переносом функционала утилиты `mdbx_chk` внутрь библиотеки. - - `mdbx_dbi_rename()` и `mdbx_dbi_rename()` для переименования таблиц. - - `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()` для управления курсорами. - - `mdbx_env_resurrect_after_fork()` для восстановление открытой среды работы с БД в дочернем процессе после ветвления/расщепления процесса. - - `mdbx_cursor_compare()` для сравнения позиций курсоров. - - `mdbx_cursor_scan()` и `mdbx_cursor_scan_from()` для сканирования таблиц с использованием функционального предиката. - - `mdbx_cursor_on_first_dup()` и `mdbx_cursor_on_last_dup()` для проверки позиции курсора. - - `mdbx_preopen_snapinfo()` для получения информации о БД без её открытия. + + - Функции `mdbx_limits_keysize_min()` и `mdbx_limits_valsize_min()` для + получения нижней границы длины ключей и значений. - Расширение и доработка C++ API: @@ -38,6 +93,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - поддержка base58 переработана и приведена в соответствии с черновиком RFC, в текущем понимании теперь это одна из самых высокопроизводительных реализаций; - переработка `to_hex()` и `from_hex()`. +Нарушение совместимости: + - Опция `MDBX_COALESCE` объявлена устаревшей, так как соответствующий функционал всегда включен начиная с предыдущей версии 0.12. + - Опция `MDBX_NOTLS` объявлена устаревшей и заменена на `MDBX_NOSTICKYTHREADS`. + - Опция сборки `MDBX_USE_VALGRIND` заменена на общепринятую `ENABLE_MEMCHECK`. + - В структуре `MDBX_envinfo` серии полей вида `meta1`, `meta2` и `meta3` заменены на массивы вида `meta[3]`. + - В шаблонных классах и функциях С++ API по-умолчанию вместо `mdbx::legacy_buffer` использован тип `mdbx::default_buffer` использующий полиморфные аллокаторы С++ 17. + ## v0.13.0 от 2023-04-23 From 4dccc4ab6d930b2be4f788779f1d20833e6fefd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Apr 2024 17:32:49 +0300 Subject: [PATCH 170/443] =?UTF-8?q?mdbx-doc:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5?= =?UTF-8?q?=D1=87=D0=B0=D1=82=D0=BE=D0=BA=20=D0=B2=20doxygen-=D0=BA=D0=BE?= =?UTF-8?q?=D0=BC=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8=D1=8F=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mdbx.h b/mdbx.h index 79cf025f..98f9a866 100644 --- a/mdbx.h +++ b/mdbx.h @@ -6109,9 +6109,9 @@ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); * работы с БД в процессе её открытия (при удержании блокировок). * * \param [in] pathname Путь к директории или файлу БД. - * \param [out] into Указатель на структуру \ref MDBX_envinfo + * \param [out] info Указатель на структуру \ref MDBX_envinfo * для получения информации. - * \param [int] bytes Актуальный размер структуры \ref MDBX_envinfo, это + * \param [in] bytes Актуальный размер структуры \ref MDBX_envinfo, это * значение используется для обеспечения совместимости * ABI. * @@ -6375,7 +6375,7 @@ typedef struct MDBX_chk_callbacks { * библиотеку. * * Проверка выполняется в несколько стадий, начиная с инициализации и до - * завершения, более подробно см \ref enum MDBX_chk_stage. О начале и завершении + * завершения, более подробно см \ref MDBX_chk_stage. О начале и завершении * каждой стадии код приложения уведомляется через соответствующие функции * обратного вызова, более подробно см \ref MDBX_chk_callbacks_t. * From f19753636d2364c43125f972b8d3f29dc9e244b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Apr 2024 22:31:03 +0300 Subject: [PATCH 171/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20TODO.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index b79a5824..bf40f6f4 100644 --- a/TODO.md +++ b/TODO.md @@ -11,7 +11,6 @@ For the same reason ~~Github~~ is blacklisted forever. So currently most of the links are broken due to noted malicious ~~Github~~ sabotage. - - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOSTICKYTHREADS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). - [Support for RAW devices](https://libmdbx.dqdkfa.ru/dead-github/issues/124). - [Support MessagePack for Keys & Values](https://libmdbx.dqdkfa.ru/dead-github/issues/115). @@ -27,3 +26,17 @@ Done - [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224). - [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192). - [Get rid of dirty-pages list in MDBX_WRITEMAP mode](https://libmdbx.dqdkfa.ru/dead-github/issues/193). + +Canceled +-------- + + - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOSTICKYTHREADS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). + Доработка не может быть реализована, так как замена SRW-блокировки + лишает лишь предварительную проблему, но не главную. На Windows + уменьшение размера отображенного в память файла не поддерживается ядром + ОС. Для этого необходимо снять отображение, изменить размер файла и + затем отобразить обратно. В свою очередь, для это необходимо + приостановить работающие с БД потоки выполняющие транзакции чтения, либо + готовые к такому выполнению. Но режиме MDBX_NOSTICKYTHREADS нет + возможности отслеживать работающие с БД потоки, а приостановка всех + потоков неприемлема для большинства приложений. From 7c6949347317e60b8c06951e529e14f08a210548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 5 Apr 2024 00:08:09 +0300 Subject: [PATCH 172/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5Fopt=5Fprefer=5Fwaf=5Fi?= =?UTF-8?q?nsteadof=5Fbalance`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 2 +- mdbx.h | 31 ++++++++++++++++++++++++++++--- src/core.c | 21 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 51269f92..b65bf5ff 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -66,7 +66,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic интеграции с легковесными потоками/нитями их мультиплексирования вместе с транзакциями по потокам операционной системы. - - TODO: Опция `MDBX_opt_prefer_waf_insteadof_balance`. + - Опция `MDBX_opt_prefer_waf_insteadof_balance`. - TODO: Опции `MDBX_opt_subpage_limit`, `MDBX_opt_subpage_room_threshold`, `MDBX_opt_subpage_reserve_prereq`, `MDBX_opt_subpage_reserve_limit`. diff --git a/mdbx.h b/mdbx.h index 98f9a866..e19e47c8 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2320,14 +2320,14 @@ enum MDBX_option_t { MDBX_opt_spill_parent4child_denominator, /** \brief Controls the in-process threshold of semi-empty pages merge. - * \warning This is experimental option and subject for change or removal. * \details This option controls the in-process threshold of minimum page * fill, as used space of percentage of a page. Neighbour pages emptier than * this value are candidates for merging. The threshold value is specified * in 1/65536 of percent, which is equivalent to the 16-dot-16 fixed point * format. The specified value must be in the range from 12.5% (almost empty) * to 50% (half empty) which corresponds to the range from 8192 and to 32768 - * in units respectively. */ + * in units respectively. + * \see MDBX_opt_prefer_waf_insteadof_balance */ MDBX_opt_merge_threshold_16dot16_percent, /** \brief Controls the choosing between use write-through disk writes and @@ -2388,7 +2388,32 @@ enum MDBX_option_t { * С другой стороны, при минимальном значении (включая 0) * `MDBX_opt_rp_augment_limit` переработка GC будет ограничиваться * преимущественно затраченным временем. */ - MDBX_opt_gc_time_limit + MDBX_opt_gc_time_limit, + + /** \brief Управляет выбором между стремлением к равномерности наполнения + * страниц, либо уменьшением количества измененных и записанных страниц. + * + * \details После операций удаления страницы содержащие меньше минимума + * ключей, либо опустошенные до \ref MDBX_opt_merge_threshold_16dot16_percent + * подлежат слиянию с одной из соседних. Если страницы справа и слева от + * текущей обе «грязные» (были изменены в ходе транзакции и должны быть + * записаны на диск), либо обе «чисты» (не изменялись в текущей транзакции), + * то целью для слияния всегда выбирается менее заполненная страница. + * Когда же только одна из соседствующих является «грязной», а другая + * «чистой», то возможны две тактики выбора цели для слияния: + * + * - Если `MDBX_opt_prefer_waf_insteadof_balance = True`, то будет выбрана + * уже измененная страница, что НЕ УВЕЛИЧИТ количество измененных страниц + * и объем записи на диск при фиксации текущей транзакции, но в среднем + * будет УВЕЛИЧИВАТЬ неравномерность заполнения страниц. + * + * - Если `MDBX_opt_prefer_waf_insteadof_balance = False`, то будет выбрана + * менее заполненная страница, что УВЕЛИЧИТ количество измененных страниц + * и объем записи на диск при фиксации текущей транзакции, но в среднем + * будет УМЕНЬШАТЬ неравномерность заполнения страниц. + * + * \see MDBX_opt_merge_threshold_16dot16_percent */ + MDBX_opt_prefer_waf_insteadof_balance }; #ifndef __cplusplus /** \ingroup c_settings */ diff --git a/src/core.c b/src/core.c index 2f5a7a2f..ad304319 100644 --- a/src/core.c +++ b/src/core.c @@ -6487,6 +6487,11 @@ static bool default_prefault_write(const MDBX_env *env) { (env->me_flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == MDBX_WRITEMAP; } +static bool default_prefer_waf_insteadof_balance(const MDBX_env *env) { + (void)env; + return false; +} + static void adjust_defaults(MDBX_env *env) { if (!env->me_options.flags.non_auto.rp_augment_limit) env->me_options.rp_augment_limit = default_rp_augment_limit(env); @@ -13692,6 +13697,8 @@ __cold int mdbx_env_create(MDBX_env **penv) { env->me_options.spill_parent4child_denominator = 0; env->me_options.dp_loose_limit = 64; env->me_options.merge_threshold_16dot16_percent = 65536 / 4 /* 25% */; + if (default_prefer_waf_insteadof_balance(env)) + env->me_options.prefer_waf_insteadof_balance = true; #if !(defined(_WIN32) || defined(_WIN64)) env->me_options.writethrough_threshold = @@ -26459,6 +26466,16 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, } break; + case MDBX_opt_prefer_waf_insteadof_balance: + if (value == /* default */ UINT64_MAX) + env->me_options.prefer_waf_insteadof_balance = + default_prefer_waf_insteadof_balance(env); + else if (value > 1) + err = MDBX_EINVAL; + else + env->me_options.prefer_waf_insteadof_balance = value != 0; + break; + default: return MDBX_EINVAL; } @@ -26548,6 +26565,10 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, *pvalue = env->me_options.prefault_write; break; + case MDBX_opt_prefer_waf_insteadof_balance: + *pvalue = env->me_options.prefer_waf_insteadof_balance; + break; + default: return MDBX_EINVAL; } From 45377f20c5cb3608a6bf61562b207ca936894673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 10 Apr 2024 21:09:10 +0300 Subject: [PATCH 173/443] =?UTF-8?q?mdbx:=20=D0=BE=D1=82=D0=BA=D0=BB=D1=8E?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D0=B6=D0=B5=20=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D1=83=D0=B6=D0=BD=D0=BE=D0=B9=20=D0=BE=D1=82=D0=BB?= =?UTF-8?q?=D0=B0=D0=B4=D0=BA=D0=B8=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8?= =?UTF-8?q?=20`txn=5Fmerge()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index ad304319..20b3588d 100644 --- a/src/core.c +++ b/src/core.c @@ -11940,7 +11940,7 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, } ++w; } - NOTICE("squash to begin for extending-merge %zu -> %zu", d, w - 1); + VERBOSE("squash to begin for extending-merge %zu -> %zu", d, w - 1); d = w - 1; continue; } @@ -11982,7 +11982,7 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, } --w; } - NOTICE("squash to end for shrinking-merge %zu -> %zu", d, w + 1); + VERBOSE("squash to end for shrinking-merge %zu -> %zu", d, w + 1); d = w + 1; continue; } From 95bc96dda370ed009c5678ed3f6e6eceb9f625e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 17 Apr 2024 21:33:30 +0300 Subject: [PATCH 174/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`update=5Fgc()`=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20=D1=83=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D1=81=D1=85=D0=BE=D0=B4=D0=B8=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20?= =?UTF-8?q?=D1=81=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=D0=BC=20=D0=B2=D0=B5=D1=80=D0=BE=D1=8F=D1=82=D0=BD?= =?UTF-8?q?=D0=BE=D1=81=D1=82=D0=B8=20`MDBX=5FPROBLEM`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 980 ++++++++++++++++++++++++++++------------------------- 1 file changed, 521 insertions(+), 459 deletions(-) diff --git a/src/core.c b/src/core.c index 20b3588d..62fdf162 100644 --- a/src/core.c +++ b/src/core.c @@ -4341,8 +4341,8 @@ static void dpage_free(MDBX_env *env, MDBX_page *dp, size_t npages) { MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages)); if (unlikely(env->me_flags & MDBX_PAGEPERTURB)) memset(dp, -1, pgno2bytes(env, npages)); - if (npages == 1 && - env->me_dp_reserve_len < env->me_options.dp_reserve_limit) { + if (likely(npages == 1 && + env->me_dp_reserve_len < env->me_options.dp_reserve_limit)) { MDBX_ASAN_POISON_MEMORY_REGION(dp, env->me_psize); MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(dp), sizeof(MDBX_page *)); mp_next(dp) = env->me_dp_reserve; @@ -10599,7 +10599,7 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, typedef struct gc_update_context { size_t loop, reserve_adj; size_t retired_stored; - size_t reserved, cleaned_slot, reused_slot, fill_idx; + size_t amount, reserved, cleaned_slot, reused_slot, fill_idx; txnid_t cleaned_id, rid; bool lifo, dense; #if MDBX_ENABLE_BIGFOOT @@ -10617,6 +10617,11 @@ static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) { return cursor_init(&ctx->cursor, txn, FREE_DBI); } +MDBX_MAYBE_UNUSED static __inline const char * +gcu_dbg_prefix(gcu_context_t *ctx) { + return ctx->lifo ? " lifo" : " fifo"; +} + static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) { return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count; } @@ -10746,7 +10751,7 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS; } -static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { +static __inline void gcu_zeroize_reserved(MDBX_env *env, MDBX_val pnl) { #if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе @@ -10764,6 +10769,425 @@ static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { memset(pnl.iov_base, 0, pnl.iov_len); } +static int gcu_loose(MDBX_txn *txn, gcu_context_t *ctx) { + tASSERT(txn, txn->tw.loose_count > 0); + /* Return loose page numbers to tw.relist, + * though usually none are left at this point. + * The pages themselves remain in dirtylist. */ + if (unlikely(!txn->tw.lifo_reclaimed && txn->tw.last_reclaimed < 1)) { + TRACE("%s: try allocate gc-slot for %zu loose-pages", gcu_dbg_prefix(ctx), + txn->tw.loose_count); + int err = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err; + if (err == MDBX_SUCCESS) { + TRACE("%s: retry since gc-slot for %zu loose-pages available", + gcu_dbg_prefix(ctx), txn->tw.loose_count); + return MDBX_SUCCESS; + } + + /* Put loose page numbers in tw.retired_pages, + * since unable to return ones to tw.relist. */ + err = pnl_need(&txn->tw.retired_pages, txn->tw.loose_count); + if (unlikely(err != MDBX_SUCCESS)) + return err; + for (MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { + pnl_xappend(txn->tw.retired_pages, lp->mp_pgno); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); + VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); + } + TRACE("%s: append %zu loose-pages to retired-pages", gcu_dbg_prefix(ctx), + txn->tw.loose_count); + } else { + /* Room for loose pages + temp PNL with same */ + int err = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2); + if (unlikely(err != MDBX_SUCCESS)) + return err; + MDBX_PNL loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - + txn->tw.loose_count - 1; + size_t count = 0; + for (MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { + tASSERT(txn, lp->mp_flags == P_LOOSE); + loose[++count] = lp->mp_pgno; + MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); + VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); + } + tASSERT(txn, count == txn->tw.loose_count); + MDBX_PNL_SETSIZE(loose, count); + pnl_sort(loose, txn->mt_next_pgno); + pnl_merge(txn->tw.relist, loose); + TRACE("%s: append %zu loose-pages to reclaimed-pages", gcu_dbg_prefix(ctx), + txn->tw.loose_count); + } + + /* filter-out list of dirty-pages from loose-pages */ + MDBX_dpl *const dl = txn->tw.dirtylist; + if (dl) { + tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, dl->sorted <= dl->length); + size_t w = 0, sorted_out = 0; + for (size_t r = w; ++r <= dl->length;) { + MDBX_page *dp = dl->items[r].ptr; + tASSERT(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); + tASSERT(txn, dpl_endpgno(dl, r) <= txn->mt_next_pgno); + if ((dp->mp_flags & P_LOOSE) == 0) { + if (++w != r) + dl->items[w] = dl->items[r]; + } else { + tASSERT(txn, dp->mp_flags == P_LOOSE); + sorted_out += dl->sorted >= r; + if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) + dpage_free(txn->mt_env, dp, 1); + } + } + TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", + gcu_dbg_prefix(ctx), dl->length, w); + tASSERT(txn, txn->tw.loose_count == dl->length - w); + dl->sorted -= sorted_out; + tASSERT(txn, dl->sorted <= w); + dpl_setlen(dl, w); + dl->pages_including_loose -= txn->tw.loose_count; + txn->tw.dirtyroom += txn->tw.loose_count; + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->mt_parent ? txn->mt_parent->tw.dirtyroom + : txn->mt_env->me_options.dp_limit)); + } else { + tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } + txn->tw.loose_pages = NULL; + txn->tw.loose_count = 0; +#if MDBX_ENABLE_REFUND + txn->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + return MDBX_SUCCESS; +} + +static int gcu_retired(MDBX_txn *txn, gcu_context_t *ctx) { + int err; + if (unlikely(!ctx->retired_stored)) { + /* Make sure last page of GC is touched and on retired-list */ + err = cursor_last(&ctx->cursor, nullptr, nullptr); + if (likely(err == MDBX_SUCCESS)) + err = gcu_touch(ctx); + if (unlikely(err != MDBX_SUCCESS) && err != MDBX_NOTFOUND) + return err; + } + + MDBX_val key, data; +#if MDBX_ENABLE_BIGFOOT + size_t retired_pages_before; + do { + if (ctx->bigfoot > txn->mt_txnid) { + err = gcu_clean_stored_retired(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + tASSERT(txn, ctx->bigfoot <= txn->mt_txnid); + } + + retired_pages_before = MDBX_PNL_GETSIZE(txn->tw.retired_pages); + err = gcu_prepare_backlog(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + TRACE("%s: retired-list changed (%zu -> %zu), retry", gcu_dbg_prefix(ctx), + retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + break; + } + + pnl_sort(txn->tw.retired_pages, txn->mt_next_pgno); + ctx->retired_stored = 0; + ctx->bigfoot = txn->mt_txnid; + do { + if (ctx->retired_stored) { + err = gcu_prepare_backlog(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (ctx->retired_stored >= MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + TRACE("%s: retired-list changed (%zu -> %zu), retry", + gcu_dbg_prefix(ctx), retired_pages_before, + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + break; + } + } + key.iov_len = sizeof(txnid_t); + key.iov_base = &ctx->bigfoot; + const size_t left = + MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored; + const size_t chunk = + (left > txn->mt_env->me_maxgc_ov1page && ctx->bigfoot < MAX_TXNID) + ? txn->mt_env->me_maxgc_ov1page + : left; + data.iov_len = (chunk + 1) * sizeof(pgno_t); + err = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. + */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ + + if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) + ? left - chunk + : ctx->retired_stored; + pgno_t *const begin = txn->tw.retired_pages + at; + /* MDBX_PNL_ASCENDING == false && LIFO == false: + * - the larger pgno is at the beginning of retired list + * and should be placed with the larger txnid. + * MDBX_PNL_ASCENDING == true && LIFO == true: + * - the larger pgno is at the ending of retired list + * and should be placed with the smaller txnid. */ + const pgno_t save = *begin; + *begin = (pgno_t)chunk; + memcpy(data.iov_base, begin, data.iov_len); + *begin = save; + TRACE("%s: put-retired/bigfoot @ %" PRIaTXN + " (slice #%u) #%zu [%zu..%zu] of %zu", + gcu_dbg_prefix(ctx), ctx->bigfoot, + (unsigned)(ctx->bigfoot - txn->mt_txnid), chunk, at, at + chunk, + retired_pages_before); + } + ctx->retired_stored += chunk; + } while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) && + (++ctx->bigfoot, true)); + } while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)); +#else + /* Write to last page of GC */ + key.iov_len = sizeof(txnid_t); + key.iov_base = &txn->mt_txnid; + do { + gcu_prepare_backlog(txn, ctx); + data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages); + err = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ + + /* Retry if tw.retired_pages[] grew during the Put() */ + } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + + ctx->retired_stored = MDBX_PNL_GETSIZE(txn->tw.retired_pages); + pnl_sort(txn->tw.retired_pages, txn->mt_next_pgno); + eASSERT(env, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len); + + TRACE("%s: put-retired #%zu @ %" PRIaTXN, gcu_dbg_prefix(ctx), + ctx->retired_stored, txn->mt_txnid); +#endif /* MDBX_ENABLE_BIGFOOT */ + if (LOG_ENABLED(MDBX_LOG_EXTRA)) { + size_t i = ctx->retired_stored; + DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", + txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i); + for (; i; i--) + DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]); + DEBUG_EXTRA_PRINT("%s\n", "."); + } + return MDBX_SUCCESS; +} + +typedef struct gcu_rid_result +{ + int err; + txnid_t rid; +} gcu_rid_result; + +static gcu_rid_result gcu_get_rid_for_reclaimed(MDBX_txn *txn, gcu_context_t *ctx, const size_t left) { + gcu_rid_result r; + if (ctx->lifo) { + if (txn->tw.lifo_reclaimed == nullptr) { + txn->tw.lifo_reclaimed = txl_alloc(); + if (unlikely(!txn->tw.lifo_reclaimed)) { + r.err = MDBX_ENOMEM; + goto return_error; + } + } + if (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < MDBX_TXL_MAX && + left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) * + txn->mt_env->me_maxgc_ov1page && + !ctx->dense) { + /* Hужен свободный для для сохранения списка страниц. */ + bool need_cleanup = false; + txnid_t snap_oldest = 0; + retry_rid: + do { + r.err = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err; + snap_oldest = txn->mt_env->me_lck->mti_oldest_reader.weak; + if (likely(r.err == MDBX_SUCCESS)) { + TRACE("%s: took @%" PRIaTXN " from GC", gcu_dbg_prefix(ctx), + MDBX_PNL_LAST(txn->tw.lifo_reclaimed)); + need_cleanup = true; + } + } while ( + r.err == MDBX_SUCCESS && + MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < MDBX_TXL_MAX && + left > + (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) * + txn->mt_env->me_maxgc_ov1page); + + if (likely(r.err == MDBX_SUCCESS)) { + TRACE("%s: got enough from GC.", gcu_dbg_prefix(ctx)); + goto return_continue; + } else if (unlikely(r.err != MDBX_NOTFOUND)) + /* LY: some troubles... */ + goto return_error; + + if (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { + if (need_cleanup) { + txl_sort(txn->tw.lifo_reclaimed); + ctx->cleaned_slot = 0; + } + ctx->rid = MDBX_PNL_LAST(txn->tw.lifo_reclaimed); + } else { + tASSERT(txn, txn->tw.last_reclaimed == 0); + if (unlikely(txn_oldest_reader(txn) != snap_oldest)) + /* should retry page_alloc_slowpath() + * if the oldest reader changes since the last attempt */ + goto retry_rid; + /* no reclaimable GC entries, + * therefore no entries with ID < mdbx_find_oldest(txn) */ + txn->tw.last_reclaimed = ctx->rid = snap_oldest; + TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, + gcu_dbg_prefix(ctx), ctx->rid); + } + + /* В GC нет годных к переработке записей, + * будем использовать свободные id в обратном порядке. */ + while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < MDBX_TXL_MAX && + left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - + ctx->reused_slot) * + txn->mt_env->me_maxgc_ov1page) { + if (unlikely(ctx->rid <= MIN_TXNID)) { + if (unlikely(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) <= + ctx->reused_slot)) { + NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " + "lifo_reclaimed %zu)", + ctx->reused_slot, + MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); + goto return_restart; + } + break; + } + + tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID); + ctx->rid -= 1; + MDBX_val key = {&ctx->rid, sizeof(ctx->rid)}, data; + r.err = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; + if (unlikely(r.err == MDBX_SUCCESS)) { + DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", + gcu_dbg_prefix(ctx), ctx->rid); + r.err = cursor_first(&ctx->cursor, &key, nullptr); + if (unlikely(r.err != MDBX_SUCCESS || + key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); + r.err = MDBX_CORRUPTED; + goto return_error; + } + const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); + if (unlikely(gc_first <= MIN_TXNID)) { + DEBUG("%s: no free GC's id(s) less than %" PRIaTXN + " (going dense-mode)", + gcu_dbg_prefix(ctx), ctx->rid); + ctx->dense = true; + goto return_restart; + } + ctx->rid = gc_first - 1; + } + + tASSERT(txn, !ctx->dense); + r.err = txl_append(&txn->tw.lifo_reclaimed, ctx->rid); + if (unlikely(r.err != MDBX_SUCCESS)) + goto return_error; + + if (ctx->reused_slot) + /* rare case, but it is better to clear and re-create GC entries + * with less fragmentation. */ + need_cleanup = true; + else + ctx->cleaned_slot += + 1 /* mark cleanup is not needed for added slot. */; + + TRACE("%s: append @%" PRIaTXN + " to lifo-reclaimed, cleaned-gc-slot = %zu", + gcu_dbg_prefix(ctx), ctx->rid, ctx->cleaned_slot); + } + + if (need_cleanup) { + if (ctx->cleaned_slot) { + TRACE("%s: restart to clear and re-create GC entries", + gcu_dbg_prefix(ctx)); + goto return_restart; + } + goto return_continue; + } + } + + const size_t i = + MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot; + tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); + r.rid = txn->tw.lifo_reclaimed[i]; + TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", + gcu_dbg_prefix(ctx), r.rid, i); + } else { + tASSERT(txn, txn->tw.lifo_reclaimed == NULL); + if (unlikely(ctx->rid == 0)) { + ctx->rid = txn_oldest_reader(txn); + MDBX_val key; + r.err = cursor_first(&ctx->cursor, &key, nullptr); + if (likely(r.err == MDBX_SUCCESS)) { + if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); + r.err = MDBX_CORRUPTED; + goto return_error; + } + const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); + if (ctx->rid >= gc_first) + ctx->rid = gc_first - 1; + if (unlikely(ctx->rid == 0)) { + ERROR("%s", "** no GC tail-space to store (going dense-mode)"); + ctx->dense = true; + goto return_restart; + } + } else if (r.err != MDBX_NOTFOUND) + return r; + txn->tw.last_reclaimed = ctx->rid; + ctx->cleaned_id = ctx->rid + 1; + } + r.rid = ctx->rid--; + TRACE("%s: take @%" PRIaTXN " from GC", gcu_dbg_prefix(ctx), + r.rid); + } + ++ctx->reused_slot; + r.err = MDBX_SUCCESS; + return r; + +return_continue: + r.err = MDBX_SUCCESS; + r.rid = 0; + return r; + +return_restart: + r.err = MDBX_RESULT_TRUE; + r.rid = 0; + return r; + +return_error: + tASSERT(txn, r.err != MDBX_SUCCESS); + r.rid = 0; + return r; +} + /* Cleanups reclaimed GC (aka freeDB) records, saves the retired-list (aka * freelist) of current transaction to GC, puts back into GC leftover of the * reclaimed pages with chunking. This recursive changes the reclaimed-list, @@ -10776,18 +11200,20 @@ static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { TRACE("\n>>> @%" PRIaTXN, txn->mt_txnid); MDBX_env *const env = txn->mt_env; - const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo"; - (void)dbg_prefix_mode; ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI]; txn->mt_cursors[FREE_DBI] = &ctx->cursor; + pgno_t prev_next_pgno = 0; /* txn->tw.relist[] can grow and shrink during this call. * txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow. * But page numbers cannot disappear from txn->tw.retired_pages[]. */ retry_clean_adj: ctx->reserve_adj = 0; retry: - if (ctx->loop++) + ctx->loop += prev_next_pgno == txn->mt_next_pgno; + prev_next_pgno = txn->mt_next_pgno; + + if (ctx->loop) TRACE("%s", " >> restart"); int rc = MDBX_SUCCESS; tASSERT(txn, pnl_check_allocated(txn->tw.relist, @@ -10808,7 +11234,7 @@ retry: ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; - ctx->fill_idx = ~0u; + ctx->amount = ctx->fill_idx = ~0u; ctx->cleaned_id = 0; ctx->rid = txn->tw.last_reclaimed; while (true) { @@ -10853,7 +11279,7 @@ retry: goto bailout; } tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak); - TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix_mode, + TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, gcu_dbg_prefix(ctx), ctx->cleaned_slot, ctx->cleaned_id); tASSERT(txn, *txn->mt_cursors == &ctx->cursor); rc = cursor_del(&ctx->cursor, 0); @@ -10877,9 +11303,11 @@ retry: rc = MDBX_CORRUPTED; goto bailout; } - ctx->rid = ctx->cleaned_id; - ctx->reserved = 0; - ctx->reused_slot = 0; + if (ctx->rid != ctx->cleaned_id) { + ctx->rid = ctx->cleaned_id; + ctx->reserved = 0; + ctx->reused_slot = 0; + } ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base); if (ctx->cleaned_id > txn->tw.last_reclaimed) break; @@ -10890,7 +11318,7 @@ retry: } tASSERT(txn, ctx->cleaned_id <= txn->tw.last_reclaimed); tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak); - TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode, + TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, gcu_dbg_prefix(ctx), ctx->cleaned_id); tASSERT(txn, *txn->mt_cursors == &ctx->cursor); rc = cursor_del(&ctx->cursor, 0); @@ -10919,247 +11347,33 @@ retry: } } - /* handle loose pages - put ones into the reclaimed- or retired-list */ if (txn->tw.loose_pages) { - tASSERT(txn, txn->tw.loose_count > 0); - /* Return loose page numbers to tw.relist, - * though usually none are left at this point. - * The pages themselves remain in dirtylist. */ - if (unlikely(!txn->tw.lifo_reclaimed && txn->tw.last_reclaimed < 1)) { - TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode, - txn->tw.loose_count); - rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err; - if (rc == MDBX_SUCCESS) { - TRACE("%s: retry since gc-slot for %zu loose-pages available", - dbg_prefix_mode, txn->tw.loose_count); - continue; - } - - /* Put loose page numbers in tw.retired_pages, - * since unable to return them to tw.relist. */ - if (unlikely((rc = pnl_need(&txn->tw.retired_pages, - txn->tw.loose_count)) != 0)) - goto bailout; - for (MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { - pnl_xappend(txn->tw.retired_pages, lp->mp_pgno); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - TRACE("%s: append %zu loose-pages to retired-pages", dbg_prefix_mode, - txn->tw.loose_count); - } else { - /* Room for loose pages + temp PNL with same */ - rc = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - MDBX_PNL loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - - txn->tw.loose_count - 1; - size_t count = 0; - for (MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { - tASSERT(txn, lp->mp_flags == P_LOOSE); - loose[++count] = lp->mp_pgno; - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - tASSERT(txn, count == txn->tw.loose_count); - MDBX_PNL_SETSIZE(loose, count); - pnl_sort(loose, txn->mt_next_pgno); - pnl_merge(txn->tw.relist, loose); - TRACE("%s: append %zu loose-pages to reclaimed-pages", dbg_prefix_mode, - txn->tw.loose_count); - } - - /* filter-out list of dirty-pages from loose-pages */ - MDBX_dpl *const dl = txn->tw.dirtylist; - if (dl) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, dl->sorted <= dl->length); - size_t w = 0, sorted_out = 0; - for (size_t r = w; ++r <= dl->length;) { - MDBX_page *dp = dl->items[r].ptr; - tASSERT(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); - tASSERT(txn, dpl_endpgno(dl, r) <= txn->mt_next_pgno); - if ((dp->mp_flags & P_LOOSE) == 0) { - if (++w != r) - dl->items[w] = dl->items[r]; - } else { - tASSERT(txn, dp->mp_flags == P_LOOSE); - sorted_out += dl->sorted >= r; - if (!MDBX_AVOID_MSYNC || !(env->me_flags & MDBX_WRITEMAP)) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0); - dpage_free(env, dp, 1); - } - } - } - TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", - dbg_prefix_mode, dl->length, w); - tASSERT(txn, txn->tw.loose_count == dl->length - w); - dl->sorted -= sorted_out; - tASSERT(txn, dl->sorted <= w); - dpl_setlen(dl, w); - dl->pages_including_loose -= txn->tw.loose_count; - txn->tw.dirtyroom += txn->tw.loose_count; - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } - txn->tw.loose_pages = NULL; - txn->tw.loose_count = 0; -#if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ + /* put loose pages into the reclaimed- or retired-list */ + rc = gcu_loose(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + if (unlikely(txn->tw.loose_pages)) + continue; } - const size_t amount = MDBX_PNL_GETSIZE(txn->tw.relist); - /* handle retired-list - store ones into single gc-record */ + if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) && + (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) > + env->me_maxgc_ov1page / 2)) { + TRACE("%s: reclaimed-list changed %zu -> %zu, retry", gcu_dbg_prefix(ctx), + ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); + ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); + goto retry; + } + ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist); + if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - if (unlikely(!ctx->retired_stored)) { - /* Make sure last page of GC is touched and on retired-list */ - rc = cursor_last(&ctx->cursor, nullptr, nullptr); - if (likely(rc == MDBX_SUCCESS)) - rc = gcu_touch(ctx); - if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND) - goto bailout; - } - -#if MDBX_ENABLE_BIGFOOT - size_t retired_pages_before; - do { - if (ctx->bigfoot > txn->mt_txnid) { - rc = gcu_clean_stored_retired(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - tASSERT(txn, ctx->bigfoot <= txn->mt_txnid); - } - - retired_pages_before = MDBX_PNL_GETSIZE(txn->tw.retired_pages); - rc = gcu_prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix_mode, - retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - break; - } - - pnl_sort(txn->tw.retired_pages, txn->mt_next_pgno); - ctx->retired_stored = 0; - ctx->bigfoot = txn->mt_txnid; - do { - if (ctx->retired_stored) { - rc = gcu_prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - if (ctx->retired_stored >= - MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - TRACE("%s: retired-list changed (%zu -> %zu), retry", - dbg_prefix_mode, retired_pages_before, - MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - break; - } - } - key.iov_len = sizeof(txnid_t); - key.iov_base = &ctx->bigfoot; - const size_t left = - MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored; - const size_t chunk = - (left > env->me_maxgc_ov1page && ctx->bigfoot < MAX_TXNID) - ? env->me_maxgc_ov1page - : left; - data.iov_len = (chunk + 1) * sizeof(pgno_t); - rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - -#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) - /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() - * вызванное через макрос DVAL_DEBUG() на выходе - * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле - * очистки, так и ниже в цикле заполнения зарезервированных элементов. - */ - memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ - - if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) - ? left - chunk - : ctx->retired_stored; - pgno_t *const begin = txn->tw.retired_pages + at; - /* MDBX_PNL_ASCENDING == false && LIFO == false: - * - the larger pgno is at the beginning of retired list - * and should be placed with the larger txnid. - * MDBX_PNL_ASCENDING == true && LIFO == true: - * - the larger pgno is at the ending of retired list - * and should be placed with the smaller txnid. - */ - const pgno_t save = *begin; - *begin = (pgno_t)chunk; - memcpy(data.iov_base, begin, data.iov_len); - *begin = save; - TRACE("%s: put-retired/bigfoot @ %" PRIaTXN - " (slice #%u) #%zu [%zu..%zu] of %zu", - dbg_prefix_mode, ctx->bigfoot, - (unsigned)(ctx->bigfoot - txn->mt_txnid), chunk, at, - at + chunk, retired_pages_before); - } - ctx->retired_stored += chunk; - } while (ctx->retired_stored < - MDBX_PNL_GETSIZE(txn->tw.retired_pages) && - (++ctx->bigfoot, true)); - } while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)); -#else - /* Write to last page of GC */ - key.iov_len = sizeof(txnid_t); - key.iov_base = &txn->mt_txnid; - do { - gcu_prepare_backlog(txn, ctx); - data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages); - rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - -#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) - /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() - * вызванное через макрос DVAL_DEBUG() на выходе - * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле - * очистки, так и ниже в цикле заполнения зарезервированных элементов. - */ - memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ - - /* Retry if tw.retired_pages[] grew during the Put() */ - } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); - - ctx->retired_stored = MDBX_PNL_GETSIZE(txn->tw.retired_pages); - pnl_sort(txn->tw.retired_pages, txn->mt_next_pgno); - eASSERT(env, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages)); - memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len); - - TRACE("%s: put-retired #%zu @ %" PRIaTXN, dbg_prefix_mode, - ctx->retired_stored, txn->mt_txnid); -#endif /* MDBX_ENABLE_BIGFOOT */ - if (LOG_ENABLED(MDBX_LOG_EXTRA)) { - size_t i = ctx->retired_stored; - DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", - txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i); - for (; i; i--) - DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]); - DEBUG_EXTRA_PRINT("%s\n", "."); - } - if (unlikely(amount != MDBX_PNL_GETSIZE(txn->tw.relist) && - ctx->reserved)) { - TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix_mode, - amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry_clean_adj /* rare case, but avoids GC fragmentation - and one cycle. */ - ; - } + /* store retired-list into GC */ + rc = gcu_retired(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; continue; } - /* handle reclaimed and lost pages - merge and store both into gc */ tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); tASSERT(txn, txn->tw.loose_count == 0); @@ -11170,197 +11384,49 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - const size_t left = amount - ctx->reserved - ctx->reserve_adj; - TRACE("%s: amount %zu, settled %zd, reserve_adj %zu, left %zd, " + const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj; + TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, " "lifo-reclaimed-slots %zu, " "reused-gc-slots %zu", - dbg_prefix_mode, amount, ctx->reserved, ctx->reserve_adj, left, + gcu_dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, + left, txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : 0, ctx->reused_slot); if (0 >= (intptr_t)left) break; - const size_t prefer_max_scatter = MDBX_ENABLE_BIGFOOT ? MDBX_TXL_MAX : 257; - txnid_t reservation_gc_id; - if (ctx->lifo) { - if (txn->tw.lifo_reclaimed == nullptr) { - txn->tw.lifo_reclaimed = txl_alloc(); - if (unlikely(!txn->tw.lifo_reclaimed)) { - rc = MDBX_ENOMEM; - goto bailout; - } - } - if (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter && - left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) * - env->me_maxgc_ov1page && - !ctx->dense) { - /* Hужен свободный для для сохранения списка страниц. */ - bool need_cleanup = false; - txnid_t snap_oldest = 0; - retry_rid: - do { - rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err; - snap_oldest = env->me_lck->mti_oldest_reader.weak; - if (likely(rc == MDBX_SUCCESS)) { - TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode, - MDBX_PNL_LAST(txn->tw.lifo_reclaimed)); - need_cleanup = true; - } - } while ( - rc == MDBX_SUCCESS && - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter && - left > - (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) * - env->me_maxgc_ov1page); - - if (likely(rc == MDBX_SUCCESS)) { - TRACE("%s: got enough from GC.", dbg_prefix_mode); - continue; - } else if (unlikely(rc != MDBX_NOTFOUND)) - /* LY: some troubles... */ - goto bailout; - - if (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { - if (need_cleanup) { - txl_sort(txn->tw.lifo_reclaimed); - ctx->cleaned_slot = 0; - } - ctx->rid = MDBX_PNL_LAST(txn->tw.lifo_reclaimed); - } else { - tASSERT(txn, txn->tw.last_reclaimed == 0); - if (unlikely(txn_oldest_reader(txn) != snap_oldest)) - /* should retry page_alloc_slowpath() - * if the oldest reader changes since the last attempt */ - goto retry_rid; - /* no reclaimable GC entries, - * therefore no entries with ID < mdbx_find_oldest(txn) */ - txn->tw.last_reclaimed = ctx->rid = snap_oldest; - TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix_mode, - ctx->rid); - } - - /* В GC нет годных к переработке записей, - * будем использовать свободные id в обратном порядке. */ - while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter && - left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - - ctx->reused_slot) * - env->me_maxgc_ov1page) { - if (unlikely(ctx->rid <= MIN_TXNID)) { - if (unlikely(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) <= - ctx->reused_slot)) { - NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " - "lifo_reclaimed %zu" PRIaTXN, - ctx->reused_slot, - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); - goto retry; - } - break; - } - - tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID); - ctx->rid -= 1; - key.iov_base = &ctx->rid; - key.iov_len = sizeof(ctx->rid); - rc = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; - if (unlikely(rc == MDBX_SUCCESS)) { - DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", - dbg_prefix_mode, ctx->rid); - rc = cursor_first(&ctx->cursor, &key, nullptr); - if (unlikely(rc != MDBX_SUCCESS || - key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); - rc = MDBX_CORRUPTED; - goto bailout; - } - const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (gc_first <= MIN_TXNID) { - DEBUG("%s: no free GC's id(s) less than %" PRIaTXN - " (going dense-mode)", - dbg_prefix_mode, ctx->rid); - ctx->dense = true; - break; - } - ctx->rid = gc_first - 1; - } - - eASSERT(env, !ctx->dense); - rc = txl_append(&txn->tw.lifo_reclaimed, ctx->rid); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - if (ctx->reused_slot) - /* rare case, but it is better to clear and re-create GC entries - * with less fragmentation. */ - need_cleanup = true; - else - ctx->cleaned_slot += - 1 /* mark cleanup is not needed for added slot. */; - - TRACE("%s: append @%" PRIaTXN - " to lifo-reclaimed, cleaned-gc-slot = %zu", - dbg_prefix_mode, ctx->rid, ctx->cleaned_slot); - } - - if (need_cleanup || ctx->dense) { - if (ctx->cleaned_slot) { - TRACE("%s: restart to clear and re-create GC entries", - dbg_prefix_mode); - goto retry; - } - continue; - } - } - - const size_t i = - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot; - tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); - reservation_gc_id = txn->tw.lifo_reclaimed[i]; - TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix_mode, - reservation_gc_id, i); - } else { - tASSERT(txn, txn->tw.lifo_reclaimed == NULL); - if (unlikely(ctx->rid == 0)) { - ctx->rid = txn_oldest_reader(txn); - rc = cursor_first(&ctx->cursor, &key, nullptr); - if (likely(rc == MDBX_SUCCESS)) { - if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); - rc = MDBX_CORRUPTED; - goto bailout; - } - const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (ctx->rid >= gc_first) - ctx->rid = gc_first - 1; - if (unlikely(ctx->rid == 0)) { - ERROR("%s", "** no GC tail-space to store (going dense-mode)"); - ctx->dense = true; - goto retry_clean_adj; - } - } else if (rc != MDBX_NOTFOUND) - goto bailout; - txn->tw.last_reclaimed = ctx->rid; - ctx->cleaned_id = ctx->rid + 1; - } - reservation_gc_id = ctx->rid--; - TRACE("%s: take @%" PRIaTXN " from head-gc-id", dbg_prefix_mode, - reservation_gc_id); + const gcu_rid_result rid_result = gcu_get_rid_for_reclaimed(txn, ctx, left); + if (unlikely(!rid_result.rid)) { + rc = rid_result.err; + if (likely(rc == MDBX_SUCCESS)) + continue; + if (likely(rc == MDBX_RESULT_TRUE)) + goto retry; + goto bailout; } - ++ctx->reused_slot; + tASSERT(txn, rid_result.err == MDBX_SUCCESS); + const txnid_t reservation_gc_id = rid_result.rid; + // const size_t prefer_max_scatter = MDBX_ENABLE_BIGFOOT ? MDBX_TXL_MAX : 257; size_t chunk = left; - if (unlikely(chunk > env->me_maxgc_ov1page)) { + if (unlikely(left > env->me_maxgc_ov1page)) { const size_t avail_gc_slots = txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot + 1 : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid : INT16_MAX; - if (avail_gc_slots > 1) { + if (likely(avail_gc_slots > 1)) { #if MDBX_ENABLE_BIGFOOT - chunk = (chunk < env->me_maxgc_ov1page * (size_t)2) - ? chunk / 2 - : env->me_maxgc_ov1page; + chunk = env->me_maxgc_ov1page; + if (avail_gc_slots < INT16_MAX && + unlikely(left > env->me_maxgc_ov1page * avail_gc_slots)) + /* TODO: Можно смотреть последовательности какой длины есть в relist + * и пробовать нарезать куски соответствующего размера. + * Смысл в том, чтобы не дробить последовательности страниц, + * а использовать целиком. */ + chunk = env->me_maxgc_ov1page + + left / (env->me_maxgc_ov1page * avail_gc_slots) * + env->me_maxgc_ov1page; #else if (chunk < env->me_maxgc_ov1page * 2) chunk /= 2; @@ -11377,7 +11443,7 @@ retry: size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) /* - 1 + span */; if (tail > avail) { - for (size_t i = amount - span; i > 0; --i) { + for (size_t i = ctx->amount - span; i > 0; --i) { if (MDBX_PNL_ASCENDING ? (txn->tw.relist[i] + span) : (txn->tw.relist[i] - span) == txn->tw.relist[i + span]) { @@ -11405,9 +11471,9 @@ retry: TRACE("%s: gc_rid %" PRIaTXN ", reused_gc_slot %zu, reservation-id " "%" PRIaTXN, - dbg_prefix_mode, ctx->rid, ctx->reused_slot, reservation_gc_id); + gcu_dbg_prefix(ctx), ctx->rid, ctx->reused_slot, reservation_gc_id); - TRACE("%s: chunk %zu, gc-per-ovpage %u", dbg_prefix_mode, chunk, + TRACE("%s: chunk %zu, gc-per-ovpage %u", gcu_dbg_prefix(ctx), chunk, env->me_maxgc_ov1page); tASSERT(txn, reservation_gc_id <= env->me_lck->mti_oldest_reader.weak); @@ -11422,9 +11488,9 @@ retry: } key.iov_len = sizeof(reservation_gc_id); - key.iov_base = &reservation_gc_id; + key.iov_base = (void*)&reservation_gc_id; data.iov_len = (chunk + 1) * sizeof(pgno_t); - TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk, + TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, gcu_dbg_prefix(ctx), chunk, ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id); gcu_prepare_backlog(txn, ctx); rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, @@ -11434,19 +11500,10 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - gcu_clean_reserved(env, data); + gcu_zeroize_reserved(env, data); ctx->reserved += chunk; - TRACE("%s: settled %zu (+%zu), continue", dbg_prefix_mode, ctx->reserved, - chunk); - - if (txn->tw.lifo_reclaimed && - unlikely(amount < MDBX_PNL_GETSIZE(txn->tw.relist)) && - (ctx->loop < 5 || MDBX_PNL_GETSIZE(txn->tw.relist) - amount > - env->me_maxgc_ov1page / 2)) { - NOTICE("** restart: reclaimed-list growth %zu -> %zu", amount, - MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry_clean_adj; - } + TRACE("%s: reserved %zu (+%zu), continue", gcu_dbg_prefix(ctx), + ctx->reserved, chunk); continue; } @@ -11467,13 +11524,12 @@ retry: tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); tASSERT(txn, dirtylist_check(txn)); - if (ctx->reserved || MDBX_PNL_GETSIZE(txn->tw.relist)) { + if (ctx->amount) { MDBX_val key, data; key.iov_len = data.iov_len = 0; /* avoid MSVC warning */ key.iov_base = data.iov_base = NULL; - const size_t amount = MDBX_PNL_GETSIZE(txn->tw.relist); - size_t left = amount, excess = 0; + size_t left = ctx->amount, excess = 0; if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); rc = cursor_first(&ctx->cursor, &key, &data); @@ -11488,7 +11544,7 @@ retry: while (true) { txnid_t fill_gc_id; - TRACE("%s: left %zu of %zu", dbg_prefix_mode, left, + TRACE("%s: left %zu of %zu", gcu_dbg_prefix(ctx), left, MDBX_PNL_GETSIZE(txn->tw.relist)); if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); @@ -11519,7 +11575,7 @@ retry: ctx->fill_idx += 1; fill_gc_id = txn->tw.lifo_reclaimed[ctx->fill_idx]; TRACE("%s: seek-reservation @%" PRIaTXN " at lifo_reclaimed[%zu]", - dbg_prefix_mode, fill_gc_id, ctx->fill_idx); + gcu_dbg_prefix(ctx), fill_gc_id, ctx->fill_idx); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); rc = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; @@ -11544,8 +11600,8 @@ retry: excess_slots += 1; goto next; } - TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk, - left, fill_gc_id); + TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, gcu_dbg_prefix(ctx), + chunk, left, fill_gc_id); if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || delta > env->me_maxgc_ov1page) data.iov_len = (left + 1) * sizeof(pgno_t); @@ -11555,20 +11611,24 @@ retry: MDBX_CURRENT | MDBX_RESERVE); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - gcu_clean_reserved(env, data); + gcu_zeroize_reserved(env, data); if (unlikely(txn->tw.loose_count || - amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { - NOTICE("** restart: reclaimed-list growth (%zu -> %zu, loose +%zu)", - amount, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); - goto retry_clean_adj; + ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", + ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist), + txn->tw.loose_count); + if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) + goto retry_clean_adj; + goto retry; } + if (unlikely(txn->tw.lifo_reclaimed ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : ctx->cleaned_id < txn->tw.last_reclaimed)) { NOTICE("%s", "** restart: reclaimed-slots changed"); - goto retry_clean_adj; + goto retry; } if (unlikely(ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { @@ -11576,7 +11636,7 @@ retry: ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); NOTICE("** restart: retired-list growth (%zu -> %zu)", ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - goto retry_clean_adj; + goto retry; } pgno_t *dst = data.iov_base; @@ -11585,12 +11645,12 @@ retry: memcpy(dst, src, chunk * sizeof(pgno_t)); pgno_t *from = src, *to = src + chunk; TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, - dbg_prefix_mode, chunk, from - txn->tw.relist, from[0], + gcu_dbg_prefix(ctx), chunk, from - txn->tw.relist, from[0], to - txn->tw.relist, to[-1], fill_gc_id); left -= chunk; if (AUDIT_ENABLED()) { - rc = audit_ex(txn, ctx->retired_stored + amount - left, true); + rc = audit_ex(txn, ctx->retired_stored + ctx->amount - left, true); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } @@ -11615,15 +11675,17 @@ retry: while (n >= env->me_maxgc_ov1page) adj -= n /= env->me_maxgc_ov1page; ctx->reserve_adj += adj; - TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix_mode, + TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", gcu_dbg_prefix(ctx), excess, adj, ctx->reserve_adj); } } tASSERT(txn, rc == MDBX_SUCCESS); - if (unlikely(txn->tw.loose_count != 0)) { - NOTICE("** restart: got %zu loose pages", txn->tw.loose_count); - goto retry_clean_adj; + if (unlikely(txn->tw.loose_count != 0 || + ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", + txn->tw.loose_count, ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); + goto retry; } if (unlikely(excess_slots)) { From e9f5c0c3085afec602edf17144727e95d0f64f54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 4 May 2024 21:42:53 +0300 Subject: [PATCH 175/443] =?UTF-8?q?mdbx++:=20=D1=83=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20`buffer:silo::inplace=5Fsignat?= =?UTF-8?q?ure`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index f8f6df4c..19668308 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1734,24 +1734,13 @@ private: return capacity_bytes < sizeof(bin); } - enum : byte { - /* Little Endian: - * last byte is the most significant byte of u_.allocated.cap, - * so use higher bit of capacity as the inplace-flag */ - le_lastbyte_mask = 0x80, - /* Big Endian: - * last byte is the least significant byte of u_.allocated.cap, - * so use lower bit of capacity as the inplace-flag. */ - be_lastbyte_mask = 0x01 + enum : byte { lastbyte_inplace_signature = byte(~0u) }; + enum : size_t { + inplace_signature_limit = + size_t(lastbyte_inplace_signature) + << (sizeof(size_t /* allocated::capacity_bytes_ */) - 1) * CHAR_BIT }; - static constexpr byte inplace_lastbyte_mask() noexcept { - static_assert( - endian::native == endian::little || endian::native == endian::big, - "Only the little-endian or big-endian bytes order are supported"); - return (endian::native == endian::little) ? le_lastbyte_mask - : be_lastbyte_mask; - } constexpr byte lastbyte() const noexcept { return inplace_[sizeof(bin) - 1]; } @@ -1760,7 +1749,14 @@ private: } constexpr bool is_inplace() const noexcept { - return (lastbyte() & inplace_lastbyte_mask()) != 0; + static_assert(size_t(inplace_signature_limit) > size_t(max_capacity), + "WTF?"); + static_assert( + std::numeric_limits::max() - + (std::numeric_limits::max() >> CHAR_BIT) == + inplace_signature_limit, + "WTF?"); + return lastbyte() == lastbyte_inplace_signature; } constexpr bool is_allocated() const noexcept { return !is_inplace(); } @@ -1773,8 +1769,8 @@ private: } if (::std::is_trivial::value) /* workaround for "uninitialized" warning from some compilers */ - ::std::memset(&allocated_.ptr_, 0, sizeof(allocated_.ptr_)); - lastbyte() = inplace_lastbyte_mask(); + memset(&allocated_.ptr_, 0, sizeof(allocated_.ptr_)); + lastbyte() = lastbyte_inplace_signature; MDBX_CONSTEXPR_ASSERT(is_inplace() && address() == inplace_ && is_suitable_for_inplace(capacity())); return address(); @@ -1783,11 +1779,7 @@ private: template MDBX_CXX17_CONSTEXPR byte * make_allocated(allocator_pointer ptr, size_t capacity_bytes) noexcept { - MDBX_CONSTEXPR_ASSERT( - (capacity_bytes & be_lastbyte_mask) == 0 && - ((capacity_bytes >> - (sizeof(allocated_.capacity_bytes_) - 1) * CHAR_BIT) & - le_lastbyte_mask) == 0); + MDBX_CONSTEXPR_ASSERT(inplace_signature_limit > capacity_bytes); if (construct_ptr) /* properly construct allocator::pointer */ new (&allocated_) allocated(ptr, capacity_bytes); From 3de3d425a128a3c6f7866503f5f93b80c09dbe41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 19 May 2024 22:07:58 +0300 Subject: [PATCH 176/443] =?UTF-8?q?mdbx:=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=B8=D1=86=D0=B5=D0=BD=D0=B7?= =?UTF-8?q?=D0=B8=D0=B8=20=D0=B8=20=D1=80=D0=B5=D1=81=D1=82=D1=80=D1=83?= =?UTF-8?q?=D0=BA=D1=82=D1=83=D1=80=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20?= =?UTF-8?q?=D0=B8=D1=81=D1=85=D0=BE=D0=B4=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=B4=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AUTHORS | 34 - CMakeLists.txt | 248 +- COPYRIGHT | 158 +- GNUmakefile | 196 +- LICENSE | 206 +- NOTICE | 23 + README.md | 75 +- cmake/compiler.cmake | 16 +- cmake/profile.cmake | 16 +- cmake/utils.cmake | 16 +- mdbx.h | 313 +- mdbx.h++ | 41 +- packages/rpm.obsolete/CMakeLists.txt | 184 - packages/rpm.obsolete/build.sh | 18 - packages/rpm.obsolete/package.sh | 25 - src/alloy.c | 67 +- src/api-cursor.c | 797 + src/api-env.c | 1399 + src/api-extra.c | 117 + src/api-key-transform.c | 225 + src/api-txn.c | 508 + src/atomics-ops.h | 390 + src/atomics-types.h | 99 + src/audit.c | 164 + src/bits.md | 12 +- src/chk.c | 2097 ++ src/cogs.c | 353 + src/cogs.h | 558 + src/coherency.c | 198 + src/cold.c | 768 + src/config.h.in | 5 + src/copy.c | 781 + src/core.c | 29506 ---------------- src/cursor.c | 2451 ++ src/cursor.h | 398 + src/dbi.c | 954 + src/dbi.h | 133 + src/dpl.c | 520 + src/dpl.h | 145 + src/dxb.c | 1553 + src/env-opts.c | 419 + src/env.c | 679 + src/essentials.h | 136 + src/gc-get.c | 1460 + src/gc-put.c | 1094 + src/gc.h | 39 + src/global.c | 476 + src/internals.h | 2083 +- src/layout-dxb.h | 306 + src/layout-lck.h | 285 + src/lck-posix.c | 409 +- src/lck-windows.c | 484 +- src/lck.c | 193 + src/lck.h | 112 + src/logging_and_debug.c | 261 + src/logging_and_debug.h | 160 + src/mdbx.c++ | 56 +- src/meta.c | 746 + src/meta.h | 203 + src/misc.c | 252 + src/mvcc-readers.c | 477 + src/node.c | 395 + src/node.h | 125 + src/options.h | 25 +- src/osal.c | 494 +- src/osal.h | 539 +- src/page-get.c | 579 + src/page-iov.c | 198 + src/page-iov.h | 38 + src/page-ops.c | 772 + src/page-ops.h | 179 + src/page-search.c | 147 + src/pnl.c | 254 + src/pnl.h | 161 + src/{base.h => preface.h} | 380 +- src/proto.h | 119 + src/range-estimate.c | 394 + src/refund.c | 229 + src/sort.h | 485 + src/spill.c | 484 + src/spill.h | 86 + src/subdb.c | 104 + src/tls.c | 610 + src/tls.h | 43 + src/{mdbx_chk.c => tools/chk.c} | 32 +- src/{mdbx_copy.c => tools/copy.c} | 27 +- src/{mdbx_drop.c => tools/drop.c} | 27 +- src/{mdbx_dump.c => tools/dump.c} | 145 +- src/{mdbx_load.c => tools/load.c} | 27 +- src/{mdbx_stat.c => tools/stat.c} | 23 +- src/{ => tools}/wingetopt.c | 0 src/{ => tools}/wingetopt.h | 0 src/tree.c | 1645 + src/txl.c | 102 + src/txl.h | 26 + src/txn.c | 1947 + src/unaligned.h | 242 + src/utils.c | 35 + src/utils.h | 87 + src/walk.c | 314 + src/walk.h | 23 + src/windows-import.c | 158 + src/windows-import.h | 136 + test/CMakeLists.txt | 29 +- test/append.c++ | 15 +- test/base.h++ | 51 +- test/cases.c++ | 15 +- test/chrono.c++ | 15 +- test/chrono.h++ | 15 +- test/config.c++ | 15 +- test/config.h++ | 15 +- test/copy.c++ | 3 + test/dead.c++ | 15 +- test/extra/doubtless_positioning.c++ | 3 + .../{dupfixed_addodd.c => dupfix_addodd.c} | 0 ...fixed_multiple.c++ => dupfix_multiple.c++} | 3 + test/extra/hex_base64_base58.c++ | 3 + test/extra/maindb_ordinal.c++ | 3 + test/fork.c++ | 15 +- test/hill.c++ | 15 +- test/jitter.c++ | 15 +- test/keygen.c++ | 15 +- test/keygen.h++ | 15 +- test/log.c++ | 15 +- test/log.h++ | 16 +- test/long_stochastic.sh | 3 + test/main.c++ | 15 +- test/nested.c++ | 15 +- test/osal-unix.c++ | 21 +- test/osal-windows.c++ | 19 +- test/osal.h++ | 16 +- test/stochastic_small.sh | 3 + test/test.c++ | 38 +- test/test.h++ | 15 +- test/try.c++ | 3 + test/ttl.c++ | 15 +- test/utils.c++ | 15 +- test/utils.h++ | 37 +- test/valgrind_suppress.txt | 4 +- 139 files changed, 34551 insertions(+), 33907 deletions(-) delete mode 100644 AUTHORS create mode 100644 NOTICE delete mode 100644 packages/rpm.obsolete/CMakeLists.txt delete mode 100755 packages/rpm.obsolete/build.sh delete mode 100755 packages/rpm.obsolete/package.sh create mode 100644 src/api-cursor.c create mode 100644 src/api-env.c create mode 100644 src/api-extra.c create mode 100644 src/api-key-transform.c create mode 100644 src/api-txn.c create mode 100644 src/atomics-ops.h create mode 100644 src/atomics-types.h create mode 100644 src/audit.c create mode 100644 src/chk.c create mode 100644 src/cogs.c create mode 100644 src/cogs.h create mode 100644 src/coherency.c create mode 100644 src/cold.c create mode 100644 src/copy.c delete mode 100644 src/core.c create mode 100644 src/cursor.c create mode 100644 src/cursor.h create mode 100644 src/dbi.c create mode 100644 src/dbi.h create mode 100644 src/dpl.c create mode 100644 src/dpl.h create mode 100644 src/dxb.c create mode 100644 src/env-opts.c create mode 100644 src/env.c create mode 100644 src/essentials.h create mode 100644 src/gc-get.c create mode 100644 src/gc-put.c create mode 100644 src/gc.h create mode 100644 src/global.c create mode 100644 src/layout-dxb.h create mode 100644 src/layout-lck.h create mode 100644 src/lck.c create mode 100644 src/lck.h create mode 100644 src/logging_and_debug.c create mode 100644 src/logging_and_debug.h create mode 100644 src/meta.c create mode 100644 src/meta.h create mode 100644 src/misc.c create mode 100644 src/mvcc-readers.c create mode 100644 src/node.c create mode 100644 src/node.h create mode 100644 src/page-get.c create mode 100644 src/page-iov.c create mode 100644 src/page-iov.h create mode 100644 src/page-ops.c create mode 100644 src/page-ops.h create mode 100644 src/page-search.c create mode 100644 src/pnl.c create mode 100644 src/pnl.h rename src/{base.h => preface.h} (70%) create mode 100644 src/proto.h create mode 100644 src/range-estimate.c create mode 100644 src/refund.c create mode 100644 src/sort.h create mode 100644 src/spill.c create mode 100644 src/spill.h create mode 100644 src/subdb.c create mode 100644 src/tls.c create mode 100644 src/tls.h rename src/{mdbx_chk.c => tools/chk.c} (95%) rename src/{mdbx_copy.c => tools/copy.c} (87%) rename src/{mdbx_drop.c => tools/drop.c} (87%) rename src/{mdbx_dump.c => tools/dump.c} (79%) rename src/{mdbx_load.c => tools/load.c} (97%) rename src/{mdbx_stat.c => tools/stat.c} (96%) rename src/{ => tools}/wingetopt.c (100%) rename src/{ => tools}/wingetopt.h (100%) create mode 100644 src/tree.c create mode 100644 src/txl.c create mode 100644 src/txl.h create mode 100644 src/txn.c create mode 100644 src/unaligned.h create mode 100644 src/utils.c create mode 100644 src/utils.h create mode 100644 src/walk.c create mode 100644 src/walk.h create mode 100644 src/windows-import.c create mode 100644 src/windows-import.h rename test/extra/{dupfixed_addodd.c => dupfix_addodd.c} (100%) rename test/extra/{dupfixed_multiple.c++ => dupfix_multiple.c++} (98%) diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index beb01868..00000000 --- a/AUTHORS +++ /dev/null @@ -1,34 +0,0 @@ -Contributors -============ - -- Alexey Naumov -- Andrew Ashikhmin -- Chris Mikkelson -- Claude Brisson -- David Barbour -- David Wilson -- dreamsxin -- Hallvard Furuseth , -- Heiko Becker -- Howard Chu , -- Ignacio Casal Quinteiro -- James Rouzier -- Jean-Christophe DUBOIS -- John Hewson -- Klaus Malorny -- Kurt Zeilenga -- Leonid Yuriev , -- Lorenz Bauer -- Luke Yeager -- Martin Hedenfalk -- Ondrej Kuznik -- Orivej Desh -- Oskari Timperi -- Pavel Medvedev -- Philipp Storz -- Quanah Gibson-Mount -- Salvador Ortiz -- Sebastien Launay -- Vladimir Romanov -- Zano Foundation -- 장세연 diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fc9e8ca..d533fa40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,5 @@ -## -## Copyright 2020-2024 Leonid Yuriev -## and other libmdbx authors: please see AUTHORS file. -## All rights reserved. -## -## Redistribution and use in source and binary forms, with or without -## modification, are permitted only as authorized by the OpenLDAP -## Public License. -## -## A copy of this license is available in the file LICENSE in the -## top-level directory of the distribution or, alternatively, at -## . -## +## Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev +## SPDX-License-Identifier: Apache-2.0 ## ## libmdbx = { Revised and extended descendant of Symas LMDB. } @@ -69,14 +58,109 @@ else() endif() if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/COPYRIGHT" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/core.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/alloy.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cursor.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-env.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-extra.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-key-transform.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-ops.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-types.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/audit.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/chk.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/coherency.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cold.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/copy.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/debug_begin.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/debug_end.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dxb.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env-opts.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/essentials.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-get.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-put.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/global.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/internals.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/layout-dxb.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/layout-lck.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck-posix.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck-windows.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/logging_and_debug.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/logging_and_debug.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_chk.1" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_copy.1" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_drop.1" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_dump.1" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_load.1" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_stat.1" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx.c++" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/misc.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mvcc-readers.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/ntdll.def" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/options.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/osal.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/osal.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-get.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-search.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/preface.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/proto.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/range-estimate.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/refund.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/sort.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/subdb.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/chk.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/copy.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/drop.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/dump.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/load.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/stat.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txn.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/unaligned.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/version.c.in" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx_chk.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx.c++") + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h") set(MDBX_AMALGAMATED_SOURCE FALSE) find_program(GIT git) if(NOT GIT) @@ -84,21 +168,27 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND endif() set(MDBX_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION.txt" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c++" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/config.h.in" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/man1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_chk.c") + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_chk.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_copy.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_dump.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_load.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_stat.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_drop.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ntdll.def" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/config.h.in") set(MDBX_AMALGAMATED_SOURCE TRUE) set(MDBX_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") else() message(FATAL_ERROR "\n" - "Please don't use tarballs nor zips which are automatically provided by Github! " - "These archives do not contain version information and thus are unfit to build libmdbx. " - "You can vote for ability of disabling auto-creation such unsuitable archives at https://github.community/t/disable-tarball\n" - "Instead of above, just clone the git repository, either download a tarball or zip with the properly amalgamated source core. " - "For embedding libmdbx use a git-submodule or the amalgamated source code.\n" - "Please, avoid using any other techniques.") + "The set of libmdbx source code files is incomplete! " + "Instead just follow the https://libmdbx.dqdkfa.ru/usage.html " + "PLEASE, AVOID USING ANY OTHER TECHNIQUES.") endif() if(DEFINED PROJECT_NAME) @@ -600,13 +690,88 @@ else() include_directories("${MDBX_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") else() list(APPEND LIBMDBX_SOURCES + "${MDBX_SOURCE_DIR}/api-cursor.c" + "${MDBX_SOURCE_DIR}/api-env.c" + "${MDBX_SOURCE_DIR}/api-extra.c" + "${MDBX_SOURCE_DIR}/api-key-transform.c" + "${MDBX_SOURCE_DIR}/api-txn.c" + "${MDBX_SOURCE_DIR}/atomics-ops.h" + "${MDBX_SOURCE_DIR}/atomics-types.h" + "${MDBX_SOURCE_DIR}/audit.c" + "${MDBX_SOURCE_DIR}/chk.c" + "${MDBX_SOURCE_DIR}/cogs.c" + "${MDBX_SOURCE_DIR}/cogs.h" + "${MDBX_SOURCE_DIR}/coherency.c" + "${MDBX_SOURCE_DIR}/cold.c" + "${MDBX_SOURCE_DIR}/copy.c" + "${MDBX_SOURCE_DIR}/cursor.c" + "${MDBX_SOURCE_DIR}/cursor.h" + "${MDBX_SOURCE_DIR}/dbi.c" + "${MDBX_SOURCE_DIR}/dbi.h" + "${MDBX_SOURCE_DIR}/dpl.c" + "${MDBX_SOURCE_DIR}/dpl.h" + "${MDBX_SOURCE_DIR}/dxb.c" + "${MDBX_SOURCE_DIR}/env-opts.c" + "${MDBX_SOURCE_DIR}/env.c" + "${MDBX_SOURCE_DIR}/essentials.h" + "${MDBX_SOURCE_DIR}/gc-get.c" + "${MDBX_SOURCE_DIR}/gc-put.c" + "${MDBX_SOURCE_DIR}/gc.h" + "${MDBX_SOURCE_DIR}/global.c" + "${MDBX_SOURCE_DIR}/internals.h" + "${MDBX_SOURCE_DIR}/layout-dxb.h" + "${MDBX_SOURCE_DIR}/layout-lck.h" + "${MDBX_SOURCE_DIR}/lck.c" + "${MDBX_SOURCE_DIR}/lck.h" + "${MDBX_SOURCE_DIR}/logging_and_debug.c" + "${MDBX_SOURCE_DIR}/logging_and_debug.h" + "${MDBX_SOURCE_DIR}/meta.c" + "${MDBX_SOURCE_DIR}/meta.h" + "${MDBX_SOURCE_DIR}/misc.c" + "${MDBX_SOURCE_DIR}/mvcc-readers.c" + "${MDBX_SOURCE_DIR}/node.c" + "${MDBX_SOURCE_DIR}/node.h" + "${MDBX_SOURCE_DIR}/options.h" + "${MDBX_SOURCE_DIR}/osal.c" + "${MDBX_SOURCE_DIR}/osal.h" + "${MDBX_SOURCE_DIR}/page-get.c" + "${MDBX_SOURCE_DIR}/page-iov.c" + "${MDBX_SOURCE_DIR}/page-iov.h" + "${MDBX_SOURCE_DIR}/page-ops.c" + "${MDBX_SOURCE_DIR}/page-ops.h" + "${MDBX_SOURCE_DIR}/page-search.c" + "${MDBX_SOURCE_DIR}/pnl.c" + "${MDBX_SOURCE_DIR}/pnl.h" + "${MDBX_SOURCE_DIR}/preface.h" + "${MDBX_SOURCE_DIR}/proto.h" + "${MDBX_SOURCE_DIR}/range-estimate.c" + "${MDBX_SOURCE_DIR}/refund.c" + "${MDBX_SOURCE_DIR}/sort.h" + "${MDBX_SOURCE_DIR}/spill.c" + "${MDBX_SOURCE_DIR}/spill.h" + "${MDBX_SOURCE_DIR}/subdb.c" + "${MDBX_SOURCE_DIR}/tls.c" + "${MDBX_SOURCE_DIR}/tls.h" + "${MDBX_SOURCE_DIR}/tree.c" + "${MDBX_SOURCE_DIR}/txl.c" + "${MDBX_SOURCE_DIR}/txl.h" + "${MDBX_SOURCE_DIR}/txn.c" + "${MDBX_SOURCE_DIR}/unaligned.h" + "${MDBX_SOURCE_DIR}/utils.c" + "${MDBX_SOURCE_DIR}/utils.h" + "${MDBX_SOURCE_DIR}/walk.c" + "${MDBX_SOURCE_DIR}/walk.h" "${CMAKE_CURRENT_BINARY_DIR}/version.c" - "${MDBX_SOURCE_DIR}/options.h" "${MDBX_SOURCE_DIR}/base.h" - "${MDBX_SOURCE_DIR}/internals.h" "${MDBX_SOURCE_DIR}/osal.h" - "${MDBX_SOURCE_DIR}/core.c" "${MDBX_SOURCE_DIR}/osal.c" - "${MDBX_SOURCE_DIR}/lck-posix.c") + ) + if(NOT MSVC) + list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/lck-posix.c") + endif() if(NOT APPLE) - list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/lck-windows.c") + list(APPEND LIBMDBX_SOURCES + "${MDBX_SOURCE_DIR}/windows-import.h" + "${MDBX_SOURCE_DIR}/windows-import.c" + "${MDBX_SOURCE_DIR}/lck-windows.c" + ) endif() include_directories("${MDBX_SOURCE_DIR}") endif() @@ -747,20 +912,23 @@ endif() # build mdbx-tools if(MDBX_BUILD_TOOLS) - if(NOT MDBX_AMALGAMATED_SOURCE AND ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/wingetopt.c ${MDBX_SOURCE_DIR}/wingetopt.h) - else() - set(WINGETOPT_SRC "") + set(WINGETOPT_SRC "") + if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/tools/wingetopt.c ${MDBX_SOURCE_DIR}/tools/wingetopt.h) endif() - foreach(TOOL mdbx_chk mdbx_copy mdbx_stat mdbx_dump mdbx_load mdbx_drop) - add_executable(${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/${TOOL}.c ${WINGETOPT_SRC}) + foreach(TOOL chk copy stat dump load drop) + if(MDBX_AMALGAMATED_SOURCE) + add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/mdbx_${TOOL}.c) + else() + add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/tools/${TOOL}.c ${WINGETOPT_SRC}) + endif() if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) - set_target_properties(${TOOL} PROPERTIES + set_target_properties(mdbx_${TOOL} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) endif() - target_setup_options(${TOOL}) - target_link_libraries(${TOOL} ${TOOL_MDBX_LIB}) + target_setup_options(mdbx_${TOOL}) + target_link_libraries(mdbx_${TOOL} ${TOOL_MDBX_LIB}) endforeach() if(LIB_MATH) target_link_libraries(mdbx_chk ${LIB_MATH}) diff --git a/COPYRIGHT b/COPYRIGHT index d6111220..23614d29 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,7 +1,138 @@ -Copyright 2015-2024 Leonid Yuriev . -Copyright 2011-2015 Howard Chu, Symas Corp. -Copyright 2015,2016 Peter-Service R&D LLC. -All rights reserved. +Copyright (c) 2015-2024 Леонид Юрьев aka Leonid Yuriev + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +----------------------------------------------------------------------- + +СМЕНА ЛИЦЕНЗИИ (THE LICENSE CHANGE) + +OpenLDAP Public License → Apache 2.0 + +Briefly: + Historically, in 2015 an early MDBX source code was derived from the + "LMDB engine" created by Howard Chu in 2011-2015, + which based on btree.c written by Martin Hedenfalk . + + By 2024, MDBX source code has actually been rewritten and has so + little in common with the original LMDB that I thought it admissible + to change the license. Below are more detailed explanations. + +Кратко: + Исторически в 2015 году ранний исходный MDBX был заимствован из «LMDB + engine», созданной Howard Chu в 2011-2015, на основе + btree.c созданного Martin Hedenfalk в 2009-2010. + + К 2024 году исходный код MDBX фактически переписан и имеет настолько + мало общего с первоначальным заимствованием из LMDB, что я счел + уместным сменить лицензию. Ниже более подробные пояснения. + +--- + +Первоисточник текста формулирован на Русском языке, который является +родным для автора. Предполагается что все заинтересованные могут легко +воспользоваться машинным переводом, который при всех недостатках сможет +донести суть, намерения и местами даже передать тональность. + +The original source of this text is in Russian, which is the author's +native language. It is assumed that all concerned can easily use machine +translation, which, with all the disadvantages, will be able to convey +the essence, intentions and, in some places, even convey the tonality of +a wording. + +1. Причины + +1.1. Лицензия Apache-2.0 является одной из самых популярных, так как +содержит ряд уточнений, проясняющих и упрощающих использование исходного +кода в производных работах и больших проектах. Эти особенности лицензии +Apache-2.0 я нахожу достаточно ценными и удобными. Соответственно, +переход на лицензию Apache-2.0 полезным в целом. + +1.2. Проект OpenLDAP имеет определенную известность, в том числе, к +сожалению, среди специалистов славится кране плохим качеством кода и +сбоями при отходе от простых/базовых сценариев использования. Поэтому +использование лицензии OpenLDAP, в глазах части аудитории, бросает тень +на качества кода libmdbx, несмотря на то, что исходный код библиотеки +переписан, в том числе, с целью повышения качества, надежности, +стабильности и пригодности к тестированию. + +Отмечу, что здесь не место для обсуждения объективности подобных мнений +и причин, равно как и не место для оценки компетентности специалистов +высказывающих такие суждения. Однако, здесь необходимо озвучить сам факт +наличия такой негативной коннотации качества кода при упоминании +OpenLDAP, совершенно без намерения как-либо задеть или обидеть +контрибьюторов OpenLDAP. + +1.3. С точки зрения исходного кода, к настоящему времени libmdbx стала +совсем другим продуктом, о котором сейчас правильнее сказать что +разработка вдохновлена LMDB, нежели основывается на заимствовании кода. +Смена лицензии на переписанный код подчеркивает, что это действительно +новый исходный код. + +2. Легитимность + +2.1. Исходная лицензия OpenLDAP 2.8 и актуальная лицензия Apache 2.0 +совпадают по базовым условиям. При этом лицензия Apache 2.0 уточняет, +определяет и проясняет многие аспекты. Поэтому смену лицензии я склонен +трактовать как уточнение, но как принципиальное изменение, которое +могло-бы нарушить чьи-либо права. + +2.2. С процедурной точки зрения, у меня есть право сменить лицензию на +новый, написанный мной, исходный код. При этом объективно существует как +техническая, так и юридическая проблемы отделения «нового кода» от +«заимствованного», а также выделение/классификация кода, который +является общественным достоянием и/или общеупотребительным воплощением +«математических моделей и других публичных знаний». + +Основываясь на собственной субъективной оценке кодовой базы, включая +соотношения «нового», «заимствованного» и «общеупотребительного» +исходного кода, я считаю что смена лицензии допустима. Одновременно с +этим, я понимаю и признаю, что можно найти повод, чтобы трактовать +ситуацию как «стакан наполовину полон/пуст». Поэтому декларирую +готовность принимать претензии и устранять их путем полного +переписывания оставшегося исходного кода, который попадает под критерии +«заимствованного» и кто-то из контрибьюторов которого будет против +изменения лицензии. + +2.3. Вне зависимости от истории происхождения каждой строки исходного +кода и её буквального авторства, прошу не считать производимую смену +лицензии, и связанных с этим технических действий, как попытку плагиата, +присвоения чужого труда, присвоения авторства или принижения вклада +других авторов/контрибьторов. Безусловно проект MDBX/libmdbx не появился +бы без LMDB и всех участников проекта LMDB, в особенности Говарда Чу +(Howard Chu), Холлварда Фурусет (Hallvard Furuseth) и Мартина Хеденфок +(Martin Hedenfalk). Как-бы исходный код не переписывался он всё равно +будет основываться на базовых идеях и включать основные концепции LMDB. + +3. Последствия и актуальные требования + +Всё очень просто. Потребуется обеспечить требования новой лицензии в +соответствии с 4-м пунктом лицензции Apache 2.0. + +В частности, при использовании/распространении libmdbx потребуется +обеспечить наличие файлов с текстом лицензии и файла NOTICE, а также +обеспечить пользователям возможность ознакомиться с их содержимым в +работах/продуктах использующих libmdbx. + +----------------------------------------------------------------------- + +Далее в справочных целях приведены уведомления об авторских правах из +первоначально заимствованного кода. + +--- + +Original source code was derived from LMDB in 2015, +and later evolutionarily rewritten in 2015-2024: +Copyright (c) 2011-2015 Howard Chu, Symas Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted only as authorized by the OpenLDAP @@ -11,12 +142,17 @@ A copy of this license is available in the file LICENSE in the top-level directory of the distribution or, alternatively, at . -OpenLDAP is a registered trademark of the OpenLDAP Foundation. +LMDB itself devived code from btree.c written by Martin Hedenfalk: +Copyright (c) 2009, 2010 Martin Hedenfalk -Individual files and/or contributed packages may be copyright by -other parties and/or subject to additional restrictions. +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. -This work also contains materials derived from public sources. - -Additional information about OpenLDAP can be obtained at -. +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/GNUmakefile b/GNUmakefile index 1694df6b..2b404988 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -61,7 +61,7 @@ MDBX_BUILD_CXX ?= YES CFLAGS ?= $(strip $(eval CFLAGS := -std=gnu11 -O2 -g -Wall -Werror -Wextra -Wpedantic -ffunction-sections -fPIC -fvisibility=hidden -pthread -Wno-error=attributes $$(shell for opt in -fno-semantic-interposition -Wno-unused-command-line-argument -Wno-tautological-compare; do [ -z "$$$$($(CC) '-DMDBX_BUILD_FLAGS="probe"' $$$${opt} -c $(SRC_PROBE_C) -o /dev/null >/dev/null 2>&1 || echo failed)" ] && echo "$$$${opt} "; done)$(CFLAGS_EXTRA))$(CFLAGS)) # choosing C++ standard with variable expansion trick (seems this work two times per session for GNU Make 3.81) -CXXSTD ?= $(eval CXXSTD := $$(shell for std in gnu++23 c++23 gnu++2b c++2b gnu++20 c++20 gnu++2a c++2a gnu++17 c++17 gnu++1z c++1z gnu++14 c++14 gnu++1y c++1y gnu+11 c++11 gnu++0x c++0x; do $(CXX) -std=$$$${std} -c $(SRC_PROBE_CXX) -o /dev/null 2>probe4std-$$$${std}.err >/dev/null && echo "-std=$$$${std}" && exit; done))$(CXXSTD) +CXXSTD ?= $(eval CXXSTD := $$(shell for std in gnu++23 c++23 gnu++2b c++2b gnu++20 c++20 gnu++2a c++2a gnu++17 c++17 gnu++1z c++1z gnu++14 c++14 gnu++1y c++1y gnu+11 c++11 gnu++0x c++0x; do $(CXX) -std=$$$${std} -DMDBX_BUILD_CXX=1 -c $(SRC_PROBE_CXX) -o /dev/null 2>probe4std-$$$${std}.err >/dev/null && echo "-std=$$$${std}" && exit; done))$(CXXSTD) CXXFLAGS ?= $(strip $(CXXSTD) $(filter-out -std=gnu11,$(CFLAGS))) # libraries and options for linking @@ -121,7 +121,8 @@ endef SO_SUFFIX := $(shell $(uname2sosuffix)) HEADERS := mdbx.h mdbx.h++ LIBRARIES := libmdbx.a libmdbx.$(SO_SUFFIX) -TOOLS := mdbx_stat mdbx_copy mdbx_dump mdbx_load mdbx_chk mdbx_drop +TOOLS := chk copy drop dump load stat +MDBX_TOOLS := $(addprefix mdbx_,$(TOOLS)) MANPAGES := mdbx_stat.1 mdbx_copy.1 mdbx_dump.1 mdbx_load.1 mdbx_chk.1 mdbx_drop.1 TIP := // TIP: @@ -148,7 +149,7 @@ else $(info $(TIP) Use `make V=1` for verbose.) endif -all: show-options $(LIBRARIES) $(TOOLS) +all: show-options $(LIBRARIES) $(MDBX_TOOLS) help: @echo " make all - build libraries and tools" @@ -234,26 +235,26 @@ options: ifeq ($(wildcard mdbx.c),mdbx.c) #< dist-cutoff-end @echo "## in README and source code (see mdbx.c) if you do." - @grep -h '#ifndef MDBX_' mdbx.c | grep -v BUILD | uniq | sed 's/#ifndef / /' + @grep -h '#ifndef MDBX_' mdbx.c | grep -v BUILD | sort -u | sed 's/#ifndef / /' #> dist-cutoff-begin else @echo "## in README and source code (see src/options.h) if you do." - @grep -h '#ifndef MDBX_' src/internals.h src/options.h | grep -v BUILD | uniq | sed 's/#ifndef / /' + @grep -h '#ifndef MDBX_' src/*.h | grep -v BUILD | sort -u | sed 's/#ifndef / /' endif #< dist-cutoff-end lib libs libmdbx mdbx: libmdbx.a libmdbx.$(SO_SUFFIX) -tools: $(TOOLS) -tools-static: $(addsuffix .static,$(TOOLS)) $(addsuffix .static-lto,$(TOOLS)) +tools: $(MDBX_TOOLS) +tools-static: $(addsuffix .static,$(MDBX_TOOLS)) $(addsuffix .static-lto,$(MDBX_TOOLS)) strip: all - @echo ' STRIP libmdbx.$(SO_SUFFIX) $(TOOLS)' - $(TRACE )strip libmdbx.$(SO_SUFFIX) $(TOOLS) + @echo ' STRIP libmdbx.$(SO_SUFFIX) $(MDBX_TOOLS)' + $(TRACE )strip libmdbx.$(SO_SUFFIX) $(MDBX_TOOLS) clean: @echo ' REMOVE ...' - $(QUIET)rm -rf $(TOOLS) mdbx_test @* *.[ao] *.[ls]o *.$(SO_SUFFIX) *.dSYM *~ tmp.db/* \ + $(QUIET)rm -rf $(MDBX_TOOLS) mdbx_test @* *.[ao] *.[ls]o *.$(SO_SUFFIX) *.dSYM *~ tmp.db/* \ *.gcov *.log *.err src/*.o test/*.o mdbx_example dist \ config.h src/config.h src/version.c *.tar* buildflags.tag \ mdbx_*.static mdbx_*.static-lto @@ -284,27 +285,28 @@ ifeq ($(wildcard mdbx.c),mdbx.c) # Amalgamated source code, i.e. distributed after `make dist` MAN_SRCDIR := man1/ -config.h: buildflags.tag mdbx.c $(lastword $(MAKEFILE_LIST)) +config.h: buildflags.tag mdbx.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' MAKE $@' $(QUIET)(echo '#define MDBX_BUILD_TIMESTAMP "$(MDBX_BUILD_TIMESTAMP)"' \ && echo "#define MDBX_BUILD_FLAGS \"$$(cat buildflags.tag)\"" \ && echo '#define MDBX_BUILD_COMPILER "$(shell (LC_ALL=C $(CC) --version || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_TARGET "$(shell set -o pipefail; (LC_ALL=C $(CC) -v 2>&1 | grep -i '^Target:' | cut -d ' ' -f 2- || (LC_ALL=C $(CC) --version | grep -qi e2k && echo E2K) || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ + && echo '#define MDBX_BUILD_CXX $(call select_by,MDBX_BUILD_CXX,1,0)' \ ) >$@ -mdbx-dylib.o: config.h mdbx.c mdbx.h $(lastword $(MAKEFILE_LIST)) +mdbx-dylib.o: config.h mdbx.c mdbx.h $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' CC $@' $(QUIET)$(CC) $(CFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -DLIBMDBX_EXPORTS=1 -c mdbx.c -o $@ -mdbx-static.o: config.h mdbx.c mdbx.h $(lastword $(MAKEFILE_LIST)) +mdbx-static.o: config.h mdbx.c mdbx.h $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' CC $@' $(QUIET)$(CC) $(CFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -ULIBMDBX_EXPORTS -c mdbx.c -o $@ -mdbx++-dylib.o: config.h mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE_LIST)) +mdbx++-dylib.o: config.h mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' CC $@' $(QUIET)$(CXX) $(CXXFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -DLIBMDBX_EXPORTS=1 -c mdbx.c++ -o $@ -mdbx++-static.o: config.h mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE_LIST)) +mdbx++-static.o: config.h mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' CC $@' $(QUIET)$(CXX) $(CXXFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -ULIBMDBX_EXPORTS -c mdbx.c++ -o $@ @@ -351,9 +353,9 @@ define uname2titer esac endef -DIST_EXTRA := LICENSE README.md CMakeLists.txt GNUmakefile Makefile ChangeLog.md VERSION.txt config.h.in ntdll.def \ +DIST_EXTRA := LICENSE NOTICE README.md CMakeLists.txt GNUmakefile Makefile ChangeLog.md VERSION.txt config.h.in ntdll.def \ $(addprefix man1/, $(MANPAGES)) cmake/compiler.cmake cmake/profile.cmake cmake/utils.cmake -DIST_SRC := mdbx.h mdbx.h++ mdbx.c mdbx.c++ $(addsuffix .c, $(TOOLS)) +DIST_SRC := mdbx.h mdbx.h++ mdbx.c mdbx.c++ $(addsuffix .c, $(MDBX_TOOLS)) TEST_DB ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.db TEST_LOG ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.log @@ -362,20 +364,20 @@ TEST_ITER := $(shell $(uname2titer)) TEST_SRC := test/osal-$(TEST_OSAL).c++ $(filter-out $(wildcard test/osal-*.c++),$(wildcard test/*.c++)) $(call select_by,MDBX_BUILD_CXX,,src/mdbx.c++) TEST_INC := $(wildcard test/*.h++) TEST_OBJ := $(patsubst %.c++,%.o,$(TEST_SRC)) -TAR ?= $(shell which gnu-tar || echo tar) +TAR ?= $(shell which gnu-tar 2>&- || echo tar) ZIP ?= $(shell which zip || echo "echo 'Please install zip'") -CLANG_FORMAT ?= $(shell (which clang-format-14 || which clang-format-13 || which clang-format) 2>/dev/null) +CLANG_FORMAT ?= $(shell (which clang-format-19 || which clang-format) 2>/dev/null) reformat: @echo ' RUNNING clang-format...' $(QUIET)if [ -n "$(CLANG_FORMAT)" ]; then \ git ls-files | grep -E '\.(c|c++|h|h++)(\.in)?$$' | xargs -r $(CLANG_FORMAT) -i --style=file; \ else \ - echo "clang-format version 13..14 not found for 'reformat'"; \ + echo "clang-format version 19 not found for 'reformat'"; \ fi MAN_SRCDIR := src/man1/ -ALLOY_DEPS := $(shell git ls-files src/) +ALLOY_DEPS := $(shell git ls-files src/ | grep -e /tools -e /man -v) git_DIR := $(shell if [ -d .git ]; then echo .git; elif [ -s .git -a -f .git ]; then grep '^gitdir: ' .git | cut -d ':' -f 2; else echo git_directory_is_absent; fi) MDBX_GIT_VERSION = $(shell set -o pipefail; git describe --tags '--match=v[0-9]*' 2>&- | sed -n 's|^v*\([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\)\(.*\)|\1|p' || echo 'Please fetch tags and/or use non-obsolete git version') MDBX_GIT_REVISION = $(shell set -o pipefail; git rev-list `git describe --tags --abbrev=0`..HEAD --count 2>&- || echo 'Please fetch tags and/or use non-obsolete git version') @@ -392,11 +394,11 @@ MDBX_SMOKE_EXTRA ?= check: DESTDIR = $(shell pwd)/@check-install check: test dist install -smoke-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1) +smoke-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1 -UNDEBUG -DMDBX_DEBUG=0) smoke-assertion: smoke -test-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1) +test-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1 -UNDEBUG -DMDBX_DEBUG=0) test-assertion: smoke -long-test-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1) +long-test-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1 -UNDEBUG -DMDBX_DEBUG=0) long-test-assertion: smoke smoke: build-test @@ -424,7 +426,7 @@ smoke-fault: build-test test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --extra --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) long-test: test-long test-long: build-test @@ -439,7 +441,7 @@ test-valgrind: test-memcheck test-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK test-memcheck: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' - $(QUIET)test/long_stochastic.sh --with-valgrind --extra --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) memcheck: smoke-memcheck smoke-memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt @@ -480,23 +482,27 @@ build-test: all mdbx_example mdbx_test define test-rule $(patsubst %.c++,%.o,$(1)): $(1) $(TEST_INC) $(HEADERS) $(lastword $(MAKEFILE_LIST)) @echo ' CC $$@' - $(QUIET)$$(CXX) $$(CXXFLAGS) $$(MDBX_BUILD_OPTIONS) -c $(1) -o $$@ + $(QUIET)$$(CXX) $$(CXXFLAGS) $$(MDBX_BUILD_OPTIONS) -DMDBX_BUILD_CXX=1 -DMDBX_WITHOUT_MSVC_CRT=0 -c $(1) -o $$@ endef $(foreach file,$(TEST_SRC),$(eval $(call test-rule,$(file)))) -mdbx_%: src/mdbx_%.c libmdbx.a - @echo ' CC+LD $@' - $(QUIET)$(CC) $(CFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' $^ $(EXE_LDFLAGS) $(LIBS) -o $@ +define tool-rule +mdbx_$(1): src/tools/$(1).c libmdbx.a + @echo ' CC+LD $$@' + $(QUIET)$$(CC) $$(CFLAGS) $$(MDBX_BUILD_OPTIONS) -Isrc '-DMDBX_CONFIG_H="config.h"' $$^ $$(EXE_LDFLAGS) $$(LIBS) -o $$@ -mdbx_%.static: src/mdbx_%.c mdbx-static.o - @echo ' CC+LD $@' - $(QUIET)$(CC) $(CFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' $^ $(EXE_LDFLAGS) $(LIBS) -static -Wl,--strip-all -o $@ +mdbx_$(1).static: src/tools/$(1).c mdbx-static.o + @echo ' CC+LD $$@' + $(QUIET)$$(CC) $$(CFLAGS) $$(MDBX_BUILD_OPTIONS) -Isrc '-DMDBX_CONFIG_H="config.h"' $$^ $$(EXE_LDFLAGS) $$(LIBS) -static -Wl,--strip-all -o $$@ -mdbx_%.static-lto: src/mdbx_%.c src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) - @echo ' CC+LD $@' - $(QUIET)$(CC) $(CFLAGS) -Os -flto $(MDBX_BUILD_OPTIONS) '-DLIBMDBX_API=' '-DMDBX_CONFIG_H="config.h"' \ - $< src/alloy.c $(EXE_LDFLAGS) $(LIBS) -static -Wl,--strip-all -o $@ +mdbx_$(1).static-lto: src/tools/$(1).c src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) + @echo ' CC+LD $$@' + $(QUIET)$$(CC) $$(CFLAGS) -Os -flto $$(MDBX_BUILD_OPTIONS) -Isrc '-DLIBMDBX_API=' '-DMDBX_CONFIG_H="config.h"' \ + $$< src/alloy.c $$(EXE_LDFLAGS) $$(LIBS) -static -Wl,--strip-all -o $$@ + +endef +$(foreach file,$(TOOLS),$(eval $(call tool-rule,$(file)))) mdbx_test: $(TEST_OBJ) libmdbx.$(SO_SUFFIX) @echo ' LD $@' @@ -506,16 +512,13 @@ $(git_DIR)/HEAD $(git_DIR)/index $(git_DIR)/refs/tags: @echo '*** ' >&2 @echo '*** Please don''t use tarballs nor zips which are automatically provided by Github !' >&2 @echo '*** These archives do not contain version information and thus are unfit to build libmdbx.' >&2 - @echo '*** You can vote for ability of disabling auto-creation such unsuitable archives at https://github.community/t/disable-tarball' >&2 @echo '*** ' >&2 - @echo '*** Instead of above, just clone the git repository, either download a tarball or zip with the properly amalgamated source core.' >&2 - @echo '*** For embedding libmdbx use a git-submodule or the amalgamated source code.' >&2 - @echo '*** ' >&2 - @echo '*** Please, avoid using any other techniques.' >&2 + @echo '*** Instead just follow the https://libmdbx.dqdkfa.ru/usage.html' >&2 + @echo '*** PLEASE, AVOID USING ANY OTHER TECHNIQUES.' >&2 @echo '*** ' >&2 @false -src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(git_DIR)/index $(git_DIR)/refs/tags +src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(git_DIR)/index $(git_DIR)/refs/tags LICENSE NOTICE @echo ' MAKE $@' $(QUIET)sed \ -e "s|@MDBX_GIT_TIMESTAMP@|$(MDBX_GIT_TIMESTAMP)|" \ @@ -528,20 +531,21 @@ src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(g -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_GIT_REVISION)|" \ src/version.c.in >$@ -src/config.h: buildflags.tag src/version.c $(lastword $(MAKEFILE_LIST)) +src/config.h: buildflags.tag src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' MAKE $@' $(QUIET)(echo '#define MDBX_BUILD_TIMESTAMP "$(MDBX_BUILD_TIMESTAMP)"' \ && echo "#define MDBX_BUILD_FLAGS \"$$(cat buildflags.tag)\"" \ && echo '#define MDBX_BUILD_COMPILER "$(shell (LC_ALL=C $(CC) --version || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_TARGET "$(shell set -o pipefail; (LC_ALL=C $(CC) -v 2>&1 | grep -i '^Target:' | cut -d ' ' -f 2- || (LC_ALL=C $(CC) --version | grep -qi e2k && echo E2K) || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_SOURCERY $(MDBX_BUILD_SOURCERY)' \ + && echo '#define MDBX_BUILD_CXX $(call select_by,MDBX_BUILD_CXX,1,0)' \ ) >$@ -mdbx-dylib.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) +mdbx-dylib.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' CC $@' $(QUIET)$(CC) $(CFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -DLIBMDBX_EXPORTS=1 -c src/alloy.c -o $@ -mdbx-static.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) +mdbx-static.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' CC $@' $(QUIET)$(CC) $(CFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -ULIBMDBX_EXPORTS -c src/alloy.c -o $@ @@ -570,9 +574,9 @@ docs/contrib.fame: src/version.c $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' $(QUIET)echo "" > $@ && git fame --show-email --format=md --silent-progress -w -M -C | grep '^|' >> $@ -docs/overall.md: docs/__overview.md docs/_toc.md docs/__mithril.md docs/__history.md AUTHORS docs/contrib.fame LICENSE $(lastword $(MAKEFILE_LIST)) +docs/overall.md: docs/__overview.md docs/_toc.md docs/__mithril.md docs/__history.md COPYRIGHT LICENSE NOTICE $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' - $(QUIET)echo -e "\\mainpage Overall\n\\section brief Brief" | cat - $(filter %.md, $^) >$@ && echo -e "\n\n\nLicense\n=======\n" | cat AUTHORS docs/contrib.fame - LICENSE >>$@ + $(QUIET)echo -e "\\mainpage Overall\n\\section brief Brief" | cat - $(filter %.md, $^) >$@ && echo -e "\n\n\nLicense\n=======\n" | cat - LICENSE >>$@ docs/intro.md: docs/_preface.md docs/__characteristics.md docs/__improvements.md docs/_restrictions.md docs/__performance.md @echo ' MAKE $@' @@ -582,11 +586,11 @@ docs/usage.md: docs/__usage.md docs/_starting.md docs/__bindings.md @echo ' MAKE $@' $(QUIET)echo -e "\\page usage Usage\n\\section getting Building & Embedding" | cat - $^ | sed 's/^Bindings$$/Bindings {#bindings}/' >$@ -doxygen: docs/Doxyfile docs/overall.md docs/intro.md docs/usage.md mdbx.h mdbx.h++ src/options.h ChangeLog.md AUTHORS LICENSE $(lastword $(MAKEFILE_LIST)) +doxygen: docs/Doxyfile docs/overall.md docs/intro.md docs/usage.md mdbx.h mdbx.h++ src/options.h ChangeLog.md COPYRIGHT LICENSE NOTICE $(lastword $(MAKEFILE_LIST)) @echo ' RUNNING doxygen...' $(QUIET)rm -rf docs/html && \ cat mdbx.h | tr '\n' '\r' | sed -e 's/LIBMDBX_INLINE_API\s*(\s*\([^,]\+\),\s*\([^,]\+\),\s*(\s*\([^)]\+\)\s*)\s*)\s*{/inline \1 \2(\3) {/g' | tr '\r' '\n' >docs/mdbx.h && \ - cp mdbx.h++ src/options.h ChangeLog.md docs/ && (cd docs && doxygen Doxyfile $(HUSH)) && cp AUTHORS LICENSE docs/html/ + cp mdbx.h++ src/options.h ChangeLog.md docs/ && (cd docs && doxygen Doxyfile $(HUSH)) && cp COPYRIGHT LICENSE NOTICE docs/html/ mdbx++-dylib.o: src/config.h src/mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE_LIST)) @echo ' CC $@' @@ -617,7 +621,7 @@ release-assets: libmdbx-amalgamated-$(MDBX_GIT_VERSION).zpaq \ dist-checked.tag: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) @echo -n ' VERIFY amalgamated sources...' - $(QUIET)rm -rf $@ dist/@tmp-shared_internals.inc \ + $(QUIET)rm -rf $@ dist/@tmp-essentials.inc dist/@tmp-internals.inc \ && if grep -R "define xMDBX_ALLOY" dist | grep -q MDBX_BUILD_SOURCERY; then echo "sed output is WRONG!" >&2; exit 2; fi \ && rm -rf dist-check && cp -r -p dist dist-check && ($(MAKE) IOARENA=false CXXSTD=$(CXXSTD) -C dist-check >dist-check.log 2>dist-check.err || (cat dist-check.err && exit 1)) \ && touch $@ || (echo " FAILED! See dist-check.log and dist-check.err" >&2; exit 2) && echo " Ok" @@ -634,7 +638,6 @@ dist-checked.tag: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) @echo ' CREATE $@' $(QUIET)$(TAR) -c $(shell LC_ALL=C $(TAR) --help | grep -q -- '--owner' && echo '--owner=0 --group=0') -f - -C dist $(DIST_SRC) $(DIST_EXTRA) | bzip2 -9 -z >$@ - %.zip: dist-checked.tag @echo ' CREATE $@' $(QUIET)rm -rf $@ && (cd dist && $(ZIP) -9 ../$@ $(DIST_SRC) $(DIST_EXTRA)) &>zip.log @@ -643,52 +646,81 @@ dist-checked.tag: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) @echo ' CREATE $@' $(QUIET)rm -rf $@ && (cd dist && zpaq a ../$@ $(DIST_SRC) $(DIST_EXTRA) -m59) &>zpaq.log -dist/mdbx.h: mdbx.h src/version.c $(lastword $(MAKEFILE_LIST)) - @echo ' COPY $@' - $(QUIET)mkdir -p dist && cp $< $@ - -dist/mdbx.h++: mdbx.h++ src/version.c $(lastword $(MAKEFILE_LIST)) - @echo ' COPY $@' - $(QUIET)mkdir -p dist && cp $< $@ - -dist/@tmp-shared_internals.inc: src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) +dist/@tmp-essentials.inc: src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) @echo ' ALLOYING...' $(QUIET)mkdir -p dist \ - && echo '#define xMDBX_ALLOY 1' >dist/@tmp-sed.inc && echo '#define MDBX_BUILD_SOURCERY $(MDBX_BUILD_SOURCERY)' >>dist/@tmp-sed.inc \ + && (grep -v '#include ' src/alloy.c && echo '#define MDBX_BUILD_SOURCERY $(MDBX_BUILD_SOURCERY)' \ && sed \ - -e '/#pragma once/r dist/@tmp-sed.inc' \ -e 's|#include "../mdbx.h"|@INCLUDE "mdbx.h"|' \ - -e '/#include "base.h"/r src/base.h' \ + -e '/#include "preface.h"/r src/preface.h' \ -e '/#include "osal.h"/r src/osal.h' \ -e '/#include "options.h"/r src/options.h' \ + -e '/#include "atomics-types.h"/r src/atomics-types.h' \ + -e '/#include "layout-dxb.h"/r src/layout-dxb.h' \ + -e '/#include "layout-lck.h"/r src/layout-lck.h' \ + -e '/#include "logging_and_debug.h"/r src/logging_and_debug.h' \ + -e '/#include "utils.h"/r src/utils.h' \ + -e '/#include "pnl.h"/r src/pnl.h' \ + src/essentials.h \ + | sed \ + -e '/#pragma once/d' -e '/#include "/d' \ -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ - src/internals.h >$@ \ - && rm -rf dist/@tmp-sed.inc + | grep -v '^/// ') >$@ -dist/mdbx.c: dist/@tmp-shared_internals.inc $(lastword $(MAKEFILE_LIST)) +dist/@tmp-internals.inc: dist/@tmp-essentials.inc src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) + $(QUIET)(cat dist/@tmp-essentials.inc \ + && sed \ + -e '/#include "essentials.h"/d' \ + -e '/#include "atomics-ops.h"/r src/atomics-ops.h' \ + -e '/#include "proto.h"/r src/proto.h' \ + -e '/#include "txl.h"/r src/txl.h' \ + -e '/#include "unaligned.h"/r src/unaligned.h' \ + -e '/#include "cogs.h"/r src/cogs.h' \ + -e '/#include "cursor.h"/r src/cursor.h' \ + -e '/#include "dbi.h"/r src/dbi.h' \ + -e '/#include "dpl.h"/r src/dpl.h' \ + -e '/#include "gc.h"/r src/gc.h' \ + -e '/#include "lck.h"/r src/lck.h' \ + -e '/#include "meta.h"/r src/meta.h' \ + -e '/#include "node.h"/r src/node.h' \ + -e '/#include "page-iov.h"/r src/page-iov.h' \ + -e '/#include "page-ops.h"/r src/page-ops.h' \ + -e '/#include "spill.h"/r src/spill.h' \ + -e '/#include "sort.h"/r src/sort.h' \ + -e '/#include "tls.h"/r src/tls.h' \ + -e '/#include "walk.h"/r src/walk.h' \ + -e '/#include "windows-import.h"/r src/windows-import.h' \ + src/internals.h \ + | sed \ + -e '/#pragma once/d' -e '/#include "/d' \ + -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ + | grep -v '^/// ') >$@ + +dist/mdbx.c: dist/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' - $(QUIET)mkdir -p dist && (cat dist/@tmp-shared_internals.inc \ - && cat src/core.c src/osal.c src/version.c src/lck-windows.c src/lck-posix.c | sed \ + $(QUIET)(cat dist/@tmp-internals.inc $(shell git ls-files src/*.c | grep -v alloy) src/version.c | sed \ -e '/#include "debug_begin.h"/r src/debug_begin.h' \ -e '/#include "debug_end.h"/r src/debug_end.h' \ ) | sed -e '/#include "/d;/#pragma once/d' -e 's|@INCLUDE|#include|' \ -e '/ clang-format o/d;/ \*INDENT-O/d' >$@ -dist/mdbx.c++: dist/@tmp-shared_internals.inc src/mdbx.c++ $(lastword $(MAKEFILE_LIST)) +dist/mdbx.c++: dist/@tmp-essentials.inc src/mdbx.c++ $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' - $(QUIET)mkdir -p dist && (cat dist/@tmp-shared_internals.inc && cat src/mdbx.c++) \ - | sed -e '/#include "/d;/#pragma once/d' -e 's|@INCLUDE|#include|;s|"mdbx.h"|"mdbx.h++"|' \ + $(QUIET)cat dist/@tmp-essentials.inc src/mdbx.c++ | sed \ + -e '/#define xMDBX_ALLOY/d' \ + -e '/#include "/d;/#pragma once/d' \ + -e 's|@INCLUDE|#include|;s|"mdbx.h"|"mdbx.h++"|' \ -e '/ clang-format o/d;/ \*INDENT-O/d' >$@ define dist-tool-rule -dist/$(1).c: src/$(1).c src/wingetopt.h src/wingetopt.c \ - dist/@tmp-shared_internals.inc $(lastword $(MAKEFILE_LIST)) +dist/mdbx_$(1).c: src/tools/$(1).c src/tools/wingetopt.h src/tools/wingetopt.c \ + dist/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $$@' $(QUIET)mkdir -p dist && sed \ - -e '/#include "internals.h"/r dist/@tmp-shared_internals.inc' \ - -e '/#include "wingetopt.h"/r src/wingetopt.c' \ + -e '/#include "essentials.h"/r dist/@tmp-essentials.inc' \ + -e '/#include "wingetopt.h"/r src/tools/wingetopt.c' \ -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ - src/$(1).c \ + src/tools/$(1).c \ | sed -e '/#include "/d;/#pragma once/d;/#define xMDBX_ALLOY/d' -e 's|@INCLUDE|#include|' \ -e '/ clang-format o/d;/ \*INDENT-O/d' >$$@ @@ -696,12 +728,12 @@ endef $(foreach file,$(TOOLS),$(eval $(call dist-tool-rule,$(file)))) define dist-extra-rule -dist/$(1): $(1) +dist/$(1): $(1) src/version.c $(lastword $(MAKEFILE_LIST)) @echo ' REFINE $$@' $(QUIET)mkdir -p $$(dir $$@) && sed -e '/^#> dist-cutoff-begin/,/^#< dist-cutoff-end/d' $$< >$$@ endef -$(foreach file,$(filter-out man1/% VERSION.txt %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) +$(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.txt %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) dist/VERSION.txt: src/version.c @echo ' MAKE $@' @@ -763,10 +795,10 @@ cross-qemu: #< dist-cutoff-end -install: $(LIBRARIES) $(TOOLS) $(HEADERS) +install: $(LIBRARIES) $(MDBX_TOOLS) $(HEADERS) @echo ' INSTALLING...' $(QUIET)mkdir -p $(DESTDIR)$(prefix)/bin$(suffix) && \ - $(INSTALL) -p $(EXE_INSTALL_FLAGS) $(TOOLS) $(DESTDIR)$(prefix)/bin$(suffix)/ && \ + $(INSTALL) -p $(EXE_INSTALL_FLAGS) $(MDBX_TOOLS) $(DESTDIR)$(prefix)/bin$(suffix)/ && \ mkdir -p $(DESTDIR)$(prefix)/lib$(suffix)/ && \ $(INSTALL) -p $(EXE_INSTALL_FLAGS) $(filter-out libmdbx.a,$(LIBRARIES)) $(DESTDIR)$(prefix)/lib$(suffix)/ && \ mkdir -p $(DESTDIR)$(prefix)/lib$(suffix)/ && \ @@ -784,7 +816,7 @@ install-no-strip: install uninstall: @echo ' UNINSTALLING/REMOVE...' - $(QUIET)rm -f $(addprefix $(DESTDIR)$(prefix)/bin$(suffix)/,$(TOOLS)) \ + $(QUIET)rm -f $(addprefix $(DESTDIR)$(prefix)/bin$(suffix)/,$(MDBX_TOOLS)) \ $(addprefix $(DESTDIR)$(prefix)/lib$(suffix)/,$(LIBRARIES)) \ $(addprefix $(DESTDIR)$(prefix)/include/,$(HEADERS)) \ $(addprefix $(DESTDIR)$(mandir)/man1/,$(MANPAGES)) diff --git a/LICENSE b/LICENSE index 05ad7571..f433b1a5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,47 +1,177 @@ -The OpenLDAP Public License - Version 2.8, 17 August 2003 -Redistribution and use of this software and associated documentation -("Software"), with or without modification, are permitted provided -that the following conditions are met: + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -1. Redistributions in source form must retain copyright statements - and notices, + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -2. Redistributions in binary form must reproduce applicable copyright - statements and notices, this list of conditions, and the following - disclaimer in the documentation and/or other materials provided - with the distribution, and + 1. Definitions. -3. Redistributions must contain a verbatim copy of this document. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -The OpenLDAP Foundation may revise this license from time to time. -Each revision is distinguished by a version number. You may use -this Software under terms of this license revision or under the -terms of any subsequent revision of the license. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. -THIS SOFTWARE IS PROVIDED BY THE OPENLDAP FOUNDATION AND ITS -CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -SHALL THE OPENLDAP FOUNDATION, ITS CONTRIBUTORS, OR THE AUTHOR(S) -OR OWNER(S) OF THE SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. -The names of the authors and copyright holders must not be used in -advertising or otherwise to promote the sale, use or other dealing -in this Software without specific, written prior permission. Title -to copyright in this Software shall at all times remain with copyright -holders. + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. -OpenLDAP is a registered trademark of the OpenLDAP Foundation. + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. -Copyright 1999-2003 The OpenLDAP Foundation, Redwood City, -California, USA. All Rights Reserved. Permission to copy and -distribute verbatim copies of this document is granted. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..cfc8dd45 --- /dev/null +++ b/NOTICE @@ -0,0 +1,23 @@ +libmdbx (aka MDBX) is an extremely fast, compact, powerful, embeddedable, +transactional key-value storage engine with open-source code. MDBX has a +specific set of properties and capabilities, focused on creating unique +lightweight solutions. + +Please visit https://libmdbx.dqdkfa.ru for more information, changelog, +documentation, C++ API description and links to the original git repo +with the source code. Questions, feedback and suggestions are welcome +to the Telegram' group https://t.me/libmdbx. + +Since 2017 _libmdbx_ development is funded by [Positive Technologies](https://www.ptsecurity.com) +and used inside company products. Всё будет хорошо! + +Copyright 2015-2024 Леонид Юрьев aka Leonid Yuriev +SPDX-License-Identifier: Apache-2.0 +For notes about the license change, credits and acknowledgments, +please refer to the COPYRIGHT file within original libmdbx source code +repository https://gitflic.ru/project/erthink/libmdbx + +On 2022-04-15 the Github administration, without any warning nor +explanation, deleted _libmdbx_ along with a lot of other projects, +simultaneously blocking access for many developers. +For the same reason ~~Github~~ is blacklisted forever. diff --git a/README.md b/README.md index c0c21c13..2fc4f514 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,5 @@ -### Время учить Русский - -Начиная с 2021 года наблюдается устойчивые тенденции к распространению -недостоверной информации о _libmdbx_ в странах ~~НАТО~~, -политизированной критика, а также отказу от использования библиотеки в -пользу LMDB, несмотря на явные проблемы с одной стороны и преимущества с -другой. Поэтому начиная с 17 марта 2024 года прекращается -документирование и сопровождение проекта на английском языке. Новый -функционал будет документироваться только на русском языке, однако, -целенаправленного переписывания/перевода документации пока не -планируется. - -### The origin has been migrated to [GitFlic](https://gitflic.ru/project/erthink/libmdbx) -Since on 2022-04-15 the Github administration, without any warning -nor explanation, deleted _libmdbx_ along with a lot of other projects, -simultaneously blocking access for many developers. -For the same reason ~~Github~~ is blacklisted forever. - -GitFlic's developers plan to support other languages, -including English 和 中文, in the near future. - -### Основной репозиторий перемещен на [GitFlic](https://gitflic.ru/project/erthink/libmdbx) -Так как 15 апреля 2022 администрация Github без предупреждения и -объяснения причин удалила _libmdbx_ вместе с массой других проектов, -одновременно заблокировав доступ многим разработчикам. -По этой же причине ~~Github~~ навсегда занесен в черный список. - --------------------------------------------------------------------------------- - -*The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.* - > Please refer to the online [documentation](https://libmdbx.dqdkfa.ru) > with [`C` API description](https://libmdbx.dqdkfa.ru/group__c__api.html) > and pay attention to the [`C++` API](https://gitflic.ru/project/erthink/libmdbx/blob?file=mdbx.h%2B%2B#line-num-1). @@ -40,6 +9,8 @@ including English 和 中文, in the near future. > For NEWS take a look to the [ChangeLog](https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md) > or the [TODO](https://gitflic.ru/project/erthink/libmdbx/blob?file=TODO.md). +*The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.* + libmdbx ======== @@ -48,7 +19,7 @@ libmdbx _libmdbx_ is an extremely fast, compact, powerful, embedded, transactional [key-value database](https://en.wikipedia.org/wiki/Key-value_database), -with [permissive license](https://gitflic.ru/project/erthink/libmdbx/blob?file=LICENSE). +with [Apache 2.0 license](https://gitflic.ru/project/erthink/libmdbx/blob?file=LICENSE). _libmdbx_ has a specific set of properties and capabilities, focused on creating unique lightweight solutions. @@ -144,15 +115,14 @@ $ objdump -f -h -j .text libmdbx.so libmdbx.so: формат файла elf64-e2k архитектура: elbrus-v6:64, флаги 0x00000150: HAS_SYMS, DYNAMIC, D_PAGED - начальный адрес 0x0000000000021680 + начальный адрес 0x00000000??????00 Разделы: - Idx Name Разм VMA LMA Фа смещ. Выр. - 10 .text 000ddd28 0000000000021680 0000000000021680 00021680 2**3 - CONTENTS, ALLOC, LOAD, READONLY, CODE + Idx Name Разм VMA LMA Фа смещ. Выр. Флаги + 10 .text 000e7460 0000000000025c00 0000000000025c00 00025c00 2**10 CONTENTS, ALLOC, LOAD, READONLY, CODE $ cc --version - lcc:1.26.12:Jun-05-2022:e2k-v6-linux + lcc:1.27.14:Jan-31-2024:e2k-v6-linux gcc (GCC) 9.3.0 compatible ``` @@ -276,7 +246,7 @@ out-of-the-box, not silently and catastrophically break down. The list below is pruned down to the improvements most notable and obvious from the user's point of view. -## Added Features +## Some Added Features 1. Keys could be more than 2 times longer than _LMDB_. > For DB with default page size _libmdbx_ support keys up to 2022 bytes @@ -319,8 +289,7 @@ be found between a `KEY1` and a `KEY2`. This is a prerequisite for build and/or optimize query execution plans. > _libmdbx_ performs a rough estimate based on common B-tree pages of the paths from root to corresponding keys. -8. `mdbx_chk` utility for database integrity check. -Since version 0.9.1, the utility supports checking the database using any of the three meta pages and the ability to switch to it. +8. Database integrity check API both with standalone `mdbx_chk` utility. 9. Support for opening databases in the exclusive mode, including on a network share. @@ -410,12 +379,26 @@ The origin for now is at [GitFlic](https://gitflic.ru/project/erthink/libmdbx) with backup at [ABF by ROSA Лаб](https://abf.rosalinux.ru/erthink/libmdbx). For the same reason ~~Github~~ is blacklisted forever. +Начиная с 2021 года наблюдаются устойчивые тенденции к распространению +недостоверной информации о libmdbx в странах НАТО, политизированной +критики, а также отказу от использования библиотеки в пользу LMDB, +несмотря на явные проблемы с одной стороны и преимущества с другой. +Поэтому, начиная с 17 марта 2024 года, прекращается документирование и +сопровождение проекта на английском языке. Новая функциональность будет +документироваться только на русском языке, однако, целенаправленного +переписывания/перевода документации пока не планируется. + +Since May 2024 and version v0.13 _libmdbx_ was re-licensed under Apache-2.0 license. +Please refer to the `COPYRIGHT` file for license change explanations. + + ## Acknowledgments -Howard Chu is the author of LMDB, from which -originated the _libmdbx_ in 2015. +Howard Chu and Hallvard Furuseth + are the authors of _LMDB_, from which _libmdbx_ +was forked in 2015. Martin Hedenfalk is the author of `btree.c` code, which -was used to begin development of LMDB. +was used to begin development of _LMDB_. @@ -523,8 +506,10 @@ There are no special traits nor quirks if you use libmdbx ONLY inside the single But in a cross-container cases or with a host-container(s) mix the two major things MUST be guaranteed: -1. Coherence of memory mapping content and unified page cache inside OS kernel for host and all container(s) operated with a DB. -Basically this means must be only a single physical copy of each memory mapped DB' page in the system memory. +1. Coherence of memory mapping content and unified page cache inside OS +kernel for host and all container(s) operated with a DB. Basically this +means must be only a single physical copy of each memory mapped DB' page +in the system memory. 2. Uniqueness of [PID](https://en.wikipedia.org/wiki/Process_identifier) values and/or a common space for ones: - for POSIX systems: PID uniqueness for all processes operated with a DB. diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index 73cd3502..bd50b9d7 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -1,17 +1,5 @@ -## Copyright (c) 2012-2024 Leonid Yuriev . -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## +## Copyright (c) 2010-2024 Леонид Юрьев aka Leonid Yuriev +## SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) diff --git a/cmake/profile.cmake b/cmake/profile.cmake index a77b7dcb..9331a0bb 100644 --- a/cmake/profile.cmake +++ b/cmake/profile.cmake @@ -1,17 +1,5 @@ -## Copyright (c) 2012-2024 Leonid Yuriev . -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## +## Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev +## SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 0fa57845..164ce8eb 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -1,17 +1,5 @@ -## Copyright (c) 2012-2024 Leonid Yuriev . -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## +## Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev +## SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) diff --git a/mdbx.h b/mdbx.h index e19e47c8..37cfd10f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1,11 +1,10 @@ /** -_libmdbx_ is an extremely fast, compact, powerful, embedded, +_libmdbx_ (aka MDBX) is an extremely fast, compact, powerful, embeddable, transactional [key-value -store](https://en.wikipedia.org/wiki/Key-value_database) database, with -[permissive license](./LICENSE). _MDBX_ has a specific set of properties and -capabilities, focused on creating unique lightweight solutions with -extraordinary performance. +store](https://en.wikipedia.org/wiki/Key-value_database), with [Apache 2.0 +license](./LICENSE). _MDBX_ has a specific set of properties and capabilities, +focused on creating unique lightweight solutions with extraordinary performance. _libmdbx_ is superior to [LMDB](https://bit.ly/26ts7tL) in terms of features and reliability, not inferior in performance. In comparison to LMDB, _libmdbx_ @@ -14,60 +13,24 @@ break down. _libmdbx_ supports Linux, Windows, MacOS, OSX, iOS, Android, FreeBSD, DragonFly, Solaris, OpenSolaris, OpenIndiana, NetBSD, OpenBSD and other systems compliant with POSIX.1-2008. -The origin has been migrated to -[GitFlic](https://gitflic.ru/project/erthink/libmdbx) since on 2022-04-15 -the Github administration, without any warning nor explanation, deleted libmdbx -along with a lot of other projects, simultaneously blocking access for many -developers. For the same reason ~~Github~~ is blacklisted forever. +Please visit https://libmdbx.dqdkfa.ru for more information, documentation, +C++ API description and links to the origin git repo with the source code. +Questions, feedback and suggestions are welcome to the Telegram' group +https://t.me/libmdbx. _The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо._ +\note The origin has been migrated to +[GitFlic](https://gitflic.ru/project/erthink/libmdbx) since on 2022-04-15 the +Github administration, without any warning nor explanation, deleted libmdbx +along with a lot of other projects, simultaneously blocking access for many +developers. For the same reason ~~Github~~ is blacklisted forever. \section copyright LICENSE & COPYRIGHT - -\authors Copyright (c) 2015-2024, Leonid Yuriev -and other _libmdbx_ authors: please see [AUTHORS](./AUTHORS) file. - -\copyright Redistribution and use in source and binary forms, with or without -modification, are permitted only as authorized by the OpenLDAP Public License. - -A copy of this license is available in the file LICENSE in the -top-level directory of the distribution or, alternatively, at -. - - --- - -This code is derived from "LMDB engine" written by -Howard Chu (Symas Corporation), which itself derived from btree.c -written by Martin Hedenfalk. - - --- - -Portions Copyright 2011-2015 Howard Chu, Symas Corp. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted only as authorized by the OpenLDAP -Public License. - -A copy of this license is available in the file LICENSE in the -top-level directory of the distribution or, alternatively, at -. - - --- - -Portions Copyright (c) 2009, 2010 Martin Hedenfalk - -Permission to use, copy, modify, and distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +\copyright SPDX-License-Identifier: Apache-2.0 +\note Please refer to the COPYRIGHT file for explanations license change, +credits and acknowledgments. +\author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 *******************************************************************************/ @@ -98,7 +61,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. /* clang-format off */ /** \file mdbx.h - \brief The libmdbx C API header file + \brief The libmdbx C API header file. \defgroup c_api C API @{ @@ -359,6 +322,14 @@ typedef mode_t mdbx_mode_t; #endif #endif /* MDBX_DEPRECATED */ +#ifndef MDBX_DEPRECATED_ENUM +#if !defined(DOXYGEN) && (!defined(_MSC_VER) || _MSC_VER >= 1930) +#define MDBX_DEPRECATED_ENUM MDBX_DEPRECATED +#else +#define MDBX_DEPRECATED_ENUM /* avoid madness MSVC */ +#endif +#endif /* MDBX_DEPRECATED_ENUM */ + #ifndef __dll_export #if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || \ defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__) @@ -393,7 +364,8 @@ typedef mode_t mdbx_mode_t; /** \brief Auxiliary macro for robustly define the both inline version of API * function and non-inline fallback dll-exported version for applications linked - * with old version of libmdbx, with a strictly ODR-common implementation. */ + * with old version of libmdbx, with a strictly ODR-common implementation. Thus, + * we emulate __extern_inline for all compilers, including non-GNU ones. */ #if defined(LIBMDBX_INTERNALS) && !defined(LIBMDBX_NO_EXPORTS_LEGACY_API) #define LIBMDBX_INLINE_API(TYPE, NAME, ARGS) \ /* proto of exported which uses common impl */ LIBMDBX_API TYPE NAME ARGS; \ @@ -888,7 +860,7 @@ enum MDBX_constants { /** Log level * \note Levels detailed than (great than) \ref MDBX_LOG_NOTICE * requires build libmdbx with \ref MDBX_DEBUG option. */ -enum MDBX_log_level_t { +typedef enum MDBX_log_level { /** Critical conditions, i.e. assertion failures. * \note libmdbx always produces such messages regardless * of \ref MDBX_DEBUG build option. */ @@ -938,17 +910,14 @@ enum MDBX_log_level_t { /** for \ref mdbx_setup_debug() only: Don't change current settings */ MDBX_LOG_DONTCHANGE = -1 -}; -#ifndef __cplusplus -typedef enum MDBX_log_level_t MDBX_log_level_t; -#endif +} MDBX_log_level_t; /** \brief Runtime debug flags * * \details `MDBX_DBG_DUMP` and `MDBX_DBG_LEGACY_MULTIOPEN` always have an * effect, but `MDBX_DBG_ASSERT`, `MDBX_DBG_AUDIT` and `MDBX_DBG_JITTER` only if * libmdbx built with \ref MDBX_DEBUG. */ -enum MDBX_debug_flags_t { +typedef enum MDBX_debug_flags { MDBX_DBG_NONE = 0, /** Enable assertion checks. @@ -986,12 +955,8 @@ enum MDBX_debug_flags_t { /** for mdbx_setup_debug() only: Don't change current settings */ MDBX_DBG_DONTCHANGE = -1 -}; -#ifndef __cplusplus -typedef enum MDBX_debug_flags_t MDBX_debug_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_debug_flags_t) -#endif +} MDBX_debug_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_debug_flags) /** \brief A debug-logger callback function, * called before printing the message and aborting. @@ -1086,7 +1051,7 @@ MDBX_NORETURN LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, * \ingroup c_opening * \anchor env_flags * \see mdbx_env_open() \see mdbx_env_set_flags() */ -enum MDBX_env_flags_t { +typedef enum MDBX_env_flags { MDBX_ENV_DEFAULTS = 0, /** Extra validation of DB structure and pages content. @@ -1210,7 +1175,7 @@ enum MDBX_env_flags_t { /** Отвязывает транзакции от потоков/threads насколько это возможно. * - * Эта опция предназначена для приложений, которые мультиплексируют множество + * Опция предназначена для приложений, которые мультиплексируют множество * пользовательских легковесных потоков выполнения по отдельным потокам * операционной системы, например как это происходит в средах выполнения * GoLang и Rust. Таким приложениям также рекомендуется сериализовать @@ -1278,10 +1243,9 @@ enum MDBX_env_flags_t { * Этот флаг вступает в силу при открытии среды и не может быть изменен после. */ MDBX_NOSTICKYTHREADS = UINT32_C(0x200000), -#ifndef _MSC_VER /* avoid madness MSVC */ + /** \deprecated Please use \ref MDBX_NOSTICKYTHREADS instead. */ - MDBX_NOTLS MDBX_DEPRECATED = MDBX_NOSTICKYTHREADS, -#endif /* avoid madness MSVC */ + MDBX_NOTLS MDBX_DEPRECATED_ENUM = MDBX_NOSTICKYTHREADS, /** Don't do readahead. * @@ -1327,7 +1291,6 @@ enum MDBX_env_flags_t { * This flag may be changed at any time using `mdbx_env_set_flags()`. */ MDBX_NOMEMINIT = UINT32_C(0x1000000), -#ifndef _MSC_VER /* avoid madness MSVC */ /** Aims to coalesce a Garbage Collection items. * \deprecated Always enabled since v0.12 and deprecated since v0.13. * @@ -1339,8 +1302,7 @@ enum MDBX_env_flags_t { * Unallocated space and reducing the database file. * * This flag may be changed at any time using mdbx_env_set_flags(). */ - MDBX_COALESCE MDBX_DEPRECATED = UINT32_C(0x2000000), -#endif /* avoid madness MSVC */ + MDBX_COALESCE MDBX_DEPRECATED_ENUM = UINT32_C(0x2000000), /** LIFO policy for recycling a Garbage Collection items. * @@ -1543,19 +1505,14 @@ enum MDBX_env_flags_t { MDBX_UTTERLY_NOSYNC = MDBX_SAFE_NOSYNC | UINT32_C(0x100000), /** end of sync_modes @} */ -}; -#ifndef __cplusplus -/** \ingroup c_opening */ -typedef enum MDBX_env_flags_t MDBX_env_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_env_flags_t) -#endif +} MDBX_env_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_env_flags) /** Transaction flags * \ingroup c_transactions * \anchor txn_flags * \see mdbx_txn_begin() \see mdbx_txn_flags() */ -enum MDBX_txn_flags_t { +typedef enum MDBX_txn_flags { /** Start read-write transaction. * * Only one write transaction may be active at a time. Writes are fully @@ -1627,18 +1584,14 @@ enum MDBX_txn_flags_t { * \note Transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ MDBX_TXN_BLOCKED = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD -}; -#ifndef __cplusplus -typedef enum MDBX_txn_flags_t MDBX_txn_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_txn_flags_t) -#endif +} MDBX_txn_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_txn_flags) /** \brief Database flags * \ingroup c_dbi * \anchor db_flags * \see mdbx_dbi_open() */ -enum MDBX_db_flags_t { +typedef enum MDBX_db_flags { /** Variable length unique keys with usual byte-by-byte string comparison. */ MDBX_DB_DEFAULTS = 0, @@ -1681,19 +1634,14 @@ enum MDBX_db_flags_t { * sub-database will be opened with flags which it was created, and then an * application could determine the actual flags by \ref mdbx_dbi_flags(). */ MDBX_DB_ACCEDE = MDBX_ACCEDE -}; -#ifndef __cplusplus -/** \ingroup c_dbi */ -typedef enum MDBX_db_flags_t MDBX_db_flags_t; -#else +} MDBX_db_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_db_flags_t) -#endif /** \brief Data changing flags * \ingroup c_crud * \see \ref c_crud_hints "Quick reference for Insert/Update/Delete operations" * \see mdbx_put() \see mdbx_cursor_put() \see mdbx_replace() */ -enum MDBX_put_flags_t { +typedef enum MDBX_put_flags { /** Upsertion by default (without any other flags) */ MDBX_UPSERT = 0, @@ -1731,18 +1679,13 @@ enum MDBX_put_flags_t { /** Only for \ref MDBX_DUPFIXED. * Store multiple data items in one call. */ MDBX_MULTIPLE = UINT32_C(0x80000) -}; -#ifndef __cplusplus -/** \ingroup c_crud */ -typedef enum MDBX_put_flags_t MDBX_put_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_put_flags_t) -#endif +} MDBX_put_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_put_flags) /** \brief Environment copy flags * \ingroup c_extra * \see mdbx_env_copy() \see mdbx_env_copy2fd() */ -enum MDBX_copy_flags_t { +typedef enum MDBX_copy_flags { MDBX_CP_DEFAULTS = 0, /** Copy with compactification: Omit free space from copy and renumber all @@ -1751,19 +1694,14 @@ enum MDBX_copy_flags_t { /** Force to make resizable copy, i.e. dynamic size instead of fixed */ MDBX_CP_FORCE_DYNAMIC_SIZE = 2u -}; -#ifndef __cplusplus -/** \ingroup c_extra */ -typedef enum MDBX_copy_flags_t MDBX_copy_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_copy_flags_t) -#endif +} MDBX_copy_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_copy_flags) /** \brief Cursor operations * \ingroup c_cursors * This is the set of all operations for retrieving data using a cursor. * \see mdbx_cursor_get() */ -enum MDBX_cursor_op { +typedef enum MDBX_cursor_op { /** Position at first key/data item */ MDBX_FIRST, @@ -1875,18 +1813,14 @@ enum MDBX_cursor_op { MDBX_TO_PAIR_EQUAL, MDBX_TO_PAIR_GREATER_OR_EQUAL, MDBX_TO_PAIR_GREATER_THAN -}; -#ifndef __cplusplus -/** \ingroup c_cursors */ -typedef enum MDBX_cursor_op MDBX_cursor_op; -#endif +} MDBX_cursor_op; /** \brief Errors and return codes * \ingroup c_err * * BerkeleyDB uses -30800 to -30999, we'll go under them * \see mdbx_strerror() \see mdbx_strerror_r() \see mdbx_liberr2str() */ -enum MDBX_error_t { +typedef enum MDBX_error { /** Successful result */ MDBX_SUCCESS = 0, @@ -2062,11 +1996,7 @@ enum MDBX_error_t { MDBX_EREMOTE = ENOTBLK, MDBX_EDEADLK = EDEADLK #endif /* !Windows */ -}; -#ifndef __cplusplus -/** \ingroup c_err */ -typedef enum MDBX_error_t MDBX_error_t; -#endif +} MDBX_error_t; /** MDBX_MAP_RESIZED * \ingroup c_err @@ -2158,7 +2088,7 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv); /** \brief MDBX environment extra runtime options. * \ingroup c_settings * \see mdbx_env_set_option() \see mdbx_env_get_option() */ -enum MDBX_option_t { +typedef enum MDBX_option { /** \brief Controls the maximum number of named databases for the environment. * * \details By default only unnamed key-value database could used and @@ -2323,10 +2253,11 @@ enum MDBX_option_t { * \details This option controls the in-process threshold of minimum page * fill, as used space of percentage of a page. Neighbour pages emptier than * this value are candidates for merging. The threshold value is specified - * in 1/65536 of percent, which is equivalent to the 16-dot-16 fixed point - * format. The specified value must be in the range from 12.5% (almost empty) - * to 50% (half empty) which corresponds to the range from 8192 and to 32768 - * in units respectively. + * in 1/65536 points of a whole page, which is equivalent to the 16-dot-16 + * fixed point format. + * The specified value must be in the range from 12.5% (almost empty page) + * to 50% (half empty page) which corresponds to the range from 8192 and + * to 32768 in units respectively. * \see MDBX_opt_prefer_waf_insteadof_balance */ MDBX_opt_merge_threshold_16dot16_percent, @@ -2414,11 +2345,7 @@ enum MDBX_option_t { * * \see MDBX_opt_merge_threshold_16dot16_percent */ MDBX_opt_prefer_waf_insteadof_balance -}; -#ifndef __cplusplus -/** \ingroup c_settings */ -typedef enum MDBX_option_t MDBX_option_t; -#endif +} MDBX_option_t; /** \brief Sets the value of a extra runtime options for an environment. * \ingroup c_settings @@ -2533,7 +2460,7 @@ LIBMDBX_API int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, /** \brief Deletion modes for \ref mdbx_env_delete(). * \ingroup c_extra * \see mdbx_env_delete() */ -enum MDBX_env_delete_mode_t { +typedef enum MDBX_env_delete_mode { /** \brief Just delete the environment's files and directory if any. * \note On POSIX systems, processes already working with the database will * continue to work without interference until it close the environment. @@ -2547,11 +2474,7 @@ enum MDBX_env_delete_mode_t { /** \brief Wait until other processes closes the environment before deletion. */ MDBX_ENV_WAIT_FOR_UNUSED = 2, -}; -#ifndef __cplusplus -/** \ingroup c_extra */ -typedef enum MDBX_env_delete_mode_t MDBX_env_delete_mode_t; -#endif +} MDBX_env_delete_mode_t; /** \brief Delete the environment's files in a proper and multiprocess-safe way. * \ingroup c_extra @@ -2662,7 +2585,7 @@ struct MDBX_stat { uint32_t ms_depth; /**< Depth (height) of the B-tree */ uint64_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ uint64_t ms_leaf_pages; /**< Number of leaf pages */ - uint64_t ms_overflow_pages; /**< Number of overflow pages */ + uint64_t ms_overflow_pages; /**< Number of large/overflow pages */ uint64_t ms_entries; /**< Number of data items */ uint64_t ms_mod_txnid; /**< Transaction ID of committed last modification */ }; @@ -3122,7 +3045,7 @@ LIBMDBX_API int mdbx_env_resurrect_after_fork(MDBX_env *env); * \ingroup c_settings * \anchor warmup_flags * \see mdbx_env_warmup() */ -enum MDBX_warmup_flags_t { +typedef enum MDBX_warmup_flags { /** By default \ref mdbx_env_warmup() just ask OS kernel to asynchronously * prefetch database pages. */ MDBX_warmup_default = 0, @@ -3165,12 +3088,8 @@ enum MDBX_warmup_flags_t { /** Release the lock that was performed before by \ref MDBX_warmup_lock. */ MDBX_warmup_release = 16, -}; -#ifndef __cplusplus -typedef enum MDBX_warmup_flags_t MDBX_warmup_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_warmup_flags_t) -#endif +} MDBX_warmup_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_warmup_flags) /** \brief Warms up the database by loading pages into memory, optionally lock * ones. \ingroup c_settings @@ -3564,7 +3483,7 @@ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes to fit in a leaf-page or - * single overflow/large-page with the given page size and database flags, + * single large/overflow-page with the given page size and database flags, * or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ @@ -3740,7 +3659,7 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes to fit in a leaf-page or - * single overflow/large-page for specified database flags. + * single large/overflow-page for specified database flags. * \ingroup c_statinfo * * \param [in] env An environment handle returned by \ref mdbx_env_create(). @@ -4578,7 +4497,7 @@ LIBMDBX_API int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, /** \brief DBI state bits returted by \ref mdbx_dbi_flags_ex() * \ingroup c_statinfo * \see mdbx_dbi_flags_ex() */ -enum MDBX_dbi_state_t { +typedef enum MDBX_dbi_state { /** DB was written in this txn */ MDBX_DBI_DIRTY = 0x01, /** Cached Named-DB record is older than txnID */ @@ -4587,13 +4506,8 @@ enum MDBX_dbi_state_t { MDBX_DBI_FRESH = 0x04, /** Named-DB handle created in this txn */ MDBX_DBI_CREAT = 0x08, -}; -#ifndef __cplusplus -/** \ingroup c_statinfo */ -typedef enum MDBX_dbi_state_t MDBX_dbi_state_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state_t) -#endif +} MDBX_dbi_state_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state) /** \brief Retrieve the DB flags and status for a database handle. * \ingroup c_statinfo @@ -5005,6 +4919,7 @@ LIBMDBX_API int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *cursor, * \see mdbx_cursor_renew() * \see mdbx_cursor_bind() * \see mdbx_cursor_close() + * \see mdbx_cursor_reset() * * \note In contrast to LMDB, the MDBX required that any opened cursors can be * reused and must be freed explicitly, regardless ones was opened in a @@ -5017,6 +4932,20 @@ LIBMDBX_API int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *cursor, * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_cursor_unbind(MDBX_cursor *cursor); +/** \brief Сбрасывает состояние курсора. + * \ingroup c_cursors + * + * В результате сброса курсор становится неустановленным и не позволяет + * выполнять операции относительного позиционирования, получения или изменения + * данных, до установки на позицию не зависящую от текущей. Что позволяет + * приложению пресекать дальнейшие операции без предварительного + * позиционирования курсора. + * + * \param [in] cursor Указатель на курсор. + * + * \returns Результат операции сканирования, либо код ошибки. */ +LIBMDBX_API int mdbx_cursor_reset(MDBX_cursor *cursor); + /** \brief Create a cursor handle for the specified transaction and DBI handle. * \ingroup c_cursors * @@ -5197,6 +5126,21 @@ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); +/** \brief Служебная функция для использования в утилитах. + * \ingroup c_extra + * + * При использовании определяемых пользователем функций сравнения (aka custom + * comparison functions) проверка порядка ключей может приводить к неверным + * результатам и возврате ошибки \ref MDBX_CORRUPTED. + * + * Эта функция отключает контроль порядка следования ключей на страницах при + * чтении страниц БД для этого курсора, и таким образом, позволяет прочитать + * данные при отсутствии/недоступности использованных функций сравнения. + * \see avoid_custom_comparators + * + * \returns Результат операции сканирования, либо код ошибки. */ +LIBMDBX_API int mdbx_cursor_ignord(MDBX_cursor *cursor); + /** \brief Тип предикативных функций обратного вызова используемых * \ref mdbx_cursor_scan() и \ref mdbx_cursor_scan_from() для пробирования * пар ключ-значения. @@ -5424,18 +5368,16 @@ LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, * \param [in] limit The size of pairs buffer as the number of items, * but not a pairs. * \param [in] op A cursor operation \ref MDBX_cursor_op (only - * \ref MDBX_FIRST, \ref MDBX_NEXT, \ref MDBX_GET_CURRENT - * are supported). + * \ref MDBX_FIRST and \ref MDBX_NEXT are supported). * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. - * \retval MDBX_NOTFOUND No more key-value pairs are available. + * \retval MDBX_NOTFOUND No any key-value pairs are available. * \retval MDBX_ENODATA The cursor is already at the end of data. - * \retval MDBX_RESULT_TRUE The specified limit is less than the available - * key-value pairs on the current page/position - * that the cursor points to. + * \retval MDBX_RESULT_TRUE The returned chunk is the last one, + * and there are no pairs left. * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, MDBX_val *pairs, size_t limit, @@ -6166,7 +6108,7 @@ LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, * \note Данный API еще не зафиксирован, в последующих версиях могут быть * незначительные доработки и изменения. * \see mdbx_env_chk() */ -enum MDBX_chk_flags_t { +typedef enum MDBX_chk_flags { /** Режим проверки по-умолчанию, в том числе в режиме только-чтения. */ MDBX_CHK_DEFAULTS = 0, @@ -6184,18 +6126,13 @@ enum MDBX_chk_flags_t { * \note Требуется при проверке унаследованных БД созданных с использованием * нестандартных (пользовательских) функций сравнения ключей или значений. */ MDBX_CHK_IGNORE_ORDER = 8 -}; -#ifndef __cplusplus -/** \ingroup c_opening */ -typedef enum MDBX_chk_flags_t MDBX_chk_flags_t; -#else -DEFINE_ENUM_FLAG_OPERATORS(MDBX_chk_flags_t) -#endif +} MDBX_chk_flags_t; +DEFINE_ENUM_FLAG_OPERATORS(MDBX_chk_flags) /** \brief Уровни логирование/детализации информации, * поставляемой через обратные вызовы при проверке целостности базы данных. * \see mdbx_env_chk() */ -enum MDBX_chk_severity { +typedef enum MDBX_chk_severity { MDBX_chk_severity_prio_shift = 4, MDBX_chk_severity_kind_mask = 0xF, MDBX_chk_fatal = 0x00u, @@ -6209,25 +6146,25 @@ enum MDBX_chk_severity { MDBX_chk_verbose = 0x78u, MDBX_chk_details = 0x89u, MDBX_chk_extra = 0x9Au -}; +} MDBX_chk_severity_t; /** \brief Стадии проверки, * сообщаемые через обратные вызовы при проверке целостности базы данных. * \see mdbx_env_chk() */ -enum MDBX_chk_stage { +typedef enum MDBX_chk_stage { MDBX_chk_none, MDBX_chk_init, MDBX_chk_lock, MDBX_chk_meta, - MDBX_chk_traversal_tree, - MDBX_chk_traversal_freedb, + MDBX_chk_tree, + MDBX_chk_gc, MDBX_chk_space, - MDBX_chk_traversal_maindb, - MDBX_chk_traversal_subdbs, + MDBX_chk_maindb, + MDBX_chk_subdbs, MDBX_chk_conclude, MDBX_chk_unlock, MDBX_chk_finalize -}; +} MDBX_chk_stage_t; /** \brief Виртуальная строка отчета, формируемого при проверке целостности базы * данных. \see mdbx_env_chk() */ @@ -6251,8 +6188,8 @@ typedef struct MDBX_chk_scope { MDBX_chk_issue_t *issues; struct MDBX_chk_internal *internal; const void *object; - enum MDBX_chk_stage stage; - enum MDBX_chk_severity verbosity; + MDBX_chk_stage_t stage; + MDBX_chk_severity_t verbosity; size_t subtotal_issues; union { void *ptr; @@ -6373,11 +6310,11 @@ typedef struct MDBX_chk_callbacks { size_t entry_number, const MDBX_val *key, const MDBX_val *value); - int (*stage_begin)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage); - int (*stage_end)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage, int err); + int (*stage_begin)(MDBX_chk_context_t *ctx, MDBX_chk_stage_t); + int (*stage_end)(MDBX_chk_context_t *ctx, MDBX_chk_stage_t, int err); MDBX_chk_line_t *(*print_begin)(MDBX_chk_context_t *ctx, - enum MDBX_chk_severity severity); + MDBX_chk_severity_t severity); void (*print_flush)(MDBX_chk_line_t *); void (*print_done)(MDBX_chk_line_t *); void (*print_chars)(MDBX_chk_line_t *, const char *str, size_t len); @@ -6417,8 +6354,8 @@ typedef struct MDBX_chk_callbacks { * \returns Нулевое значение в случае успеха, иначе код ошибки. */ LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, MDBX_chk_context_t *ctx, - const enum MDBX_chk_flags_t flags, - enum MDBX_chk_severity verbosity, + const MDBX_chk_flags_t flags, + MDBX_chk_severity_t verbosity, unsigned timeout_seconds_16dot16); /** \brief Вспомогательная функция для подсчета проблем детектируемых diff --git a/mdbx.h++ b/mdbx.h++ index 19668308..f6912342 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1,8 +1,8 @@ -/// \file mdbx.h++ -/// \brief The libmdbx C++ API header file. +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2020-2024 /// -/// \author Copyright (c) 2020-2024, Leonid Yuriev . -/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \file mdbx.h++ +/// \brief The libmdbx C++ API header file. /// /// Tested with: /// - Elbrus LCC >= 1.23 (http://www.mcst.ru/lcc); @@ -2329,14 +2329,16 @@ public: buffer(const char *c_str, bool make_reference, const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(c_str), make_reference, allocator) {} + : buffer(::mdbx::slice(c_str), make_reference, allocator){} #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - buffer(const ::std::basic_string_view &view, bool make_reference, - const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(view), make_reference, allocator) {} + template + buffer(const ::std::basic_string_view &view, + bool make_reference, + const allocator_type &allocator = allocator_type()) + : buffer(::mdbx::slice(view), make_reference, allocator) { + } #endif /* __cpp_lib_string_view >= 201606L */ MDBX_CXX20_CONSTEXPR @@ -2362,15 +2364,16 @@ public: MDBX_CXX20_CONSTEXPR buffer(const char *c_str, const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(c_str), allocator) {} + : buffer(::mdbx::slice(c_str), allocator){} #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - MDBX_CXX20_CONSTEXPR - buffer(const ::std::basic_string_view &view, - const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(view), allocator) {} + template + MDBX_CXX20_CONSTEXPR + buffer(const ::std::basic_string_view &view, + const allocator_type &allocator = allocator_type()) + : buffer(::mdbx::slice(view), allocator) { + } #endif /* __cpp_lib_string_view >= 201606L */ buffer(size_t head_room, size_t tail_room, @@ -3819,17 +3822,17 @@ public: static inline size_t pairsize4page_max(const env &, value_mode); /// \brief Returns maximal data size in bytes to fit in a leaf-page or - /// single overflow/large-page for specified size and database flags. + /// single large/overflow-page for specified size and database flags. static inline size_t valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns maximal data size in bytes to fit in a leaf-page or - /// single overflow/large-page for specified page size and values mode. + /// single large/overflow-page for specified page size and values mode. static inline size_t valsize4page_max(intptr_t pagesize, value_mode); /// \brief Returns maximal data size in bytes to fit in a leaf-page or - /// single overflow/large-page for given environment and database flags. + /// single large/overflow-page for given environment and database flags. static inline size_t valsize4page_max(const env &, MDBX_db_flags_t flags); /// \brief Returns maximal data size in bytes to fit in a leaf-page or - /// single overflow/large-page for specified page size and values mode. + /// single large/overflow-page for specified page size and values mode. static inline size_t valsize4page_max(const env &, value_mode); /// \brief Returns the maximal write transaction size (i.e. limit for diff --git a/packages/rpm.obsolete/CMakeLists.txt b/packages/rpm.obsolete/CMakeLists.txt deleted file mode 100644 index 5949e9f0..00000000 --- a/packages/rpm.obsolete/CMakeLists.txt +++ /dev/null @@ -1,184 +0,0 @@ -cmake_minimum_required(VERSION 2.8.7) -set(TARGET mdbx) -project(${TARGET}) - -set(MDBX_VERSION_MAJOR 0) -set(MDBX_VERSION_MINOR 3) -set(MDBX_VERSION_RELEASE 1) -set(MDBX_VERSION_REVISION 0) - -set(MDBX_VERSION_STRING ${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VERSION_RELEASE}) - -enable_language(C) -enable_language(CXX) - -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED on) - -add_definitions(-DNDEBUG=1 -DMDBX_DEBUG=0 -DLIBMDBX_EXPORTS=1 -D_GNU_SOURCE=1) - -find_package(Threads REQUIRED) - -get_directory_property(hasParent PARENT_DIRECTORY) -if(hasParent) - set(STANDALONE_BUILD 0) -else() - set(STANDALONE_BUILD 1) - enable_testing() - - if (CMAKE_C_COMPILER_ID MATCHES GNU) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g3") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wextra") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") - endif() - - if (CMAKE_CXX_COMPILER_ID MATCHES GNU) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wpointer-arith") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat-security") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Woverloaded-virtual") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wwrite-strings") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmax-errors=20") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wunused-function -Wunused-variable -Wunused-value -Wmissing-declarations") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-field-initializers") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wcast-qual") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-aliasing") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -finline-functions-called-once") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-packed-bitfield-compat") - - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g3") - endif() - - if (COVERAGE) - if (NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug") - message(FATAL_ERROR "Coverage requires -DCMAKE_BUILD_TYPE=Debug Current value=${CMAKE_BUILD_TYPE}") - endif() - - message(STATUS "Setting coverage compiler flags") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb3 -O0 --coverage -fprofile-arcs -ftest-coverage") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -ggdb3 -O0 --coverage -fprofile-arcs -ftest-coverage") - add_definitions(-DCOVERAGE_TEST) - endif() - - if (NOT TRAVIS) - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=leak -fstack-protector-strong -static-libasan") - endif() -endif() - -set(${TARGET}_SRC - mdbx.h - src/bits.h - src/defs.h - src/lck-linux.c - src/mdbx.c - src/osal.c - src/osal.h - src/version.c - ) - -add_library(${TARGET}_STATIC STATIC - ${${TARGET}_SRC} - ) - -add_library(${TARGET} ALIAS ${TARGET}_STATIC) - -add_library(${TARGET}_SHARED SHARED - ${${TARGET}_SRC} - ) - -set_target_properties(${TARGET}_SHARED PROPERTIES - VERSION ${MDBX_VERSION_STRING} - SOVERSION ${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR} - OUTPUT_NAME ${TARGET} - CLEAN_DIRECT_OUTPUT 1 - ) - -set_target_properties(${TARGET}_STATIC PROPERTIES - VERSION ${MDBX_VERSION_STRING} - SOVERSION ${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR} - OUTPUT_NAME ${TARGET} - CLEAN_DIRECT_OUTPUT 1 - ) - -target_include_directories(${TARGET}_STATIC PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}) -target_include_directories(${TARGET}_SHARED PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}) - -target_link_libraries(${TARGET}_STATIC ${CMAKE_THREAD_LIBS_INIT}) -target_link_libraries(${TARGET}_SHARED ${CMAKE_THREAD_LIBS_INIT}) -if(UNIX AND NOT APPLE) - target_link_libraries(${TARGET}_STATIC rt) - target_link_libraries(${TARGET}_SHARED rt) -endif() - -install(TARGETS ${TARGET}_STATIC DESTINATION ${CMAKE_INSTALL_PREFIX}/lib64 COMPONENT mdbx) -install(TARGETS ${TARGET}_SHARED DESTINATION ${CMAKE_INSTALL_PREFIX}/lib64 COMPONENT mdbx) -install(FILES mdbx.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include COMPONENT mdbx-devel) - -add_subdirectory(src/tools) -add_subdirectory(test) -add_subdirectory(test/pcrf) -add_subdirectory(tutorial) - -############################################################################## - -set(CPACK_GENERATOR "RPM") -set(CPACK_RPM_COMPONENT_INSTALL ON) - -# Version -if (NOT "$ENV{BUILD_NUMBER}" STREQUAL "") - set(CPACK_PACKAGE_RELEASE $ENV{BUILD_NUMBER}) -else() - if (NOT "$ENV{CI_PIPELINE_ID}" STREQUAL "") - set(CPACK_PACKAGE_RELEASE $ENV{CI_PIPELINE_ID}) - else() - set(CPACK_PACKAGE_RELEASE 1) - endif() -endif() -set(CPACK_RPM_PACKAGE_RELEASE ${CPACK_PACKAGE_RELEASE}) - -set(CPACK_PACKAGE_VERSION ${MDBX_VERSION_STRING}) -set(CPACK_PACKAGE_VERSION_FULL ${CPACK_PACKAGE_VERSION}-${CPACK_PACKAGE_RELEASE}) - -set(CPACK_RPM_mdbx-devel_PACKAGE_REQUIRES "mdbx = ${CPACK_PACKAGE_VERSION}") - -set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") -set(CPACK_RPM_mdbx_PACKAGE_NAME mdbx) -set(CPACK_RPM_mdbx-devel_PACKAGE_NAME mdbx-devel) -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The revised and extended descendant of Symas LMDB") - -set(CPACK_PACKAGE_VENDOR "???") -set(CPACK_PACKAGE_CONTACT "Vladimir Romanov") -set(CPACK_PACKAGE_RELOCATABLE false) -set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") -set(CPACK_RPM_PACKAGE_REQUIRES "") -set(CPACK_RPM_PACKAGE_GROUP "Applications/Database") - -set(CPACK_RPM_mdbx_FILE_NAME "${CPACK_RPM_mdbx_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_FULL}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") -set(CPACK_RPM_mdbx-devel_FILE_NAME "${CPACK_RPM_mdbx-devel_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_FULL}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") - -set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION - /usr/local - /usr/local/bin - /usr/local/lib64 - /usr/local/include - /usr/local/man - /usr/local/man/man1 - ) - -include(CPack) diff --git a/packages/rpm.obsolete/build.sh b/packages/rpm.obsolete/build.sh deleted file mode 100755 index 51708822..00000000 --- a/packages/rpm.obsolete/build.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -e -CONFIG=$1 - -if [[ -z "${CONFIG}" ]]; then - CONFIG=Debug -fi -if [[ -r /opt/rh/devtoolset-6/enable ]]; then - source /opt/rh/devtoolset-6/enable -fi -#rm -f -r build || true -mkdir -p cmake-build-${CONFIG} -pushd cmake-build-${CONFIG} &> /dev/null -if [[ ! -r Makefile ]]; then - cmake .. -DCMAKE_BUILD_TYPE=${CONFIG} -fi -make -j8 || exit 1 -popd &> /dev/null diff --git a/packages/rpm.obsolete/package.sh b/packages/rpm.obsolete/package.sh deleted file mode 100755 index d7f9ab29..00000000 --- a/packages/rpm.obsolete/package.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e - -CONFIG=$1 - -if [[ -z "${CONFIG}" ]]; then - CONFIG=Debug -fi - -DIRNAME=`dirname ${BASH_SOURCE[0]}` -DIRNAME=`readlink --canonicalize ${DIRNAME}` - -if [[ -r /opt/rh/devtoolset-6/enable ]]; then - source /opt/rh/devtoolset-6/enable -fi - -mkdir -p cmake-build-${CONFIG} -pushd cmake-build-${CONFIG} &> /dev/null -if [[ ! -r Makefile ]]; then - cmake .. -DCMAKE_BUILD_TYPE=${CONFIG} -fi -rm -f *.rpm -make -j8 package || exit 1 -rm -f *-Unspecified.rpm -popd &> /dev/null diff --git a/src/alloy.c b/src/alloy.c index d79f269c..7ca5f07b 100644 --- a/src/alloy.c +++ b/src/alloy.c @@ -1,25 +1,52 @@ -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 #define xMDBX_ALLOY 1 /* alloyed build */ #include "internals.h" /* must be included first */ -#include "core.c" -#include "osal.c" -#include "version.c" - -#if defined(_WIN32) || defined(_WIN64) -#include "lck-windows.c" -#else +#include "api-cursor.c" +#include "api-env.c" +#include "api-extra.c" +#include "api-key-transform.c" +#include "api-txn.c" +#include "audit.c" +#include "chk.c" +#include "cogs.c" +#include "coherency.c" +#include "cold.c" +#include "copy.c" +#include "cursor.c" +#include "dbi.c" +#include "dpl.c" +#include "dxb.c" +#include "env-opts.c" +#include "env.c" +#include "gc-get.c" +#include "gc-put.c" +#include "global.c" #include "lck-posix.c" -#endif +#include "lck-windows.c" +#include "lck.c" +#include "logging_and_debug.c" +#include "meta.c" +#include "misc.c" +#include "mvcc-readers.c" +#include "node.c" +#include "osal.c" +#include "page-get.c" +#include "page-iov.c" +#include "page-ops.c" +#include "page-search.c" +#include "pnl.c" +#include "range-estimate.c" +#include "refund.c" +#include "spill.c" +#include "subdb.c" +#include "tls.c" +#include "tree.c" +#include "txl.c" +#include "txn.c" +#include "utils.c" +#include "version.c" +#include "walk.c" +#include "windows-import.c" diff --git a/src/api-cursor.c b/src/api-cursor.c new file mode 100644 index 00000000..2f2506e5 --- /dev/null +++ b/src/api-cursor.c @@ -0,0 +1,797 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +MDBX_cursor *mdbx_cursor_create(void *context) { + cursor_couple_t *couple = osal_calloc(1, sizeof(cursor_couple_t)); + if (unlikely(!couple)) + return nullptr; + + VALGRIND_MAKE_MEM_UNDEFINED(couple, sizeof(cursor_couple_t)); + couple->outer.signature = cur_signature_ready4dispose; + couple->outer.next = &couple->outer; + couple->userctx = context; + couple->outer.top_and_flags = z_poor_mark; + couple->inner.cursor.top_and_flags = z_poor_mark | z_inner; + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.backup, + sizeof(couple->outer.backup)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.tree, sizeof(couple->outer.tree)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.clc, sizeof(couple->outer.clc)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.dbi_state, + sizeof(couple->outer.dbi_state)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.subcur, + sizeof(couple->outer.subcur)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.txn, sizeof(couple->outer.txn)); + return &couple->outer; +} + +int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { + return likely(mc) + ? mdbx_cursor_bind(txn, mc, (kvx_t *)mc->clc - txn->env->kvs) + : MDBX_EINVAL; +} + +int mdbx_cursor_reset(MDBX_cursor *mc) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_ready4dispose && + mc->signature != cur_signature_live)) + return MDBX_EBADSIGN; + + cursor_couple_t *couple = (cursor_couple_t *)mc; + couple->outer.top_and_flags = z_poor_mark; + couple->inner.cursor.top_and_flags = z_poor_mark | z_inner; + return MDBX_SUCCESS; +} + +int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_ready4dispose && + mc->signature != cur_signature_live)) + return MDBX_EBADSIGN; + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(dbi == FREE_DBI && !(txn->flags & MDBX_TXN_RDONLY))) + return MDBX_EACCESS; + + if (unlikely(mc->backup)) /* Cursor from parent transaction */ { + cASSERT(mc, mc->signature == cur_signature_live); + if (unlikely(cursor_dbi(mc) != dbi || + /* paranoia */ mc->signature != cur_signature_live || + mc->txn != txn)) + return MDBX_EINVAL; + + cASSERT(mc, mc->tree == &txn->dbs[dbi]); + cASSERT(mc, mc->clc == &txn->env->kvs[dbi].clc); + cASSERT(mc, cursor_dbi(mc) == dbi); + return likely(cursor_dbi(mc) == dbi && + /* paranoia */ mc->signature == cur_signature_live && + mc->txn == txn) + ? MDBX_SUCCESS + : MDBX_EINVAL /* Disallow change DBI in nested transactions */; + } + + if (mc->signature == cur_signature_live) { + rc = mdbx_cursor_unbind(mc); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + cASSERT(mc, mc->next == mc); + + rc = cursor_init(mc, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + mc->next = txn->cursors[dbi]; + txn->cursors[dbi] = mc; + return MDBX_SUCCESS; +} + +int mdbx_cursor_unbind(MDBX_cursor *mc) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_SUCCESS + : MDBX_EBADSIGN; + + if (unlikely(mc->backup)) /* Cursor from parent transaction */ + return MDBX_EINVAL; + + eASSERT(nullptr, mc->txn && mc->txn->signature == txn_signature); + cASSERT(mc, mc->signature == cur_signature_live); + cASSERT(mc, !mc->backup); + if (unlikely(!mc->txn || mc->txn->signature != txn_signature)) { + ERROR("Wrong cursor's transaction %p 0x%x", + __Wpedantic_format_voidptr(mc->txn), + mc->txn ? mc->txn->signature : 0); + return MDBX_PROBLEM; + } + if (mc->next != mc) { + const size_t dbi = (kvx_t *)mc->clc - mc->txn->env->kvs; + cASSERT(mc, cursor_dbi(mc) == dbi); + cASSERT(mc, dbi < mc->txn->n_dbi); + if (dbi < mc->txn->n_dbi) { + MDBX_cursor **prev = &mc->txn->cursors[dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->next; + cASSERT(mc, *prev == mc); + *prev = mc->next; + } + mc->next = mc; + } + mc->signature = cur_signature_ready4dispose; + mc->flags = 0; + return MDBX_SUCCESS; +} + +int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { + if (unlikely(!ret)) + return MDBX_EINVAL; + *ret = nullptr; + + MDBX_cursor *const mc = mdbx_cursor_create(nullptr); + if (unlikely(!mc)) + return MDBX_ENOMEM; + + int rc = mdbx_cursor_bind(txn, mc, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + mdbx_cursor_close(mc); + return rc; + } + + *ret = mc; + return MDBX_SUCCESS; +} + +void mdbx_cursor_close(MDBX_cursor *mc) { + if (likely(mc)) { + ENSURE(nullptr, mc->signature == cur_signature_live || + mc->signature == cur_signature_ready4dispose); + MDBX_txn *const txn = mc->txn; + if (!mc->backup) { + mc->txn = nullptr; + /* Unlink from txn, if tracked. */ + if (mc->next != mc) { + ENSURE(txn->env, check_txn(txn, 0) == MDBX_SUCCESS); + const size_t dbi = (kvx_t *)mc->clc - txn->env->kvs; + tASSERT(txn, dbi < txn->n_dbi); + if (dbi < txn->n_dbi) { + MDBX_cursor **prev = &txn->cursors[dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->next; + tASSERT(txn, *prev == mc); + *prev = mc->next; + } + mc->next = mc; + } + mc->signature = 0; + osal_free(mc); + } else { + /* Cursor closed before nested txn ends */ + tASSERT(txn, mc->signature == cur_signature_live); + ENSURE(txn->env, check_txn_rw(txn, 0) == MDBX_SUCCESS); + mc->signature = cur_signature_wait4eot; + } + } +} + +int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { + if (unlikely(!src)) + return MDBX_EINVAL; + if (unlikely(src->signature != cur_signature_live)) + return (src->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = mdbx_cursor_bind(src->txn, dest, cursor_dbi(src)); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + assert(dest->tree == src->tree); + assert(cursor_dbi(dest) == cursor_dbi(src)); +again: + assert(dest->clc == src->clc); + assert(dest->txn == src->txn); + dest->top_and_flags = src->top_and_flags; + for (intptr_t i = 0; i <= src->top; ++i) { + dest->ki[i] = src->ki[i]; + dest->pg[i] = src->pg[i]; + } + + if (src->subcur) { + dest->subcur->nested_tree = src->subcur->nested_tree; + src = &src->subcur->cursor; + dest = &dest->subcur->cursor; + goto again; + } + + return MDBX_SUCCESS; +} + +int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { + int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD); + if (likely(rc == MDBX_SUCCESS)) { + TXN_FOREACH_DBI_FROM(txn, i, MAIN_DBI) { + while (txn->cursors[i]) { + MDBX_cursor *mc = txn->cursors[i]; + ENSURE(nullptr, mc->signature == cur_signature_live && + (mc->next != mc) && !mc->backup); + rc = likely(rc < INT_MAX) ? rc + 1 : rc; + txn->cursors[i] = mc->next; + mc->next = mc; + if (unbind) { + mc->signature = cur_signature_ready4dispose; + mc->flags = 0; + } else { + mc->signature = 0; + osal_free(mc); + } + } + } + } else { + eASSERT(nullptr, rc < 0); + } + return rc; +} + +int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, + bool ignore_multival) { + const int incomparable = INT16_MAX + 1; + if (unlikely(!l)) + return r ? -incomparable * 9 : 0; + else if (unlikely(!r)) + return incomparable * 9; + + if (unlikely(l->signature != cur_signature_live)) + return (r->signature == cur_signature_live) ? -incomparable * 8 : 0; + if (unlikely(r->signature != cur_signature_live)) + return (l->signature == cur_signature_live) ? incomparable * 8 : 0; + + if (unlikely(l->clc != r->clc)) { + if (l->txn->env != r->txn->env) + return (l->txn->env > r->txn->env) ? incomparable * 7 : -incomparable * 7; + if (l->txn->txnid != r->txn->txnid) + return (l->txn->txnid > r->txn->txnid) ? incomparable * 6 + : -incomparable * 6; + return (l->clc > r->clc) ? incomparable * 5 : -incomparable * 5; + } + assert(cursor_dbi(l) == cursor_dbi(r)); + + int diff = is_pointed(l) - is_pointed(r); + if (unlikely(diff)) + return (diff > 0) ? incomparable * 4 : -incomparable * 4; + if (unlikely(!is_pointed(l))) + return 0; + + intptr_t detent = (l->top <= r->top) ? l->top : r->top; + for (intptr_t i = 0; i <= detent; ++i) { + diff = l->ki[i] - r->ki[i]; + if (diff) + return diff; + } + if (unlikely(l->top != r->top)) + return (l->top > r->top) ? incomparable * 3 : -incomparable * 3; + + assert((l->subcur != nullptr) == (r->subcur != nullptr)); + if (unlikely((l->subcur != nullptr) != (r->subcur != nullptr))) + return l->subcur ? incomparable * 2 : -incomparable * 2; + if (ignore_multival || !l->subcur) + return 0; + +#if MDBX_DEBUG + if (is_pointed(&l->subcur->cursor)) { + const page_t *mp = l->pg[l->top]; + const node_t *node = page_node(mp, l->ki[l->top]); + assert(node_flags(node) & N_DUPDATA); + } + if (is_pointed(&r->subcur->cursor)) { + const page_t *mp = r->pg[r->top]; + const node_t *node = page_node(mp, r->ki[r->top]); + assert(node_flags(node) & N_DUPDATA); + } +#endif /* MDBX_DEBUG */ + + l = &l->subcur->cursor; + r = &r->subcur->cursor; + diff = is_pointed(l) - is_pointed(r); + if (unlikely(diff)) + return (diff > 0) ? incomparable * 2 : -incomparable * 2; + if (unlikely(!is_pointed(l))) + return 0; + + detent = (l->top <= r->top) ? l->top : r->top; + for (intptr_t i = 0; i <= detent; ++i) { + diff = l->ki[i] - r->ki[i]; + if (diff) + return diff; + } + if (unlikely(l->top != r->top)) + return (l->top > r->top) ? incomparable : -incomparable; + + return (l->flags & z_eof_hard) - (r->flags & z_eof_hard); +} + +/* Return the count of duplicate data items for the current key */ +int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(countp == nullptr)) + return MDBX_EINVAL; + + if ((*countp = is_filled(mc)) > 0) { + if (!inner_hollow(mc)) { + const page_t *mp = mc->pg[mc->top]; + const node_t *node = page_node(mp, mc->ki[mc->top]); + cASSERT(mc, node_flags(node) & N_DUPDATA); + *countp = unlikely(mc->subcur->nested_tree.items > PTRDIFF_MAX) + ? PTRDIFF_MAX + : (size_t)mc->subcur->nested_tree.items; + } + } + return MDBX_SUCCESS; +} + +int mdbx_cursor_on_first(const MDBX_cursor *mc) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + for (intptr_t i = 0; i <= mc->top; ++i) { + if (mc->ki[i]) + return MDBX_RESULT_FALSE; + } + + return MDBX_RESULT_TRUE; +} + +int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (is_filled(mc) && mc->subcur) { + mc = &mc->subcur->cursor; + for (intptr_t i = 0; i <= mc->top; ++i) { + if (mc->ki[i]) + return MDBX_RESULT_FALSE; + } + } + + return MDBX_RESULT_TRUE; +} + +int mdbx_cursor_on_last(const MDBX_cursor *mc) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + for (intptr_t i = 0; i <= mc->top; ++i) { + size_t nkeys = page_numkeys(mc->pg[i]); + if (mc->ki[i] < nkeys - 1) + return MDBX_RESULT_FALSE; + } + + return MDBX_RESULT_TRUE; +} + +int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (is_filled(mc) && mc->subcur) { + mc = &mc->subcur->cursor; + for (intptr_t i = 0; i <= mc->top; ++i) { + size_t nkeys = page_numkeys(mc->pg[i]); + if (mc->ki[i] < nkeys - 1) + return MDBX_RESULT_FALSE; + } + } + + return MDBX_RESULT_TRUE; +} + +int mdbx_cursor_eof(const MDBX_cursor *mc) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + return is_eof(mc) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; +} + +int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, + MDBX_cursor_op op) { + if (unlikely(mc == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(cursor_dbi_changed(mc))) + return MDBX_BAD_DBI; + + return cursor_ops(mc, key, data, op); +} + +__hot static int scan_confinue(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, void *arg, MDBX_val *key, + MDBX_val *value, MDBX_cursor_op turn_op) { + int rc; + switch (turn_op) { + case MDBX_NEXT: + case MDBX_NEXT_NODUP: + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = outer_next(mc, key, value, turn_op); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } + + case MDBX_PREV: + case MDBX_PREV_NODUP: + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = outer_prev(mc, key, value, turn_op); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } + + case MDBX_NEXT_DUP: + if (mc->subcur) + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = inner_next(&mc->subcur->cursor, value); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } + return MDBX_NOTFOUND; + + case MDBX_PREV_DUP: + if (mc->subcur) + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = inner_prev(&mc->subcur->cursor, value); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } + return MDBX_NOTFOUND; + + default: + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = cursor_ops(mc, key, value, turn_op); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } + } +} + +int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op start_op, + MDBX_cursor_op turn_op, void *arg) { + if (unlikely(!predicate)) + return MDBX_EINVAL; + + const unsigned valid_start_mask = + 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | + 1 << MDBX_LAST_DUP | 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; + if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0)) + return MDBX_EINVAL; + + const unsigned valid_turn_mask = + 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | + 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | + 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) + return MDBX_EINVAL; + + MDBX_val key = {nullptr, 0}, value = {nullptr, 0}; + int rc = mdbx_cursor_get(mc, &key, &value, start_op); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + return scan_confinue(mc, predicate, context, arg, &key, &value, turn_op); +} + +int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op from_op, MDBX_val *key, + MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { + if (unlikely(!predicate || !key)) + return MDBX_EINVAL; + + const unsigned valid_start_mask = + 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | + 1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | + 1 << MDBX_SET_UPPERBOUND; + if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && + ((1 << from_op) & valid_start_mask) == 0)) + return MDBX_EINVAL; + + const unsigned valid_turn_mask = + 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | + 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | + 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) + return MDBX_EINVAL; + + int rc = mdbx_cursor_get(mc, key, value, from_op); + if (unlikely(MDBX_IS_ERROR(rc))) + return rc; + + cASSERT(mc, key != nullptr); + MDBX_val stub; + if (!value) { + value = &stub; + rc = cursor_ops(mc, key, value, MDBX_GET_CURRENT); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + return scan_confinue(mc, predicate, context, arg, key, value, turn_op); +} + +int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, + size_t limit, MDBX_cursor_op op) { + if (unlikely(!count)) + return MDBX_EINVAL; + + *count = 0; + if (unlikely(mc == nullptr || limit < 4 || limit > INTPTR_MAX - 2)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(cursor_dbi_changed(mc))) + return MDBX_BAD_DBI; + + if (unlikely(mc->subcur)) + return MDBX_INCOMPATIBLE /* must be a non-dupsort subDB */; + + switch (op) { + case MDBX_NEXT: + if (unlikely(is_eof(mc))) + return is_pointed(mc) ? MDBX_NOTFOUND : MDBX_ENODATA; + break; + + case MDBX_FIRST: + if (!is_filled(mc)) { + rc = outer_first(mc, nullptr, nullptr); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + break; + + default: + DEBUG("unhandled/unimplemented cursor operation %u", op); + return MDBX_EINVAL; + } + + const page_t *mp = mc->pg[mc->top]; + size_t nkeys = page_numkeys(mp); + size_t ki = mc->ki[mc->top]; + size_t n = 0; + while (n + 2 <= limit) { + cASSERT(mc, ki < nkeys); + if (unlikely(ki >= nkeys)) + goto sibling; + + const node_t *leaf = page_node(mp, ki); + pairs[n] = get_key(leaf); + rc = node_read(mc, leaf, &pairs[n + 1], mp); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + n += 2; + if (++ki == nkeys) { + sibling: + rc = cursor_sibling_right(mc); + if (rc != MDBX_SUCCESS) { + if (rc == MDBX_NOTFOUND) + rc = MDBX_RESULT_TRUE; + goto bailout; + } + + mp = mc->pg[mc->top]; + DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, + mc->ki[mc->top]); + if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", + mp->pgno, mp->flags); + rc = MDBX_CORRUPTED; + goto bailout; + } + nkeys = page_numkeys(mp); + ki = 0; + } + } + mc->ki[mc->top] = (indx_t)ki; + +bailout: + *count = n; + return rc; +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_ready4dispose && + mc->signature != cur_signature_live)) + return MDBX_EBADSIGN; + + cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer); + couple->userctx = ctx; + return MDBX_SUCCESS; +} + +void *mdbx_cursor_get_userctx(const MDBX_cursor *mc) { + if (unlikely(!mc)) + return nullptr; + + if (unlikely(mc->signature != cur_signature_ready4dispose && + mc->signature != cur_signature_live)) + return nullptr; + + cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer); + return couple->userctx; +} + +MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { + if (unlikely(!mc || mc->signature != cur_signature_live)) + return nullptr; + MDBX_txn *txn = mc->txn; + if (unlikely(!txn || txn->signature != txn_signature)) + return nullptr; + if (unlikely(txn->flags & MDBX_TXN_FINISHED)) + return nullptr; + return txn; +} + +MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { + if (unlikely(!mc || mc->signature != cur_signature_live)) + return UINT_MAX; + return cursor_dbi(mc); +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, + MDBX_put_flags_t flags) { + if (unlikely(mc == nullptr || key == nullptr || data == nullptr)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn_rw(mc->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(cursor_dbi_changed(mc))) + return MDBX_BAD_DBI; + + cASSERT(mc, cursor_is_tracked(mc)); + + /* Check this first so counter will always be zero on any early failures. */ + if (unlikely(flags & MDBX_MULTIPLE)) { + if (unlikely(flags & MDBX_RESERVE)) + return MDBX_EINVAL; + if (unlikely(!(mc->tree->flags & MDBX_DUPFIXED))) + return MDBX_INCOMPATIBLE; + const size_t dcount = data[1].iov_len; + if (unlikely(dcount < 2 || data->iov_len == 0)) + return MDBX_BAD_VALSIZE; + if (unlikely(mc->tree->dupfix_size != data->iov_len) && + mc->tree->dupfix_size) + return MDBX_BAD_VALSIZE; + if (unlikely(dcount > + MAX_MAPSIZE / 2 / + (BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) - NODESIZE))) { + /* checking for multiplication overflow */ + if (unlikely(dcount > MAX_MAPSIZE / 2 / data->iov_len)) + return MDBX_TOO_LARGE; + } + } + + if (flags & MDBX_RESERVE) { + if (unlikely(mc->tree->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + MDBX_INTEGERDUP | MDBX_DUPFIXED))) + return MDBX_INCOMPATIBLE; + data->iov_base = nullptr; + } + + if (unlikely(mc->txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) + return (mc->txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; + + return cursor_put_checklen(mc, key, data, flags); +} + +int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn_rw(mc->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(cursor_dbi_changed(mc))) + return MDBX_BAD_DBI; + + return cursor_del(mc, flags); +} + +__cold int mdbx_cursor_ignord(MDBX_cursor *mc) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->signature != cur_signature_live)) + return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + mc->checking |= z_ignord; + if (mc->subcur) + mc->subcur->cursor.checking |= z_ignord; + + return MDBX_SUCCESS; +} diff --git a/src/api-env.c b/src/api-env.c new file mode 100644 index 00000000..9115370e --- /dev/null +++ b/src/api-env.c @@ -0,0 +1,1399 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold static intptr_t reasonable_db_maxsize(intptr_t *cached_result) { + if (*cached_result == 0) { + intptr_t pagesize, total_ram_pages; + if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != + MDBX_SUCCESS)) + return *cached_result = MAX_MAPSIZE32 /* the 32-bit limit is good enough + for fallback */ + ; + + if (unlikely((size_t)total_ram_pages * 2 > MAX_MAPSIZE / (size_t)pagesize)) + return *cached_result = MAX_MAPSIZE; + assert(MAX_MAPSIZE >= (size_t)(total_ram_pages * pagesize * 2)); + + /* Suggesting should not be more than golden ratio of the size of RAM. */ + *cached_result = (intptr_t)((size_t)total_ram_pages * 207 >> 7) * pagesize; + + /* Round to the nearest human-readable granulation. */ + for (size_t unit = MEGABYTE; unit; unit <<= 5) { + const size_t floor = floor_powerof2(*cached_result, unit); + const size_t ceil = ceil_powerof2(*cached_result, unit); + const size_t threshold = (size_t)*cached_result >> 4; + const bool down = + *cached_result - floor < ceil - *cached_result || ceil > MAX_MAPSIZE; + if (threshold < (down ? *cached_result - floor : ceil - *cached_result)) + break; + *cached_result = down ? floor : ceil; + } + } + return *cached_result; +} + +__cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { + int err = osal_fileexists(lck_pathname); + if (unlikely(err != MDBX_RESULT_FALSE)) { + if (err == MDBX_RESULT_TRUE) + err = MDBX_DUPLICATED_CLK; + ERROR("Alternative/Duplicate LCK-file '%" MDBX_PRIsPATH "' error %d", + lck_pathname, err); + } + return err; +} + +__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, + const mdbx_mode_t mode) { + memset(&env->pathname, 0, sizeof(env->pathname)); + if (unlikely(!pathname || !*pathname)) + return MDBX_EINVAL; + + int rc; +#if defined(_WIN32) || defined(_WIN64) + const DWORD dwAttrib = GetFileAttributesW(pathname); + if (dwAttrib == INVALID_FILE_ATTRIBUTES) { + rc = GetLastError(); + if (rc != MDBX_ENOFILE) + return rc; + if (mode == 0 || (env->flags & MDBX_RDONLY) != 0) + /* can't open existing */ + return rc; + + /* auto-create directory if requested */ + if ((env->flags & MDBX_NOSUBDIR) == 0 && + !CreateDirectoryW(pathname, nullptr)) { + rc = GetLastError(); + if (rc != ERROR_ALREADY_EXISTS) + return rc; + } + } else { + /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ + env->flags |= MDBX_NOSUBDIR; + if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) + env->flags -= MDBX_NOSUBDIR; + } +#else + struct stat st; + if (stat(pathname, &st) != 0) { + rc = errno; + if (rc != MDBX_ENOFILE) + return rc; + if (mode == 0 || (env->flags & MDBX_RDONLY) != 0) + /* can't open non-existing */ + return rc /* MDBX_ENOFILE */; + + /* auto-create directory if requested */ + const mdbx_mode_t dir_mode = + (/* inherit read/write permissions for group and others */ mode & + (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | + /* always add read/write/search for owner */ S_IRWXU | + ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | + ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); + if ((env->flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { + rc = errno; + if (rc != EEXIST) + return rc; + } + } else { + /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ + env->flags |= MDBX_NOSUBDIR; + if (S_ISDIR(st.st_mode)) + env->flags -= MDBX_NOSUBDIR; + } +#endif + + static const pathchar_t dxb_name[] = MDBX_DATANAME; + static const pathchar_t lck_name[] = MDBX_LOCKNAME; + static const pathchar_t lock_suffix[] = MDBX_LOCK_SUFFIX; + +#if defined(_WIN32) || defined(_WIN64) + assert(dxb_name[0] == '\\' && lck_name[0] == '\\'); + const size_t pathname_len = wcslen(pathname); +#else + assert(dxb_name[0] == '/' && lck_name[0] == '/'); + const size_t pathname_len = strlen(pathname); +#endif + assert(!osal_isdirsep(lock_suffix[0])); + size_t base_len = pathname_len; + static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1; + if (env->flags & MDBX_NOSUBDIR) { + if (base_len > dxb_name_len && + osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, + dxb_name_len)) { + env->flags -= MDBX_NOSUBDIR; + base_len -= dxb_name_len; + } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && + osal_isdirsep(lck_name[0]) && + osal_pathequal(pathname + base_len - dxb_name_len + 1, + dxb_name + 1, dxb_name_len - 1)) { + env->flags -= MDBX_NOSUBDIR; + base_len -= dxb_name_len - 1; + } + } + + const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t); + const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name); + const size_t enough4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) + ? suflen_with_NOSUBDIR + : suflen_without_NOSUBDIR; + const size_t bytes_needed = + sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; + env->pathname.buffer = osal_malloc(bytes_needed); + if (!env->pathname.buffer) + return MDBX_ENOMEM; + + env->pathname.specified = env->pathname.buffer; + env->pathname.dxb = env->pathname.specified + pathname_len + 1; + env->pathname.lck = env->pathname.dxb + base_len + dxb_name_len + 1; + rc = MDBX_SUCCESS; + pathchar_t *const buf = env->pathname.buffer; + if (base_len) { + memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len); + if (env->flags & MDBX_NOSUBDIR) { + const pathchar_t *const lck_ext = + osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); + if (lck_ext) { + pathchar_t *pathname_ext = osal_fileext(buf, pathname_len); + memcpy(pathname_ext ? pathname_ext : buf + pathname_len, lck_ext, + sizeof(pathchar_t) * (ARRAY_END(lck_name) - lck_ext)); + rc = check_alternative_lck_absent(buf); + } + } else { + memcpy(buf + base_len, dxb_name, sizeof(dxb_name)); + memcpy(buf + base_len + dxb_name_len, lock_suffix, sizeof(lock_suffix)); + rc = check_alternative_lck_absent(buf); + } + + memcpy(env->pathname.dxb, pathname, sizeof(pathchar_t) * (base_len + 1)); + memcpy(env->pathname.lck, pathname, sizeof(pathchar_t) * base_len); + if (env->flags & MDBX_NOSUBDIR) { + memcpy(env->pathname.lck + base_len, lock_suffix, sizeof(lock_suffix)); + } else { + memcpy(env->pathname.dxb + base_len, dxb_name, sizeof(dxb_name)); + memcpy(env->pathname.lck + base_len, lck_name, sizeof(lck_name)); + } + } else { + assert(!(env->flags & MDBX_NOSUBDIR)); + memcpy(buf, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); + memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix)); + rc = check_alternative_lck_absent(buf); + + memcpy(env->pathname.dxb, dxb_name + 1, + sizeof(dxb_name) - sizeof(pathchar_t)); + memcpy(env->pathname.lck, lck_name + 1, + sizeof(lck_name) - sizeof(pathchar_t)); + } + + memcpy(env->pathname.specified, pathname, + sizeof(pathchar_t) * (pathname_len + 1)); + return rc; +} + +/*----------------------------------------------------------------------------*/ + +__cold int mdbx_env_create(MDBX_env **penv) { + if (unlikely(!penv)) + return MDBX_EINVAL; + *penv = nullptr; + +#ifdef MDBX_HAVE_C11ATOMICS + if (unlikely(!atomic_is_lock_free((const volatile uint32_t *)penv))) { + ERROR("lock-free atomic ops for %u-bit types is required", 32); + return MDBX_INCOMPATIBLE; + } +#if MDBX_64BIT_ATOMIC + if (unlikely(!atomic_is_lock_free((const volatile uint64_t *)penv))) { + ERROR("lock-free atomic ops for %u-bit types is required", 64); + return MDBX_INCOMPATIBLE; + } +#endif /* MDBX_64BIT_ATOMIC */ +#endif /* MDBX_HAVE_C11ATOMICS */ + + if (unlikely(!is_powerof2(globals.sys_pagesize) || + globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { + ERROR("unsuitable system pagesize %u", globals.sys_pagesize); + return MDBX_INCOMPATIBLE; + } + +#if defined(__linux__) || defined(__gnu_linux__) + if (unlikely(globals.linux_kernel_version < 0x04000000)) { + /* 2022-09-01: Прошло уже больше двух после окончания какой-либо поддержки + * самого "долгоиграющего" ядра 3.16.85 ветки 3.x */ + ERROR("too old linux kernel %u.%u.%u.%u, the >= 4.0.0 is required", + globals.linux_kernel_version >> 24, + (globals.linux_kernel_version >> 16) & 255, + (globals.linux_kernel_version >> 8) & 255, + globals.linux_kernel_version & 255); + return MDBX_INCOMPATIBLE; + } +#endif /* Linux */ + + MDBX_env *env = osal_calloc(1, sizeof(MDBX_env)); + if (unlikely(!env)) + return MDBX_ENOMEM; + + env->max_readers = DEFAULT_READERS; + env->max_dbi = env->n_dbi = CORE_DBS; + env->lazy_fd = env->dsync_fd = env->fd4meta = env->lck_mmap.fd = + INVALID_HANDLE_VALUE; + env->stuck_meta = -1; + + env_options_init(env); + env_setup_pagesize(env, (globals.sys_pagesize < MDBX_MAX_PAGESIZE) + ? globals.sys_pagesize + : MDBX_MAX_PAGESIZE); + + int rc = osal_fastmutex_init(&env->dbi_lock); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + +#if defined(_WIN32) || defined(_WIN64) + imports.srwl_Init(&env->remap_guard); + InitializeCriticalSection(&env->windowsbug_lock); +#else + rc = osal_fastmutex_init(&env->remap_guard); + if (unlikely(rc != MDBX_SUCCESS)) { + osal_fastmutex_destroy(&env->dbi_lock); + goto bailout; + } + +#if MDBX_LOCKING > MDBX_LOCKING_SYSV + lck_t *const stub = lckless_stub(env); + rc = lck_ipclock_stubinit(&stub->wrt_lock); +#endif /* MDBX_LOCKING */ + if (unlikely(rc != MDBX_SUCCESS)) { + osal_fastmutex_destroy(&env->remap_guard); + osal_fastmutex_destroy(&env->dbi_lock); + goto bailout; + } +#endif /* Windows */ + + VALGRIND_CREATE_MEMPOOL(env, 0, 0); + env->signature.weak = env_signature; + *penv = env; + return MDBX_SUCCESS; + +bailout: + osal_free(env); + return rc; +} + +__cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { + if (unlikely(target >= NUM_METAS)) + return MDBX_EINVAL; + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_EXCLUSIVE)) + return MDBX_EPERM; + + const meta_t *const target_meta = METAPAGE(env, target); + txnid_t new_txnid = constmeta_txnid(target_meta); + if (new_txnid < MIN_TXNID) + new_txnid = MIN_TXNID; + for (unsigned n = 0; n < NUM_METAS; ++n) { + if (n == target) + continue; + page_t *const page = pgno2page(env, n); + meta_t meta = *page_meta(page); + if (meta_validate(env, &meta, page, n, nullptr) != MDBX_SUCCESS) { + int err = meta_override(env, n, 0, nullptr); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } else { + txnid_t txnid = constmeta_txnid(&meta); + if (new_txnid <= txnid) + new_txnid = safe64_txnid_next(txnid); + } + } + + if (unlikely(new_txnid > MAX_TXNID)) { + ERROR("txnid overflow, raise %d", MDBX_TXN_FULL); + return MDBX_TXN_FULL; + } + return meta_override(env, target, new_txnid, target_meta); +} + +__cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, + unsigned target_meta, bool writeable) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_open_for_recoveryW(env, pathnameW, target_meta, writeable); + osal_free(pathnameW); + } + return rc; +} + +__cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, + unsigned target_meta, bool writeable) { +#endif /* Windows */ + + if (unlikely(target_meta >= NUM_METAS)) + return MDBX_EINVAL; + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(env->dxb_mmap.base)) + return MDBX_EPERM; + + env->stuck_meta = (int8_t)target_meta; + return +#if defined(_WIN32) || defined(_WIN64) + mdbx_env_openW +#else + mdbx_env_open +#endif /* Windows */ + (env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY, + 0); +} + +__cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_deleteW(pathnameW, mode); + osal_free(pathnameW); + } + return rc; +} + +__cold int mdbx_env_deleteW(const wchar_t *pathname, + MDBX_env_delete_mode_t mode) { +#endif /* Windows */ + + switch (mode) { + default: + return MDBX_EINVAL; + case MDBX_ENV_JUST_DELETE: + case MDBX_ENV_ENSURE_UNUSED: + case MDBX_ENV_WAIT_FOR_UNUSED: + break; + } + +#ifdef __e2k__ /* https://bugs.mcst.ru/bugzilla/show_bug.cgi?id=6011 */ + MDBX_env *const dummy_env = alloca(sizeof(MDBX_env)); +#else + MDBX_env dummy_env_silo, *const dummy_env = &dummy_env_silo; +#endif + memset(dummy_env, 0, sizeof(*dummy_env)); + dummy_env->flags = + (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; + dummy_env->ps = (unsigned)mdbx_default_pagesize(); + + STATIC_ASSERT(sizeof(dummy_env->flags) == sizeof(MDBX_env_flags_t)); + int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0); + if (likely(err == MDBX_SUCCESS)) { + mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, + dxb_handle = INVALID_HANDLE_VALUE; + if (mode > MDBX_ENV_JUST_DELETE) { + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.dxb, + &dxb_handle, 0); + err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; + if (err == MDBX_SUCCESS) { + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, + dummy_env->pathname.lck, &clk_handle, 0); + err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; + } + if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE) + err = osal_lockfile(clk_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED); + if (err == MDBX_SUCCESS && dxb_handle != INVALID_HANDLE_VALUE) + err = osal_lockfile(dxb_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED); + } + + if (err == MDBX_SUCCESS) { + err = osal_removefile(dummy_env->pathname.dxb); + if (err == MDBX_SUCCESS) + rc = MDBX_SUCCESS; + else if (err == MDBX_ENOFILE) + err = MDBX_SUCCESS; + } + + if (err == MDBX_SUCCESS) { + err = osal_removefile(dummy_env->pathname.lck); + if (err == MDBX_SUCCESS) + rc = MDBX_SUCCESS; + else if (err == MDBX_ENOFILE) + err = MDBX_SUCCESS; + } + + if (err == MDBX_SUCCESS && !(dummy_env->flags & MDBX_NOSUBDIR) && + (/* pathname != "." */ pathname[0] != '.' || pathname[1] != 0) && + (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || + pathname[2] != 0)) { + err = osal_removedirectory(pathname); + if (err == MDBX_SUCCESS) + rc = MDBX_SUCCESS; + else if (err == MDBX_ENOFILE) + err = MDBX_SUCCESS; + } + + if (dxb_handle != INVALID_HANDLE_VALUE) + osal_closefile(dxb_handle); + if (clk_handle != INVALID_HANDLE_VALUE) + osal_closefile(clk_handle); + } else if (err == MDBX_ENOFILE) + err = MDBX_SUCCESS; + + osal_free(dummy_env->pathname.buffer); + return (err == MDBX_SUCCESS) ? rc : err; +} + +__cold int mdbx_env_open(MDBX_env *env, const char *pathname, + MDBX_env_flags_t flags, mdbx_mode_t mode) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_openW(env, pathnameW, flags, mode); + osal_free(pathnameW); + if (rc == MDBX_SUCCESS) + /* force to make cache of the multi-byte pathname representation */ + mdbx_env_get_path(env, &pathname); + } + return rc; +} + +__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, + MDBX_env_flags_t flags, mdbx_mode_t mode) { +#endif /* Windows */ + + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(flags & ~ENV_USABLE_FLAGS)) + return MDBX_EINVAL; + + if (unlikely(env->lazy_fd != INVALID_HANDLE_VALUE || + (env->flags & ENV_ACTIVE) != 0 || env->dxb_mmap.base)) + return MDBX_EPERM; + + /* Pickup previously mdbx_env_set_flags(), + * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ + const uint32_t saved_me_flags = env->flags; + flags = combine_durability_flags(flags | DEPRECATED_COALESCE, env->flags); + + if (flags & MDBX_RDONLY) { + /* Silently ignore irrelevant flags when we're only getting read access */ + flags &= ~(MDBX_WRITEMAP | DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | + MDBX_NOMETASYNC | DEPRECATED_COALESCE | MDBX_LIFORECLAIM | + MDBX_NOMEMINIT | MDBX_ACCEDE); + mode = 0; + } else { +#if MDBX_MMAP_INCOHERENT_FILE_WRITE + /* Temporary `workaround` for OpenBSD kernel's flaw. + * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ + if ((flags & MDBX_WRITEMAP) == 0) { + if (flags & MDBX_ACCEDE) + flags |= MDBX_WRITEMAP; + else { + debug_log(MDBX_LOG_ERROR, __func__, __LINE__, + "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " + "of an internal flaw(s) in a file/buffer/page cache.\n"); + return 42 /* ENOPROTOOPT */; + } + } +#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ + } + + env->flags = (flags & ~ENV_FATAL_ERROR); + rc = env_handle_pathname(env, pathname, mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + env->kvs = osal_calloc(env->max_dbi, sizeof(env->kvs[0])); + env->dbs_flags = osal_calloc(env->max_dbi, sizeof(env->dbs_flags[0])); + env->dbi_seqs = osal_calloc(env->max_dbi, sizeof(env->dbi_seqs[0])); + if (unlikely(!(env->kvs && env->dbs_flags && env->dbi_seqs))) { + rc = MDBX_ENOMEM; + goto bailout; + } + + if ((flags & MDBX_RDONLY) == 0) { + MDBX_txn *txn = nullptr; + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / + CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + const size_t base = sizeof(MDBX_txn) + sizeof(cursor_couple_t); + const size_t size = + base + bitmap_bytes + + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + + sizeof(txn->dbi_seqs[0]) + sizeof(txn->dbi_state[0])); + rc = env_page_auxbuffer(env); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + memset(env->page_auxbuf, -1, env->ps * (size_t)2); + memset(ptr_disp(env->page_auxbuf, env->ps * (size_t)2), 0, env->ps); + txn = osal_calloc(1, size); + if (unlikely(!txn)) { + rc = MDBX_ENOMEM; + goto bailout; + } + txn->dbs = ptr_disp(txn, base); + txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); + txn->dbi_seqs = + ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); + txn->dbi_state = + ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->env = env; + txn->flags = MDBX_TXN_FINISHED; + env->basal_txn = txn; + txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); + txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) { + rc = MDBX_ENOMEM; + goto bailout; + } + env_options_adjust_defaults(env); + } + + rc = env_open(env, mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + +#if MDBX_DEBUG + const troika_t troika = meta_tap(env); + const meta_ptr_t head = meta_recent(env, &troika); + const tree_t *db = &head.ptr_c->trees.main; + + DEBUG("opened database version %u, pagesize %u", + (uint8_t)unaligned_peek_u64(4, head.ptr_c->magic_and_version), env->ps); + DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, + data_page(head.ptr_c)->pgno, head.txnid); + DEBUG("depth: %u", db->height); + DEBUG("entries: %" PRIu64, db->items); + DEBUG("branch pages: %" PRIaPGNO, db->branch_pages); + DEBUG("leaf pages: %" PRIaPGNO, db->leaf_pages); + DEBUG("large/overflow pages: %" PRIaPGNO, db->large_pages); + DEBUG("root: %" PRIaPGNO, db->root); + DEBUG("schema_altered: %" PRIaTXN, db->mod_txnid); +#endif /* MDBX_DEBUG */ + + if (likely(rc == MDBX_SUCCESS)) { + dxb_sanitize_tail(env, nullptr); + } else { + bailout: + if (likely(env_close(env, false) == MDBX_SUCCESS)) { + env->flags = saved_me_flags; + } else { + rc = MDBX_PANIC; + env->flags = saved_me_flags | ENV_FATAL_ERROR; + } + } + return rc; +} + +/*----------------------------------------------------------------------------*/ + +#if !(defined(_WIN32) || defined(_WIN64)) +__cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { + if (unlikely(!env)) + return MDBX_EINVAL; + + if (unlikely(env->signature.weak != env_signature)) + return MDBX_EBADSIGN; + + if (unlikely(env->flags & ENV_FATAL_ERROR)) + return MDBX_PANIC; + + if (unlikely((env->flags & ENV_ACTIVE) == 0)) + return MDBX_SUCCESS; + + const uint32_t new_pid = osal_getpid(); + if (unlikely(env->pid == new_pid)) + return MDBX_SUCCESS; + + if (!atomic_cas32(&env->signature, env_signature, ~env_signature)) + return MDBX_EBADSIGN; + + if (env->txn) + txn_abort(env->basal_txn); + env->registered_reader_pid = 0; + int rc = env_close(env, true); + env->signature.weak = env_signature; + if (likely(rc == MDBX_SUCCESS)) { + rc = (env->flags & MDBX_EXCLUSIVE) ? MDBX_BUSY : env_open(env, 0); + if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { + rc = MDBX_PANIC; + env->flags |= ENV_FATAL_ERROR; + } + } + return rc; +} +#endif /* Windows */ + +__cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { + page_t *dp; + int rc = MDBX_SUCCESS; + + if (unlikely(!env)) + return MDBX_EINVAL; + + if (unlikely(env->signature.weak != env_signature)) + return MDBX_EBADSIGN; + +#if MDBX_ENV_CHECKPID || !(defined(_WIN32) || defined(_WIN64)) + /* Check the PID even if MDBX_ENV_CHECKPID=0 on non-Windows + * platforms (i.e. where fork() is available). + * This is required to legitimize a call after fork() + * from a child process, that should be allowed to free resources. */ + if (unlikely(env->pid != osal_getpid())) + env->flags |= ENV_FATAL_ERROR; +#endif /* MDBX_ENV_CHECKPID */ + + if (env->dxb_mmap.base && + (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0 && env->basal_txn) { + if (env->basal_txn->owner && env->basal_txn->owner != osal_thread_self()) + return MDBX_BUSY; + } else + dont_sync = true; + + if (!atomic_cas32(&env->signature, env_signature, 0)) + return MDBX_EBADSIGN; + + if (!dont_sync) { +#if defined(_WIN32) || defined(_WIN64) + /* On windows, without blocking is impossible to determine whether another + * process is running a writing transaction or not. + * Because in the "owner died" condition kernel don't release + * file lock immediately. */ + rc = env_sync(env, true, false); + rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; +#else + struct stat st; + if (unlikely(fstat(env->lazy_fd, &st))) + rc = errno; + else if (st.st_nlink > 0 /* don't sync deleted files */) { + rc = env_sync(env, true, true); + rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY || + rc == EWOULDBLOCK || rc == MDBX_RESULT_TRUE) + ? MDBX_SUCCESS + : rc; + } +#endif /* Windows */ + } + + if (env->basal_txn && env->basal_txn->owner == osal_thread_self()) + lck_txn_unlock(env); + + eASSERT(env, env->signature.weak == 0); + rc = env_close(env, false) ? MDBX_PANIC : rc; + ENSURE(env, osal_fastmutex_destroy(&env->dbi_lock) == MDBX_SUCCESS); +#if defined(_WIN32) || defined(_WIN64) + /* remap_guard don't have destructor (Slim Reader/Writer Lock) */ + DeleteCriticalSection(&env->windowsbug_lock); +#else + ENSURE(env, osal_fastmutex_destroy(&env->remap_guard) == MDBX_SUCCESS); +#endif /* Windows */ + +#if MDBX_LOCKING > MDBX_LOCKING_SYSV + lck_t *const stub = lckless_stub(env); + /* может вернуть ошибку в дочернем процессе после fork() */ + lck_ipclock_destroy(&stub->wrt_lock); +#endif /* MDBX_LOCKING */ + + while ((dp = env->shadow_reserve) != nullptr) { + MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->ps); + VALGRIND_MAKE_MEM_DEFINED(&page_next(dp), sizeof(page_t *)); + env->shadow_reserve = page_next(dp); + void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); + osal_free(ptr); + } + VALGRIND_DESTROY_MEMPOOL(env); + osal_free(env); + + return rc; +} + +/*----------------------------------------------------------------------------*/ + +static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, + MDBX_envinfo *out, const size_t bytes, + troika_t *const troika) { + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(env->flags & ENV_FATAL_ERROR)) + return MDBX_PANIC; + + /* is the environment open? + * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */ + if (unlikely(!env->dxb_mmap.base)) { + /* environment not yet opened */ +#if 1 + /* default behavior: returns the available info but zeroed the rest */ + memset(out, 0, bytes); + out->mi_geo.lower = env->geo_in_bytes.lower; + out->mi_geo.upper = env->geo_in_bytes.upper; + out->mi_geo.shrink = env->geo_in_bytes.shrink; + out->mi_geo.grow = env->geo_in_bytes.grow; + out->mi_geo.current = env->geo_in_bytes.now; + out->mi_maxreaders = env->max_readers; + out->mi_dxb_pagesize = env->ps; + out->mi_sys_pagesize = globals.sys_pagesize; + if (likely(bytes > size_before_bootid)) { + out->mi_bootid.current.x = globals.bootid.x; + out->mi_bootid.current.y = globals.bootid.y; + } + return MDBX_SUCCESS; +#else + /* some users may prefer this behavior: return appropriate error */ + return MDBX_EPERM; +#endif + } + + *troika = + (txn && !(txn->flags & MDBX_TXN_RDONLY)) ? txn->tw.troika : meta_tap(env); + const meta_ptr_t head = meta_recent(env, troika); + const meta_t *const meta0 = METAPAGE(env, 0); + const meta_t *const meta1 = METAPAGE(env, 1); + const meta_t *const meta2 = METAPAGE(env, 2); + out->mi_recent_txnid = head.txnid; + out->mi_meta_txnid[0] = troika->txnid[0]; + out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->sign); + out->mi_meta_txnid[1] = troika->txnid[1]; + out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->sign); + out->mi_meta_txnid[2] = troika->txnid[2]; + out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->sign); + if (likely(bytes > size_before_bootid)) { + memcpy(&out->mi_bootid.meta[0], &meta0->bootid, 16); + memcpy(&out->mi_bootid.meta[1], &meta1->bootid, 16); + memcpy(&out->mi_bootid.meta[2], &meta2->bootid, 16); + } + + const volatile meta_t *txn_meta = head.ptr_v; + out->mi_last_pgno = txn_meta->geometry.first_unallocated - 1; + out->mi_geo.current = pgno2bytes(env, txn_meta->geometry.now); + if (txn) { + out->mi_last_pgno = txn->geo.first_unallocated - 1; + out->mi_geo.current = pgno2bytes(env, txn->geo.end_pgno); + + const txnid_t wanna_meta_txnid = (txn->flags & MDBX_TXN_RDONLY) + ? txn->txnid + : txn->txnid - xMDBX_TXNID_STEP; + txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; + txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; + txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; + } + out->mi_geo.lower = pgno2bytes(env, txn_meta->geometry.lower); + out->mi_geo.upper = pgno2bytes(env, txn_meta->geometry.upper); + out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->geometry.shrink_pv)); + out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->geometry.grow_pv)); + out->mi_mapsize = env->dxb_mmap.limit; + + const lck_t *const lck = env->lck; + out->mi_maxreaders = env->max_readers; + out->mi_numreaders = env->lck_mmap.lck + ? atomic_load32(&lck->rdt_length, mo_Relaxed) + : INT32_MAX; + out->mi_dxb_pagesize = env->ps; + out->mi_sys_pagesize = globals.sys_pagesize; + + if (likely(bytes > size_before_bootid)) { + const uint64_t unsynced_pages = + atomic_load64(&lck->unsynced_pages, mo_Relaxed) + + ((uint32_t)out->mi_recent_txnid != + atomic_load32(&lck->meta_sync_txnid, mo_Relaxed)); + out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); + const uint64_t monotime_now = osal_monotime(); + uint64_t ts = atomic_load64(&lck->eoos_timestamp, mo_Relaxed); + out->mi_since_sync_seconds16dot16 = + ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; + ts = atomic_load64(&lck->readers_check_timestamp, mo_Relaxed); + out->mi_since_reader_check_seconds16dot16 = + ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; + out->mi_autosync_threshold = + pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed)); + out->mi_autosync_period_seconds16dot16 = + osal_monotime_to_16dot16_noUnderflow( + atomic_load64(&lck->autosync_period, mo_Relaxed)); + out->mi_bootid.current.x = globals.bootid.x; + out->mi_bootid.current.y = globals.bootid.y; + out->mi_mode = env->lck_mmap.lck ? lck->envmode.weak : env->flags; + } + + if (likely(bytes > size_before_pgop_stat)) { +#if MDBX_ENABLE_PGOP_STAT + out->mi_pgop_stat.newly = atomic_load64(&lck->pgops.newly, mo_Relaxed); + out->mi_pgop_stat.cow = atomic_load64(&lck->pgops.cow, mo_Relaxed); + out->mi_pgop_stat.clone = atomic_load64(&lck->pgops.clone, mo_Relaxed); + out->mi_pgop_stat.split = atomic_load64(&lck->pgops.split, mo_Relaxed); + out->mi_pgop_stat.merge = atomic_load64(&lck->pgops.merge, mo_Relaxed); + out->mi_pgop_stat.spill = atomic_load64(&lck->pgops.spill, mo_Relaxed); + out->mi_pgop_stat.unspill = atomic_load64(&lck->pgops.unspill, mo_Relaxed); + out->mi_pgop_stat.wops = atomic_load64(&lck->pgops.wops, mo_Relaxed); + out->mi_pgop_stat.prefault = + atomic_load64(&lck->pgops.prefault, mo_Relaxed); + out->mi_pgop_stat.mincore = atomic_load64(&lck->pgops.mincore, mo_Relaxed); + out->mi_pgop_stat.msync = atomic_load64(&lck->pgops.msync, mo_Relaxed); + out->mi_pgop_stat.fsync = atomic_load64(&lck->pgops.fsync, mo_Relaxed); +#else + memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat)); +#endif /* MDBX_ENABLE_PGOP_STAT*/ + } + + txnid_t overall_latter_reader_txnid = out->mi_recent_txnid; + txnid_t self_latter_reader_txnid = overall_latter_reader_txnid; + if (env->lck_mmap.lck) { + for (size_t i = 0; i < out->mi_numreaders; ++i) { + const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease); + if (pid) { + const txnid_t txnid = safe64_read(&lck->rdt[i].txnid); + if (overall_latter_reader_txnid > txnid) + overall_latter_reader_txnid = txnid; + if (pid == env->pid && self_latter_reader_txnid > txnid) + self_latter_reader_txnid = txnid; + } + } + } + out->mi_self_latter_reader_txnid = self_latter_reader_txnid; + out->mi_latter_reader_txnid = overall_latter_reader_txnid; + + osal_compiler_barrier(); + return MDBX_SUCCESS; +} + +__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, + size_t bytes, troika_t *troika) { + MDBX_envinfo snap; + int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + eASSERT(env, sizeof(snap) >= bytes); + while (1) { + rc = env_info_snap(env, txn, out, bytes, troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; + snap.mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16; + if (likely(memcmp(&snap, out, bytes) == 0)) + return MDBX_SUCCESS; + memcpy(&snap, out, bytes); + } +} + +__cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, + MDBX_envinfo *arg, size_t bytes) { + if (unlikely((env == nullptr && txn == nullptr) || arg == nullptr)) + return MDBX_EINVAL; + + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && + bytes != size_before_pgop_stat) + return MDBX_EINVAL; + + if (txn) { + int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + if (env) { + int err = check_env(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (txn && unlikely(txn->env != env)) + return MDBX_EINVAL; + } else { + env = txn->env; + } + + troika_t troika; + return env_info(env, txn, arg, bytes, &troika); +} + +__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, + size_t bytes) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes); + osal_free(pathnameW); + } + return rc; +} + +__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, + size_t bytes) { +#endif /* Windows */ + if (unlikely(!out)) + return MDBX_EINVAL; + + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && + bytes != size_before_pgop_stat) + return MDBX_EINVAL; + + memset(out, 0, bytes); + if (likely(bytes > size_before_bootid)) { + out->mi_bootid.current.x = globals.bootid.x; + out->mi_bootid.current.y = globals.bootid.y; + } + + MDBX_env env; + memset(&env, 0, sizeof(env)); + env.pid = osal_getpid(); + if (unlikely(!is_powerof2(globals.sys_pagesize) || + globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { + ERROR("unsuitable system pagesize %u", globals.sys_pagesize); + return MDBX_INCOMPATIBLE; + } + out->mi_sys_pagesize = globals.sys_pagesize; + env.flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION; + env.stuck_meta = -1; + env.lck_mmap.fd = INVALID_HANDLE_VALUE; + env.lazy_fd = INVALID_HANDLE_VALUE; + env.dsync_fd = INVALID_HANDLE_VALUE; + env.fd4meta = INVALID_HANDLE_VALUE; +#if defined(_WIN32) || defined(_WIN64) + env.dxb_lock_event = INVALID_HANDLE_VALUE; + env.ioring.overlapped_fd = INVALID_HANDLE_VALUE; +#endif /* Windows */ + env_options_init(&env); + + int rc = env_handle_pathname(&env, pathname, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.pathname.dxb, &env.lazy_fd, + 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + meta_t header; + rc = dxb_read_header(&env, &header, 0, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + out->mi_dxb_pagesize = env_setup_pagesize(&env, header.pagesize); + out->mi_geo.lower = pgno2bytes(&env, header.geometry.lower); + out->mi_geo.upper = pgno2bytes(&env, header.geometry.upper); + out->mi_geo.shrink = pgno2bytes(&env, pv2pages(header.geometry.shrink_pv)); + out->mi_geo.grow = pgno2bytes(&env, pv2pages(header.geometry.grow_pv)); + out->mi_geo.current = pgno2bytes(&env, header.geometry.now); + out->mi_last_pgno = header.geometry.first_unallocated - 1; + + const unsigned n = 0; + out->mi_recent_txnid = constmeta_txnid(&header); + out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.sign); + if (likely(bytes > size_before_bootid)) + memcpy(&out->mi_bootid.meta[n], &header.bootid, 16); + +bailout: + env_close(&env, false); + return rc; +} + +/*----------------------------------------------------------------------------*/ + +__cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, + intptr_t size_now, intptr_t size_upper, + intptr_t growth_step, + intptr_t shrink_threshold, intptr_t pagesize) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const bool txn0_owned = env->basal_txn && env_txn0_owned(env); + const bool inside_txn = txn0_owned && env->txn; + bool should_unlock = false; + +#if MDBX_DEBUG + if (growth_step < 0) { + growth_step = 1; + if (shrink_threshold < 0) + shrink_threshold = 1; + } +#endif /* MDBX_DEBUG */ + + intptr_t reasonable_maxsize_cache = 0; + if (env->dxb_mmap.base) { + /* env already mapped */ + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + + if (!txn0_owned) { + int err = lck_txn_lock(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + should_unlock = true; + env->basal_txn->tw.troika = meta_tap(env); + eASSERT(env, !env->txn && !env->basal_txn->nested); + env->basal_txn->txnid = + env->basal_txn->tw.troika.txnid[env->basal_txn->tw.troika.recent]; + txn_snapshot_oldest(env->basal_txn); + } + + /* get untouched params from current TXN or DB */ + if (pagesize <= 0 || pagesize >= INT_MAX) + pagesize = env->ps; + const geo_t *const geo = + inside_txn + ? &env->txn->geo + : &meta_recent(env, &env->basal_txn->tw.troika).ptr_c->geometry; + if (size_lower < 0) + size_lower = pgno2bytes(env, geo->lower); + if (size_now < 0) + size_now = pgno2bytes(env, geo->now); + if (size_upper < 0) + size_upper = pgno2bytes(env, geo->upper); + if (growth_step < 0) + growth_step = pgno2bytes(env, pv2pages(geo->grow_pv)); + if (shrink_threshold < 0) + shrink_threshold = pgno2bytes(env, pv2pages(geo->shrink_pv)); + + if (pagesize != (intptr_t)env->ps) { + rc = MDBX_EINVAL; + goto bailout; + } + const size_t usedbytes = + pgno2bytes(env, mvcc_snapshot_largest(env, geo->first_unallocated)); + if ((size_t)size_upper < usedbytes) { + rc = MDBX_MAP_FULL; + goto bailout; + } + if ((size_t)size_now < usedbytes) + size_now = usedbytes; + } else { + /* env NOT yet mapped */ + if (unlikely(inside_txn)) + return MDBX_PANIC; + + /* is requested some auto-value for pagesize ? */ + if (pagesize >= INT_MAX /* maximal */) + pagesize = MDBX_MAX_PAGESIZE; + else if (pagesize <= 0) { + if (pagesize < 0 /* default */) { + pagesize = globals.sys_pagesize; + if ((uintptr_t)pagesize > MDBX_MAX_PAGESIZE) + pagesize = MDBX_MAX_PAGESIZE; + eASSERT(env, (uintptr_t)pagesize >= MDBX_MIN_PAGESIZE); + } else if (pagesize == 0 /* minimal */) + pagesize = MDBX_MIN_PAGESIZE; + + /* choose pagesize */ + intptr_t max_size = (size_now > size_lower) ? size_now : size_lower; + max_size = (size_upper > max_size) ? size_upper : max_size; + if (max_size < 0 /* default */) + max_size = DEFAULT_MAPSIZE; + else if (max_size == 0 /* minimal */) + max_size = MIN_MAPSIZE; + else if (max_size >= (intptr_t)MAX_MAPSIZE /* maximal */) + max_size = reasonable_db_maxsize(&reasonable_maxsize_cache); + + while (max_size > pagesize * (int64_t)(MAX_PAGENO + 1) && + pagesize < MDBX_MAX_PAGESIZE) + pagesize <<= 1; + } + } + + if (pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2(pagesize)) { + rc = MDBX_EINVAL; + goto bailout; + } + + if (size_lower <= 0) { + size_lower = MIN_MAPSIZE; + if (MIN_MAPSIZE / pagesize < MIN_PAGENO) + size_lower = MIN_PAGENO * pagesize; + } + if (size_lower >= INTPTR_MAX) { + size_lower = reasonable_db_maxsize(&reasonable_maxsize_cache); + if ((size_t)size_lower / pagesize > MAX_PAGENO + 1) + size_lower = pagesize * (MAX_PAGENO + 1); + } + + if (size_now <= 0) { + size_now = size_lower; + if (size_upper >= size_lower && size_now > size_upper) + size_now = size_upper; + } + if (size_now >= INTPTR_MAX) { + size_now = reasonable_db_maxsize(&reasonable_maxsize_cache); + if ((size_t)size_now / pagesize > MAX_PAGENO + 1) + size_now = pagesize * (MAX_PAGENO + 1); + } + + if (size_upper <= 0) { + if (size_now >= reasonable_db_maxsize(&reasonable_maxsize_cache) / 2) + size_upper = reasonable_db_maxsize(&reasonable_maxsize_cache); + else if (MAX_MAPSIZE != MAX_MAPSIZE32 && + (size_t)size_now >= MAX_MAPSIZE32 / 2 && + (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) + size_upper = MAX_MAPSIZE32; + else { + size_upper = size_now + size_now; + if ((size_t)size_upper < DEFAULT_MAPSIZE * 2) + size_upper = DEFAULT_MAPSIZE * 2; + } + if ((size_t)size_upper / pagesize > (MAX_PAGENO + 1)) + size_upper = pagesize * (MAX_PAGENO + 1); + } else if (size_upper >= INTPTR_MAX) { + size_upper = reasonable_db_maxsize(&reasonable_maxsize_cache); + if ((size_t)size_upper / pagesize > MAX_PAGENO + 1) + size_upper = pagesize * (MAX_PAGENO + 1); + } + + if (unlikely(size_lower < (intptr_t)MIN_MAPSIZE || size_lower > size_upper)) { + rc = MDBX_EINVAL; + goto bailout; + } + + if ((uint64_t)size_lower / pagesize < MIN_PAGENO) { + size_lower = pagesize * MIN_PAGENO; + if (unlikely(size_lower > size_upper)) { + rc = MDBX_EINVAL; + goto bailout; + } + if (size_now < size_lower) + size_now = size_lower; + } + + if (unlikely((size_t)size_upper > MAX_MAPSIZE || + (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { + rc = MDBX_TOO_LARGE; + goto bailout; + } + + const size_t unit = (globals.sys_pagesize > (size_t)pagesize) + ? globals.sys_pagesize + : (size_t)pagesize; + size_lower = ceil_powerof2(size_lower, unit); + size_upper = ceil_powerof2(size_upper, unit); + size_now = ceil_powerof2(size_now, unit); + + /* LY: подбираем значение size_upper: + * - кратное размеру страницы + * - без нарушения MAX_MAPSIZE и MAX_PAGENO */ + while (unlikely((size_t)size_upper > MAX_MAPSIZE || + (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { + if ((size_t)size_upper < unit + MIN_MAPSIZE || + (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) { + /* паранойа на случай переполнения при невероятных значениях */ + rc = MDBX_EINVAL; + goto bailout; + } + size_upper -= unit; + if ((size_t)size_upper < (size_t)size_lower) + size_lower = size_upper; + } + eASSERT(env, (size_upper - size_lower) % globals.sys_pagesize == 0); + + if (size_now < size_lower) + size_now = size_lower; + if (size_now > size_upper) + size_now = size_upper; + + if (growth_step < 0) { + growth_step = ((size_t)(size_upper - size_lower)) / 42; + if (growth_step > size_lower && size_lower < (intptr_t)MEGABYTE) + growth_step = size_lower; + if (growth_step < 65536) + growth_step = 65536; + if ((size_t)growth_step > MAX_MAPSIZE / 64) + growth_step = MAX_MAPSIZE / 64; + } + if (growth_step == 0 && shrink_threshold > 0) + growth_step = 1; + growth_step = ceil_powerof2(growth_step, unit); + + if (shrink_threshold < 0) + shrink_threshold = growth_step + growth_step; + shrink_threshold = ceil_powerof2(shrink_threshold, unit); + + //---------------------------------------------------------------------------- + + if (!env->dxb_mmap.base) { + /* save user's geo-params for future open/create */ + if (pagesize != (intptr_t)env->ps) + env_setup_pagesize(env, pagesize); + env->geo_in_bytes.lower = size_lower; + env->geo_in_bytes.now = size_now; + env->geo_in_bytes.upper = size_upper; + env->geo_in_bytes.grow = + pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, growth_step)))); + env->geo_in_bytes.shrink = + pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, shrink_threshold)))); + env_options_adjust_defaults(env); + + ENSURE(env, env->geo_in_bytes.lower >= MIN_MAPSIZE); + ENSURE(env, env->geo_in_bytes.lower / (unsigned)pagesize >= MIN_PAGENO); + ENSURE(env, env->geo_in_bytes.lower % (unsigned)pagesize == 0); + ENSURE(env, env->geo_in_bytes.lower % globals.sys_pagesize == 0); + + ENSURE(env, env->geo_in_bytes.upper <= MAX_MAPSIZE); + ENSURE(env, env->geo_in_bytes.upper / (unsigned)pagesize <= MAX_PAGENO + 1); + ENSURE(env, env->geo_in_bytes.upper % (unsigned)pagesize == 0); + ENSURE(env, env->geo_in_bytes.upper % globals.sys_pagesize == 0); + + ENSURE(env, env->geo_in_bytes.now >= env->geo_in_bytes.lower); + ENSURE(env, env->geo_in_bytes.now <= env->geo_in_bytes.upper); + ENSURE(env, env->geo_in_bytes.now % (unsigned)pagesize == 0); + ENSURE(env, env->geo_in_bytes.now % globals.sys_pagesize == 0); + + ENSURE(env, env->geo_in_bytes.grow % (unsigned)pagesize == 0); + ENSURE(env, env->geo_in_bytes.grow % globals.sys_pagesize == 0); + ENSURE(env, env->geo_in_bytes.shrink % (unsigned)pagesize == 0); + ENSURE(env, env->geo_in_bytes.shrink % globals.sys_pagesize == 0); + + rc = MDBX_SUCCESS; + } else { + /* apply new params to opened environment */ + ENSURE(env, pagesize == (intptr_t)env->ps); + meta_t meta; + memset(&meta, 0, sizeof(meta)); + if (!inside_txn) { + eASSERT(env, should_unlock); + const meta_ptr_t head = meta_recent(env, &env->basal_txn->tw.troika); + + uint64_t timestamp = 0; + while ("workaround for " + "https://libmdbx.dqdkfa.ru/dead-github/issues/269") { + rc = coherency_check_head(env->basal_txn, head, ×tamp); + if (likely(rc == MDBX_SUCCESS)) + break; + if (unlikely(rc != MDBX_RESULT_TRUE)) + goto bailout; + } + meta = *head.ptr_c; + const txnid_t txnid = safe64_txnid_next(head.txnid); + if (unlikely(txnid > MAX_TXNID)) { + rc = MDBX_TXN_FULL; + ERROR("txnid overflow, raise %d", rc); + goto bailout; + } + meta_set_txnid(env, &meta, txnid); + } + + const geo_t *const current_geo = + &(env->txn ? env->txn : env->basal_txn)->geo; + /* update env-geo to avoid influences */ + env->geo_in_bytes.now = pgno2bytes(env, current_geo->now); + env->geo_in_bytes.lower = pgno2bytes(env, current_geo->lower); + env->geo_in_bytes.upper = pgno2bytes(env, current_geo->upper); + env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(current_geo->grow_pv)); + env->geo_in_bytes.shrink = + pgno2bytes(env, pv2pages(current_geo->shrink_pv)); + + geo_t new_geo; + new_geo.lower = bytes2pgno(env, size_lower); + new_geo.now = bytes2pgno(env, size_now); + new_geo.upper = bytes2pgno(env, size_upper); + new_geo.grow_pv = pages2pv(bytes2pgno(env, growth_step)); + new_geo.shrink_pv = pages2pv(bytes2pgno(env, shrink_threshold)); + new_geo.first_unallocated = current_geo->first_unallocated; + + ENSURE(env, pgno_align2os_bytes(env, new_geo.lower) == (size_t)size_lower); + ENSURE(env, pgno_align2os_bytes(env, new_geo.upper) == (size_t)size_upper); + ENSURE(env, pgno_align2os_bytes(env, new_geo.now) == (size_t)size_now); + ENSURE(env, new_geo.grow_pv == pages2pv(pv2pages(new_geo.grow_pv))); + ENSURE(env, new_geo.shrink_pv == pages2pv(pv2pages(new_geo.shrink_pv))); + + ENSURE(env, (size_t)size_lower >= MIN_MAPSIZE); + ENSURE(env, new_geo.lower >= MIN_PAGENO); + ENSURE(env, (size_t)size_upper <= MAX_MAPSIZE); + ENSURE(env, new_geo.upper <= MAX_PAGENO + 1); + ENSURE(env, new_geo.now >= new_geo.first_unallocated); + ENSURE(env, new_geo.upper >= new_geo.now); + ENSURE(env, new_geo.now >= new_geo.lower); + + if (memcmp(current_geo, &new_geo, sizeof(geo_t)) != 0) { +#if defined(_WIN32) || defined(_WIN64) + /* Was DB shrinking disabled before and now it will be enabled? */ + if (new_geo.lower < new_geo.upper && new_geo.shrink_pv && + !(current_geo->lower < current_geo->upper && + current_geo->shrink_pv)) { + if (!env->lck_mmap.lck) { + rc = MDBX_EPERM; + goto bailout; + } + int err = lck_rdt_lock(env); + if (unlikely(MDBX_IS_ERROR(err))) { + rc = err; + goto bailout; + } + + /* Check if there are any reading threads that do not use the SRWL */ + const size_t CurrentTid = GetCurrentThreadId(); + const reader_slot_t *const begin = env->lck_mmap.lck->rdt; + const reader_slot_t *const end = + begin + + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); + for (const reader_slot_t *reader = begin; reader < end; ++reader) { + if (reader->pid.weak == env->pid && reader->tid.weak && + reader->tid.weak != CurrentTid) { + /* At least one thread may don't use SRWL */ + rc = MDBX_EPERM; + break; + } + } + + lck_rdt_unlock(env); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } +#endif /* Windows */ + + if (new_geo.now != current_geo->now || + new_geo.upper != current_geo->upper) { + rc = dxb_resize(env, current_geo->first_unallocated, new_geo.now, + new_geo.upper, explicit_resize); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + if (inside_txn) { + env->txn->geo = new_geo; + env->txn->flags |= MDBX_TXN_DIRTY; + } else { + meta.geometry = new_geo; + rc = + dxb_sync_locked(env, env->flags, &meta, &env->basal_txn->tw.troika); + if (likely(rc == MDBX_SUCCESS)) { + env->geo_in_bytes.now = + pgno2bytes(env, new_geo.now = meta.geometry.now); + env->geo_in_bytes.upper = + pgno2bytes(env, new_geo.upper = meta.geometry.upper); + } + } + } + if (likely(rc == MDBX_SUCCESS)) { + /* update env-geo to avoid influences */ + eASSERT(env, env->geo_in_bytes.now == pgno2bytes(env, new_geo.now)); + env->geo_in_bytes.lower = pgno2bytes(env, new_geo.lower); + eASSERT(env, env->geo_in_bytes.upper == pgno2bytes(env, new_geo.upper)); + env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(new_geo.grow_pv)); + env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(new_geo.shrink_pv)); + } + } + +bailout: + if (should_unlock) + lck_txn_unlock(env); + return rc; +} + +__cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + return env_sync(env, force, nonblock); +} diff --git a/src/api-extra.c b/src/api-extra.c new file mode 100644 index 00000000..1a9b8b08 --- /dev/null +++ b/src/api-extra.c @@ -0,0 +1,117 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +/*------------------------------------------------------------------------------ + * Readers API */ + +__cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, + void *ctx) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!func)) + return MDBX_EINVAL; + + rc = MDBX_RESULT_TRUE; + int serial = 0; + lck_t *const lck = env->lck_mmap.lck; + if (likely(lck)) { + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + for (size_t i = 0; i < snap_nreaders; i++) { + const reader_slot_t *r = lck->rdt + i; + retry_reader:; + const uint32_t pid = atomic_load32(&r->pid, mo_AcquireRelease); + if (!pid) + continue; + txnid_t txnid = safe64_read(&r->txnid); + const uint64_t tid = atomic_load64(&r->tid, mo_Relaxed); + const pgno_t pages_used = + atomic_load32(&r->snapshot_pages_used, mo_Relaxed); + const uint64_t reader_pages_retired = + atomic_load64(&r->snapshot_pages_retired, mo_Relaxed); + if (unlikely(txnid != safe64_read(&r->txnid) || + pid != atomic_load32(&r->pid, mo_AcquireRelease) || + tid != atomic_load64(&r->tid, mo_Relaxed) || + pages_used != + atomic_load32(&r->snapshot_pages_used, mo_Relaxed) || + reader_pages_retired != + atomic_load64(&r->snapshot_pages_retired, mo_Relaxed))) + goto retry_reader; + + eASSERT(env, txnid > 0); + if (txnid >= SAFE64_INVALID_THRESHOLD) + txnid = 0; + + size_t bytes_used = 0; + size_t bytes_retained = 0; + uint64_t lag = 0; + if (txnid) { + troika_t troika = meta_tap(env); + retry_header:; + const meta_ptr_t head = meta_recent(env, &troika); + const uint64_t head_pages_retired = + unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); + if (unlikely(meta_should_retry(env, &troika) || + head_pages_retired != unaligned_peek_u64_volatile( + 4, head.ptr_v->pages_retired))) + goto retry_header; + + lag = (head.txnid - txnid) / xMDBX_TXNID_STEP; + bytes_used = pgno2bytes(env, pages_used); + bytes_retained = (head_pages_retired > reader_pages_retired) + ? pgno2bytes(env, (pgno_t)(head_pages_retired - + reader_pages_retired)) + : 0; + } + rc = func(ctx, ++serial, (unsigned)i, pid, (mdbx_tid_t)((intptr_t)tid), + txnid, lag, bytes_used, bytes_retained); + if (unlikely(rc != MDBX_SUCCESS)) + break; + } + } + + return rc; +} + +__cold int mdbx_reader_check(MDBX_env *env, int *dead) { + if (dead) + *dead = 0; + return mvcc_cleanup_dead(env, false, dead); +} + +/*------------------------------------------------------------------------------ + * Locking API */ + +int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->basal_txn->owner || + (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + return lck_txn_lock(env, dont_wait); +} + +int mdbx_txn_unlock(MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->basal_txn->owner != osal_thread_self())) + return MDBX_THREAD_MISMATCH; + if (unlikely((env->basal_txn->flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + lck_txn_unlock(env); + return MDBX_SUCCESS; +} diff --git a/src/api-key-transform.c b/src/api-key-transform.c new file mode 100644 index 00000000..e28f8de9 --- /dev/null +++ b/src/api-key-transform.c @@ -0,0 +1,225 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +static inline double key2double(const int64_t key) { + union { + uint64_t u; + double f; + } casting; + + casting.u = (key < 0) ? key + UINT64_C(0x8000000000000000) + : UINT64_C(0xffffFFFFffffFFFF) - key; + return casting.f; +} + +static inline uint64_t double2key(const double *const ptr) { + STATIC_ASSERT(sizeof(double) == sizeof(int64_t)); + const int64_t i = *(const int64_t *)ptr; + const uint64_t u = (i < 0) ? UINT64_C(0xffffFFFFffffFFFF) - i + : i + UINT64_C(0x8000000000000000); + if (ASSERT_ENABLED()) { + const double f = key2double(u); + assert(memcmp(&f, ptr, sizeof(double)) == 0); + } + return u; +} + +static inline float key2float(const int32_t key) { + union { + uint32_t u; + float f; + } casting; + + casting.u = + (key < 0) ? key + UINT32_C(0x80000000) : UINT32_C(0xffffFFFF) - key; + return casting.f; +} + +static inline uint32_t float2key(const float *const ptr) { + STATIC_ASSERT(sizeof(float) == sizeof(int32_t)); + const int32_t i = *(const int32_t *)ptr; + const uint32_t u = + (i < 0) ? UINT32_C(0xffffFFFF) - i : i + UINT32_C(0x80000000); + if (ASSERT_ENABLED()) { + const float f = key2float(u); + assert(memcmp(&f, ptr, sizeof(float)) == 0); + } + return u; +} + +uint64_t mdbx_key_from_double(const double ieee754_64bit) { + return double2key(&ieee754_64bit); +} + +uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) { + return double2key(ieee754_64bit); +} + +uint32_t mdbx_key_from_float(const float ieee754_32bit) { + return float2key(&ieee754_32bit); +} + +uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) { + return float2key(ieee754_32bit); +} + +#define IEEE754_DOUBLE_MANTISSA_SIZE 52 +#define IEEE754_DOUBLE_EXPONENTA_BIAS 0x3FF +#define IEEE754_DOUBLE_EXPONENTA_MAX 0x7FF +#define IEEE754_DOUBLE_IMPLICIT_LEAD UINT64_C(0x0010000000000000) +#define IEEE754_DOUBLE_MANTISSA_MASK UINT64_C(0x000FFFFFFFFFFFFF) +#define IEEE754_DOUBLE_MANTISSA_AMAX UINT64_C(0x001FFFFFFFFFFFFF) + +static inline int clz64(uint64_t value) { +#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_clzl) + if (sizeof(value) == sizeof(int)) + return __builtin_clz(value); + if (sizeof(value) == sizeof(long)) + return __builtin_clzl(value); +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ + __has_builtin(__builtin_clzll) + return __builtin_clzll(value); +#endif /* have(long long) && long long == uint64_t */ +#endif /* GNU C */ + +#if defined(_MSC_VER) + unsigned long index; +#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) + _BitScanReverse64(&index, value); + return 63 - index; +#else + if (value > UINT32_MAX) { + _BitScanReverse(&index, (uint32_t)(value >> 32)); + return 31 - index; + } + _BitScanReverse(&index, (uint32_t)value); + return 63 - index; +#endif +#endif /* MSVC */ + + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + value |= value >> 32; + static const uint8_t debruijn_clz64[64] = { + 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, + 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, + 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, + 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0}; + return debruijn_clz64[value * UINT64_C(0x03F79D71B4CB0A89) >> 58]; +} + +static inline uint64_t round_mantissa(const uint64_t u64, int shift) { + assert(shift < 0 && u64 > 0); + shift = -shift; + const unsigned half = 1 << (shift - 1); + const unsigned lsb = 1 & (unsigned)(u64 >> shift); + const unsigned tie2even = 1 ^ lsb; + return (u64 + half - tie2even) >> shift; +} + +uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) { + const uint64_t bias = UINT64_C(0x8000000000000000); + if (json_integer > 0) { + const uint64_t u64 = json_integer; + int shift = clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1); + uint64_t mantissa = u64 << shift; + if (unlikely(shift < 0)) { + mantissa = round_mantissa(u64, shift); + if (mantissa > IEEE754_DOUBLE_MANTISSA_AMAX) + mantissa = round_mantissa(u64, --shift); + } + + assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && + mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); + const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + + IEEE754_DOUBLE_MANTISSA_SIZE - shift; + assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX); + const uint64_t key = bias + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); +#if !defined(_MSC_VER) || \ + defined( \ + _DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ + symbol __except1 referenced in function __ftol3_except */ + assert(key == mdbx_key_from_double((double)json_integer)); +#endif /* Workaround for MSVC */ + return key; + } + + if (json_integer < 0) { + const uint64_t u64 = -json_integer; + int shift = clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1); + uint64_t mantissa = u64 << shift; + if (unlikely(shift < 0)) { + mantissa = round_mantissa(u64, shift); + if (mantissa > IEEE754_DOUBLE_MANTISSA_AMAX) + mantissa = round_mantissa(u64, --shift); + } + + assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && + mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); + const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + + IEEE754_DOUBLE_MANTISSA_SIZE - shift; + assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX); + const uint64_t key = bias - 1 - (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); +#if !defined(_MSC_VER) || \ + defined( \ + _DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ + symbol __except1 referenced in function __ftol3_except */ + assert(key == mdbx_key_from_double((double)json_integer)); +#endif /* Workaround for MSVC */ + return key; + } + + return bias; +} + +int64_t mdbx_jsonInteger_from_key(const MDBX_val v) { + assert(v.iov_len == 8); + const uint64_t key = unaligned_peek_u64(2, v.iov_base); + const uint64_t bias = UINT64_C(0x8000000000000000); + const uint64_t covalent = (key > bias) ? key - bias : bias - key - 1; + const int shift = IEEE754_DOUBLE_EXPONENTA_BIAS + 63 - + (IEEE754_DOUBLE_EXPONENTA_MAX & + (int)(covalent >> IEEE754_DOUBLE_MANTISSA_SIZE)); + if (unlikely(shift < 1)) + return (key < bias) ? INT64_MIN : INT64_MAX; + if (unlikely(shift > 63)) + return 0; + + const uint64_t unscaled = ((covalent & IEEE754_DOUBLE_MANTISSA_MASK) + << (63 - IEEE754_DOUBLE_MANTISSA_SIZE)) + + bias; + const int64_t absolute = unscaled >> shift; + const int64_t value = (key < bias) ? -absolute : absolute; + assert(key == mdbx_key_from_jsonInteger(value) || + (mdbx_key_from_jsonInteger(value - 1) < key && + key < mdbx_key_from_jsonInteger(value + 1))); + return value; +} + +double mdbx_double_from_key(const MDBX_val v) { + assert(v.iov_len == 8); + return key2double(unaligned_peek_u64(2, v.iov_base)); +} + +float mdbx_float_from_key(const MDBX_val v) { + assert(v.iov_len == 4); + return key2float(unaligned_peek_u32(2, v.iov_base)); +} + +int32_t mdbx_int32_from_key(const MDBX_val v) { + assert(v.iov_len == 4); + return (int32_t)(unaligned_peek_u32(2, v.iov_base) - UINT32_C(0x80000000)); +} + +int64_t mdbx_int64_from_key(const MDBX_val v) { + assert(v.iov_len == 8); + return (int64_t)(unaligned_peek_u64(2, v.iov_base) - + UINT64_C(0x8000000000000000)); +} diff --git a/src/api-txn.c b/src/api-txn.c new file mode 100644 index 00000000..8d048f9a --- /dev/null +++ b/src/api-txn.c @@ -0,0 +1,508 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +#ifdef __SANITIZE_THREAD__ +/* LY: avoid tsan-trap by txn, mm_last_pg and geo.first_unallocated */ +__attribute__((__no_sanitize_thread__, __noinline__)) +#endif +int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) +{ + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc > 0) ? -rc : rc; + + MDBX_env *env = txn->env; + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) { + if (percent) + *percent = (int)((txn->geo.first_unallocated * UINT64_C(100) + + txn->geo.end_pgno / 2) / + txn->geo.end_pgno); + return 0; + } + + txnid_t lag; + troika_t troika = meta_tap(env); + do { + const meta_ptr_t head = meta_recent(env, &troika); + if (percent) { + const pgno_t maxpg = head.ptr_v->geometry.now; + *percent = (int)((head.ptr_v->geometry.first_unallocated * UINT64_C(100) + + maxpg / 2) / + maxpg); + } + lag = (head.txnid - txn->txnid) / xMDBX_TXNID_STEP; + } while (unlikely(meta_should_retry(env, &troika))); + + return (lag > INT_MAX) ? INT_MAX : (int)lag; +} + +__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, + uint32_t *mask) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!mask)) + return MDBX_EINVAL; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0) + return MDBX_RESULT_TRUE; + + MDBX_val key, data; + rc = outer_first(&cx.outer, &key, &data); + *mask = 0; + while (rc == MDBX_SUCCESS) { + const node_t *node = + page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + const tree_t *db = node_data(node); + const unsigned flags = node_flags(node); + switch (flags) { + case N_BIGDATA: + case 0: + /* single-value entry, deep = 0 */ + *mask |= 1 << 0; + break; + case N_DUPDATA: + /* single sub-page, deep = 1 */ + *mask |= 1 << 1; + break; + case N_DUPDATA | N_SUBDATA: + /* sub-tree */ + *mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height); + break; + default: + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid node-size", flags); + return MDBX_CORRUPTED; + } + rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); + } + + return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; +} + +int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(canary == nullptr)) + return MDBX_EINVAL; + + *canary = txn->canary; + return MDBX_SUCCESS; +} + +int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, + MDBX_val *data) { + DKBUF_DEBUG; + DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!key || !data)) + return MDBX_EINVAL; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + return cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err; +} + +int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *data) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!key || !data)) + return MDBX_EINVAL; + + if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) + return MDBX_BAD_TXN; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + return cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND); +} + +int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *data, size_t *values_count) { + DKBUF_DEBUG; + DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!key || !data)) + return MDBX_EINVAL; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = cursor_seek(&cx.outer, key, data, MDBX_SET_KEY).err; + if (unlikely(rc != MDBX_SUCCESS)) { + if (values_count) + *values_count = 0; + return rc; + } + + if (values_count) { + *values_count = 1; + if (inner_pointed(&cx.outer)) + *values_count = + (sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || + cx.inner.nested_tree.items <= PTRDIFF_MAX) + ? (size_t)cx.inner.nested_tree.items + : PTRDIFF_MAX; + } + return MDBX_SUCCESS; +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (likely(canary)) { + if (txn->canary.x == canary->x && txn->canary.y == canary->y && + txn->canary.z == canary->z) + return MDBX_SUCCESS; + txn->canary.x = canary->x; + txn->canary.y = canary->y; + txn->canary.z = canary->z; + } + txn->canary.v = txn->txnid; + txn->flags |= MDBX_TXN_DIRTY; + + return MDBX_SUCCESS; +} + +/* Функция сообщает находится ли указанный адрес в "грязной" странице у + * заданной пишущей транзакции. В конечном счете это позволяет избавиться от + * лишнего копирования данных из НЕ-грязных страниц. + * + * "Грязные" страницы - это те, которые уже были изменены в ходе пишущей + * транзакции. Соответственно, какие-либо дальнейшие изменения могут привести + * к перезаписи таких страниц. Поэтому все функции, выполняющие изменения, в + * качестве аргументов НЕ должны получать указатели на данные в таких + * страницах. В свою очередь "НЕ грязные" страницы перед модификацией будут + * скопированы. + * + * Другими словами, данные из "грязных" страниц должны быть либо скопированы + * перед передачей в качестве аргументов для дальнейших модификаций, либо + * отвергнуты на стадии проверки корректности аргументов. + * + * Таким образом, функция позволяет как избавится от лишнего копирования, + * так и выполнить более полную проверку аргументов. + * + * ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только + * так гарантируется что актуальный заголовок страницы будет физически + * расположен в той-же странице памяти, в том числе для многостраничных + * P_LARGE страниц с длинными данными. */ +int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const MDBX_env *env = txn->env; + const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base); + if (offset >= 0) { + const pgno_t pgno = bytes2pgno(env, offset); + if (likely(pgno < txn->geo.first_unallocated)) { + const page_t *page = pgno2page(env, pgno); + if (unlikely(page->pgno != pgno || (page->flags & P_ILL_BITS) != 0)) { + /* The ptr pointed into middle of a large page, + * not to the beginning of a data. */ + return MDBX_EINVAL; + } + return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) + ? MDBX_RESULT_FALSE + : MDBX_RESULT_TRUE; + } + if ((size_t)offset < env->dxb_mmap.limit) { + /* Указатель адресует что-то в пределах mmap, но за границей + * распределенных страниц. Такое может случится если mdbx_is_dirty() + * вызывается после операции, в ходе которой грязная страница была + * возвращена в нераспределенное пространство. */ + return (txn->flags & MDBX_TXN_RDONLY) ? MDBX_EINVAL : MDBX_RESULT_TRUE; + } + } + + /* Страница вне используемого mmap-диапазона, т.е. либо в функцию был + * передан некорректный адрес, либо адрес в теневой странице, которая была + * выделена посредством malloc(). + * + * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная", + * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */ + return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? MDBX_EINVAL + : MDBX_RESULT_TRUE; +} + +int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, + const MDBX_val *data) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!key)) + return MDBX_EINVAL; + + if (unlikely(dbi <= FREE_DBI)) + return MDBX_BAD_DBI; + + if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) + return (txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + MDBX_val proxy; + MDBX_cursor_op op = MDBX_SET; + unsigned flags = MDBX_ALLDUPS; + if (data) { + proxy = *data; + data = &proxy; + op = MDBX_GET_BOTH; + flags = 0; + } + rc = cursor_seek(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + rc = cursor_del(&cx.outer, flags); + txn->cursors[dbi] = cx.outer.next; + return rc; +} + +int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, + MDBX_put_flags_t flags) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!key || !data)) + return MDBX_EINVAL; + + if (unlikely(dbi <= FREE_DBI)) + return MDBX_BAD_DBI; + + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | + MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | + MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE))) + return MDBX_EINVAL; + + if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) + return (txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + + /* LY: support for update (explicit overwrite) */ + if (flags & MDBX_CURRENT) { + rc = cursor_seek(&cx.outer, (MDBX_val *)key, nullptr, MDBX_SET).err; + if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && + (flags & MDBX_ALLDUPS) == 0) { + /* LY: allows update (explicit overwrite) only for unique keys */ + node_t *node = + page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + if (node_flags(node) & N_DUPDATA) { + tASSERT(txn, inner_pointed(&cx.outer) && + cx.outer.subcur->nested_tree.items > 1); + rc = MDBX_EMULTIVAL; + if ((flags & MDBX_NOOVERWRITE) == 0) { + flags -= MDBX_CURRENT; + rc = cursor_del(&cx.outer, MDBX_ALLDUPS); + } + } + } + } + + if (likely(rc == MDBX_SUCCESS)) + rc = cursor_put_checklen(&cx.outer, key, data, flags); + txn->cursors[dbi] = cx.outer.next; + + return rc; +} + +//------------------------------------------------------------------------------ + +/* Позволяет обновить или удалить существующую запись с получением + * в old_data предыдущего значения данных. При этом если new_data равен + * нулю, то выполняется удаление, иначе обновление/вставка. + * + * Текущее значение может находиться в уже измененной (грязной) странице. + * В этом случае страница будет перезаписана при обновлении, а само старое + * значение утрачено. Поэтому исходно в old_data должен быть передан + * дополнительный буфер для копирования старого значения. + * Если переданный буфер слишком мал, то функция вернет -1, установив + * old_data->iov_len в соответствующее значение. + * + * Для не-уникальных ключей также возможен второй сценарий использования, + * когда посредством old_data из записей с одинаковым ключом для + * удаления/обновления выбирается конкретная. Для выбора этого сценария + * во flags следует одновременно указать MDBX_CURRENT и MDBX_NOOVERWRITE. + * Именно эта комбинация выбрана, так как она лишена смысла, и этим позволяет + * идентифицировать запрос такого сценария. + * + * Функция может быть замещена соответствующими операциями с курсорами + * после двух доработок (TODO): + * - внешняя аллокация курсоров, в том числе на стеке (без malloc). + * - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE). + */ + +int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, + MDBX_val *new_data, MDBX_val *old_data, + MDBX_put_flags_t flags, MDBX_preserve_func preserver, + void *preserver_context) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!key || !old_data || old_data == new_data)) + return MDBX_EINVAL; + + if (unlikely(old_data->iov_base == nullptr && old_data->iov_len)) + return MDBX_EINVAL; + + if (unlikely(new_data == nullptr && + (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) + return MDBX_EINVAL; + + if (unlikely(dbi <= FREE_DBI)) + return MDBX_BAD_DBI; + + if (unlikely(flags & + ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | + MDBX_RESERVE | MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) + return MDBX_EINVAL; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + + MDBX_val present_key = *key; + if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) { + /* в old_data значение для выбора конкретного дубликата */ + if (unlikely(!(txn->dbs[dbi].flags & MDBX_DUPSORT))) { + rc = MDBX_EINVAL; + goto bailout; + } + + /* убираем лишний бит, он был признаком запрошенного режима */ + flags -= MDBX_NOOVERWRITE; + + rc = cursor_seek(&cx.outer, &present_key, old_data, MDBX_GET_BOTH).err; + if (rc != MDBX_SUCCESS) + goto bailout; + } else { + /* в old_data буфер для сохранения предыдущего значения */ + if (unlikely(new_data && old_data->iov_base == new_data->iov_base)) + return MDBX_EINVAL; + MDBX_val present_data; + rc = cursor_seek(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err; + if (unlikely(rc != MDBX_SUCCESS)) { + old_data->iov_base = nullptr; + old_data->iov_len = 0; + if (rc != MDBX_NOTFOUND || (flags & MDBX_CURRENT)) + goto bailout; + } else if (flags & MDBX_NOOVERWRITE) { + rc = MDBX_KEYEXIST; + *old_data = present_data; + goto bailout; + } else { + page_t *page = cx.outer.pg[cx.outer.top]; + if (txn->dbs[dbi].flags & MDBX_DUPSORT) { + if (flags & MDBX_CURRENT) { + /* disallow update/delete for multi-values */ + node_t *node = page_node(page, cx.outer.ki[cx.outer.top]); + if (node_flags(node) & N_DUPDATA) { + tASSERT(txn, inner_pointed(&cx.outer) && + cx.outer.subcur->nested_tree.items > 1); + if (cx.outer.subcur->nested_tree.items > 1) { + rc = MDBX_EMULTIVAL; + goto bailout; + } + } + /* В LMDB флажок MDBX_CURRENT здесь приведет + * к замене данных без учета MDBX_DUPSORT сортировки, + * но здесь это в любом случае допустимо, так как мы + * проверили что для ключа есть только одно значение. */ + } + } + + if (is_modifable(txn, page)) { + if (new_data && cmp_lenfast(&present_data, new_data) == 0) { + /* если данные совпадают, то ничего делать не надо */ + *old_data = *new_data; + goto bailout; + } + rc = preserver ? preserver(preserver_context, old_data, + present_data.iov_base, present_data.iov_len) + : MDBX_SUCCESS; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } else { + *old_data = present_data; + } + flags |= MDBX_CURRENT; + } + } + + if (likely(new_data)) + rc = cursor_put_checklen(&cx.outer, key, new_data, flags); + else + rc = cursor_del(&cx.outer, flags & MDBX_ALLDUPS); + +bailout: + txn->cursors[dbi] = cx.outer.next; + return rc; +} + +static int default_value_preserver(void *context, MDBX_val *target, + const void *src, size_t bytes) { + (void)context; + if (unlikely(target->iov_len < bytes)) { + target->iov_base = nullptr; + target->iov_len = bytes; + return MDBX_RESULT_TRUE; + } + memcpy(target->iov_base, src, target->iov_len = bytes); + return MDBX_SUCCESS; +} + +int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, + MDBX_val *new_data, MDBX_val *old_data, + MDBX_put_flags_t flags) { + return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, + default_value_preserver, nullptr); +} diff --git a/src/atomics-ops.h b/src/atomics-ops.h new file mode 100644 index 00000000..0b29cb84 --- /dev/null +++ b/src/atomics-ops.h @@ -0,0 +1,390 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +#ifndef __cplusplus + +#ifdef MDBX_HAVE_C11ATOMICS +#define osal_memory_fence(order, write) \ + atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order)) +#else /* MDBX_HAVE_C11ATOMICS */ +#define osal_memory_fence(order, write) \ + do { \ + osal_compiler_barrier(); \ + if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed \ + : mo_AcquireRelease)) \ + osal_memory_barrier(); \ + } while (0) +#endif /* MDBX_HAVE_C11ATOMICS */ + +#if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__) +#define atomic_store32(p, value, order) \ + ({ \ + const uint32_t value_to_store = (value); \ + atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, \ + mo_c11_store(order)); \ + value_to_store; \ + }) +#define atomic_load32(p, order) \ + atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)) +#define atomic_store64(p, value, order) \ + ({ \ + const uint64_t value_to_store = (value); \ + atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, \ + mo_c11_store(order)); \ + value_to_store; \ + }) +#define atomic_load64(p, order) \ + atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)) +#endif /* LCC && MDBX_HAVE_C11ATOMICS */ + +#ifndef atomic_store32 +MDBX_MAYBE_UNUSED static __always_inline uint32_t +atomic_store32(mdbx_atomic_uint32_t *p, const uint32_t value, + enum mdbx_memory_order order) { + STATIC_ASSERT(sizeof(mdbx_atomic_uint32_t) == 4); +#ifdef MDBX_HAVE_C11ATOMICS + assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); + atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order)); +#else /* MDBX_HAVE_C11ATOMICS */ + if (order != mo_Relaxed) + osal_compiler_barrier(); + p->weak = value; + osal_memory_fence(order, true); +#endif /* MDBX_HAVE_C11ATOMICS */ + return value; +} +#endif /* atomic_store32 */ + +#ifndef atomic_load32 +MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32( + const volatile mdbx_atomic_uint32_t *p, enum mdbx_memory_order order) { + STATIC_ASSERT(sizeof(mdbx_atomic_uint32_t) == 4); +#ifdef MDBX_HAVE_C11ATOMICS + assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p))); + return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)); +#else /* MDBX_HAVE_C11ATOMICS */ + osal_memory_fence(order, false); + const uint32_t value = p->weak; + if (order != mo_Relaxed) + osal_compiler_barrier(); + return value; +#endif /* MDBX_HAVE_C11ATOMICS */ +} +#endif /* atomic_load32 */ + +/*------------------------------------------------------------------------------ + * safe read/write volatile 64-bit fields on 32-bit architectures. */ + +/* LY: for testing non-atomic 64-bit txnid on 32-bit arches. + * #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */ +#ifndef xMDBX_TXNID_STEP +#if MDBX_64BIT_CAS +#define xMDBX_TXNID_STEP 1u +#else +#define xMDBX_TXNID_STEP 2u +#endif +#endif /* xMDBX_TXNID_STEP */ + +#ifndef atomic_store64 +MDBX_MAYBE_UNUSED static __always_inline uint64_t +atomic_store64(mdbx_atomic_uint64_t *p, const uint64_t value, + enum mdbx_memory_order order) { + STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8); +#if MDBX_64BIT_ATOMIC +#if __GNUC_PREREQ(11, 0) + STATIC_ASSERT(__alignof__(mdbx_atomic_uint64_t) >= sizeof(uint64_t)); +#endif /* GNU C >= 11 */ +#ifdef MDBX_HAVE_C11ATOMICS + assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); + atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value, mo_c11_store(order)); +#else /* MDBX_HAVE_C11ATOMICS */ + if (order != mo_Relaxed) + osal_compiler_barrier(); + p->weak = value; + osal_memory_fence(order, true); +#endif /* MDBX_HAVE_C11ATOMICS */ +#else /* !MDBX_64BIT_ATOMIC */ + osal_compiler_barrier(); + atomic_store32(&p->low, (uint32_t)value, mo_Relaxed); + jitter4testing(true); + atomic_store32(&p->high, (uint32_t)(value >> 32), order); + jitter4testing(true); +#endif /* !MDBX_64BIT_ATOMIC */ + return value; +} +#endif /* atomic_store64 */ + +#ifndef atomic_load64 +MDBX_MAYBE_UNUSED static +#if MDBX_64BIT_ATOMIC + __always_inline +#endif /* MDBX_64BIT_ATOMIC */ + uint64_t + atomic_load64(const volatile mdbx_atomic_uint64_t *p, + enum mdbx_memory_order order) { + STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8); +#if MDBX_64BIT_ATOMIC +#ifdef MDBX_HAVE_C11ATOMICS + assert(atomic_is_lock_free(MDBX_c11a_ro(uint64_t, p))); + return atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)); +#else /* MDBX_HAVE_C11ATOMICS */ + osal_memory_fence(order, false); + const uint64_t value = p->weak; + if (order != mo_Relaxed) + osal_compiler_barrier(); + return value; +#endif /* MDBX_HAVE_C11ATOMICS */ +#else /* !MDBX_64BIT_ATOMIC */ + osal_compiler_barrier(); + uint64_t value = (uint64_t)atomic_load32(&p->high, order) << 32; + jitter4testing(true); + value |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed + : mo_AcquireRelease); + jitter4testing(true); + for (;;) { + osal_compiler_barrier(); + uint64_t again = (uint64_t)atomic_load32(&p->high, order) << 32; + jitter4testing(true); + again |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed + : mo_AcquireRelease); + jitter4testing(true); + if (likely(value == again)) + return value; + value = again; + } +#endif /* !MDBX_64BIT_ATOMIC */ +} +#endif /* atomic_load64 */ + +MDBX_MAYBE_UNUSED static __always_inline void atomic_yield(void) { +#if defined(_WIN32) || defined(_WIN64) + YieldProcessor(); +#elif defined(__ia32__) || defined(__e2k__) + __builtin_ia32_pause(); +#elif defined(__ia64__) +#if defined(__HP_cc__) || defined(__HP_aCC__) + _Asm_hint(_HINT_PAUSE); +#else + __asm__ __volatile__("hint @pause"); +#endif +#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || \ + defined(__ARM_ARCH_6K__) +#ifdef __CC_ARM + __yield(); +#else + __asm__ __volatile__("yield"); +#endif +#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && \ + __mips_isa_rev >= 2 + __asm__ __volatile__("pause"); +#elif defined(__mips) || defined(__mips__) || defined(__mips64) || \ + defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ + defined(__MWERKS__) || defined(__sgi) + __asm__ __volatile__(".word 0x00000140"); +#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE) + sched_yield(); +#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS) + pthread_yield(); +#endif +} + +#if MDBX_64BIT_CAS +MDBX_MAYBE_UNUSED static __always_inline bool +atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) { +#ifdef MDBX_HAVE_C11ATOMICS + STATIC_ASSERT(sizeof(long long) >= sizeof(uint64_t)); + assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); + return atomic_compare_exchange_strong(MDBX_c11a_rw(uint64_t, p), &c, v); +#elif defined(__GNUC__) || defined(__clang__) + return __sync_bool_compare_and_swap(&p->weak, c, v); +#elif defined(_MSC_VER) + return c == (uint64_t)_InterlockedCompareExchange64( + (volatile __int64 *)&p->weak, v, c); +#elif defined(__APPLE__) + return OSAtomicCompareAndSwap64Barrier(c, v, &p->weak); +#else +#error FIXME: Unsupported compiler +#endif +} +#endif /* MDBX_64BIT_CAS */ + +MDBX_MAYBE_UNUSED static __always_inline bool +atomic_cas32(mdbx_atomic_uint32_t *p, uint32_t c, uint32_t v) { +#ifdef MDBX_HAVE_C11ATOMICS + STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t)); + assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); + return atomic_compare_exchange_strong(MDBX_c11a_rw(uint32_t, p), &c, v); +#elif defined(__GNUC__) || defined(__clang__) + return __sync_bool_compare_and_swap(&p->weak, c, v); +#elif defined(_MSC_VER) + STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t)); + return c == + (uint32_t)_InterlockedCompareExchange((volatile long *)&p->weak, v, c); +#elif defined(__APPLE__) + return OSAtomicCompareAndSwap32Barrier(c, v, &p->weak); +#else +#error FIXME: Unsupported compiler +#endif +} + +MDBX_MAYBE_UNUSED static __always_inline uint32_t +atomic_add32(mdbx_atomic_uint32_t *p, uint32_t v) { +#ifdef MDBX_HAVE_C11ATOMICS + STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t)); + assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); + return atomic_fetch_add(MDBX_c11a_rw(uint32_t, p), v); +#elif defined(__GNUC__) || defined(__clang__) + return __sync_fetch_and_add(&p->weak, v); +#elif defined(_MSC_VER) + STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t)); + return (uint32_t)_InterlockedExchangeAdd((volatile long *)&p->weak, v); +#elif defined(__APPLE__) + return OSAtomicAdd32Barrier(v, &p->weak); +#else +#error FIXME: Unsupported compiler +#endif +} + +#define atomic_sub32(p, v) atomic_add32(p, 0 - (v)) + +MDBX_MAYBE_UNUSED static __always_inline uint64_t +safe64_txnid_next(uint64_t txnid) { + txnid += xMDBX_TXNID_STEP; +#if !MDBX_64BIT_CAS + /* avoid overflow of low-part in safe64_reset() */ + txnid += (UINT32_MAX == (uint32_t)txnid); +#endif + return txnid; +} + +/* Atomically make target value >= SAFE64_INVALID_THRESHOLD */ +MDBX_MAYBE_UNUSED static __always_inline void +safe64_reset(mdbx_atomic_uint64_t *p, bool single_writer) { + if (single_writer) { +#if MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64 + atomic_store64(p, UINT64_MAX, mo_AcquireRelease); +#else + atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease); +#endif /* MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64 */ + } else { +#if MDBX_64BIT_CAS && MDBX_64BIT_ATOMIC + /* atomically make value >= SAFE64_INVALID_THRESHOLD by 64-bit operation */ + atomic_store64(p, UINT64_MAX, mo_AcquireRelease); +#elif MDBX_64BIT_CAS + /* atomically make value >= SAFE64_INVALID_THRESHOLD by 32-bit operation */ + atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease); +#else + /* it is safe to increment low-part to avoid ABA, since xMDBX_TXNID_STEP > 1 + * and overflow was preserved in safe64_txnid_next() */ + STATIC_ASSERT(xMDBX_TXNID_STEP > 1); + atomic_add32(&p->low, 1) /* avoid ABA in safe64_reset_compare() */; + atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease); + atomic_add32(&p->low, 1) /* avoid ABA in safe64_reset_compare() */; +#endif /* MDBX_64BIT_CAS && MDBX_64BIT_ATOMIC */ + } + assert(p->weak >= SAFE64_INVALID_THRESHOLD); + jitter4testing(true); +} + +MDBX_MAYBE_UNUSED static __always_inline bool +safe64_reset_compare(mdbx_atomic_uint64_t *p, uint64_t compare) { + /* LY: This function is used to reset `txnid` from hsr-handler in case + * the asynchronously cancellation of read transaction. Therefore, + * there may be a collision between the cleanup performed here and + * asynchronous termination and restarting of the read transaction + * in another process/thread. In general we MUST NOT reset the `txnid` + * if a new transaction was started (i.e. if `txnid` was changed). */ +#if MDBX_64BIT_CAS + bool rc = atomic_cas64(p, compare, UINT64_MAX); +#else + /* LY: There is no gold ratio here since shared mutex is too costly, + * in such way we must acquire/release it for every update of txnid, + * i.e. twice for each read transaction). */ + bool rc = false; + if (likely(atomic_load32(&p->low, mo_AcquireRelease) == (uint32_t)compare && + atomic_cas32(&p->high, (uint32_t)(compare >> 32), UINT32_MAX))) { + if (unlikely(atomic_load32(&p->low, mo_AcquireRelease) != + (uint32_t)compare)) + atomic_cas32(&p->high, UINT32_MAX, (uint32_t)(compare >> 32)); + else + rc = true; + } +#endif /* MDBX_64BIT_CAS */ + jitter4testing(true); + return rc; +} + +MDBX_MAYBE_UNUSED static __always_inline void +safe64_write(mdbx_atomic_uint64_t *p, const uint64_t v) { + assert(p->weak >= SAFE64_INVALID_THRESHOLD); +#if MDBX_64BIT_ATOMIC && MDBX_64BIT_CAS + atomic_store64(p, v, mo_AcquireRelease); +#else /* MDBX_64BIT_ATOMIC */ + osal_compiler_barrier(); + /* update low-part but still value >= SAFE64_INVALID_THRESHOLD */ + atomic_store32(&p->low, (uint32_t)v, mo_Relaxed); + assert(p->weak >= SAFE64_INVALID_THRESHOLD); + jitter4testing(true); + /* update high-part from SAFE64_INVALID_THRESHOLD to actual value */ + atomic_store32(&p->high, (uint32_t)(v >> 32), mo_AcquireRelease); +#endif /* MDBX_64BIT_ATOMIC */ + assert(p->weak == v); + jitter4testing(true); +} + +MDBX_MAYBE_UNUSED static __always_inline uint64_t +safe64_read(const mdbx_atomic_uint64_t *p) { + jitter4testing(true); + uint64_t v; + do + v = atomic_load64(p, mo_AcquireRelease); + while (!MDBX_64BIT_ATOMIC && unlikely(v != p->weak)); + return v; +} + +#if 0 /* unused for now */ +MDBX_MAYBE_UNUSED static __always_inline bool safe64_is_valid(uint64_t v) { +#if MDBX_WORDBITS >= 64 + return v < SAFE64_INVALID_THRESHOLD; +#else + return (v >> 32) != UINT32_MAX; +#endif /* MDBX_WORDBITS */ +} + +MDBX_MAYBE_UNUSED static __always_inline bool + safe64_is_valid_ptr(const mdbx_atomic_uint64_t *p) { +#if MDBX_64BIT_ATOMIC + return atomic_load64(p, mo_AcquireRelease) < SAFE64_INVALID_THRESHOLD; +#else + return atomic_load32(&p->high, mo_AcquireRelease) != UINT32_MAX; +#endif /* MDBX_64BIT_ATOMIC */ +} +#endif /* unused for now */ + +/* non-atomic write with safety for reading a half-updated value */ +MDBX_MAYBE_UNUSED static __always_inline void +safe64_update(mdbx_atomic_uint64_t *p, const uint64_t v) { +#if MDBX_64BIT_ATOMIC + atomic_store64(p, v, mo_Relaxed); +#else + safe64_reset(p, true); + safe64_write(p, v); +#endif /* MDBX_64BIT_ATOMIC */ +} + +/* non-atomic increment with safety for reading a half-updated value */ +MDBX_MAYBE_UNUSED static +#if MDBX_64BIT_ATOMIC + __always_inline +#endif /* MDBX_64BIT_ATOMIC */ + void + safe64_inc(mdbx_atomic_uint64_t *p, const uint64_t v) { + assert(v > 0); + safe64_update(p, safe64_read(p) + v); +} + +#endif /* !__cplusplus */ diff --git a/src/atomics-types.h b/src/atomics-types.h new file mode 100644 index 00000000..8e3e4b9b --- /dev/null +++ b/src/atomics-types.h @@ -0,0 +1,99 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +#ifndef MDBX_64BIT_ATOMIC +#error "The MDBX_64BIT_ATOMIC must be defined before" +#endif /* MDBX_64BIT_ATOMIC */ + +#ifndef MDBX_64BIT_CAS +#error "The MDBX_64BIT_CAS must be defined before" +#endif /* MDBX_64BIT_CAS */ + +#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include() +#include +#define MDBX_HAVE_C11ATOMICS +#elif !defined(__cplusplus) && \ + (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) && \ + !defined(__STDC_NO_ATOMICS__) && \ + (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \ + !(defined(__GNUC__) || defined(__clang__))) +#include +#define MDBX_HAVE_C11ATOMICS +#elif defined(__GNUC__) || defined(__clang__) +#elif defined(_MSC_VER) +#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */ +#pragma warning(disable : 4133) /* 'function': incompatible types - from \ + 'size_t' to 'LONGLONG' */ +#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \ + 'std::size_t', possible loss of data */ +#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \ + 'long', possible loss of data */ +#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange) +#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64) +#elif defined(__APPLE__) +#include +#else +#error FIXME atomic-ops +#endif + +typedef enum mdbx_memory_order { + mo_Relaxed, + mo_AcquireRelease + /* , mo_SequentialConsistency */ +} mdbx_memory_order_t; + +typedef union { + volatile uint32_t weak; +#ifdef MDBX_HAVE_C11ATOMICS + volatile _Atomic uint32_t c11a; +#endif /* MDBX_HAVE_C11ATOMICS */ +} mdbx_atomic_uint32_t; + +typedef union { + volatile uint64_t weak; +#if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC) + volatile _Atomic uint64_t c11a; +#endif +#if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC + __anonymous_struct_extension__ struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + mdbx_atomic_uint32_t low, high; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + mdbx_atomic_uint32_t high, low; +#else +#error "FIXME: Unsupported byte order" +#endif /* __BYTE_ORDER__ */ + }; +#endif +} mdbx_atomic_uint64_t; + +#ifdef MDBX_HAVE_C11ATOMICS + +/* Crutches for C11 atomic compiler's bugs */ +#if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127 +#define MDBX_c11a_ro(type, ptr) (&(ptr)->weak) +#define MDBX_c11a_rw(type, ptr) (&(ptr)->weak) +#elif defined(__clang__) && __clang__ < 8 +#define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a) +#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) +#else +#define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a) +#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) +#endif /* Crutches for C11 atomic compiler's bugs */ + +#define mo_c11_store(fence) \ + (((fence) == mo_Relaxed) ? memory_order_relaxed \ + : ((fence) == mo_AcquireRelease) ? memory_order_release \ + : memory_order_seq_cst) +#define mo_c11_load(fence) \ + (((fence) == mo_Relaxed) ? memory_order_relaxed \ + : ((fence) == mo_AcquireRelease) ? memory_order_acquire \ + : memory_order_seq_cst) + +#endif /* MDBX_HAVE_C11ATOMICS */ + +#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000) diff --git a/src/audit.c b/src/audit.c new file mode 100644 index 00000000..7e6bee78 --- /dev/null +++ b/src/audit.c @@ -0,0 +1,164 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold static tree_t *audit_db_dig(const MDBX_txn *txn, const size_t dbi, + tree_t *fallback) { + const MDBX_txn *dig = txn; + do { + tASSERT(txn, txn->n_dbi == dig->n_dbi); + const uint8_t state = dbi_state(dig, dbi); + if (state & DBI_LINDO) + switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) { + case DBI_VALID: + case DBI_OLDEN: + return dig->dbs + dbi; + case 0: + return nullptr; + case DBI_VALID | DBI_STALE: + case DBI_OLDEN | DBI_STALE: + break; + default: + tASSERT(txn, !!"unexpected dig->dbi_state[dbi]"); + } + dig = dig->parent; + } while (dig); + return fallback; +} + +static size_t audit_db_used(const tree_t *db) { + return db ? (size_t)db->branch_pages + (size_t)db->leaf_pages + + (size_t)db->large_pages + : 0; +} + +__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, + bool dont_filter_gc) { + const MDBX_env *const env = txn->env; + size_t pending = 0; + if ((txn->flags & MDBX_TXN_RDONLY) == 0) + pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.relist) + + (MDBX_PNL_GETSIZE(txn->tw.retired_pages) - retired_stored); + + cursor_couple_t cx; + int rc = cursor_init(&cx.outer, txn, FREE_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + size_t gc = 0; + MDBX_val key, data; + rc = outer_first(&cx.outer, &key, &data); + while (rc == MDBX_SUCCESS) { + if (!dont_filter_gc) { + if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); + return MDBX_CORRUPTED; + } + txnid_t id = unaligned_peek_u64(4, key.iov_base); + if (txn->tw.gc.reclaimed) { + for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed); ++i) + if (id == txn->tw.gc.reclaimed[i]) + goto skip; + } else if (id <= txn->tw.gc.last_reclaimed) + goto skip; + } + gc += *(pgno_t *)data.iov_base; + skip: + rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT); + } + tASSERT(txn, rc == MDBX_NOTFOUND); + + const size_t done_bitmap_size = (txn->n_dbi + CHAR_BIT - 1) / CHAR_BIT; + uint8_t *const done_bitmap = alloca(done_bitmap_size); + memset(done_bitmap, 0, done_bitmap_size); + if (txn->parent) { + tASSERT(txn, txn->n_dbi == txn->parent->n_dbi && + txn->n_dbi == txn->env->txn->n_dbi); +#if MDBX_ENABLE_DBI_SPARSE + tASSERT(txn, txn->dbi_sparse == txn->parent->dbi_sparse && + txn->dbi_sparse == txn->env->txn->dbi_sparse); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + } + + size_t used = NUM_METAS + + audit_db_used(audit_db_dig(txn, FREE_DBI, nullptr)) + + audit_db_used(audit_db_dig(txn, MAIN_DBI, nullptr)); + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = tree_search(&cx.outer, nullptr, Z_FIRST); + while (rc == MDBX_SUCCESS) { + page_t *mp = cx.outer.pg[cx.outer.top]; + for (size_t k = 0; k < page_numkeys(mp); k++) { + node_t *node = page_node(mp, k); + if (node_flags(node) != N_SUBDATA) + continue; + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); + return MDBX_CORRUPTED; + } + + tree_t reside; + const tree_t *db = memcpy(&reside, node_data(node), sizeof(reside)); + const MDBX_val name = {node_key(node), node_ks(node)}; + for (size_t dbi = CORE_DBS; dbi < env->n_dbi; ++dbi) { + if (dbi >= txn->n_dbi || !(env->dbs_flags[dbi] & DB_VALID)) + continue; + if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name)) + continue; + + done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; + db = audit_db_dig(txn, dbi, &reside); + break; + } + used += audit_db_used(db); + } + rc = cursor_sibling_right(&cx.outer); + } + tASSERT(txn, rc == MDBX_NOTFOUND); + + for (size_t dbi = CORE_DBS; dbi < txn->n_dbi; ++dbi) { + if (done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT)) + continue; + const tree_t *db = audit_db_dig(txn, dbi, nullptr); + if (db) + used += audit_db_used(db); + else if (dbi_state(txn, dbi)) + WARNING("audit %s@%" PRIaTXN + ": unable account dbi %zd / \"%*s\", state 0x%02x", + txn->parent ? "nested-" : "", txn->txnid, dbi, + (int)env->kvs[dbi].name.iov_len, + (const char *)env->kvs[dbi].name.iov_base, dbi_state(txn, dbi)); + } + + if (pending + gc + used == txn->geo.first_unallocated) + return MDBX_SUCCESS; + + if ((txn->flags & MDBX_TXN_RDONLY) == 0) + ERROR("audit @%" PRIaTXN ": %zu(pending) = %zu(loose) + " + "%zu(reclaimed) + %zu(retired-pending) - %zu(retired-stored)", + txn->txnid, pending, txn->tw.loose_count, + MDBX_PNL_GETSIZE(txn->tw.relist), + txn->tw.retired_pages ? MDBX_PNL_GETSIZE(txn->tw.retired_pages) : 0, + retired_stored); + ERROR("audit @%" PRIaTXN ": %zu(pending) + %zu" + "(gc) + %zu(count) = %zu(total) <> %zu" + "(allocated)", + txn->txnid, pending, gc, used, pending + gc + used, + (size_t)txn->geo.first_unallocated); + return MDBX_PROBLEM; +} + +__cold int audit_ex(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc) { + MDBX_env *const env = txn->env; + int rc = osal_fastmutex_acquire(&env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + rc = audit_ex_locked(txn, retired_stored, dont_filter_gc); + ENSURE(txn->env, osal_fastmutex_release(&env->dbi_lock) == MDBX_SUCCESS); + } + return rc; +} diff --git a/src/bits.md b/src/bits.md index abcedf8b..29154b67 100644 --- a/src/bits.md +++ b/src/bits.md @@ -1,13 +1,13 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NODE | PAGE | MRESIZE | --|---------|-----------|--------------|----------|-----------|------------|---------|----------|---------| -0 |0000 0001|ALLOC_RSRV |TXN_FINISHED | | |DBI_DIRTY |F_BIGDATA|P_BRANCH | | -1 |0000 0002|ALLOC_UNIMP|TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF | | -2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| | +0 |0000 0001|ALLOC_RSRV |TXN_FINISHED | | |DBI_DIRTY |N_BIGDATA|P_BRANCH | | +1 |0000 0002|ALLOC_UNIMP|TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |N_SUBDATA|P_LEAF | | +2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |N_DUPDATA|P_LARGE | | 3 |0000 0008|ALLOC_SSCAN|TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | | 4 |0000 0010|ALLOC_FIFO |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | | -5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA | | |P_LEAF2 | | +5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA | | |P_DUPFIX | | 6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_OLDEN | |P_SUBP | | -7 |0000 0080| | | |ALLDUPS |DBI_LINDO | | | | +7 |0000 0080| | |DB_VALID |ALLDUPS |DBI_LINDO | | | | 8 |0000 0100| _MAY_MOVE | | | | | | | <= | 9 |0000 0200| _MAY_UNMAP| | | | | | | <= | 10|0000 0400| | | | | | | | | @@ -15,7 +15,7 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 12|0000 1000| | | | | | | | | 13|0000 2000|VALIDATION | | | | | |P_SPILLED | | 14|0000 4000|NOSUBDIR | | | | | |P_LOOSE | | -15|0000 8000| | |DB_VALID | | | |P_FROZEN | | +15|0000 8000| | | | | | |P_FROZEN | | 16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE | | | 17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | | <= | 18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP | | | | | diff --git a/src/chk.c b/src/chk.c new file mode 100644 index 00000000..1e7cb6dd --- /dev/null +++ b/src/chk.c @@ -0,0 +1,2097 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +typedef struct MDBX_chk_internal { + MDBX_chk_context_t *usr; + const struct MDBX_chk_callbacks *cb; + uint64_t monotime_timeout; + + size_t *problem_counter; + uint8_t flags; + bool got_break; + bool write_locked; + uint8_t scope_depth; + + MDBX_chk_subdb_t subdb_gc, subdb_main; + int16_t *pagemap; + MDBX_chk_subdb_t *last_lookup; + const void *last_nested; + MDBX_chk_scope_t scope_stack[12]; + MDBX_chk_subdb_t *subdb[MDBX_MAX_DBI + CORE_DBS]; + + MDBX_envinfo envinfo; + troika_t troika; + MDBX_val v2a_buf; +} MDBX_chk_internal_t; + +__cold static int chk_check_break(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + return (chk->got_break || (chk->cb->check_break && + (chk->got_break = chk->cb->check_break(chk->usr)))) + ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; +} + +__cold static void chk_line_end(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_done)) + chk->cb->print_done(line); + } +} + +__cold __must_check_result static MDBX_chk_line_t * +chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { + MDBX_chk_internal_t *const chk = scope->internal; + if (severity < MDBX_chk_warning) + mdbx_env_chk_encount_problem(chk->usr); + MDBX_chk_line_t *line = nullptr; + if (likely(chk->cb->print_begin)) { + line = chk->cb->print_begin(chk->usr, severity); + if (likely(line)) { + assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->ctx = chk->usr; + } + } + return line; +} + +__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + enum MDBX_chk_severity severity = line->severity; + chk_line_end(line); + line = chk_line_begin(chk->usr->scope, severity); + } + return line; +} + +__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_flush)) { + chk->cb->print_flush(line); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->out = line->begin; + } + } + return line; +} + +__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { + if (likely(line && need)) { + size_t have = line->end - line->out; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (need > have) { + line = chk_flush(line); + have = line->end - line->out; + } + return (need < have) ? need : have; + } + return 0; +} + +__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, + const char *str) { + if (likely(line && str && *str)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + size_t left = strlen(str); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_chars) { + chk->cb->print_chars(line, str, left); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else + do { + size_t chunk = chk_print_wanna(line, left); + assert(chunk <= left); + if (unlikely(!chunk)) + break; + memcpy(line->out, str, chunk); + line->out += chunk; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + str += chunk; + left -= chunk; + } while (left); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, + const char *fmt, va_list args) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_format) { + chk->cb->print_format(line, fmt, args); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else { + va_list ones; + va_copy(ones, args); + const int needed = vsnprintf(nullptr, 0, fmt, ones); + va_end(ones); + if (likely(needed > 0)) { + const size_t have = chk_print_wanna(line, needed); + if (likely(have > 0)) { + int written = vsnprintf(line->out, have, fmt, args); + if (likely(written > 0)) + line->out += written; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } + } + } + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) + chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { + if (likely(line)) { + // MDBX_chk_internal_t *chk = line->ctx->internal; + va_list args; + va_start(args, fmt); + line = chk_print_va(line, fmt, args); + va_end(args); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, + const char *prefix, + const uint64_t value, + const char *suffix) { + static const char sf[] = + "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + prefix = prefix ? prefix : ""; + suffix = suffix ? suffix : ""; + if (chk->cb->print_size) + chk->cb->print_size(line, prefix, value, suffix); + else + for (unsigned i = 0;; ++i) { + const unsigned scale = 10 + i * 10; + const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); + const uint64_t integer = rounded >> scale; + const uint64_t fractional = + (rounded - (integer << scale)) * 100u >> scale; + if ((rounded >> scale) <= 1000) + return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, + value, (unsigned)integer, (unsigned)fractional, + sf[i], suffix); + } + line->empty = false; + } + return line; +} + +__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, + const char *subj) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (line) + chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, + mdbx_strerror(err), err))); + else + debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", + subj, mdbx_strerror(err), err); + return err; +} + +__cold static void MDBX_PRINTF_ARGS(5, 6) + chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, + uint64_t entry_number, const char *caption, + const char *extra_fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_issue_t *issue = chk->usr->scope->issues; + while (issue) { + if (issue->caption == caption) { + issue->count += 1; + break; + } else + issue = issue->next; + } + const bool fresh = issue == nullptr; + if (fresh) { + issue = osal_malloc(sizeof(*issue)); + if (likely(issue)) { + issue->caption = caption; + issue->count = 1; + issue->next = chk->usr->scope->issues; + chk->usr->scope->issues = issue; + } else + chk_error_rc(scope, ENOMEM, "adding issue"); + } + + va_list args; + va_start(args, extra_fmt); + if (chk->cb->issue) { + mdbx_env_chk_encount_problem(chk->usr); + chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); + } else { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (entry_number != UINT64_MAX) + chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption); + else + chk_print(line, "%s: %s", object, caption); + if (extra_fmt) + chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")"); + chk_line_end(fresh ? chk_flush(line) : line); + } + va_end(args); +} + +__cold static void MDBX_PRINTF_ARGS(2, 3) + chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + va_list args; + va_start(args, fmt); + if (likely(chk->cb->issue)) { + mdbx_env_chk_encount_problem(chk->usr); + chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); + } else + chk_line_end( + chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); + va_end(args); +} + +__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { + assert(chk->scope_depth > 0); + MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth; + MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr; + if (!outer || outer->stage != inner->stage) { + if (err == MDBX_SUCCESS && *chk->problem_counter) + err = MDBX_PROBLEM; + else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err)) + *chk->problem_counter = 1; + if (chk->problem_counter != &chk->usr->result.total_problems) { + chk->usr->result.total_problems += *chk->problem_counter; + chk->problem_counter = &chk->usr->result.total_problems; + } + if (chk->cb->stage_end) + err = chk->cb->stage_end(chk->usr, inner->stage, err); + } + if (chk->cb->scope_conclude) + err = chk->cb->scope_conclude(chk->usr, outer, inner, err); + chk->usr->scope = outer; + chk->usr->scope_nesting = chk->scope_depth -= 1; + if (outer) + outer->subtotal_issues += inner->subtotal_issues; + if (chk->cb->scope_pop) + chk->cb->scope_pop(chk->usr, outer, inner); + + while (inner->issues) { + MDBX_chk_issue_t *next = inner->issues->next; + osal_free(inner->issues); + inner->issues = next; + } + memset(inner, -1, sizeof(*inner)); + return err; +} + +__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, + int verbosity_adjustment, + enum MDBX_chk_stage stage, + const void *object, size_t *problems, + const char *fmt, va_list args) { + if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) + return MDBX_BACKLOG_DEPLETED; + + MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; + const int verbosity = + outer->verbosity + + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); + MDBX_chk_scope_t *const inner = outer + 1; + memset(inner, 0, sizeof(*inner)); + inner->internal = outer->internal; + inner->stage = stage ? stage : (stage = outer->stage); + inner->object = object; + inner->verbosity = (verbosity < MDBX_chk_warning) + ? MDBX_chk_warning + : (enum MDBX_chk_severity)verbosity; + if (problems) + chk->problem_counter = problems; + else if (!chk->problem_counter || outer->stage != stage) + chk->problem_counter = &chk->usr->result.total_problems; + + if (chk->cb->scope_push) { + const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + chk->usr->scope = inner; + chk->usr->scope_nesting = chk->scope_depth += 1; + + if (stage != outer->stage && chk->cb->stage_begin) { + int err = chk->cb->stage_begin(chk->usr, stage); + if (unlikely(err != MDBX_SUCCESS)) { + err = chk_scope_end(chk, err); + assert(err != MDBX_SUCCESS); + return err ? err : MDBX_RESULT_TRUE; + } + } + return MDBX_SUCCESS; +} + +__cold static int MDBX_PRINTF_ARGS(6, 7) + chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, + enum MDBX_chk_stage stage, const void *object, + size_t *problems, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, + problems, fmt, args); + va_end(args); + return rc; +} + +__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) { + MDBX_chk_internal_t *const chk = target->internal; + assert(target <= chk->usr->scope); + while (chk->usr->scope > target) + err = chk_scope_end(chk, err); + return err; +} + +__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { + if (inner && inner > inner->internal->scope_stack) + chk_scope_restore(inner - 1, MDBX_SUCCESS); +} + +__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) + chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, + const char *fmt, ...) { + chk_scope_restore(scope, MDBX_SUCCESS); + va_list args; + va_start(args, fmt); + int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, + scope->stage, nullptr, nullptr, fmt, args); + va_end(args); + return err ? nullptr : scope + 1; +} + +__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, + const MDBX_val *val) { + if (val == MDBX_CHK_MAIN) + return "@MAIN"; + if (val == MDBX_CHK_GC) + return "@GC"; + if (val == MDBX_CHK_META) + return "@META"; + + const unsigned char *const data = val->iov_base; + const size_t len = val->iov_len; + if (data == MDBX_CHK_MAIN) + return "@MAIN"; + if (data == MDBX_CHK_GC) + return "@GC"; + if (data == MDBX_CHK_META) + return "@META"; + + if (!len) + return ""; + if (!data) + return ""; + if (len > 65536) { + const size_t enough = 42; + if (chk->v2a_buf.iov_len < enough) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = enough; + } + snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, + "", len); + return chk->v2a_buf.iov_base; + } + + bool printable = true; + bool quoting = false; + size_t xchars = 0; + for (size_t i = 0; i < len && printable; ++i) { + quoting = quoting || !(data[i] == '_' || isalnum(data[i])); + printable = + isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); + } + + size_t need = len + 1; + if (quoting || !printable) + need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; + if (need > chk->v2a_buf.iov_len) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, need); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = need; + } + + static const char hex[] = "0123456789abcdef"; + char *w = chk->v2a_buf.iov_base; + if (!quoting) { + memcpy(w, data, len); + w += len; + } else if (printable) { + *w++ = '\''; + for (size_t i = 0; i < len; ++i) { + if (data[i] < ' ') { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4); + w[0] = '\\'; + w[1] = 'x'; + w[2] = hex[data[i] >> 4]; + w[3] = hex[data[i] & 15]; + w += 4; + } else if (strchr("\"'`\\", data[i])) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = '\\'; + w[1] = data[i]; + w += 2; + } else { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1); + *w++ = data[i]; + } + } + *w++ = '\''; + } else { + *w++ = '\\'; + *w++ = 'x'; + for (size_t i = 0; i < len; ++i) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = hex[data[i] >> 4]; + w[1] = hex[data[i] & 15]; + w += 2; + } + } + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w); + *w = 0; + return chk->v2a_buf.iov_base; +} + +__cold static void chk_dispose(MDBX_chk_internal_t *chk) { + assert(chk->subdb[FREE_DBI] == &chk->subdb_gc); + assert(chk->subdb[MAIN_DBI] == &chk->subdb_main); + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + if (sdb) { + chk->subdb[i] = nullptr; + if (chk->cb->subdb_dispose && sdb->cookie) { + chk->cb->subdb_dispose(chk->usr, sdb); + sdb->cookie = nullptr; + } + if (sdb != &chk->subdb_gc && sdb != &chk->subdb_main) { + osal_free(sdb); + } + } + } + osal_free(chk->v2a_buf.iov_base); + osal_free(chk->pagemap); + chk->usr->internal = nullptr; + chk->usr->scope = nullptr; + chk->pagemap = nullptr; + memset(chk, 0xDD, sizeof(*chk)); + osal_free(chk); +} + +static size_t div_8s(size_t numerator, size_t divider) { + assert(numerator <= (SIZE_MAX >> 8)); + return (numerator << 8) / divider; +} + +static size_t mul_8s(size_t quotient, size_t multiplier) { + size_t hi = multiplier * (quotient >> 8); + size_t lo = multiplier * (quotient & 255) + 128; + return hi + (lo >> 8); +} + +static void histogram_reduce(struct MDBX_chk_histogram *p) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + // ищем пару для слияния с минимальной ошибкой + size_t min_err = SIZE_MAX, min_i = last - 1; + for (size_t i = 0; i < last; ++i) { + const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, + s1 = p->ranges[i].amount; + const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, + s2 = p->ranges[i + 1].amount; + const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; + assert(s1 > 0 && b1 > 0 && b1 < e1); + assert(s2 > 0 && b2 > 0 && b2 < e2); + assert(e1 <= b2); + // за ошибку принимаем площадь изменений на гистограмме при слиянии + const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx); + const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1); + const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2); + const size_t dx = mul_8s(hx, b2 - e1); + const size_t err = d1 + d2 + dx; + if (min_err >= err) { + min_i = i; + min_err = err; + } + } + // объединяем + p->ranges[min_i].end = p->ranges[min_i + 1].end; + p->ranges[min_i].amount += p->ranges[min_i + 1].amount; + p->ranges[min_i].count += p->ranges[min_i + 1].count; + if (min_i < last) + // перемещаем хвост + memmove(p->ranges + min_i, p->ranges + min_i + 1, + (last - min_i) * sizeof(p->ranges[0])); + // обнуляем последний элемент и продолжаем + p->ranges[last].count = 0; +} + +static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { + STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2); + p->amount += n; + p->count += 1; + if (likely(n < 2)) { + p->ones += n; + p->pad += 1; + } else + for (;;) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + size_t i = 0; + while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) { + if (n < p->ranges[i].end) { + // значение попадает в существующий интервал + p->ranges[i].amount += n; + p->ranges[i].count += 1; + return; + } + ++i; + } + if (p->ranges[last].count == 0) { + // использованы еще не все слоты, добавляем интервал + assert(i < size); + if (p->ranges[i].count) { + assert(i < last); + // раздвигаем +#ifdef __COVERITY__ + if (i < last) /* avoid Coverity false-positive issue */ +#endif /* __COVERITY__ */ + memmove(p->ranges + i + 1, p->ranges + i, + (last - i) * sizeof(p->ranges[0])); + } + p->ranges[i].begin = n; + p->ranges[i].end = n + 1; + p->ranges[i].amount = n; + p->ranges[i].count = 1; + return; + } + histogram_reduce(p); + } +} + +__cold static MDBX_chk_line_t * +histogram_dist(MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + line = chk_print(line, "%s:", prefix); + const char *comma = ""; + const size_t first_val = amount ? histogram->ones : histogram->pad; + if (first_val) { + chk_print(line, " %s=%" PRIuSIZE, first, first_val); + comma = ","; + } + for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n) + if (histogram->ranges[n].count) { + chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); + if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) + chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); + line = chk_print(line, "=%" PRIuSIZE, + amount ? histogram->ranges[n].amount + : histogram->ranges[n].count); + comma = ","; + } + return line; +} + +__cold static MDBX_chk_line_t * +histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + if (histogram->count) { + line = chk_print(line, "%s %" PRIuSIZE, prefix, + amount ? histogram->amount : histogram->count); + if (scope->verbosity > MDBX_chk_info) + line = chk_puts( + histogram_dist(line, histogram, " (distribution", first, amount), + ")"); + } + return line; +} + +//----------------------------------------------------------------------------- + +__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, + const walk_sdb_t *in, MDBX_chk_subdb_t **out) { + MDBX_chk_internal_t *const chk = scope->internal; + if (chk->last_lookup && + chk->last_lookup->name.iov_base == in->name.iov_base) { + *out = chk->last_lookup; + return MDBX_SUCCESS; + } + + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *sdb = chk->subdb[i]; + if (!sdb) { + sdb = osal_calloc(1, sizeof(MDBX_chk_subdb_t)); + if (unlikely(!sdb)) { + *out = nullptr; + return chk_error_rc(scope, MDBX_ENOMEM, "alloc_subDB"); + } + chk->subdb[i] = sdb; + sdb->flags = in->internal->flags; + sdb->id = -1; + sdb->name = in->name; + } + if (sdb->name.iov_base == in->name.iov_base) { + if (sdb->id < 0) { + sdb->id = (int)i; + sdb->cookie = + chk->cb->subdb_filter + ? chk->cb->subdb_filter(chk->usr, &sdb->name, sdb->flags) + : (void *)(intptr_t)-1; + } + *out = (chk->last_lookup = sdb); + return MDBX_SUCCESS; + } + } + chk_scope_issue(scope, "too many subDBs > %u", + (unsigned)ARRAY_LENGTH(chk->subdb) - CORE_DBS - /* meta */ 1); + *out = nullptr; + return MDBX_PROBLEM; +} + +//------------------------------------------------------------------------------ + +__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, + const unsigned num) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); + MDBX_chk_internal_t *const chk = scope->internal; + if (line) { + MDBX_env *const env = chk->usr->env; + const bool have_bootid = (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0; + const bool bootid_match = + have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], + &chk->envinfo.mi_bootid.current, + sizeof(chk->envinfo.mi_bootid.current)) == 0; + + const char *status = "stay"; + if (num == chk->troika.recent) + status = "head"; + else if (num == TROIKA_TAIL(&chk->troika)) + status = "tail"; + line = chk_print(line, "meta-%u: %s, ", num, status); + + switch (chk->envinfo.mi_meta_sign[num]) { + case DATASIGN_NONE: + line = chk_puts(line, "no-sync/legacy"); + break; + case DATASIGN_WEAK: + line = chk_print(line, "weak-%s", + have_bootid + ? (bootid_match ? "intact (same boot-id)" : "dead") + : "unknown (no boot-id)"); + break; + default: + line = chk_puts(line, "steady"); + break; + } + const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; + line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid); + if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y) + line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", + chk->envinfo.mi_bootid.meta[num].x, + chk->envinfo.mi_bootid.meta[num].y, + bootid_match ? "live" : "not match"); + else + line = chk_puts(line, "no boot-id"); + + if (env->stuck_meta >= 0) { + if (num == (unsigned)env->stuck_meta) + line = chk_print(line, ", %s", "forced for checking"); + } else if (meta_txnid > chk->envinfo.mi_recent_txnid && + (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) + line = chk_print(line, + ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 + " >>> %" PRIu64 ")", + meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, + chk->envinfo.mi_recent_txnid); + chk_line_end(line); + } +} + +__cold static int +chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, + const int deep, const walk_sdb_t *sdb_info, + const size_t page_size, const page_type_t pagetype, + const MDBX_error_t page_err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes) { + MDBX_chk_scope_t *const scope = ctx; + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + + MDBX_chk_subdb_t *sdb; + int err = chk_get_sdb(scope, sdb_info, &sdb); + if (unlikely(err)) + return err; + + if (deep > 42) { + chk_scope_issue(scope, "too deeply %u", deep); + return MDBX_CORRUPTED /* avoid infinite loop/recursion */; + } + histogram_acc(deep, &sdb->histogram.deep); + usr->result.processed_pages += npages; + const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; + + int height = deep + 1; + if (sdb->id >= CORE_DBS) + height -= usr->txn->dbs[MAIN_DBI].height; + const tree_t *nested = sdb_info->nested; + if (nested) { + if (sdb->flags & MDBX_DUPSORT) + height -= sdb_info->internal->height; + else { + chk_object_issue(scope, "nested tree", pgno, "unexpected", + "subDb %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), + sdb->flags, deep); + nested = nullptr; + } + } else + chk->last_nested = nullptr; + + const char *pagetype_caption; + bool branch = false; + switch (pagetype) { + default: + chk_object_issue(scope, "page", pgno, "unknown page-type", + "type %u, deep %i", (unsigned)pagetype, deep); + pagetype_caption = "unknown"; + sdb->pages.other += npages; + break; + case page_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken"; + sdb->pages.other += npages; + break; + case page_sub_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken-subpage"; + sdb->pages.other += npages; + break; + case page_large: + pagetype_caption = "large"; + histogram_acc(npages, &sdb->histogram.large_pages); + if (sdb->flags & MDBX_DUPSORT) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + case page_branch: + branch = true; + if (!nested) { + pagetype_caption = "branch"; + sdb->pages.branch += 1; + } else { + pagetype_caption = "nested-branch"; + sdb->pages.nested_branch += 1; + } + break; + case page_dupfix_leaf: + if (!nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + /* fall through */ + __fallthrough; + case page_leaf: + if (!nested) { + pagetype_caption = "leaf"; + sdb->pages.leaf += 1; + if (height != sdb_info->internal->height) + chk_object_issue(scope, "page", pgno, "wrong tree height", + "actual %i != %i subDb %s", height, + sdb_info->internal->height, chk_v2a(chk, &sdb->name)); + } else { + pagetype_caption = + (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix"; + sdb->pages.nested_leaf += 1; + if (chk->last_nested != nested) { + histogram_acc(height, &sdb->histogram.nested_tree); + chk->last_nested = nested; + } + if (height != nested->height) + chk_object_issue(scope, "page", pgno, "wrong nested-tree height", + "actual %i != %i dupsort-node %s", height, + nested->height, chk_v2a(chk, &sdb->name)); + } + break; + case page_sub_dupfix_leaf: + case page_sub_leaf: + pagetype_caption = + (pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix"; + sdb->pages.nested_subleaf += 1; + if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + } + + if (npages) { + if (sdb->cookie) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (npages == 1) + chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); + else + chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, + npages); + chk_line_end(chk_print(line, + " of %s: header %" PRIiPTR ", %s %" PRIiPTR + ", payload %" PRIiPTR ", unused %" PRIiPTR + ", deep %i", + chk_v2a(chk, &sdb->name), header_bytes, + (pagetype == page_branch) ? "keys" : "entries", + nentries, payload_bytes, unused_bytes, deep)); + } + + bool already_used = false; + for (unsigned n = 0; n < npages; ++n) { + const size_t spanpgno = pgno + n; + if (spanpgno >= usr->result.alloc_pages) { + chk_object_issue(scope, "page", spanpgno, "wrong page-no", + "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", + pagetype_caption, spanpgno, usr->result.alloc_pages, + deep); + sdb->pages.all += 1; + } else if (chk->pagemap[spanpgno]) { + const MDBX_chk_subdb_t *const rival = + chk->subdb[chk->pagemap[spanpgno] - 1]; + chk_object_issue(scope, "page", spanpgno, + (branch && rival == sdb) ? "loop" : "already used", + "%s-page: by %s, deep %i", pagetype_caption, + chk_v2a(chk, &rival->name), deep); + already_used = true; + } else { + chk->pagemap[spanpgno] = (int16_t)sdb->id + 1; + sdb->pages.all += 1; + } + } + + if (already_used) + return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ + : MDBX_SUCCESS; + } + + if (MDBX_IS_ERROR(page_err)) { + chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", + pagetype_caption); + } else { + if (unused_bytes > page_size) + chk_object_issue(scope, "page", pgno, "illegal unused-bytes", + "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, + unused_bytes, env->ps); + + if (header_bytes < (int)sizeof(long) || + (size_t)header_bytes >= env->ps - sizeof(long)) { + chk_object_issue(scope, "page", pgno, "illegal header-length", + "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, + pagetype_caption, sizeof(long), header_bytes, + env->ps - sizeof(long)); + } + if (nentries < 1 || (pagetype == page_branch && nentries < 2)) { + chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + sdb->pages.empty += 1; + } + + if (npages) { + if (page_bytes != page_size) { + chk_object_issue(scope, "page", pgno, "misused", + "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR + "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", + pagetype_caption, page_size, page_bytes, header_bytes, + payload_bytes, unused_bytes, deep); + if (page_size > page_bytes) + sdb->lost_bytes += page_size - page_bytes; + } else { + sdb->payload_bytes += payload_bytes + header_bytes; + usr->result.total_payload_bytes += payload_bytes + header_bytes; + } + } + } + return chk_check_break(scope); +} + +__cold static int chk_tree(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + +#if defined(_WIN32) || defined(_WIN64) + SetLastError(ERROR_SUCCESS); +#else + errno = 0; +#endif /* Windows */ + chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap)); + if (!chk->pagemap) { + int err = osal_get_errno(); + return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc"); + } + + if (scope->verbosity > MDBX_chk_info) + chk_scope_push(scope, 0, "Walking pages..."); + /* always skip key ordering checking + * to avoid MDBX_CORRUPTED in case custom comparators were used */ + usr->result.processed_pages = NUM_METAS; + int err = walk_pages(txn, chk_pgvisitor, scope, dont_check_keys_ordering); + if (MDBX_IS_ERROR(err) && err != MDBX_EINTR) + chk_error_rc(scope, err, "walk_pages"); + + for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n) + if (!chk->pagemap[n]) + usr->result.unused_pages += 1; + + MDBX_chk_subdb_t total; + memset(&total, 0, sizeof(total)); + total.pages.all = NUM_METAS; + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + total.payload_bytes += sdb->payload_bytes; + total.lost_bytes += sdb->lost_bytes; + total.pages.all += sdb->pages.all; + total.pages.empty += sdb->pages.empty; + total.pages.other += sdb->pages.other; + total.pages.branch += sdb->pages.branch; + total.pages.leaf += sdb->pages.leaf; + total.pages.nested_branch += sdb->pages.nested_branch; + total.pages.nested_leaf += sdb->pages.nested_leaf; + total.pages.nested_subleaf += sdb->pages.nested_subleaf; + } + assert(total.pages.all == usr->result.processed_pages); + + const size_t total_page_bytes = pgno2bytes(env, total.pages.all); + if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose) + chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "walked %zu pages, left/unused %zu" + ", %" PRIuSIZE " problem(s)", + usr->result.processed_pages, + usr->result.unused_pages, + usr->scope->subtotal_issues)); + + err = chk_scope_restore(scope, err); + if (scope->verbosity > MDBX_chk_info) { + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + MDBX_chk_scope_t *inner = + chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); + if (sdb->pages.all == 0) + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); + else { + MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); + if (line) { + line = chk_print(line, "page usage: subtotal %" PRIuSIZE, + sdb->pages.all); + const size_t branch_pages = + sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf + + sdb->pages.nested_subleaf; + if (sdb->pages.other) + line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other); + if (sdb->pages.other == 0 || + (branch_pages | leaf_pages | sdb->histogram.large_pages.count) != + 0) { + line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, + branch_pages, leaf_pages); + if (sdb->histogram.large_pages.count || + (sdb->flags & MDBX_DUPSORT) == 0) { + line = chk_print(line, ", large %" PRIuSIZE, + sdb->histogram.large_pages.count); + if (sdb->histogram.large_pages.amount | + sdb->histogram.large_pages.count) + line = histogram_print(inner, line, &sdb->histogram.large_pages, + " amount", "single", true); + } + } + line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, + "tree deep density", "1", false); + if (sdb != &chk->subdb_gc && sdb->histogram.nested_tree.count) { + line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, + sdb->histogram.nested_tree.count); + line = histogram_dist(line, &sdb->histogram.nested_tree, " density", + "1", false); + line = chk_print(chk_line_feed(line), + "nested tree(s) pages %" PRIuSIZE + ": branch %" PRIuSIZE ", leaf %" PRIuSIZE + ", subleaf %" PRIuSIZE, + sdb->pages.nested_branch + sdb->pages.nested_leaf, + sdb->pages.nested_branch, sdb->pages.nested_leaf, + sdb->pages.nested_subleaf); + } + + const size_t bytes = pgno2bytes(env, sdb->pages.all); + line = chk_print( + chk_line_feed(line), + "page filling: subtotal %" PRIuSIZE + " bytes (%.1f%%), payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", + bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes, + sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes, + (bytes - sdb->payload_bytes) * 100.0 / bytes); + if (sdb->pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", + sdb->pages.empty); + if (sdb->lost_bytes) + line = + chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes); + chk_line_end(line); + } + } + chk_scope_restore(scope, 0); + } + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, + "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," + " average fill %.1f%%", + total_page_bytes, usr->result.total_payload_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes, + total_page_bytes - usr->result.total_payload_bytes, + (total_page_bytes - usr->result.total_payload_bytes) * + 100.0 / total_page_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes); + if (total.pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); + if (total.lost_bytes) + line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes); + chk_line_end(line); + return err; +} + +typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, const size_t record_number, + const MDBX_val *key, const MDBX_val *data); + +__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + int err = MDBX_SUCCESS; + assert(sdb->cookie); + if (chk->cb->subdb_handle_kv) + err = chk->cb->subdb_handle_kv(chk->usr, sdb, record_number, key, data); + return err ? err : chk_check_break(scope); +} + +__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, + MDBX_chk_subdb_t *sdb, chk_kv_visitor *handler) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + MDBX_cursor *cursor = nullptr; + size_t record_count = 0, dups = 0, sub_databases = 0; + int err; + + if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->flags) { + chk_line_end( + chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), + "abort processing %s due to a previous error", + chk_v2a(chk, &sdb->name)))); + err = MDBX_BAD_TXN; + goto bailout; + } + + if (0 > (int)dbi) { + err = dbi_open( + txn, &sdb->name, MDBX_DB_ACCEDE, &dbi, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); + if (unlikely(err)) { + tASSERT(txn, dbi >= txn->env->n_dbi || + (txn->env->dbs_flags[dbi] & DB_VALID) == 0); + chk_error_rc(scope, err, "mdbx_dbi_open"); + goto bailout; + } + tASSERT(txn, dbi < txn->env->n_dbi && + (txn->env->dbs_flags[dbi] & DB_VALID) != 0); + } + + const tree_t *const db = txn->dbs + dbi; + if (handler) { + const char *key_mode = nullptr; + switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + case 0: + key_mode = "usual"; + break; + case MDBX_REVERSEKEY: + key_mode = "reserve"; + break; + case MDBX_INTEGERKEY: + key_mode = "ordinal"; + break; + case MDBX_REVERSEKEY | MDBX_INTEGERKEY: + key_mode = "msgpack"; + break; + default: + key_mode = "inconsistent"; + chk_scope_issue(scope, "wrong key-mode (0x%x)", + sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); + } + + const char *value_mode = nullptr; + switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | + MDBX_INTEGERDUP)) { + case 0: + value_mode = "single"; + break; + case MDBX_DUPSORT: + value_mode = "multi"; + break; + case MDBX_DUPSORT | MDBX_REVERSEDUP: + value_mode = "multi-reverse"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED: + value_mode = "multi-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + value_mode = "multi-reverse-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + value_mode = "multi-ordinal"; + break; + case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "multi-msgpack"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "reserved"; + break; + default: + value_mode = "inconsistent"; + chk_scope_issue(scope, "wrong value-mode (0x%x)", + sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + MDBX_DUPFIXED | MDBX_INTEGERDUP)); + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, + value_mode); + line = chk_print(line, ", flags:"); + if (!sdb->flags) + line = chk_print(line, " none"); + else { + const uint8_t f[] = {MDBX_DUPSORT, + MDBX_INTEGERKEY, + MDBX_REVERSEKEY, + MDBX_DUPFIXED, + MDBX_REVERSEDUP, + MDBX_INTEGERDUP, + 0}; + const char *const t[] = {"dupsort", "integerkey", "reversekey", + "dupfix", "reversedup", "integerdup"}; + for (size_t i = 0; f[i]; i++) + if (sdb->flags & f[i]) + line = chk_print(line, " %s", t[i]); + } + chk_line_end(chk_print(line, " (0x%02X)", sdb->flags)); + + line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "entries %" PRIu64 ", sequence %" PRIu64, db->items, + db->sequence); + if (db->mod_txnid) + line = + chk_print(line, ", last modification txn#%" PRIaTXN, db->mod_txnid); + if (db->root != P_INVALID) + line = chk_print(line, ", root #%" PRIaPGNO, db->root); + chk_line_end(line); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "b-tree depth %u, pages: branch %" PRIaPGNO + ", leaf %" PRIaPGNO ", large %" PRIaPGNO, + db->height, db->branch_pages, db->leaf_pages, + db->large_pages)); + + if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf; + const size_t subtotal_pages = + db->branch_pages + db->leaf_pages + db->large_pages; + if (subtotal_pages != sdb->pages.all) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", + "subtotal", subtotal_pages, sdb->pages.all); + if (db->branch_pages != branch_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "branch", db->branch_pages, branch_pages); + if (db->leaf_pages != leaf_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "all-leaf", db->leaf_pages, leaf_pages); + if (db->large_pages != sdb->histogram.large_pages.amount) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "large/overlow", db->large_pages, + sdb->histogram.large_pages.amount); + } + } + + err = mdbx_cursor_open(txn, dbi, &cursor); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_cursor_open"); + goto bailout; + } + if (chk->flags & MDBX_CHK_IGNORE_ORDER) { + cursor->checking |= z_ignord | z_pagecheck; + if (cursor->subcur) + cursor->subcur->cursor.checking |= z_ignord | z_pagecheck; + } + + const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags); + MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; + MDBX_val key, data; + err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); + while (err == MDBX_SUCCESS) { + err = chk_check_break(scope); + if (unlikely(err)) + goto bailout; + + bool bad_key = false; + if (key.iov_len > maxkeysize) { + chk_object_issue(scope, "entry", record_count, + "key length exceeds max-key-size", + "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); + bad_key = true; + } else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && + key.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong key length", + "%" PRIuPTR " != 4or8", key.iov_len); + bad_key = true; + } + + bool bad_data = false; + if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && + data.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong data length", + "%" PRIuPTR " != 4or8", data.iov_len); + bad_data = true; + } + + if (prev_key.iov_base) { + if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) && + prev_data.iov_len != data.iov_len) { + chk_object_issue(scope, "entry", record_count, "different data length", + "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, + data.iov_len); + bad_data = true; + } + + if (!bad_key) { + int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); + if (cmp == 0) { + ++dups; + if ((sdb->flags & MDBX_DUPSORT) == 0) { + chk_object_issue(scope, "entry", record_count, "duplicated entries", + nullptr); + if (prev_data.iov_base && data.iov_len == prev_data.iov_len && + memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + } else if (!bad_data && prev_data.iov_base) { + cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); + if (cmp == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of multi-values", nullptr); + } + } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of entries", nullptr); + } + } + + if (!bad_key) { + if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY)) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed key-size %" PRIuSIZE, key.iov_len)); + prev_key = key; + } + if (!bad_data) { + if (!prev_data.iov_base && + (sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed data-size %" PRIuSIZE, data.iov_len)); + prev_data = data; + } + + record_count++; + histogram_acc(key.iov_len, &sdb->histogram.key_len); + histogram_acc(data.iov_len, &sdb->histogram.val_len); + + const node_t *const node = + page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]); + if (node_flags(node) == N_SUBDATA) { + if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + chk_object_issue(scope, "entry", record_count, + "unexpected sub-database", "node-flags 0x%x", + node_flags(node)); + else if (data.iov_len != sizeof(tree_t)) + chk_object_issue(scope, "entry", record_count, + "wrong sub-database node size", + "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, + sizeof(tree_t)); + else if (scope->stage == MDBX_chk_maindb) + /* подсчитываем subDB при первом проходе */ + sub_databases += 1; + else { + /* обработка subDB при втором проходе */ + tree_t aligned_db; + memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); + walk_sdb_t sdb_info = {.name = key}; + sdb_info.internal = &aligned_db; + MDBX_chk_subdb_t *subdb; + err = chk_get_sdb(scope, &sdb_info, &subdb); + if (unlikely(err)) + goto bailout; + if (subdb->cookie) { + err = chk_scope_begin( + chk, 0, MDBX_chk_subdbs, subdb, &usr->result.problems_kv, + "Processing subDB %s...", chk_v2a(chk, &subdb->name)); + if (likely(!err)) { + err = chk_db(usr->scope, (MDBX_dbi)-1, subdb, chk_handle_kv); + if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) + usr->result.subdb_processed += 1; + } + err = chk_scope_restore(scope, err); + if (unlikely(err)) + goto bailout; + } else + chk_line_end(chk_flush( + chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s...", chk_v2a(chk, &subdb->name)))); + } + } else if (handler) { + err = handler(scope, sdb, record_count, &key, &data); + if (unlikely(err)) + goto bailout; + } + + err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); + } + + err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") + : MDBX_SUCCESS; + if (err == MDBX_SUCCESS && record_count != db->items) + chk_scope_issue(scope, + "different number of entries %" PRIuSIZE " != %" PRIu64, + record_count, db->items); +bailout: + if (cursor) { + if (handler) { + if (sdb->histogram.key_len.count) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = histogram_dist(line, &sdb->histogram.key_len, + "key length density", "0/1", false); + chk_line_feed(line); + line = histogram_dist(line, &sdb->histogram.val_len, + "value length density", "0/1", false); + chk_line_end(line); + } + if (scope->stage == MDBX_chk_maindb) + usr->result.subdb_total = sub_databases; + if (chk->cb->subdb_conclude) + err = chk->cb->subdb_conclude(usr, sdb, cursor, err); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); + if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + line = chk_print(line, " %" PRIuSIZE " dups,", dups); + if (sub_databases || dbi == MAIN_DBI) + line = chk_print(line, " %" PRIuSIZE " sub-databases,", sub_databases); + line = chk_print(line, + " %" PRIuSIZE " key's bytes," + " %" PRIuSIZE " data's bytes," + " %" PRIuSIZE " problem(s)", + sdb->histogram.key_len.amount, + sdb->histogram.val_len.amount, scope->subtotal_issues); + chk_line_end(chk_flush(line)); + } + + mdbx_cursor_close(cursor); + if (!txn->cursors[dbi] && (txn->dbi_state[dbi] & DBI_FRESH)) + mdbx_dbi_close(env, dbi); + } + return err; +} + +__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + assert(sdb == &chk->subdb_gc); + (void)sdb; + const char *bad = ""; + pgno_t *iptr = data->iov_base; + + if (key->iov_len != sizeof(txnid_t)) + chk_object_issue(scope, "entry", record_number, "wrong txn-id size", + "key-size %" PRIuSIZE, key->iov_len); + else { + txnid_t txnid; + memcpy(&txnid, key->iov_base, sizeof(txnid)); + if (txnid < 1 || txnid > usr->txn->txnid) + chk_object_issue(scope, "entry", record_number, "wrong txn-id", + "%" PRIaTXN, txnid); + else { + if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) + chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, + data->iov_len); + size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; + if (number > PAGELIST_LIMIT) + chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, + number); + else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { + chk_object_issue(scope, "entry", txnid, "trimmed idl", + "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", + (number + 1) * sizeof(pgno_t), data->iov_len); + number = data->iov_len / sizeof(pgno_t) - 1; + } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= + /* LY: allow gap up to one page. it is ok + * and better than shink-and-retry inside gc_update() */ + usr->env->ps) + chk_object_issue(scope, "entry", txnid, "extra idl space", + "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", + (number + 1) * sizeof(pgno_t), data->iov_len); + + usr->result.gc_pages += number; + if (chk->envinfo.mi_latter_reader_txnid > txnid) + usr->result.reclaimable_pages += number; + + size_t prev = + MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->geo.first_unallocated; + size_t span = 1; + for (size_t i = 0; i < number; ++i) { + const size_t pgno = iptr[i]; + if (pgno < NUM_METAS) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " < meta-pages %u", pgno, + NUM_METAS); + else if (pgno >= usr->result.backed_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, + usr->result.backed_pages); + else if (pgno >= usr->result.alloc_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, + usr->result.alloc_pages - 1); + else { + if (MDBX_PNL_DISORDERED(prev, pgno)) { + bad = " [bad sequence]"; + chk_object_issue( + scope, "entry", txnid, "bad sequence", + "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, + (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, + pgno); + } + if (chk->pagemap) { + const intptr_t id = chk->pagemap[pgno]; + if (id == 0) + chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; + else if (id > 0) { + assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->subdb)); + chk_object_issue(scope, "page", pgno, "already used", "by %s", + chk_v2a(chk, &chk->subdb[id - 1]->name)); + } else + chk_object_issue(scope, "page", pgno, "already listed in GC", + nullptr); + } + } + prev = pgno; + while (i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span))) + ++span; + } + if (sdb->cookie) { + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), + "transaction %" PRIaTXN ", %" PRIuSIZE + " pages, maxspan %" PRIuSIZE "%s", + txnid, number, span, bad)); + for (size_t i = 0; i < number; i += span) { + const size_t pgno = iptr[i]; + for (span = 1; + i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span)); + ++span) + ; + histogram_acc(span, &sdb->histogram.nested_tree); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (line) { + if (span > 1) + line = + chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); + else + line = chk_print(line, "%9" PRIuSIZE, pgno); + chk_line_end(line); + int err = chk_check_break(scope); + if (err) + return err; + } + } + } + } + } + return chk_check_break(scope); +} + +__cold static int env_chk(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + int err = + env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); + if (unlikely(err)) + return chk_error_rc(scope, err, "env_info"); + + MDBX_chk_line_t *line = + chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); + if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, + chk->envinfo.mi_bootid.current.x, + chk->envinfo.mi_bootid.current.y); + else + line = chk_puts(line, "unavailable"); + chk_line_end(line); + + err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); + if (unlikely(err)) + return chk_error_rc(scope, err, "osal_filesize"); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, + &usr->result.problems_meta, "Peek the meta-pages..."); + if (likely(!err)) { + MDBX_chk_scope_t *const inner = usr->scope; + const uint64_t dxbfile_pages = env->dxb_mmap.filesize >> env->ps2ln; + usr->result.alloc_pages = txn->geo.first_unallocated; + usr->result.backed_pages = bytes2pgno(env, env->dxb_mmap.current); + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + if (unlikely(dxbfile_pages < NUM_METAS)) + chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS)) + chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS)) { + chk_scope_issue(inner, "backed-pages %zu < num-metas %u", + usr->result.backed_pages, NUM_METAS); + return MDBX_CORRUPTED; + } + if (unlikely(dxbfile_pages < NUM_METAS)) { + chk_scope_issue(inner, "backed-pages %zu < num-metas %u", + usr->result.backed_pages, NUM_METAS); + return MDBX_CORRUPTED; + } + if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { + chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", + usr->result.backed_pages, (size_t)MAX_PAGENO + 1); + usr->result.backed_pages = MAX_PAGENO + 1; + } + + if ((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { + if (unlikely(usr->result.backed_pages > dxbfile_pages)) { + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + usr->result.backed_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + } else { + /* DB may be shrunk by writer down to the allocated (but unused) pages. */ + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, + usr->result.alloc_pages, dxbfile_pages); + usr->result.alloc_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + usr->result.backed_pages = (size_t)dxbfile_pages; + } + + line = chk_line_feed(chk_print( + chk_line_begin(inner, MDBX_chk_info), + "pagesize %u (%u system), max keysize %u..%u" + ", max readers %u", + env->ps, globals.sys_pagesize, + mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), + mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers)); + line = chk_line_feed( + chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr)); + if (txn->geo.lower == txn->geo.upper) + line = chk_print_size( + line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); + else { + line = chk_print_size( + line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); + line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); + line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); + + line = chk_line_feed( + chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); + line = chk_print_size( + line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); + } + tASSERT(txn, txn->geo.now == chk->envinfo.mi_geo.current / + chk->envinfo.mi_dxb_pagesize); + chk_line_end(chk_print(line, ", %u pages", txn->geo.now)); +#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG + if (txn->geo.shrink_pv && txn->geo.now != txn->geo.upper && + scope->verbosity >= MDBX_chk_verbose) { + line = chk_line_begin(inner, MDBX_chk_notice); + chk_line_feed(chk_print( + line, " > WARNING: Due Windows system limitations a file couldn't")); + chk_line_feed(chk_print( + line, " > be truncated while the database is opened. So, the size")); + chk_line_feed(chk_print( + line, " > database file of may by large than the database itself,")); + chk_line_end(chk_print( + line, " > until it will be closed or reopened in read-write mode.")); + } +#endif /* Windows || Debug */ + chk_verbose_meta(inner, 0); + chk_verbose_meta(inner, 1); + chk_verbose_meta(inner, 2); + + if (env->stuck_meta >= 0) { + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing), + "skip checking meta-pages since the %u" + " is selected for verification", + env->stuck_meta)); + line = chk_line_feed( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", " + "selected for verification %" PRIu64 ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, + chk->envinfo.mi_meta_txnid[env->stuck_meta], + chk->envinfo.mi_recent_txnid - + chk->envinfo.mi_meta_txnid[env->stuck_meta])); + chk_line_end(line); + } else { + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs check for meta-pages clashes")); + const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); + if (meta_clash_mask & 1) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); + if (meta_clash_mask & 2) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2); + if (meta_clash_mask & 4) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); + + const unsigned prefer_steady_metanum = chk->troika.prefer_steady; + const uint64_t prefer_steady_txnid = + chk->troika.txnid[prefer_steady_metanum]; + const unsigned recent_metanum = chk->troika.recent; + const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; + if (env->flags & MDBX_EXCLUSIVE) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs full check recent-txn-id with meta-pages")); + eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid); + if (prefer_steady_txnid != recent_txnid) { + if ((chk->flags & MDBX_CHK_READWRITE) != 0 && + (env->flags & MDBX_RDONLY) == 0 && + recent_txnid > prefer_steady_txnid && + (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0 && + chk->envinfo.mi_bootid.current.x == + chk->envinfo.mi_bootid.meta[recent_metanum].x && + chk->envinfo.mi_bootid.current.y == + chk->envinfo.mi_bootid.meta[recent_metanum].y) { + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_verbose), + "recent meta-%u is weak, but boot-id match current" + " (will synced upon successful check)", + recent_metanum)); + } else + chk_scope_issue( + inner, + "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, recent_txnid); + } + } else if (chk->write_locked) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs lite check recent-txn-id with meta-pages (not a " + "monopolistic mode)")); + if (recent_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue(inner, + "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + recent_metanum, recent_txnid, + chk->envinfo.mi_recent_txnid); + } + } else { + chk_line_end(chk_puts( + chk_line_begin(inner, MDBX_chk_verbose), + "skip check recent-txn-id with meta-pages (monopolistic or " + "read-write mode only)")); + } + + chk_line_end(chk_print( + chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", latter reader %" PRIu64 + ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); + } + } + err = chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + const char *const subj_tree = "B-Trees"; + if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", subj_tree)); + else { + err = chk_scope_begin( + chk, -1, MDBX_chk_tree, nullptr, &usr->result.tree_problems, + "Traversal %s by txn#%" PRIaTXN "...", subj_tree, txn->txnid); + if (likely(!err)) + err = chk_tree(usr->scope); + if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) + usr->result.gc_tree_problems = usr->result.tree_problems; + if (usr->result.tree_problems && usr->result.kv_tree_problems == 0) + usr->result.kv_tree_problems = usr->result.tree_problems; + chk_scope_restore(scope, err); + } + + const char *const subj_gc = chk_v2a(chk, MDBX_CHK_GC); + if (usr->result.gc_tree_problems > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + subj_gc, subj_tree, + usr->result.problems_gc = usr->result.gc_tree_problems)); + else { + err = chk_scope_begin( + chk, -1, MDBX_chk_gc, &chk->subdb_gc, &usr->result.problems_gc, + "Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid); + if (likely(!err)) + err = chk_db(usr->scope, FREE_DBI, &chk->subdb_gc, chk_handle_gc); + line = chk_line_begin(scope, MDBX_chk_info); + if (line) { + histogram_print(scope, line, &chk->subdb_gc.histogram.nested_tree, + "span(s)", "single", false); + chk_line_end(line); + } + if (usr->result.problems_gc == 0 && + (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; + if (usr->result.processed_pages != used_pages) + chk_scope_issue(usr->scope, + "used pages mismatch (%" PRIuSIZE + "(walked) != %" PRIuSIZE "(allocated - GC))", + usr->result.processed_pages, used_pages); + if (usr->result.unused_pages != usr->result.gc_pages) + chk_scope_issue(usr->scope, + "GC pages mismatch (%" PRIuSIZE + "(expected) != %" PRIuSIZE "(GC))", + usr->result.unused_pages, usr->result.gc_pages); + } + } + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, + "Page allocation:"); + const double percent_boundary_reciprocal = 100.0 / txn->geo.upper; + const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; + const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; + const size_t available2boundary = + txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages; + const size_t available2backed = usr->result.backed_pages - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t remained2boundary = txn->geo.upper - usr->result.alloc_pages; + const size_t remained2backed = + usr->result.backed_pages - usr->result.alloc_pages; + + const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + ? usr->result.alloc_pages - usr->result.gc_pages + : usr->result.processed_pages; + + line = chk_line_begin(usr->scope, MDBX_chk_info); + line = chk_print(line, + "backed by file: %" PRIuSIZE " pages (%.1f%%)" + ", %" PRIuSIZE " left to boundary (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal, + txn->geo.upper - usr->result.backed_pages, + (txn->geo.upper - usr->result.backed_pages) * + percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "used", used, used * percent_backed_reciprocal, + used * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "remained", remained2backed, remained2backed * percent_backed_reciprocal, + remained2boundary, remained2boundary * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" + ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", + usr->result.reclaimable_pages, + usr->result.reclaimable_pages * percent_backed_reciprocal, + usr->result.reclaimable_pages * percent_boundary_reciprocal, + usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, + usr->result.gc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "detained by reader(s): %" PRIuSIZE + " (%.1f%% of backed, %.1f%% of boundary)" + ", %u reader(s), lag %" PRIi64, + detained, detained * percent_backed_reciprocal, + detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "allocated", usr->result.alloc_pages, + usr->result.alloc_pages * percent_backed_reciprocal, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print(line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "available", available2backed, + available2backed * percent_backed_reciprocal, + available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + + line = chk_line_begin(usr->scope, MDBX_chk_resolution); + line = chk_print(line, "%s %" PRIaPGNO " pages", + (txn->geo.upper == txn->geo.now) ? "total" : "upto", + txn->geo.upper); + line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal); + line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", + usr->result.alloc_pages, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = + chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + const char *const subj_main = chk_v2a(chk, MDBX_CHK_MAIN); + if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s...", subj_main)); + else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + subj_main, subj_tree, + usr->result.problems_kv = usr->result.kv_tree_problems)); + else { + err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->subdb_main, + &usr->result.problems_kv, "Processing %s...", + subj_main); + if (likely(!err)) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, chk_handle_kv); + chk_scope_restore(scope, err); + + const char *const subj_subdbs = "sub-database(s)"; + if (usr->result.problems_kv && usr->result.subdb_total) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s", subj_subdbs)); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total == 0) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", + subj_subdbs)); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total) { + err = chk_scope_begin( + chk, 1, MDBX_chk_subdbs, nullptr, &usr->result.problems_kv, + "Processing %s by txn#%" PRIaTXN "...", subj_subdbs, txn->txnid); + if (!err) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, nullptr); + if (usr->scope->subtotal_issues) + chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "processed %" PRIuSIZE " of %" PRIuSIZE + " %s, %" PRIuSIZE " problems(s)", + usr->result.subdb_processed, + usr->result.subdb_total, subj_subdbs, + usr->scope->subtotal_issues)); + } + chk_scope_restore(scope, err); + } + + return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, + nullptr, nullptr)); +} + +__cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) { + if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && + ctx->internal->problem_counter && ctx->scope)) { + *ctx->internal->problem_counter += 1; + ctx->scope->subtotal_issues += 1; + return MDBX_SUCCESS; + } + return MDBX_EINVAL; +} + +__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, + MDBX_chk_context_t *ctx, const MDBX_chk_flags_t flags, + MDBX_chk_severity_t verbosity, + unsigned timeout_seconds_16dot16) { + int err, rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(!cb || !ctx || ctx->internal)) + return MDBX_EINVAL; + + MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); + if (unlikely(!chk)) + return MDBX_ENOMEM; + + chk->cb = cb; + chk->usr = ctx; + chk->usr->internal = chk; + chk->usr->env = env; + chk->flags = flags; + + chk->subdb_gc.id = -1; + chk->subdb_gc.name.iov_base = MDBX_CHK_GC; + chk->subdb[FREE_DBI] = &chk->subdb_gc; + + chk->subdb_main.id = -1; + chk->subdb_main.name.iov_base = MDBX_CHK_MAIN; + chk->subdb[MAIN_DBI] = &chk->subdb_main; + + chk->monotime_timeout = + timeout_seconds_16dot16 + ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() + : 0; + chk->usr->scope_nesting = 0; + chk->usr->result.subdbs = (const void *)&chk->subdb; + + MDBX_chk_scope_t *const top = chk->scope_stack; + top->verbosity = verbosity; + top->internal = chk; + + // init + rc = chk_scope_end( + chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); + + // lock + if (likely(!rc)) + rc = chk_scope_begin( + chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", + (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); + if (likely(!rc) && (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && + (flags & MDBX_CHK_READWRITE)) { + rc = mdbx_txn_lock(env, false); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); + else + chk->write_locked = true; + } + if (likely(!rc)) { + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_begin"); + } + chk_scope_end(chk, rc); + + // doit + if (likely(!rc)) { + chk->subdb_gc.flags = ctx->txn->dbs[FREE_DBI].flags; + chk->subdb_main.flags = ctx->txn->dbs[MAIN_DBI].flags; + rc = env_chk(top); + } + + // unlock + if (ctx->txn || chk->write_locked) { + chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr); + if (ctx->txn) { + err = mdbx_txn_abort(ctx->txn); + if (err && !rc) + rc = err; + ctx->txn = nullptr; + } + if (chk->write_locked) + mdbx_txn_unlock(env); + rc = chk_scope_end(chk, rc); + } + + // finalize + err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); + rc = chk_scope_end(chk, err ? err : rc); + chk_dispose(chk); + return rc; +} diff --git a/src/cogs.c b/src/cogs.c new file mode 100644 index 00000000..2c505fbe --- /dev/null +++ b/src/cogs.c @@ -0,0 +1,353 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +/*------------------------------------------------------------------------------ + * Pack/Unpack 16-bit values for Grow step & Shrink threshold */ + +MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t me2v(size_t m, size_t e) { + assert(m < 2048 && e < 8); + return (pgno_t)(32768 + ((m + 1) << (e + 8))); +} + +MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t v2me(size_t v, size_t e) { + assert(v > (e ? me2v(2047, e - 1) : 32768)); + assert(v <= me2v(2047, e)); + size_t m = (v - 32768 + ((size_t)1 << (e + 8)) - 1) >> (e + 8); + m -= m > 0; + assert(m < 2048 && e < 8); + // f e d c b a 9 8 7 6 5 4 3 2 1 0 + // 1 e e e m m m m m m m m m m m 1 + const uint16_t pv = (uint16_t)(0x8001 + (e << 12) + (m << 1)); + assert(pv != 65535); + return pv; +} + +/* Convert 16-bit packed (exponential quantized) value to number of pages */ +pgno_t pv2pages(uint16_t pv) { + if ((pv & 0x8001) != 0x8001) + return pv; + if (pv == 65535) + return 65536; + // f e d c b a 9 8 7 6 5 4 3 2 1 0 + // 1 e e e m m m m m m m m m m m 1 + return me2v((pv >> 1) & 2047, (pv >> 12) & 7); +} + +/* Convert number of pages to 16-bit packed (exponential quantized) value */ +uint16_t pages2pv(size_t pages) { + if (pages < 32769 || (pages < 65536 && (pages & 1) == 0)) + return (uint16_t)pages; + if (pages <= me2v(2047, 0)) + return v2me(pages, 0); + if (pages <= me2v(2047, 1)) + return v2me(pages, 1); + if (pages <= me2v(2047, 2)) + return v2me(pages, 2); + if (pages <= me2v(2047, 3)) + return v2me(pages, 3); + if (pages <= me2v(2047, 4)) + return v2me(pages, 4); + if (pages <= me2v(2047, 5)) + return v2me(pages, 5); + if (pages <= me2v(2047, 6)) + return v2me(pages, 6); + return (pages < me2v(2046, 7)) ? v2me(pages, 7) : 65533; +} + +__cold bool pv2pages_verify(void) { + bool ok = true, dump_translation = false; + for (size_t i = 0; i < 65536; ++i) { + size_t pages = pv2pages(i); + size_t x = pages2pv(pages); + size_t xp = pv2pages(x); + if (pages != xp) { + ERROR("%zu => %zu => %zu => %zu\n", i, pages, x, xp); + ok = false; + } else if (dump_translation && !(x == i || (x % 2 == 0 && x < 65536))) { + DEBUG("%zu => %zu => %zu => %zu\n", i, pages, x, xp); + } + } + return ok; +} + +/*----------------------------------------------------------------------------*/ + +MDBX_NOTHROW_PURE_FUNCTION size_t bytes_align2os_bytes(const MDBX_env *env, + size_t bytes) { + return ceil_powerof2( + bytes, (env->ps > globals.sys_pagesize) ? env->ps : globals.sys_pagesize); +} + +MDBX_NOTHROW_PURE_FUNCTION size_t pgno_align2os_bytes(const MDBX_env *env, + size_t pgno) { + return ceil_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize); +} + +MDBX_NOTHROW_PURE_FUNCTION pgno_t pgno_align2os_pgno(const MDBX_env *env, + size_t pgno) { + return bytes2pgno(env, pgno_align2os_bytes(env, pgno)); +} + +/*----------------------------------------------------------------------------*/ + +MDBX_NOTHROW_PURE_FUNCTION static __always_inline int +cmp_int_inline(const size_t expected_alignment, const MDBX_val *a, + const MDBX_val *b) { + if (likely(a->iov_len == b->iov_len)) { + if (sizeof(size_t) > 7 && likely(a->iov_len == 8)) + return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base), + unaligned_peek_u64(expected_alignment, b->iov_base)); + if (likely(a->iov_len == 4)) + return CMP2INT(unaligned_peek_u32(expected_alignment, a->iov_base), + unaligned_peek_u32(expected_alignment, b->iov_base)); + if (sizeof(size_t) < 8 && likely(a->iov_len == 8)) + return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base), + unaligned_peek_u64(expected_alignment, b->iov_base)); + } + ERROR("mismatch and/or invalid size %p.%zu/%p.%zu for INTEGERKEY/INTEGERDUP", + a->iov_base, a->iov_len, b->iov_base, b->iov_len); + return 0; +} + +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_unaligned(const MDBX_val *a, + const MDBX_val *b) { + return cmp_int_inline(1, a, b); +} + +#ifndef cmp_int_align2 +/* Compare two items pointing at 2-byte aligned unsigned int's. */ +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align2(const MDBX_val *a, + const MDBX_val *b) { + return cmp_int_inline(2, a, b); +} +#endif /* cmp_int_align2 */ + +#ifndef cmp_int_align4 +/* Compare two items pointing at 4-byte aligned unsigned int's. */ +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align4(const MDBX_val *a, + const MDBX_val *b) { + return cmp_int_inline(4, a, b); +} +#endif /* cmp_int_align4 */ + +/* Compare two items lexically */ +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lexical(const MDBX_val *a, + const MDBX_val *b) { + if (a->iov_len == b->iov_len) + return a->iov_len ? memcmp(a->iov_base, b->iov_base, a->iov_len) : 0; + + const int diff_len = (a->iov_len < b->iov_len) ? -1 : 1; + const size_t shortest = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len; + int diff_data = shortest ? memcmp(a->iov_base, b->iov_base, shortest) : 0; + return likely(diff_data) ? diff_data : diff_len; +} + +MDBX_NOTHROW_PURE_FUNCTION static __always_inline unsigned +tail3le(const uint8_t *p, size_t l) { + STATIC_ASSERT(sizeof(unsigned) > 2); + // 1: 0 0 0 + // 2: 0 1 1 + // 3: 0 1 2 + return p[0] | p[l >> 1] << 8 | p[l - 1] << 16; +} + +/* Compare two items in reverse byte order */ +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_reverse(const MDBX_val *a, + const MDBX_val *b) { + size_t left = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len; + if (likely(left)) { + const uint8_t *pa = ptr_disp(a->iov_base, a->iov_len); + const uint8_t *pb = ptr_disp(b->iov_base, b->iov_len); + while (left >= sizeof(size_t)) { + pa -= sizeof(size_t); + pb -= sizeof(size_t); + left -= sizeof(size_t); + STATIC_ASSERT(sizeof(size_t) == 4 || sizeof(size_t) == 8); + if (sizeof(size_t) == 4) { + uint32_t xa = unaligned_peek_u32(1, pa); + uint32_t xb = unaligned_peek_u32(1, pb); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + xa = osal_bswap32(xa); + xb = osal_bswap32(xb); +#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ + if (xa != xb) + return (xa < xb) ? -1 : 1; + } else { + uint64_t xa = unaligned_peek_u64(1, pa); + uint64_t xb = unaligned_peek_u64(1, pb); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + xa = osal_bswap64(xa); + xb = osal_bswap64(xb); +#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ + if (xa != xb) + return (xa < xb) ? -1 : 1; + } + } + if (sizeof(size_t) == 8 && left >= 4) { + pa -= 4; + pb -= 4; + left -= 4; + uint32_t xa = unaligned_peek_u32(1, pa); + uint32_t xb = unaligned_peek_u32(1, pb); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + xa = osal_bswap32(xa); + xb = osal_bswap32(xb); +#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ + if (xa != xb) + return (xa < xb) ? -1 : 1; + } + if (left) { + unsigned xa = tail3le(pa - left, left); + unsigned xb = tail3le(pb - left, left); + if (xa != xb) + return (xa < xb) ? -1 : 1; + } + } + return CMP2INT(a->iov_len, b->iov_len); +} + +/* Fast non-lexically comparator */ +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lenfast(const MDBX_val *a, + const MDBX_val *b) { + int diff = CMP2INT(a->iov_len, b->iov_len); + return (likely(diff) || a->iov_len == 0) + ? diff + : memcmp(a->iov_base, b->iov_base, a->iov_len); +} + +MDBX_NOTHROW_PURE_FUNCTION __hot bool +eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l) { + if (likely(l > 3)) { + if (MDBX_UNALIGNED_OK >= 4 && likely(l < 9)) + return ((unaligned_peek_u32(1, a) - unaligned_peek_u32(1, b)) | + (unaligned_peek_u32(1, a + l - 4) - + unaligned_peek_u32(1, b + l - 4))) == 0; + if (MDBX_UNALIGNED_OK >= 8 && sizeof(size_t) > 7 && likely(l < 17)) + return ((unaligned_peek_u64(1, a) - unaligned_peek_u64(1, b)) | + (unaligned_peek_u64(1, a + l - 8) - + unaligned_peek_u64(1, b + l - 8))) == 0; + return memcmp(a, b, l) == 0; + } + if (likely(l)) + return tail3le(a, l) == tail3le(b, l); + return true; +} + +int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return eq_fast(a, b) ? 0 : 1; +} + +int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b) { + return eq_fast(a, b) ? 0 : -1; +} + +/*----------------------------------------------------------------------------*/ + +__cold void update_mlcnt(const MDBX_env *env, + const pgno_t new_aligned_mlocked_pgno, + const bool lock_not_release) { + for (;;) { + const pgno_t mlock_pgno_before = + atomic_load32(&env->mlocked_pgno, mo_AcquireRelease); + eASSERT(env, + pgno_align2os_pgno(env, mlock_pgno_before) == mlock_pgno_before); + eASSERT(env, pgno_align2os_pgno(env, new_aligned_mlocked_pgno) == + new_aligned_mlocked_pgno); + if (lock_not_release ? (mlock_pgno_before >= new_aligned_mlocked_pgno) + : (mlock_pgno_before <= new_aligned_mlocked_pgno)) + break; + if (likely(atomic_cas32(&((MDBX_env *)env)->mlocked_pgno, mlock_pgno_before, + new_aligned_mlocked_pgno))) + for (;;) { + mdbx_atomic_uint32_t *const mlcnt = env->lck->mlcnt; + const int32_t snap_locked = atomic_load32(mlcnt + 0, mo_Relaxed); + const int32_t snap_unlocked = atomic_load32(mlcnt + 1, mo_Relaxed); + if (mlock_pgno_before == 0 && (snap_locked - snap_unlocked) < INT_MAX) { + eASSERT(env, lock_not_release); + if (unlikely(!atomic_cas32(mlcnt + 0, snap_locked, snap_locked + 1))) + continue; + } + if (new_aligned_mlocked_pgno == 0 && + (snap_locked - snap_unlocked) > 0) { + eASSERT(env, !lock_not_release); + if (unlikely( + !atomic_cas32(mlcnt + 1, snap_unlocked, snap_unlocked + 1))) + continue; + } + NOTICE("%s-pages %u..%u, mlocked-process(es) %u -> %u", + lock_not_release ? "lock" : "unlock", + lock_not_release ? mlock_pgno_before : new_aligned_mlocked_pgno, + lock_not_release ? new_aligned_mlocked_pgno : mlock_pgno_before, + snap_locked - snap_unlocked, + atomic_load32(mlcnt + 0, mo_Relaxed) - + atomic_load32(mlcnt + 1, mo_Relaxed)); + return; + } + } +} + +__cold void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, + const size_t end_bytes) { + if (atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) > aligned_pgno) { + int err = MDBX_ENOSYS; + const size_t munlock_begin = pgno2bytes(env, aligned_pgno); + const size_t munlock_size = end_bytes - munlock_begin; + eASSERT(env, end_bytes % globals.sys_pagesize == 0 && + munlock_begin % globals.sys_pagesize == 0 && + munlock_size % globals.sys_pagesize == 0); +#if defined(_WIN32) || defined(_WIN64) + err = + VirtualUnlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) + ? MDBX_SUCCESS + : (int)GetLastError(); + if (err == ERROR_NOT_LOCKED) + err = MDBX_SUCCESS; +#elif defined(_POSIX_MEMLOCK_RANGE) + err = munlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) + ? errno + : MDBX_SUCCESS; +#endif + if (likely(err == MDBX_SUCCESS)) + update_mlcnt(env, aligned_pgno, false); + else { +#if defined(_WIN32) || defined(_WIN64) + WARNING("VirtualUnlock(%zu, %zu) error %d", munlock_begin, munlock_size, + err); +#else + WARNING("munlock(%zu, %zu) error %d", munlock_begin, munlock_size, err); +#endif + } + } +} + +__cold void munlock_all(const MDBX_env *env) { + munlock_after(env, 0, bytes_align2os_bytes(env, env->dxb_mmap.current)); +} + +/*----------------------------------------------------------------------------*/ + +uint32_t combine_durability_flags(const uint32_t a, const uint32_t b) { + uint32_t r = a | b; + + /* avoid false MDBX_UTTERLY_NOSYNC */ + if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && + !F_ISSET(b, MDBX_UTTERLY_NOSYNC)) + r = (r - MDBX_UTTERLY_NOSYNC) | MDBX_SAFE_NOSYNC; + + /* convert DEPRECATED_MAPASYNC to MDBX_SAFE_NOSYNC */ + if ((r & (MDBX_WRITEMAP | DEPRECATED_MAPASYNC)) == + (MDBX_WRITEMAP | DEPRECATED_MAPASYNC) && + !F_ISSET(r, MDBX_UTTERLY_NOSYNC)) + r = (r - DEPRECATED_MAPASYNC) | MDBX_SAFE_NOSYNC; + + /* force MDBX_NOMETASYNC if NOSYNC enabled */ + if (r & (MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC)) + r |= MDBX_NOMETASYNC; + + assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && + !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && + !F_ISSET(b, MDBX_UTTERLY_NOSYNC))); + return r; +} diff --git a/src/cogs.h b/src/cogs.h new file mode 100644 index 00000000..caaed0bd --- /dev/null +++ b/src/cogs.h @@ -0,0 +1,558 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL pgno_t pv2pages(uint16_t pv); + +MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint16_t pages2pv(size_t pages); + +MDBX_MAYBE_UNUSED MDBX_INTERNAL bool pv2pages_verify(void); + +/*------------------------------------------------------------------------------ + * Nodes, Keys & Values length limitation factors: + * + * BRANCH_NODE_MAX + * Branch-page must contain at least two nodes, within each a key and a child + * page number. But page can't be split if it contains less that 4 keys, + * i.e. a page should not overflow before adding the fourth key. Therefore, + * at least 3 branch-node should fit in the single branch-page. Further, the + * first node of a branch-page doesn't contain a key, i.e. the first node + * is always require space just for itself. Thus: + * PAGESPACE = pagesize - page_hdr_len; + * BRANCH_NODE_MAX = even_floor( + * (PAGESPACE - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t)); + * KEYLEN_MAX = BRANCH_NODE_MAX - node_hdr_len; + * + * LEAF_NODE_MAX + * Leaf-node must fit into single leaf-page, where a value could be placed on + * a large/overflow page. However, may require to insert a nearly page-sized + * node between two large nodes are already fill-up a page. In this case the + * page must be split to two if some pair of nodes fits on one page, or + * otherwise the page should be split to the THREE with a single node + * per each of ones. Such 1-into-3 page splitting is costly and complex since + * requires TWO insertion into the parent page, that could lead to split it + * and so on up to the root. Therefore double-splitting is avoided here and + * the maximum node size is half of a leaf page space: + * LEAF_NODE_MAX = even_floor(PAGESPACE / 2 - sizeof(indx_t)); + * DATALEN_NO_OVERFLOW = LEAF_NODE_MAX - NODESIZE - KEYLEN_MAX; + * + * - SubDatabase-node must fit into one leaf-page: + * SUBDB_NAME_MAX = LEAF_NODE_MAX - node_hdr_len - sizeof(tree_t); + * + * - Dupsort values itself are a keys in a dupsort-subdb and couldn't be longer + * than the KEYLEN_MAX. But dupsort node must not great than LEAF_NODE_MAX, + * since dupsort value couldn't be placed on a large/overflow page: + * DUPSORT_DATALEN_MAX = min(KEYLEN_MAX, + * max(DATALEN_NO_OVERFLOW, sizeof(tree_t)); + */ + +#define PAGESPACE(pagesize) ((pagesize) - PAGEHDRSZ) + +#define BRANCH_NODE_MAX(pagesize) \ + (EVEN_FLOOR((PAGESPACE(pagesize) - sizeof(indx_t) - NODESIZE) / (3 - 1) - \ + sizeof(indx_t))) + +#define LEAF_NODE_MAX(pagesize) \ + (EVEN_FLOOR(PAGESPACE(pagesize) / 2) - sizeof(indx_t)) + +#define MAX_GC1OVPAGE(pagesize) (PAGESPACE(pagesize) / sizeof(pgno_t) - 1) + +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +keysize_max(size_t pagesize, MDBX_db_flags_t flags) { + assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && + is_powerof2(pagesize)); + STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE >= 8); + if (flags & MDBX_INTEGERKEY) + return 8 /* sizeof(uint64_t) */; + + const intptr_t max_branch_key = BRANCH_NODE_MAX(pagesize) - NODESIZE; + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE - + /* sizeof(uint64) as a key */ 8 > + sizeof(tree_t)); + if (flags & + (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { + const intptr_t max_dupsort_leaf_key = + LEAF_NODE_MAX(pagesize) - NODESIZE - sizeof(tree_t); + return (max_branch_key < max_dupsort_leaf_key) ? max_branch_key + : max_dupsort_leaf_key; + } + return max_branch_key; +} + +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) { + size_t size_max; + if (flags & MDBX_INTEGERKEY) + size_max = 8 /* sizeof(uint64_t) */; + else { + const intptr_t max_branch_key = env->branch_nodemax - NODESIZE; + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE - + /* sizeof(uint64) as a key */ 8 > + sizeof(tree_t)); + if (flags & + (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { + const intptr_t max_dupsort_leaf_key = + env->leaf_nodemax - NODESIZE - sizeof(tree_t); + size_max = (max_branch_key < max_dupsort_leaf_key) ? max_branch_key + : max_dupsort_leaf_key; + } else + size_max = max_branch_key; + } + eASSERT(env, size_max == keysize_max(env->ps, flags)); + return size_max; +} + +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +keysize_min(MDBX_db_flags_t flags) { + return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; +} + +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +valsize_min(MDBX_db_flags_t flags) { + if (flags & MDBX_INTEGERDUP) + return 4 /* sizeof(uint32_t) */; + else if (flags & MDBX_DUPFIXED) + return sizeof(indx_t); + else + return 0; +} + +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +valsize_max(size_t pagesize, MDBX_db_flags_t flags) { + assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && + is_powerof2(pagesize)); + + if (flags & MDBX_INTEGERDUP) + return 8 /* sizeof(uint64_t) */; + + if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP)) + return keysize_max(pagesize, 0); + + const unsigned page_ln2 = log2n_powerof2(pagesize); + const size_t hard = 0x7FF00000ul; + const size_t hard_pages = hard >> page_ln2; + STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO); + const size_t pages_limit = PAGELIST_LIMIT / 4; + const size_t limit = + (hard_pages < pages_limit) ? hard : (pages_limit << page_ln2); + return (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2; +} + +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) { + size_t size_max; + if (flags & MDBX_INTEGERDUP) + size_max = 8 /* sizeof(uint64_t) */; + else if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP)) + size_max = env_keysize_max(env, 0); + else { + const size_t hard = 0x7FF00000ul; + const size_t hard_pages = hard >> env->ps2ln; + STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO); + const size_t pages_limit = PAGELIST_LIMIT / 4; + const size_t limit = + (hard_pages < pages_limit) ? hard : (pages_limit << env->ps2ln); + size_max = (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2; + } + eASSERT(env, size_max == valsize_max(env->ps, flags)); + return size_max; +} + +/*----------------------------------------------------------------------------*/ + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t +leaf_size(const MDBX_env *env, const MDBX_val *key, const MDBX_val *data) { + size_t node_bytes = node_size(key, data); + if (node_bytes > env->leaf_nodemax) + /* put on large/overflow page */ + node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t); + + return node_bytes + sizeof(indx_t); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t +branch_size(const MDBX_env *env, const MDBX_val *key) { + /* Size of a node in a branch page with a given key. + * This is just the node header plus the key, there is no data. */ + size_t node_bytes = node_size(key, nullptr); + if (unlikely(node_bytes > env->branch_nodemax)) { + /* put on large/overflow page, not implemented */ + mdbx_panic("node_size(key) %zu > %u branch_nodemax", node_bytes, + env->branch_nodemax); + node_bytes = node_size(key, nullptr) + sizeof(pgno_t); + } + + return node_bytes + sizeof(indx_t); +} + +MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t +flags_db2sub(uint16_t db_flags) { + uint16_t sub_flags = db_flags & MDBX_DUPFIXED; + + /* MDBX_INTEGERDUP => MDBX_INTEGERKEY */ +#define SHIFT_INTEGERDUP_TO_INTEGERKEY 2 + STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == + MDBX_INTEGERKEY); + sub_flags |= (db_flags & MDBX_INTEGERDUP) >> SHIFT_INTEGERDUP_TO_INTEGERKEY; + + /* MDBX_REVERSEDUP => MDBX_REVERSEKEY */ +#define SHIFT_REVERSEDUP_TO_REVERSEKEY 5 + STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == + MDBX_REVERSEKEY); + sub_flags |= (db_flags & MDBX_REVERSEDUP) >> SHIFT_REVERSEDUP_TO_REVERSEKEY; + + return sub_flags; +} + +static inline bool check_sdb_flags(unsigned flags) { + switch (flags & ~(MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + default: + NOTICE("invalid db-flags 0x%x", flags); + return false; + case MDBX_DUPSORT: + case MDBX_DUPSORT | MDBX_REVERSEDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + case MDBX_DB_DEFAULTS: + return (flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != + (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + } +} + +/*----------------------------------------------------------------------------*/ + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t pgno2bytes(const MDBX_env *env, + size_t pgno) { + eASSERT(env, (1u << env->ps2ln) == env->ps); + return ((size_t)pgno) << env->ps2ln; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline page_t *pgno2page(const MDBX_env *env, + size_t pgno) { + return ptr_disp(env->dxb_mmap.base, pgno2bytes(env, pgno)); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t bytes2pgno(const MDBX_env *env, + size_t bytes) { + eASSERT(env, (env->ps >> env->ps2ln) == 1); + return (pgno_t)(bytes >> env->ps2ln); +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t +bytes_align2os_bytes(const MDBX_env *env, size_t bytes); + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t +pgno_align2os_bytes(const MDBX_env *env, size_t pgno); + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL pgno_t +pgno_align2os_pgno(const MDBX_env *env, size_t pgno); + +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t +largechunk_npages(const MDBX_env *env, size_t bytes) { + return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val get_key(const node_t *node) { + MDBX_val key; + key.iov_len = node_ks(node); + key.iov_base = node_key(node); + return key; +} + +static inline void get_key_optional(const node_t *node, + MDBX_val *keyptr /* __may_null */) { + if (keyptr) + *keyptr = get_key(node); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline void *page_data(const page_t *mp) { + return ptr_disp(mp, PAGEHDRSZ); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline const page_t * +data_page(const void *data) { + return container_of(data, page_t, entries); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline meta_t *page_meta(page_t *mp) { + return (meta_t *)page_data(mp); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_numkeys(const page_t *mp) { + return mp->lower >> 1; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_room(const page_t *mp) { + return mp->upper - mp->lower; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t +page_space(const MDBX_env *env) { + STATIC_ASSERT(PAGEHDRSZ % 2 == 0); + return env->ps - PAGEHDRSZ; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_used(const MDBX_env *env, + const page_t *mp) { + return page_space(env) - page_room(mp); +} + +/* The percentage of space used in the page, in a percents. */ +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline unsigned +page_fill_percentum_x10(const MDBX_env *env, const page_t *mp) { + const size_t space = page_space(env); + return (unsigned)((page_used(env, mp) * 1000 + space / 2) / space); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline node_t *page_node(const page_t *mp, + size_t i) { + assert(page_type_compat(mp) == P_LEAF || page_type(mp) == P_BRANCH); + assert(page_numkeys(mp) > i); + assert(mp->entries[i] % 2 == 0); + return ptr_disp(mp, mp->entries[i] + PAGEHDRSZ); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline void * +page_dupfix_ptr(const page_t *mp, size_t i, size_t keysize) { + assert(page_type_compat(mp) == (P_LEAF | P_DUPFIX) && i == (indx_t)i && + mp->dupfix_ksize == keysize); + (void)keysize; + return ptr_disp(mp, PAGEHDRSZ + mp->dupfix_ksize * (indx_t)i); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val +page_dupfix_key(const page_t *mp, size_t i, size_t keysize) { + MDBX_val r; + r.iov_base = page_dupfix_ptr(mp, i, keysize); + r.iov_len = mp->dupfix_ksize; + return r; +} + +/*----------------------------------------------------------------------------*/ + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int +cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b); + +#if MDBX_UNALIGNED_OK < 2 || \ + (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int +/* Compare two items pointing at 2-byte aligned unsigned int's. */ +cmp_int_align2(const MDBX_val *a, const MDBX_val *b); +#else +#define cmp_int_align2 cmp_int_unaligned +#endif /* !MDBX_UNALIGNED_OK || debug */ + +#if MDBX_UNALIGNED_OK < 4 || \ + (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int +/* Compare two items pointing at 4-byte aligned unsigned int's. */ +cmp_int_align4(const MDBX_val *a, const MDBX_val *b); +#else +#define cmp_int_align4 cmp_int_unaligned +#endif /* !MDBX_UNALIGNED_OK || debug */ + +/* Compare two items lexically */ +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lexical(const MDBX_val *a, + const MDBX_val *b); + +/* Compare two items in reverse byte order */ +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_reverse(const MDBX_val *a, + const MDBX_val *b); + +/* Fast non-lexically comparator */ +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lenfast(const MDBX_val *a, + const MDBX_val *b); + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL bool +eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l); + +MDBX_NOTHROW_PURE_FUNCTION static inline bool eq_fast(const MDBX_val *a, + const MDBX_val *b) { + return unlikely(a->iov_len == b->iov_len) && + eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int +cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b); + +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int +cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b); + +static inline MDBX_cmp_func *builtin_keycmp(MDBX_db_flags_t flags) { + return (flags & MDBX_REVERSEKEY) ? cmp_reverse + : (flags & MDBX_INTEGERKEY) ? cmp_int_align2 + : cmp_lexical; +} + +static inline MDBX_cmp_func *builtin_datacmp(MDBX_db_flags_t flags) { + return !(flags & MDBX_DUPSORT) + ? cmp_lenfast + : ((flags & MDBX_INTEGERDUP) + ? cmp_int_unaligned + : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical)); +} + +/*----------------------------------------------------------------------------*/ + +MDBX_INTERNAL uint32_t combine_durability_flags(const uint32_t a, + const uint32_t b); + +MDBX_CONST_FUNCTION static inline lck_t *lckless_stub(const MDBX_env *env) { + uintptr_t stub = (uintptr_t)&env->lckless_placeholder; + /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ + stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); + return (lck_t *)stub; +} + +#if !(defined(_WIN32) || defined(_WIN64)) +MDBX_MAYBE_UNUSED static inline int ignore_enosys(int err) { +#ifdef ENOSYS + if (err == ENOSYS) + return MDBX_RESULT_TRUE; +#endif /* ENOSYS */ +#ifdef ENOIMPL + if (err == ENOIMPL) + return MDBX_RESULT_TRUE; +#endif /* ENOIMPL */ +#ifdef ENOTSUP + if (err == ENOTSUP) + return MDBX_RESULT_TRUE; +#endif /* ENOTSUP */ +#ifdef ENOSUPP + if (err == ENOSUPP) + return MDBX_RESULT_TRUE; +#endif /* ENOSUPP */ +#ifdef EOPNOTSUPP + if (err == EOPNOTSUPP) + return MDBX_RESULT_TRUE; +#endif /* EOPNOTSUPP */ + if (err == EAGAIN) + return MDBX_RESULT_TRUE; + return err; +} +#endif /* defined(_WIN32) || defined(_WIN64) */ + +static inline int check_env(const MDBX_env *env, const bool wanna_active) { + if (unlikely(!env)) + return MDBX_EINVAL; + + if (unlikely(env->signature.weak != env_signature)) + return MDBX_EBADSIGN; + + if (unlikely(env->flags & ENV_FATAL_ERROR)) + return MDBX_PANIC; + + if (wanna_active) { +#if MDBX_ENV_CHECKPID + if (unlikely(env->pid != osal_getpid()) && env->pid) { + ((MDBX_env *)env)->flags |= ENV_FATAL_ERROR; + return MDBX_PANIC; + } +#endif /* MDBX_ENV_CHECKPID */ + if (unlikely((env->flags & ENV_ACTIVE) == 0)) + return MDBX_EPERM; + eASSERT(env, env->dxb_mmap.base != nullptr); + } + + return MDBX_SUCCESS; +} + +static inline int check_txn(const MDBX_txn *txn, int bad_bits) { + if (unlikely(!txn)) + return MDBX_EINVAL; + + if (unlikely(txn->signature != txn_signature)) + return MDBX_EBADSIGN; + + if (unlikely(txn->flags & bad_bits)) + return MDBX_BAD_TXN; + + tASSERT(txn, (txn->flags & MDBX_TXN_FINISHED) || + (txn->flags & MDBX_NOSTICKYTHREADS) == + (txn->env->flags & MDBX_NOSTICKYTHREADS)); +#if MDBX_TXN_CHECKOWNER + STATIC_ASSERT((long)MDBX_NOSTICKYTHREADS > (long)MDBX_TXN_FINISHED); + if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) < + MDBX_TXN_FINISHED && + unlikely(txn->owner != osal_thread_self())) + return txn->owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN; +#endif /* MDBX_TXN_CHECKOWNER */ + + if (bad_bits && unlikely(!txn->env->dxb_mmap.base)) + return MDBX_EPERM; + + return MDBX_SUCCESS; +} + +static inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) { + int err = check_txn(txn, bad_bits); + if (unlikely(err)) + return err; + + if (unlikely(txn->flags & MDBX_TXN_RDONLY)) + return MDBX_EACCESS; + + return MDBX_SUCCESS; +} + +/*----------------------------------------------------------------------------*/ + +MDBX_INTERNAL void mincore_clean_cache(const MDBX_env *const env); + +MDBX_INTERNAL void update_mlcnt(const MDBX_env *env, + const pgno_t new_aligned_mlocked_pgno, + const bool lock_not_release); + +MDBX_INTERNAL void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, + const size_t end_bytes); + +MDBX_INTERNAL void munlock_all(const MDBX_env *env); + +/*----------------------------------------------------------------------------*/ +/* Cache coherence and mmap invalidation */ +#ifndef MDBX_CPU_WRITEBACK_INCOHERENT +#error "The MDBX_CPU_WRITEBACK_INCOHERENT must be defined before" +#elif MDBX_CPU_WRITEBACK_INCOHERENT +#define osal_flush_incoherent_cpu_writeback() osal_memory_barrier() +#else +#define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier() +#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ + +MDBX_MAYBE_UNUSED static inline void +osal_flush_incoherent_mmap(const void *addr, size_t nbytes, + const intptr_t pagesize) { +#ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE +#error "The MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined before" +#elif MDBX_MMAP_INCOHERENT_FILE_WRITE + char *const begin = (char *)(-pagesize & (intptr_t)addr); + char *const end = + (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1)); + int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0; + eASSERT(nullptr, err == 0); + (void)err; +#else + (void)pagesize; +#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ + +#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE +#error "The MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined before" +#elif MDBX_MMAP_INCOHERENT_CPU_CACHE +#ifdef DCACHE + /* MIPS has cache coherency issues. + * Note: for any nbytes >= on-chip cache size, entire is flushed. */ + cacheflush((void *)addr, nbytes, DCACHE); +#else +#error "Oops, cacheflush() not available" +#endif /* DCACHE */ +#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ + +#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE + (void)addr; + (void)nbytes; +#endif +} diff --git a/src/coherency.c b/src/coherency.c new file mode 100644 index 00000000..4bab049f --- /dev/null +++ b/src/coherency.c @@ -0,0 +1,198 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ +static bool coherency_check(const MDBX_env *env, const txnid_t txnid, + const volatile tree_t *trees, + const volatile meta_t *meta, bool report) { + const txnid_t freedb_mod_txnid = trees[FREE_DBI].mod_txnid; + const txnid_t maindb_mod_txnid = trees[MAIN_DBI].mod_txnid; + const pgno_t last_pgno = meta->geometry.now; + + const pgno_t freedb_root_pgno = trees[FREE_DBI].root; + const page_t *freedb_root = + (env->dxb_mmap.base && freedb_root_pgno < last_pgno) + ? pgno2page(env, freedb_root_pgno) + : nullptr; + + const pgno_t maindb_root_pgno = trees[MAIN_DBI].root; + const page_t *maindb_root = + (env->dxb_mmap.base && maindb_root_pgno < last_pgno) + ? pgno2page(env, maindb_root_pgno) + : nullptr; + const uint64_t magic_and_version = + unaligned_peek_u64_volatile(4, &meta->magic_and_version); + + bool ok = true; + if (freedb_root_pgno != P_INVALID && + unlikely(freedb_root_pgno >= last_pgno)) { + if (report) + WARNING( + "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + " %s", + "free", freedb_root_pgno, txnid, + (env->stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + if (maindb_root_pgno != P_INVALID && + unlikely(maindb_root_pgno >= last_pgno)) { + if (report) + WARNING( + "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + " %s", + "main", maindb_root_pgno, txnid, + (env->stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + if (unlikely(txnid < freedb_mod_txnid || + (!freedb_mod_txnid && freedb_root && + likely(magic_and_version == MDBX_DATA_MAGIC)))) { + if (report) + WARNING( + "catch invalid %sdb.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN + " %s", + "free", freedb_mod_txnid, txnid, + (env->stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + if (unlikely(txnid < maindb_mod_txnid || + (!maindb_mod_txnid && maindb_root && + likely(magic_and_version == MDBX_DATA_MAGIC)))) { + if (report) + WARNING( + "catch invalid %sdb.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN + " %s", + "main", maindb_mod_txnid, txnid, + (env->stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + if (likely(freedb_root && freedb_mod_txnid)) { + VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->txnid)); + MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, sizeof(freedb_root->txnid)); + const txnid_t root_txnid = freedb_root->txnid; + if (unlikely(root_txnid != freedb_mod_txnid)) { + if (report) + WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN + " for %sdb.mod_txnid %" PRIaTXN " %s", + freedb_root_pgno, root_txnid, "free", freedb_mod_txnid, + (env->stuck_meta < 0) ? "(workaround for incoherent flaw of " + "unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + } + if (likely(maindb_root && maindb_mod_txnid)) { + VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->txnid)); + MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, sizeof(maindb_root->txnid)); + const txnid_t root_txnid = maindb_root->txnid; + if (unlikely(root_txnid != maindb_mod_txnid)) { + if (report) + WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN + " for %sdb.mod_txnid %" PRIaTXN " %s", + maindb_root_pgno, root_txnid, "main", maindb_mod_txnid, + (env->stuck_meta < 0) ? "(workaround for incoherent flaw of " + "unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + } + if (unlikely(!ok) && report) + env->lck->pgops.incoherence.weak = + (env->lck->pgops.incoherence.weak >= INT32_MAX) + ? INT32_MAX + : env->lck->pgops.incoherence.weak + 1; + return ok; +} + +__cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, + const MDBX_env *env) { + if (likely(timestamp && *timestamp == 0)) + *timestamp = osal_monotime(); + else if (unlikely(!timestamp || osal_monotime() - *timestamp > + osal_16dot16_to_monotime(65536 / 10))) { + if (pgno >= 0 && pgno != env->stuck_meta) + ERROR("bailout waiting for %" PRIuSIZE " page arrival %s", pgno, + "(workaround for incoherent flaw of unified page/buffer cache)"); + else if (env->stuck_meta < 0) + ERROR("bailout waiting for valid snapshot (%s)", + "workaround for incoherent flaw of unified page/buffer cache"); + return MDBX_PROBLEM; + } + + osal_memory_fence(mo_AcquireRelease, true); +#if defined(_WIN32) || defined(_WIN64) + SwitchToThread(); +#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE) + sched_yield(); +#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS) + pthread_yield(); +#else + usleep(42); +#endif + return MDBX_RESULT_TRUE; +} + +/* check with timeout as the workaround + * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ +__hot int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, + uint64_t *timestamp) { + /* Copy the DB info and flags */ + txn->geo = head.ptr_v->geometry; + memcpy(txn->dbs, &head.ptr_c->trees, sizeof(head.ptr_c->trees)); + STATIC_ASSERT(sizeof(head.ptr_c->trees) == CORE_DBS * sizeof(tree_t)); + VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS, + txn->env->max_dbi - CORE_DBS); + txn->canary = head.ptr_v->canary; + + if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v, + *timestamp == 0))) + return coherency_timeout(timestamp, -1, txn->env); + + tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); + tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + return MDBX_SUCCESS; +} + +int coherency_check_written(const MDBX_env *env, const txnid_t txnid, + const volatile meta_t *meta, const intptr_t pgno, + uint64_t *timestamp) { + const bool report = !(timestamp && *timestamp); + const txnid_t head_txnid = meta_txnid(meta); + if (unlikely(head_txnid < MIN_TXNID || head_txnid < txnid)) { + if (report) { + env->lck->pgops.incoherence.weak = + (env->lck->pgops.incoherence.weak >= INT32_MAX) + ? INT32_MAX + : env->lck->pgops.incoherence.weak + 1; + WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s", + (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid, + bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), + "(workaround for incoherent flaw of unified page/buffer cache)"); + } + return coherency_timeout(timestamp, pgno, env); + } + if (unlikely( + !coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) + return coherency_timeout(timestamp, pgno, env); + + eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY); + eASSERT(env, check_sdb_flags(meta->trees.main.flags)); + return MDBX_SUCCESS; +} + +bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, + bool report) { + uint64_t timestamp = 0; + return coherency_check_written(env, 0, meta, -1, + report ? ×tamp : nullptr) == MDBX_SUCCESS; +} diff --git a/src/cold.c b/src/cold.c new file mode 100644 index 00000000..d4425ae4 --- /dev/null +++ b/src/cold.c @@ -0,0 +1,768 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold size_t mdbx_default_pagesize(void) { + size_t pagesize = globals.sys_pagesize; + ENSURE(nullptr, is_powerof2(pagesize)); + pagesize = (pagesize >= MDBX_MIN_PAGESIZE) ? pagesize : MDBX_MIN_PAGESIZE; + pagesize = (pagesize <= MDBX_MAX_PAGESIZE) ? pagesize : MDBX_MAX_PAGESIZE; + return pagesize; +} + +__cold intptr_t mdbx_limits_dbsize_min(intptr_t pagesize) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + return MIN_PAGENO * pagesize; +} + +__cold intptr_t mdbx_limits_dbsize_max(intptr_t pagesize) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); + const uint64_t limit = (1 + (uint64_t)MAX_PAGENO) * pagesize; + return (limit < MAX_MAPSIZE) ? (intptr_t)limit : (intptr_t)MAX_MAPSIZE; +} + +__cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); + const uint64_t pgl_limit = + pagesize * (uint64_t)(PAGELIST_LIMIT / MDBX_GOLD_RATIO_DBL); + const uint64_t map_limit = (uint64_t)(MAX_MAPSIZE / MDBX_GOLD_RATIO_DBL); + return (pgl_limit < map_limit) ? (intptr_t)pgl_limit : (intptr_t)map_limit; +} + +__cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, + MDBX_db_flags_t flags) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + return keysize_max(pagesize, flags); +} + +__cold int mdbx_env_get_maxkeysize_ex(const MDBX_env *env, + MDBX_db_flags_t flags) { + if (unlikely(!env || env->signature.weak != env_signature)) + return -1; + + return (int)mdbx_limits_keysize_max((intptr_t)env->ps, flags); +} + +__cold int mdbx_env_get_maxkeysize(const MDBX_env *env) { + return mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT); +} + +__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { + return keysize_min(flags); +} + +__cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, + MDBX_db_flags_t flags) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + return valsize_max(pagesize, flags); +} + +__cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, + MDBX_db_flags_t flags) { + if (unlikely(!env || env->signature.weak != env_signature)) + return -1; + + return (int)mdbx_limits_valsize_max((intptr_t)env->ps, flags); +} + +__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { + return valsize_min(flags); +} + +__cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, + MDBX_db_flags_t flags) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + if (flags & + (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) + return BRANCH_NODE_MAX(pagesize) - NODESIZE; + + return LEAF_NODE_MAX(pagesize) - NODESIZE; +} + +__cold int mdbx_env_get_pairsize4page_max(const MDBX_env *env, + MDBX_db_flags_t flags) { + if (unlikely(!env || env->signature.weak != env_signature)) + return -1; + + return (int)mdbx_limits_pairsize4page_max((intptr_t)env->ps, flags); +} + +__cold intptr_t mdbx_limits_valsize4page_max(intptr_t pagesize, + MDBX_db_flags_t flags) { + if (pagesize < 1) + pagesize = (intptr_t)mdbx_default_pagesize(); + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || + pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + !is_powerof2((size_t)pagesize))) + return -1; + + if (flags & + (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) + return valsize_max(pagesize, flags); + + return PAGESPACE(pagesize); +} + +__cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, + MDBX_db_flags_t flags) { + if (unlikely(!env || env->signature.weak != env_signature)) + return -1; + + return (int)mdbx_limits_valsize4page_max((intptr_t)env->ps, flags); +} + +/*----------------------------------------------------------------------------*/ + +__cold static void stat_add(const tree_t *db, MDBX_stat *const st, + const size_t bytes) { + st->ms_depth += db->height; + st->ms_branch_pages += db->branch_pages; + st->ms_leaf_pages += db->leaf_pages; + st->ms_overflow_pages += db->large_pages; + st->ms_entries += db->items; + if (likely(bytes >= + offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) + st->ms_mod_txnid = + (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid; +} + +__cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { + int err = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + cursor_couple_t cx; + err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + const MDBX_env *const env = txn->env; + st->ms_psize = env->ps; + TXN_FOREACH_DBI_FROM( + txn, dbi, + /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { + if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) + stat_add(txn->dbs + dbi, st, bytes); + } + + if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && + txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { + + /* scan and account not opened named subDBs */ + err = tree_search(&cx.outer, nullptr, Z_FIRST); + while (err == MDBX_SUCCESS) { + const page_t *mp = cx.outer.pg[cx.outer.top]; + for (size_t i = 0; i < page_numkeys(mp); i++) { + const node_t *node = page_node(mp, i); + if (node_flags(node) != N_SUBDATA) + continue; + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid subDb node size", node_ds(node)); + return MDBX_CORRUPTED; + } + + /* skip opened and already accounted */ + const MDBX_val name = {node_key(node), node_ks(node)}; + TXN_FOREACH_DBI_USER(txn, dbi) { + if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && + env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) { + node = nullptr; + break; + } + } + + if (node) { + tree_t db; + memcpy(&db, node_data(node), sizeof(db)); + stat_add(&db, st, bytes); + } + } + err = cursor_sibling_right(&cx.outer); + } + if (unlikely(err != MDBX_NOTFOUND)) + return err; + } + + return MDBX_SUCCESS; +} + +__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, + MDBX_stat *dest, size_t bytes) { + if (unlikely(!dest)) + return MDBX_EINVAL; + const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) + return MDBX_EINVAL; + + if (likely(txn)) { + if (env && unlikely(txn->env != env)) + return MDBX_EINVAL; + return stat_acc(txn, dest, bytes); + } + + int err = check_env(env, true); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + if (env->txn && env_txn0_owned(env)) + /* inside write-txn */ + return stat_acc(env->txn, dest, bytes); + + MDBX_txn *tmp_txn; + err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + const int rc = stat_acc(tmp_txn, dest, bytes); + err = mdbx_txn_abort(tmp_txn); + if (unlikely(err != MDBX_SUCCESS)) + return err; + return rc; +} + +/*----------------------------------------------------------------------------*/ + +static size_t estimate_rss(size_t database_bytes) { + return database_bytes + database_bytes / 64 + + (512 + MDBX_WORDBITS * 16) * MEGABYTE; +} + +__cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, + MDBX_warmup_flags_t flags, + unsigned timeout_seconds_16dot16) { + if (unlikely(env == nullptr && txn == nullptr)) + return MDBX_EINVAL; + if (unlikely(flags > + (MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | + MDBX_warmup_touchlimit | MDBX_warmup_release))) + return MDBX_EINVAL; + + if (txn) { + int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + if (env) { + int err = check_env(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (txn && unlikely(txn->env != env)) + return MDBX_EINVAL; + } else { + env = txn->env; + } + + const uint64_t timeout_monotime = + (timeout_seconds_16dot16 && (flags & MDBX_warmup_force)) + ? osal_monotime() + osal_16dot16_to_monotime(timeout_seconds_16dot16) + : 0; + + if (flags & MDBX_warmup_release) + munlock_all(env); + + pgno_t used_pgno; + if (txn) { + used_pgno = txn->geo.first_unallocated; + } else { + const troika_t troika = meta_tap(env); + used_pgno = meta_recent(env, &troika).ptr_v->geometry.first_unallocated; + } + const size_t used_range = pgno_align2os_bytes(env, used_pgno); + const pgno_t mlock_pgno = bytes2pgno(env, used_range); + + int rc = MDBX_SUCCESS; + if (flags & MDBX_warmup_touchlimit) { + const size_t estimated_rss = estimate_rss(used_range); +#if defined(_WIN32) || defined(_WIN64) + SIZE_T current_ws_lower, current_ws_upper; + if (GetProcessWorkingSetSize(GetCurrentProcess(), ¤t_ws_lower, + ¤t_ws_upper) && + current_ws_lower < estimated_rss) { + const SIZE_T ws_lower = estimated_rss; + const SIZE_T ws_upper = + (MDBX_WORDBITS == 32 && ws_lower > MEGABYTE * 2048) + ? ws_lower + : ws_lower + MDBX_WORDBITS * MEGABYTE * 32; + if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_lower, ws_upper)) { + rc = (int)GetLastError(); + WARNING("SetProcessWorkingSetSize(%zu, %zu) error %d", ws_lower, + ws_upper, rc); + } + } +#endif /* Windows */ +#ifdef RLIMIT_RSS + struct rlimit rss; + if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) { + rss.rlim_cur = estimated_rss; + if (rss.rlim_max < estimated_rss) + rss.rlim_max = estimated_rss; + if (setrlimit(RLIMIT_RSS, &rss)) { + rc = errno; + WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS", + (size_t)rss.rlim_cur, (size_t)rss.rlim_max, rc); + } + } +#endif /* RLIMIT_RSS */ +#ifdef RLIMIT_MEMLOCK + if (flags & MDBX_warmup_lock) { + struct rlimit memlock; + if (getrlimit(RLIMIT_MEMLOCK, &memlock) == 0 && + memlock.rlim_cur < estimated_rss) { + memlock.rlim_cur = estimated_rss; + if (memlock.rlim_max < estimated_rss) + memlock.rlim_max = estimated_rss; + if (setrlimit(RLIMIT_MEMLOCK, &memlock)) { + rc = errno; + WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_MEMLOCK", + (size_t)memlock.rlim_cur, (size_t)memlock.rlim_max, rc); + } + } + } +#endif /* RLIMIT_MEMLOCK */ + (void)estimated_rss; + } + +#if defined(MLOCK_ONFAULT) && \ + ((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 27)) || \ + (defined(__ANDROID_API__) && __ANDROID_API__ >= 30)) && \ + (defined(__linux__) || defined(__gnu_linux__)) + if ((flags & MDBX_warmup_lock) != 0 && + globals.linux_kernel_version >= 0x04040000 && + atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) < mlock_pgno) { + if (mlock2(env->dxb_mmap.base, used_range, MLOCK_ONFAULT)) { + rc = errno; + WARNING("mlock2(%zu, %s) error %d", used_range, "MLOCK_ONFAULT", rc); + } else { + update_mlcnt(env, mlock_pgno, true); + rc = MDBX_SUCCESS; + } + if (rc != EINVAL) + flags -= MDBX_warmup_lock; + } +#endif /* MLOCK_ONFAULT */ + + int err = MDBX_ENOSYS; +#if MDBX_ENABLE_MADVISE + err = dxb_set_readahead(env, used_pgno, true, true); +#else +#if defined(_WIN32) || defined(_WIN64) + if (imports.PrefetchVirtualMemory) { + WIN32_MEMORY_RANGE_ENTRY hint; + hint.VirtualAddress = env->dxb_mmap.base; + hint.NumberOfBytes = used_range; + if (imports.PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0)) + err = MDBX_SUCCESS; + else { + err = (int)GetLastError(); + ERROR("%s(%zu) error %d", "PrefetchVirtualMemory", used_range, err); + } + } +#endif /* Windows */ + +#if defined(POSIX_MADV_WILLNEED) + err = posix_madvise(env->dxb_mmap.base, used_range, POSIX_MADV_WILLNEED) + ? ignore_enosys(errno) + : MDBX_SUCCESS; +#elif defined(MADV_WILLNEED) + err = madvise(env->dxb_mmap.base, used_range, MADV_WILLNEED) + ? ignore_enosys(errno) + : MDBX_SUCCESS; +#endif + +#if defined(F_RDADVISE) + if (err) { + fcntl(env->lazy_fd, F_RDAHEAD, true); + struct radvisory hint; + hint.ra_offset = 0; + hint.ra_count = unlikely(used_range > INT_MAX && + sizeof(used_range) > sizeof(hint.ra_count)) + ? INT_MAX + : (int)used_range; + err = fcntl(env->lazy_fd, F_RDADVISE, &hint) ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (err == ENOTTY) + err = MDBX_SUCCESS /* Ignore ENOTTY for DB on the ram-disk */; + } +#endif /* F_RDADVISE */ +#endif /* MDBX_ENABLE_MADVISE */ + if (err != MDBX_SUCCESS && rc == MDBX_SUCCESS) + rc = err; + + if ((flags & MDBX_warmup_force) != 0 && + (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS)) { + const volatile uint8_t *ptr = env->dxb_mmap.base; + size_t offset = 0, unused = 42; +#if !(defined(_WIN32) || defined(_WIN64)) + if (flags & MDBX_warmup_oomsafe) { + const int null_fd = open("/dev/null", O_WRONLY); + if (unlikely(null_fd < 0)) + rc = errno; + else { + struct iovec iov[MDBX_AUXILARY_IOV_MAX]; + for (;;) { + unsigned i; + for (i = 0; i < MDBX_AUXILARY_IOV_MAX && offset < used_range; ++i) { + iov[i].iov_base = (void *)(ptr + offset); + iov[i].iov_len = 1; + offset += globals.sys_pagesize; + } + if (unlikely(writev(null_fd, iov, i) < 0)) { + rc = errno; + if (rc == EFAULT) + rc = ENOMEM; + break; + } + if (offset >= used_range) { + rc = MDBX_SUCCESS; + break; + } + if (timeout_seconds_16dot16 && osal_monotime() > timeout_monotime) { + rc = MDBX_RESULT_TRUE; + break; + } + } + close(null_fd); + } + } else +#endif /* Windows */ + for (;;) { + unused += ptr[offset]; + offset += globals.sys_pagesize; + if (offset >= used_range) { + rc = MDBX_SUCCESS; + break; + } + if (timeout_seconds_16dot16 && osal_monotime() > timeout_monotime) { + rc = MDBX_RESULT_TRUE; + break; + } + } + (void)unused; + } + + if ((flags & MDBX_warmup_lock) != 0 && + (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS) && + atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) < mlock_pgno) { +#if defined(_WIN32) || defined(_WIN64) + if (VirtualLock(env->dxb_mmap.base, used_range)) { + update_mlcnt(env, mlock_pgno, true); + rc = MDBX_SUCCESS; + } else { + rc = (int)GetLastError(); + WARNING("%s(%zu) error %d", "VirtualLock", used_range, rc); + } +#elif defined(_POSIX_MEMLOCK_RANGE) + if (mlock(env->dxb_mmap.base, used_range) == 0) { + update_mlcnt(env, mlock_pgno, true); + rc = MDBX_SUCCESS; + } else { + rc = errno; + WARNING("%s(%zu) error %d", "mlock", used_range, rc); + } +#else + rc = MDBX_ENOSYS; +#endif + } + + return rc; +} + +/*----------------------------------------------------------------------------*/ + +__cold int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!arg)) + return MDBX_EINVAL; + + *arg = env->lazy_fd; + return MDBX_SUCCESS; +} + +__cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, + bool onoff) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(flags & ((env->flags & ENV_ACTIVE) ? ~ENV_CHANGEABLE_FLAGS + : ~ENV_USABLE_FLAGS))) + return MDBX_EPERM; + + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + + const bool lock_needed = (env->flags & ENV_ACTIVE) && !env_txn0_owned(env); + bool should_unlock = false; + if (lock_needed) { + rc = lck_txn_lock(env, false); + if (unlikely(rc)) + return rc; + should_unlock = true; + } + + if (onoff) + env->flags = combine_durability_flags(env->flags, flags); + else + env->flags &= ~flags; + + if (should_unlock) + lck_txn_unlock(env); + return MDBX_SUCCESS; +} + +__cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!arg)) + return MDBX_EINVAL; + + *arg = env->flags & ENV_USABLE_FLAGS; + return MDBX_SUCCESS; +} + +__cold int mdbx_env_set_userctx(MDBX_env *env, void *ctx) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + env->userctx = ctx; + return MDBX_SUCCESS; +} + +__cold void *mdbx_env_get_userctx(const MDBX_env *env) { + return env ? env->userctx : nullptr; +} + +__cold int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + +#if MDBX_DEBUG + env->assert_func = func; + return MDBX_SUCCESS; +#else + (void)func; + return MDBX_ENOSYS; +#endif +} + +__cold int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + env->hsr_callback = hsr; + return MDBX_SUCCESS; +} + +__cold MDBX_hsr_func *mdbx_env_get_hsr(const MDBX_env *env) { + return likely(env && env->signature.weak == env_signature) ? env->hsr_callback + : nullptr; +} + +#if defined(_WIN32) || defined(_WIN64) +__cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!arg)) + return MDBX_EINVAL; + + *arg = env->pathname.specified; + return MDBX_SUCCESS; +} +#endif /* Windows */ + +__cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!arg)) + return MDBX_EINVAL; + +#if defined(_WIN32) || defined(_WIN64) + if (!env->pathname_char) { + *arg = nullptr; + DWORD flags = /* WC_ERR_INVALID_CHARS */ 0x80; + size_t mb_len = + WideCharToMultiByte(CP_THREAD_ACP, flags, env->pathname.specified, -1, + nullptr, 0, nullptr, nullptr); + rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); + if (rc == ERROR_INVALID_FLAGS) { + mb_len = + WideCharToMultiByte(CP_THREAD_ACP, flags = 0, env->pathname.specified, + -1, nullptr, 0, nullptr, nullptr); + rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); + } + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + char *const mb_pathname = osal_malloc(mb_len); + if (!mb_pathname) + return MDBX_ENOMEM; + if (mb_len != (size_t)WideCharToMultiByte( + CP_THREAD_ACP, flags, env->pathname.specified, -1, + mb_pathname, (int)mb_len, nullptr, nullptr)) { + rc = (int)GetLastError(); + osal_free(mb_pathname); + return rc; + } + if (env->pathname_char || + InterlockedCompareExchangePointer((PVOID volatile *)&env->pathname_char, + mb_pathname, nullptr)) + osal_free(mb_pathname); + } + *arg = env->pathname_char; +#else + *arg = env->pathname.specified; +#endif /* Windows */ + return MDBX_SUCCESS; +} + +/*------------------------------------------------------------------------------ + * Legacy API */ + +#ifndef LIBMDBX_NO_EXPORTS_LEGACY_API + +LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, + MDBX_txn_flags_t flags, MDBX_txn **ret) { + return __inline_mdbx_txn_begin(env, parent, flags, ret); +} + +LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn) { + return __inline_mdbx_txn_commit(txn); +} + +LIBMDBX_API __cold int mdbx_env_stat(const MDBX_env *env, MDBX_stat *stat, + size_t bytes) { + return __inline_mdbx_env_stat(env, stat, bytes); +} + +LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, + size_t bytes) { + return __inline_mdbx_env_info(env, info, bytes); +} + +LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, + unsigned *flags) { + return __inline_mdbx_dbi_flags(txn, dbi, flags); +} + +LIBMDBX_API __cold int mdbx_env_sync(MDBX_env *env) { + return __inline_mdbx_env_sync(env); +} + +LIBMDBX_API __cold int mdbx_env_sync_poll(MDBX_env *env) { + return __inline_mdbx_env_sync_poll(env); +} + +LIBMDBX_API __cold int mdbx_env_close(MDBX_env *env) { + return __inline_mdbx_env_close(env); +} + +LIBMDBX_API __cold int mdbx_env_set_mapsize(MDBX_env *env, size_t size) { + return __inline_mdbx_env_set_mapsize(env, size); +} + +LIBMDBX_API __cold int mdbx_env_set_maxdbs(MDBX_env *env, MDBX_dbi dbs) { + return __inline_mdbx_env_set_maxdbs(env, dbs); +} + +LIBMDBX_API __cold int mdbx_env_get_maxdbs(const MDBX_env *env, MDBX_dbi *dbs) { + return __inline_mdbx_env_get_maxdbs(env, dbs); +} + +LIBMDBX_API __cold int mdbx_env_set_maxreaders(MDBX_env *env, + unsigned readers) { + return __inline_mdbx_env_set_maxreaders(env, readers); +} + +LIBMDBX_API __cold int mdbx_env_get_maxreaders(const MDBX_env *env, + unsigned *readers) { + return __inline_mdbx_env_get_maxreaders(env, readers); +} + +LIBMDBX_API __cold int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) { + return __inline_mdbx_env_set_syncbytes(env, threshold); +} + +LIBMDBX_API __cold int mdbx_env_get_syncbytes(const MDBX_env *env, + size_t *threshold) { + return __inline_mdbx_env_get_syncbytes(env, threshold); +} + +LIBMDBX_API __cold int mdbx_env_set_syncperiod(MDBX_env *env, + unsigned seconds_16dot16) { + return __inline_mdbx_env_set_syncperiod(env, seconds_16dot16); +} + +LIBMDBX_API __cold int mdbx_env_get_syncperiod(const MDBX_env *env, + unsigned *seconds_16dot16) { + return __inline_mdbx_env_get_syncperiod(env, seconds_16dot16); +} + +LIBMDBX_API __cold uint64_t mdbx_key_from_int64(const int64_t i64) { + return __inline_mdbx_key_from_int64(i64); +} + +LIBMDBX_API __cold uint32_t mdbx_key_from_int32(const int32_t i32) { + return __inline_mdbx_key_from_int32(i32); +} + +LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_min(void) { + return __inline_mdbx_limits_pgsize_min(); +} + +LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_max(void) { + return __inline_mdbx_limits_pgsize_max(); +} + +#endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */ diff --git a/src/config.h.in b/src/config.h.in index 88a282c8..bd5b7c46 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -11,6 +11,9 @@ #cmakedefine ENABLE_ASAN #cmakedefine ENABLE_UBSAN #cmakedefine01 MDBX_FORCE_ASSERTIONS +#if !defined(MDBX_BUILD_TEST) && !defined(MDBX_BUILD_CXX) +#cmakedefine01 MDBX_BUILD_CXX +#endif /* Common */ #cmakedefine01 MDBX_TXN_CHECKOWNER @@ -37,7 +40,9 @@ #cmakedefine01 MDBX_ENABLE_DBI_LOCKFREE /* Windows */ +#if !defined(MDBX_BUILD_TEST) && !defined(MDBX_WITHOUT_MSVC_CRT) #cmakedefine01 MDBX_WITHOUT_MSVC_CRT +#endif /* MacOS & iOS */ #cmakedefine01 MDBX_OSX_SPEED_INSTEADOF_DURABILITY diff --git a/src/copy.c b/src/copy.c new file mode 100644 index 00000000..4455bdf2 --- /dev/null +++ b/src/copy.c @@ -0,0 +1,781 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +typedef struct compacting_context { + MDBX_env *env; + MDBX_txn *txn; + pgno_t first_unallocated; + osal_condpair_t condpair; + volatile unsigned head; + volatile unsigned tail; + uint8_t *write_buf[2]; + size_t write_len[2]; + /* Error code. Never cleared if set. Both threads can set nonzero + * to fail the copy. Not mutex-protected, expects atomic int. */ + volatile int error; + mdbx_filehandle_t fd; +} ctx_t; + +__cold static int compacting_walk_tree(ctx_t *ctx, tree_t *tree); + +/* Dedicated writer thread for compacting copy. */ +__cold static THREAD_RESULT THREAD_CALL compacting_write_thread(void *arg) { + ctx_t *const ctx = arg; + +#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64)) + sigset_t sigset; + sigemptyset(&sigset); + sigaddset(&sigset, SIGPIPE); + ctx->error = pthread_sigmask(SIG_BLOCK, &sigset, nullptr); +#endif /* EPIPE */ + + osal_condpair_lock(&ctx->condpair); + while (!ctx->error) { + while (ctx->tail == ctx->head && !ctx->error) { + int err = osal_condpair_wait(&ctx->condpair, true); + if (err != MDBX_SUCCESS) { + ctx->error = err; + goto bailout; + } + } + const unsigned toggle = ctx->tail & 1; + size_t wsize = ctx->write_len[toggle]; + if (wsize == 0) { + ctx->tail += 1; + break /* EOF */; + } + ctx->write_len[toggle] = 0; + uint8_t *ptr = ctx->write_buf[toggle]; + if (!ctx->error) { + int err = osal_write(ctx->fd, ptr, wsize); + if (err != MDBX_SUCCESS) { +#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64)) + if (err == EPIPE) { + /* Collect the pending SIGPIPE, + * otherwise at least OS X gives it to the process on thread-exit. */ + int unused; + sigwait(&sigset, &unused); + } +#endif /* EPIPE */ + ctx->error = err; + goto bailout; + } + } + ctx->tail += 1; + osal_condpair_signal(&ctx->condpair, false); + } +bailout: + osal_condpair_unlock(&ctx->condpair); + return (THREAD_RESULT)0; +} + +/* Give buffer and/or MDBX_EOF to writer thread, await unused buffer. */ +__cold static int compacting_toggle_write_buffers(ctx_t *ctx) { + osal_condpair_lock(&ctx->condpair); + eASSERT(ctx->env, ctx->head - ctx->tail < 2 || ctx->error); + ctx->head += 1; + osal_condpair_signal(&ctx->condpair, true); + while (!ctx->error && ctx->head - ctx->tail == 2 /* both buffers in use */) { + int err = osal_condpair_wait(&ctx->condpair, false); + if (err != MDBX_SUCCESS) + ctx->error = err; + } + osal_condpair_unlock(&ctx->condpair); + return ctx->error; +} + +static int compacting_put_bytes(ctx_t *ctx, const void *src, size_t bytes, + pgno_t pgno, pgno_t npages) { + assert(pgno == 0 || bytes > PAGEHDRSZ); + while (bytes > 0) { + const size_t side = ctx->head & 1; + const size_t left = MDBX_ENVCOPY_WRITEBUF - ctx->write_len[side]; + if (left < (pgno ? PAGEHDRSZ : 1)) { + int err = compacting_toggle_write_buffers(ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + continue; + } + const size_t chunk = (bytes < left) ? bytes : left; + void *const dst = ctx->write_buf[side] + ctx->write_len[side]; + if (src) { + memcpy(dst, src, chunk); + if (pgno) { + assert(chunk > PAGEHDRSZ); + page_t *mp = dst; + mp->pgno = pgno; + if (mp->txnid == 0) + mp->txnid = ctx->txn->txnid; + if (mp->flags == P_LARGE) { + assert(bytes <= pgno2bytes(ctx->env, npages)); + mp->pages = npages; + } + pgno = 0; + } + src = ptr_disp(src, chunk); + } else + memset(dst, 0, chunk); + bytes -= chunk; + ctx->write_len[side] += chunk; + } + return MDBX_SUCCESS; +} + +static int compacting_put_page(ctx_t *ctx, const page_t *mp, + const size_t head_bytes, const size_t tail_bytes, + const pgno_t npages) { + if (tail_bytes) { + assert(head_bytes + tail_bytes <= ctx->env->ps); + assert(npages == 1 && + (page_type(mp) == P_BRANCH || page_type(mp) == P_LEAF)); + } else { + assert(head_bytes <= pgno2bytes(ctx->env, npages)); + assert((npages == 1 && page_type(mp) == (P_LEAF | P_DUPFIX)) || + page_type(mp) == P_LARGE); + } + + const pgno_t pgno = ctx->first_unallocated; + ctx->first_unallocated += npages; + int err = compacting_put_bytes(ctx, mp, head_bytes, pgno, npages); + if (unlikely(err != MDBX_SUCCESS)) + return err; + err = compacting_put_bytes( + ctx, nullptr, pgno2bytes(ctx->env, npages) - (head_bytes + tail_bytes), 0, + 0); + if (unlikely(err != MDBX_SUCCESS)) + return err; + return compacting_put_bytes(ctx, ptr_disp(mp, ctx->env->ps - tail_bytes), + tail_bytes, 0, 0); +} + +__cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, + pgno_t *const parent_pgno, + txnid_t parent_txnid) { + mc->top = 0; + mc->ki[0] = 0; + int rc = page_get(mc, *parent_pgno, &mc->pg[0], parent_txnid); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = tree_search_finalize(mc, nullptr, Z_FIRST); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + /* Make cursor pages writable */ + const intptr_t deep_limit = mc->top + 1; + void *const buf = osal_malloc(pgno2bytes(ctx->env, deep_limit + 1)); + if (buf == nullptr) + return MDBX_ENOMEM; + + void *ptr = buf; + for (intptr_t i = 0; i <= mc->top; i++) { + page_copy(ptr, mc->pg[i], ctx->env->ps); + mc->pg[i] = ptr; + ptr = ptr_disp(ptr, ctx->env->ps); + } + /* This is writable space for a leaf page. Usually not needed. */ + page_t *const leaf = ptr; + + while (mc->top >= 0) { + page_t *mp = mc->pg[mc->top]; + const size_t nkeys = page_numkeys(mp); + if (is_leaf(mp)) { + if (!(mc->flags & + z_inner) /* may have nested N_SUBDATA or N_BIGDATA nodes */) { + for (size_t i = 0; i < nkeys; i++) { + node_t *node = page_node(mp, i); + if (node_flags(node) == N_BIGDATA) { + /* Need writable leaf */ + if (mp != leaf) { + mc->pg[mc->top] = leaf; + page_copy(leaf, mp, ctx->env->ps); + mp = leaf; + node = page_node(mp, i); + } + + const pgr_t lp = + page_get_large(mc, node_largedata_pgno(node), mp->txnid); + if (unlikely((rc = lp.err) != MDBX_SUCCESS)) + goto bailout; + const size_t datasize = node_ds(node); + const pgno_t npages = largechunk_npages(ctx->env, datasize); + poke_pgno(node_data(node), ctx->first_unallocated); + rc = compacting_put_page(ctx, lp.page, PAGEHDRSZ + datasize, 0, + npages); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } else if (node_flags(node) & N_SUBDATA) { + if (!MDBX_DISABLE_VALIDATION && + unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", + (unsigned)node_ds(node)); + rc = MDBX_CORRUPTED; + goto bailout; + } + + /* Need writable leaf */ + if (mp != leaf) { + mc->pg[mc->top] = leaf; + page_copy(leaf, mp, ctx->env->ps); + mp = leaf; + node = page_node(mp, i); + } + + tree_t *nested = nullptr; + if (node_flags(node) & N_DUPDATA) { + rc = cursor_dupsort_setup(mc, node, mp); + if (likely(rc == MDBX_SUCCESS)) { + nested = &mc->subcur->nested_tree; + rc = compacting_walk(ctx, &mc->subcur->cursor, &nested->root, + mp->txnid); + } + } else { + cASSERT(mc, (mc->flags & z_inner) == 0 && mc->subcur == 0); + cursor_couple_t *couple = + container_of(mc, cursor_couple_t, outer); + nested = &couple->inner.nested_tree; + memcpy(nested, node_data(node), sizeof(tree_t)); + rc = compacting_walk_tree(ctx, nested); + } + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + memcpy(node_data(node), nested, sizeof(tree_t)); + } + } + } + } else { + mc->ki[mc->top]++; + if (mc->ki[mc->top] < nkeys) { + for (;;) { + const node_t *node = page_node(mp, mc->ki[mc->top]); + rc = page_get(mc, node_pgno(node), &mp, mp->txnid); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + mc->top += 1; + if (unlikely(mc->top >= deep_limit)) { + rc = MDBX_CURSOR_FULL; + goto bailout; + } + mc->ki[mc->top] = 0; + if (!is_branch(mp)) { + mc->pg[mc->top] = mp; + break; + } + /* Whenever we advance to a sibling branch page, + * we must proceed all the way down to its first leaf. */ + page_copy(mc->pg[mc->top], mp, ctx->env->ps); + } + continue; + } + } + + const pgno_t pgno = ctx->first_unallocated; + if (likely(!is_dupfix_leaf(mp))) { + rc = compacting_put_page(ctx, mp, PAGEHDRSZ + mp->lower, + ctx->env->ps - (PAGEHDRSZ + mp->upper), 1); + } else { + rc = compacting_put_page( + ctx, mp, PAGEHDRSZ + page_numkeys(mp) * mp->dupfix_ksize, 0, 1); + } + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + if (mc->top) { + /* Update parent if there is one */ + node_set_pgno(page_node(mc->pg[mc->top - 1], mc->ki[mc->top - 1]), pgno); + cursor_pop(mc); + } else { + /* Otherwise we're done */ + *parent_pgno = pgno; + break; + } + } + +bailout: + osal_free(buf); + return rc; +} + +__cold static int compacting_walk_tree(ctx_t *ctx, tree_t *tree) { + if (unlikely(tree->root == P_INVALID)) + return MDBX_SUCCESS; /* empty db */ + + cursor_couple_t couple; + memset(&couple, 0, sizeof(couple)); + couple.inner.cursor.signature = ~cur_signature_live; + kvx_t kvx = {.clc = {.k = {.lmin = INT_MAX}, .v = {.lmin = INT_MAX}}}; + int rc = cursor_init4walk(&couple, ctx->txn, tree, &kvx); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + couple.outer.checking |= z_ignord | z_pagecheck; + couple.inner.cursor.checking |= z_ignord | z_pagecheck; + if (!tree->mod_txnid) + tree->mod_txnid = ctx->txn->txnid; + return compacting_walk(ctx, &couple.outer, &tree->root, tree->mod_txnid); +} + +__cold static void compacting_fixup_meta(MDBX_env *env, meta_t *meta) { + eASSERT(env, meta->trees.gc.mod_txnid || meta->trees.gc.root == P_INVALID); + eASSERT(env, + meta->trees.main.mod_txnid || meta->trees.main.root == P_INVALID); + + /* Calculate filesize taking in account shrink/growing thresholds */ + if (meta->geometry.first_unallocated != meta->geometry.now) { + meta->geometry.now = meta->geometry.first_unallocated; + const size_t aligner = + pv2pages(meta->geometry.grow_pv ? meta->geometry.grow_pv + : meta->geometry.shrink_pv); + if (aligner) { + const pgno_t aligned = pgno_align2os_pgno( + env, meta->geometry.first_unallocated + aligner - + meta->geometry.first_unallocated % aligner); + meta->geometry.now = aligned; + } + } + + if (meta->geometry.now < meta->geometry.lower) + meta->geometry.now = meta->geometry.lower; + if (meta->geometry.now > meta->geometry.upper) + meta->geometry.now = meta->geometry.upper; + + /* Update signature */ + assert(meta->geometry.now >= meta->geometry.first_unallocated); + meta_sign_as_steady(meta); +} + +/* Make resizable */ +__cold static void meta_make_sizeable(meta_t *meta) { + meta->geometry.lower = MIN_PAGENO; + if (meta->geometry.grow_pv == 0) { + const pgno_t step = 1 + (meta->geometry.upper - meta->geometry.lower) / 42; + meta->geometry.grow_pv = pages2pv(step); + } + if (meta->geometry.shrink_pv == 0) { + const pgno_t step = pv2pages(meta->geometry.grow_pv) << 1; + meta->geometry.shrink_pv = pages2pv(step); + } +} + +__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, + mdbx_filehandle_t fd, uint8_t *buffer, + const bool dest_is_pipe, + const MDBX_copy_flags_t flags) { + const size_t meta_bytes = pgno2bytes(env, NUM_METAS); + uint8_t *const data_buffer = + buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); + meta_t *const meta = meta_init_triplet(env, buffer); + meta_set_txnid(env, meta, read_txn->txnid); + + if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) + meta_make_sizeable(meta); + + /* copy canary sequences if present */ + if (read_txn->canary.v) { + meta->canary = read_txn->canary; + meta->canary.v = constmeta_txnid(meta); + } + + if (read_txn->dbs[MAIN_DBI].root == P_INVALID) { + /* When the DB is empty, handle it specially to + * fix any breakage like page leaks from ITS#8174. */ + meta->trees.main.flags = read_txn->dbs[MAIN_DBI].flags; + compacting_fixup_meta(env, meta); + if (dest_is_pipe) { + int rc = osal_write(fd, buffer, meta_bytes); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + } else { + /* Count free pages + GC pages. */ + cursor_couple_t couple; + int rc = cursor_init(&couple.outer, read_txn, FREE_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + pgno_t gc_npages = read_txn->dbs[FREE_DBI].branch_pages + + read_txn->dbs[FREE_DBI].leaf_pages + + read_txn->dbs[FREE_DBI].large_pages; + MDBX_val key, data; + rc = outer_first(&couple.outer, &key, &data); + while (rc == MDBX_SUCCESS) { + const pnl_t pnl = data.iov_base; + if (unlikely(data.iov_len % sizeof(pgno_t) || + data.iov_len < MDBX_PNL_SIZEOF(pnl))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-record length", data.iov_len); + return MDBX_CORRUPTED; + } + if (unlikely(!pnl_check(pnl, read_txn->geo.first_unallocated))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-record content"); + return MDBX_CORRUPTED; + } + gc_npages += MDBX_PNL_GETSIZE(pnl); + rc = outer_next(&couple.outer, &key, &data, MDBX_NEXT); + } + if (unlikely(rc != MDBX_NOTFOUND)) + return rc; + + meta->geometry.first_unallocated = + read_txn->geo.first_unallocated - gc_npages; + meta->trees.main = read_txn->dbs[MAIN_DBI]; + + ctx_t ctx; + memset(&ctx, 0, sizeof(ctx)); + rc = osal_condpair_init(&ctx.condpair); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + memset(data_buffer, 0, 2 * (size_t)MDBX_ENVCOPY_WRITEBUF); + ctx.write_buf[0] = data_buffer; + ctx.write_buf[1] = data_buffer + (size_t)MDBX_ENVCOPY_WRITEBUF; + ctx.first_unallocated = NUM_METAS; + ctx.env = env; + ctx.fd = fd; + ctx.txn = read_txn; + + osal_thread_t thread; + int thread_err = osal_thread_create(&thread, compacting_write_thread, &ctx); + if (likely(thread_err == MDBX_SUCCESS)) { + if (dest_is_pipe) { + if (!meta->trees.main.mod_txnid) + meta->trees.main.mod_txnid = read_txn->txnid; + compacting_fixup_meta(env, meta); + rc = osal_write(fd, buffer, meta_bytes); + } + if (likely(rc == MDBX_SUCCESS)) + rc = compacting_walk_tree(&ctx, &meta->trees.main); + if (ctx.write_len[ctx.head & 1]) + /* toggle to flush non-empty buffers */ + compacting_toggle_write_buffers(&ctx); + + if (likely(rc == MDBX_SUCCESS) && + unlikely(meta->geometry.first_unallocated != ctx.first_unallocated)) { + if (ctx.first_unallocated > meta->geometry.first_unallocated) { + ERROR("the source DB %s: post-compactification used pages %" PRIaPGNO + " %c expected %" PRIaPGNO, + "has double-used pages or other corruption", + ctx.first_unallocated, '>', meta->geometry.first_unallocated); + rc = MDBX_CORRUPTED; /* corrupted DB */ + } + if (ctx.first_unallocated < meta->geometry.first_unallocated) { + WARNING( + "the source DB %s: post-compactification used pages %" PRIaPGNO + " %c expected %" PRIaPGNO, + "has page leak(s)", ctx.first_unallocated, '<', + meta->geometry.first_unallocated); + if (dest_is_pipe) + /* the root within already written meta-pages is wrong */ + rc = MDBX_CORRUPTED; + } + /* fixup meta */ + meta->geometry.first_unallocated = ctx.first_unallocated; + } + + /* toggle with empty buffers to exit thread's loop */ + eASSERT(env, (ctx.write_len[ctx.head & 1]) == 0); + compacting_toggle_write_buffers(&ctx); + thread_err = osal_thread_join(thread); + eASSERT(env, (ctx.tail == ctx.head && ctx.write_len[ctx.head & 1] == 0) || + ctx.error); + osal_condpair_destroy(&ctx.condpair); + } + if (unlikely(thread_err != MDBX_SUCCESS)) + return thread_err; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(ctx.error != MDBX_SUCCESS)) + return ctx.error; + if (!dest_is_pipe) + compacting_fixup_meta(env, meta); + } + + /* Extend file if required */ + if (meta->geometry.now != meta->geometry.first_unallocated) { + const size_t whole_size = pgno2bytes(env, meta->geometry.now); + if (!dest_is_pipe) + return osal_ftruncate(fd, whole_size); + + const size_t used_size = pgno2bytes(env, meta->geometry.first_unallocated); + memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); + for (size_t offset = used_size; offset < whole_size;) { + const size_t chunk = ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) + ? (size_t)MDBX_ENVCOPY_WRITEBUF + : whole_size - offset; + int rc = osal_write(fd, data_buffer, chunk); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + offset += chunk; + } + } + return MDBX_SUCCESS; +} + +__cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, + mdbx_filehandle_t fd, uint8_t *buffer, + const bool dest_is_pipe, + const MDBX_copy_flags_t flags) { + int rc = txn_end(read_txn, TXN_END_RESET_TMP); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + /* Temporarily block writers until we snapshot the meta pages */ + rc = lck_txn_lock(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = txn_renew(read_txn, MDBX_TXN_RDONLY); + if (unlikely(rc != MDBX_SUCCESS)) { + lck_txn_unlock(env); + return rc; + } + + jitter4testing(false); + const size_t meta_bytes = pgno2bytes(env, NUM_METAS); + const troika_t troika = meta_tap(env); + /* Make a snapshot of meta-pages, + * but writing ones after the data was flushed */ + memcpy(buffer, env->dxb_mmap.base, meta_bytes); + meta_t *const headcopy = /* LY: get pointer to the snapshot copy */ + ptr_disp(buffer, + ptr_dist(meta_recent(env, &troika).ptr_c, env->dxb_mmap.base)); + lck_txn_unlock(env); + + if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) + meta_make_sizeable(headcopy); + /* Update signature to steady */ + meta_sign_as_steady(headcopy); + + /* Copy the data */ + const size_t whole_size = pgno_align2os_bytes(env, read_txn->geo.end_pgno); + const size_t used_size = pgno2bytes(env, read_txn->geo.first_unallocated); + jitter4testing(false); + + if (dest_is_pipe) + rc = osal_write(fd, buffer, meta_bytes); + + uint8_t *const data_buffer = + buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); +#if MDBX_USE_COPYFILERANGE + static bool copyfilerange_unavailable; + bool not_the_same_filesystem = false; + struct statfs statfs_info; + if (fstatfs(fd, &statfs_info) || + statfs_info.f_type == /* ECRYPTFS_SUPER_MAGIC */ 0xf15f) + /* avoid use copyfilerange_unavailable() to ecryptfs due bugs */ + not_the_same_filesystem = true; +#endif /* MDBX_USE_COPYFILERANGE */ + for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) { +#if MDBX_USE_SENDFILE + static bool sendfile_unavailable; + if (dest_is_pipe && likely(!sendfile_unavailable)) { + off_t in_offset = offset; + const ssize_t written = + sendfile(fd, env->lazy_fd, &in_offset, used_size - offset); + if (likely(written > 0)) { + offset = in_offset; + continue; + } + rc = MDBX_ENODATA; + if (written == 0 || ignore_enosys(rc = errno) != MDBX_RESULT_TRUE) + break; + sendfile_unavailable = true; + } +#endif /* MDBX_USE_SENDFILE */ + +#if MDBX_USE_COPYFILERANGE + if (!dest_is_pipe && !not_the_same_filesystem && + likely(!copyfilerange_unavailable)) { + off_t in_offset = offset, out_offset = offset; + ssize_t bytes_copied = copy_file_range( + env->lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0); + if (likely(bytes_copied > 0)) { + offset = in_offset; + continue; + } + rc = MDBX_ENODATA; + if (bytes_copied == 0) + break; + rc = errno; + if (rc == EXDEV || rc == /* workaround for ecryptfs bug(s), + maybe useful for others FS */ + EINVAL) + not_the_same_filesystem = true; + else if (ignore_enosys(rc) == MDBX_RESULT_TRUE) + copyfilerange_unavailable = true; + else + break; + } +#endif /* MDBX_USE_COPYFILERANGE */ + + /* fallback to portable */ + const size_t chunk = ((size_t)MDBX_ENVCOPY_WRITEBUF < used_size - offset) + ? (size_t)MDBX_ENVCOPY_WRITEBUF + : used_size - offset; + /* copy to avoid EFAULT in case swapped-out */ + memcpy(data_buffer, ptr_disp(env->dxb_mmap.base, offset), chunk); + rc = osal_write(fd, data_buffer, chunk); + offset += chunk; + } + + /* Extend file if required */ + if (likely(rc == MDBX_SUCCESS) && whole_size != used_size) { + if (!dest_is_pipe) + rc = osal_ftruncate(fd, whole_size); + else { + memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); + for (size_t offset = used_size; + rc == MDBX_SUCCESS && offset < whole_size;) { + const size_t chunk = + ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) + ? (size_t)MDBX_ENVCOPY_WRITEBUF + : whole_size - offset; + rc = osal_write(fd, data_buffer, chunk); + offset += chunk; + } + } + } + + return rc; +} + +__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const int dest_is_pipe = osal_is_pipe(fd); + if (MDBX_IS_ERROR(dest_is_pipe)) + return dest_is_pipe; + + if (!dest_is_pipe) { + rc = osal_fseek(fd, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + const size_t buffer_size = + pgno_align2os_bytes(env, NUM_METAS) + + ceil_powerof2(((flags & MDBX_CP_COMPACT) + ? 2 * (size_t)MDBX_ENVCOPY_WRITEBUF + : (size_t)MDBX_ENVCOPY_WRITEBUF), + globals.sys_pagesize); + + uint8_t *buffer = nullptr; + rc = osal_memalign_alloc(globals.sys_pagesize, buffer_size, (void **)&buffer); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + MDBX_txn *read_txn = nullptr; + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. */ + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &read_txn); + if (unlikely(rc != MDBX_SUCCESS)) { + osal_memalign_free(buffer); + return rc; + } + + if (!dest_is_pipe) { + /* Firstly write a stub to meta-pages. + * Now we sure to incomplete copy will not be used. */ + memset(buffer, -1, pgno2bytes(env, NUM_METAS)); + rc = osal_write(fd, buffer, pgno2bytes(env, NUM_METAS)); + } + + if (likely(rc == MDBX_SUCCESS)) { + memset(buffer, 0, pgno2bytes(env, NUM_METAS)); + rc = ((flags & MDBX_CP_COMPACT) ? copy_with_compacting : copy_asis)( + env, read_txn, fd, buffer, dest_is_pipe, flags); + } + mdbx_txn_abort(read_txn); + + if (!dest_is_pipe) { + if (likely(rc == MDBX_SUCCESS)) + rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); + + /* Write actual meta */ + if (likely(rc == MDBX_SUCCESS)) + rc = osal_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); + + if (likely(rc == MDBX_SUCCESS)) + rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + } + + osal_memalign_free(buffer); + return rc; +} + +__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, + MDBX_copy_flags_t flags) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *dest_pathW = nullptr; + int rc = osal_mb2w(dest_path, &dest_pathW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_copyW(env, dest_pathW, flags); + osal_free(dest_pathW); + } + return rc; +} + +__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, + MDBX_copy_flags_t flags) { +#endif /* Windows */ + + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!dest_path)) + return MDBX_EINVAL; + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. */ + mdbx_filehandle_t newfd; + rc = osal_openfile(MDBX_OPEN_COPY, env, dest_path, &newfd, +#if defined(_WIN32) || defined(_WIN64) + (mdbx_mode_t)-1 +#else + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP +#endif + ); + +#if defined(_WIN32) || defined(_WIN64) + /* no locking required since the file opened with ShareMode == 0 */ +#else + if (rc == MDBX_SUCCESS) { + MDBX_STRUCT_FLOCK lock_op; + memset(&lock_op, 0, sizeof(lock_op)); + lock_op.l_type = F_WRLCK; + lock_op.l_whence = SEEK_SET; + lock_op.l_start = 0; + lock_op.l_len = OFF_T_MAX; + if (MDBX_FCNTL(newfd, MDBX_F_SETLK, &lock_op) +#if (defined(__linux__) || defined(__gnu_linux__)) && defined(LOCK_EX) && \ + (!defined(__ANDROID_API__) || __ANDROID_API__ >= 24) + || flock(newfd, LOCK_EX | LOCK_NB) +#endif /* Linux */ + ) + rc = errno; + } +#endif /* Windows / POSIX */ + + if (rc == MDBX_SUCCESS) + rc = mdbx_env_copy2fd(env, newfd, flags); + + if (newfd != INVALID_HANDLE_VALUE) { + int err = osal_closefile(newfd); + if (rc == MDBX_SUCCESS && err != rc) + rc = err; + if (rc != MDBX_SUCCESS) + (void)osal_removefile(dest_path); + } + + return rc; +} diff --git a/src/core.c b/src/core.c deleted file mode 100644 index 62fdf162..00000000 --- a/src/core.c +++ /dev/null @@ -1,29506 +0,0 @@ -/* - * Copyright 2015-2024 Leonid Yuriev . - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * This code is derived from "LMDB engine" written by - * Howard Chu (Symas Corporation), which itself derived from btree.c - * written by Martin Hedenfalk. - * - * --- - * - * Portions Copyright 2011-2015 Howard Chu, Symas Corp. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - * - * --- - * - * Portions Copyright (c) 2009, 2010 Martin Hedenfalk - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -#include "internals.h" - -/*------------------------------------------------------------------------------ - * Internal inline functions */ - -MDBX_NOTHROW_CONST_FUNCTION static size_t branchless_abs(intptr_t value) { - assert(value > INT_MIN); - const size_t expanded_sign = - (size_t)(value >> (sizeof(value) * CHAR_BIT - 1)); - return ((size_t)value + expanded_sign) ^ expanded_sign; -} - -/* Pack/Unpack 16-bit values for Grow step & Shrink threshold */ -MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t me2v(size_t m, size_t e) { - assert(m < 2048 && e < 8); - return (pgno_t)(32768 + ((m + 1) << (e + 8))); -} - -MDBX_NOTHROW_CONST_FUNCTION static __inline uint16_t v2me(size_t v, size_t e) { - assert(v > (e ? me2v(2047, e - 1) : 32768)); - assert(v <= me2v(2047, e)); - size_t m = (v - 32768 + ((size_t)1 << (e + 8)) - 1) >> (e + 8); - m -= m > 0; - assert(m < 2048 && e < 8); - // f e d c b a 9 8 7 6 5 4 3 2 1 0 - // 1 e e e m m m m m m m m m m m 1 - const uint16_t pv = (uint16_t)(0x8001 + (e << 12) + (m << 1)); - assert(pv != 65535); - return pv; -} - -/* Convert 16-bit packed (exponential quantized) value to number of pages */ -MDBX_NOTHROW_CONST_FUNCTION static pgno_t pv2pages(uint16_t pv) { - if ((pv & 0x8001) != 0x8001) - return pv; - if (pv == 65535) - return 65536; - // f e d c b a 9 8 7 6 5 4 3 2 1 0 - // 1 e e e m m m m m m m m m m m 1 - return me2v((pv >> 1) & 2047, (pv >> 12) & 7); -} - -/* Convert number of pages to 16-bit packed (exponential quantized) value */ -MDBX_NOTHROW_CONST_FUNCTION static uint16_t pages2pv(size_t pages) { - if (pages < 32769 || (pages < 65536 && (pages & 1) == 0)) - return (uint16_t)pages; - if (pages <= me2v(2047, 0)) - return v2me(pages, 0); - if (pages <= me2v(2047, 1)) - return v2me(pages, 1); - if (pages <= me2v(2047, 2)) - return v2me(pages, 2); - if (pages <= me2v(2047, 3)) - return v2me(pages, 3); - if (pages <= me2v(2047, 4)) - return v2me(pages, 4); - if (pages <= me2v(2047, 5)) - return v2me(pages, 5); - if (pages <= me2v(2047, 6)) - return v2me(pages, 6); - return (pages < me2v(2046, 7)) ? v2me(pages, 7) : 65533; -} - -/*------------------------------------------------------------------------------ - * Unaligned access */ - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t -field_alignment(size_t alignment_baseline, size_t field_offset) { - size_t merge = alignment_baseline | (size_t)field_offset; - return merge & -(int)merge; -} - -/* read-thunk for UB-sanitizer */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint8_t -peek_u8(const uint8_t *const __restrict ptr) { - return *ptr; -} - -/* write-thunk for UB-sanitizer */ -static __always_inline void poke_u8(uint8_t *const __restrict ptr, - const uint8_t v) { - *ptr = v; -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint16_t -unaligned_peek_u16(const size_t expected_alignment, const void *const ptr) { - assert((uintptr_t)ptr % expected_alignment == 0); - if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(uint16_t)) == 0) - return *(const uint16_t *)ptr; - else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - return *(const __unaligned uint16_t *)ptr; -#else - uint16_t v; - memcpy(&v, ptr, sizeof(v)); - return v; -#endif /* _MSC_VER || __unaligned */ - } -} - -static __always_inline void unaligned_poke_u16(const size_t expected_alignment, - void *const __restrict ptr, - const uint16_t v) { - assert((uintptr_t)ptr % expected_alignment == 0); - if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(v)) == 0) - *(uint16_t *)ptr = v; - else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - *((uint16_t __unaligned *)ptr) = v; -#else - memcpy(ptr, &v, sizeof(v)); -#endif /* _MSC_VER || __unaligned */ - } -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint32_t unaligned_peek_u32( - const size_t expected_alignment, const void *const __restrict ptr) { - assert((uintptr_t)ptr % expected_alignment == 0); - if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(uint32_t)) == 0) - return *(const uint32_t *)ptr; - else if ((expected_alignment % sizeof(uint16_t)) == 0) { - const uint16_t lo = - ((const uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; - const uint16_t hi = - ((const uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; - return lo | (uint32_t)hi << 16; - } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - return *(const __unaligned uint32_t *)ptr; -#else - uint32_t v; - memcpy(&v, ptr, sizeof(v)); - return v; -#endif /* _MSC_VER || __unaligned */ - } -} - -static __always_inline void unaligned_poke_u32(const size_t expected_alignment, - void *const __restrict ptr, - const uint32_t v) { - assert((uintptr_t)ptr % expected_alignment == 0); - if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(v)) == 0) - *(uint32_t *)ptr = v; - else if ((expected_alignment % sizeof(uint16_t)) == 0) { - ((uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint16_t)v; - ((uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = - (uint16_t)(v >> 16); - } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - *((uint32_t __unaligned *)ptr) = v; -#else - memcpy(ptr, &v, sizeof(v)); -#endif /* _MSC_VER || __unaligned */ - } -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint64_t unaligned_peek_u64( - const size_t expected_alignment, const void *const __restrict ptr) { - assert((uintptr_t)ptr % expected_alignment == 0); - if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0) - return *(const uint64_t *)ptr; - else if ((expected_alignment % sizeof(uint32_t)) == 0) { - const uint32_t lo = - ((const uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; - const uint32_t hi = - ((const uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; - return lo | (uint64_t)hi << 32; - } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - return *(const __unaligned uint64_t *)ptr; -#else - uint64_t v; - memcpy(&v, ptr, sizeof(v)); - return v; -#endif /* _MSC_VER || __unaligned */ - } -} - -static __always_inline uint64_t -unaligned_peek_u64_volatile(const size_t expected_alignment, - const volatile void *const __restrict ptr) { - assert((uintptr_t)ptr % expected_alignment == 0); - assert(expected_alignment % sizeof(uint32_t) == 0); - if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0) - return *(const volatile uint64_t *)ptr; - else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - return *(const volatile __unaligned uint64_t *)ptr; -#else - const uint32_t lo = ((const volatile uint32_t *) - ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; - const uint32_t hi = ((const volatile uint32_t *) - ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; - return lo | (uint64_t)hi << 32; -#endif /* _MSC_VER || __unaligned */ - } -} - -static __always_inline void unaligned_poke_u64(const size_t expected_alignment, - void *const __restrict ptr, - const uint64_t v) { - assert((uintptr_t)ptr % expected_alignment == 0); - if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(v)) == 0) - *(uint64_t *)ptr = v; - else if ((expected_alignment % sizeof(uint32_t)) == 0) { - ((uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint32_t)v; - ((uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = - (uint32_t)(v >> 32); - } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) - *((uint64_t __unaligned *)ptr) = v; -#else - memcpy(ptr, &v, sizeof(v)); -#endif /* _MSC_VER || __unaligned */ - } -} - -#define UNALIGNED_PEEK_8(ptr, struct, field) \ - peek_u8(ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_8(ptr, struct, field, value) \ - poke_u8(ptr_disp(ptr, offsetof(struct, field)), value) - -#define UNALIGNED_PEEK_16(ptr, struct, field) \ - unaligned_peek_u16(1, ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_16(ptr, struct, field, value) \ - unaligned_poke_u16(1, ptr_disp(ptr, offsetof(struct, field)), value) - -#define UNALIGNED_PEEK_32(ptr, struct, field) \ - unaligned_peek_u32(1, ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_32(ptr, struct, field, value) \ - unaligned_poke_u32(1, ptr_disp(ptr, offsetof(struct, field)), value) - -#define UNALIGNED_PEEK_64(ptr, struct, field) \ - unaligned_peek_u64(1, ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_64(ptr, struct, field, value) \ - unaligned_poke_u64(1, ptr_disp(ptr, offsetof(struct, field)), value) - -/* Get the page number pointed to by a branch node */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t -node_pgno(const MDBX_node *const __restrict node) { - pgno_t pgno = UNALIGNED_PEEK_32(node, MDBX_node, mn_pgno32); - if (sizeof(pgno) > 4) - pgno |= ((uint64_t)UNALIGNED_PEEK_8(node, MDBX_node, mn_extra)) << 32; - return pgno; -} - -/* Set the page number in a branch node */ -static __always_inline void node_set_pgno(MDBX_node *const __restrict node, - pgno_t pgno) { - assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO); - - UNALIGNED_POKE_32(node, MDBX_node, mn_pgno32, (uint32_t)pgno); - if (sizeof(pgno) > 4) - UNALIGNED_POKE_8(node, MDBX_node, mn_extra, - (uint8_t)((uint64_t)pgno >> 32)); -} - -/* Get the size of the data in a leaf node */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -node_ds(const MDBX_node *const __restrict node) { - return UNALIGNED_PEEK_32(node, MDBX_node, mn_dsize); -} - -/* Set the size of the data for a leaf node */ -static __always_inline void node_set_ds(MDBX_node *const __restrict node, - size_t size) { - assert(size < INT_MAX); - UNALIGNED_POKE_32(node, MDBX_node, mn_dsize, (uint32_t)size); -} - -/* The size of a key in a node */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -node_ks(const MDBX_node *const __restrict node) { - return UNALIGNED_PEEK_16(node, MDBX_node, mn_ksize); -} - -/* Set the size of the key for a leaf node */ -static __always_inline void node_set_ks(MDBX_node *const __restrict node, - size_t size) { - assert(size < INT16_MAX); - UNALIGNED_POKE_16(node, MDBX_node, mn_ksize, (uint16_t)size); -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint8_t -node_flags(const MDBX_node *const __restrict node) { - return UNALIGNED_PEEK_8(node, MDBX_node, mn_flags); -} - -static __always_inline void node_set_flags(MDBX_node *const __restrict node, - uint8_t flags) { - UNALIGNED_POKE_8(node, MDBX_node, mn_flags, flags); -} - -/* Size of the node header, excluding dynamic data at the end */ -#define NODESIZE offsetof(MDBX_node, mn_data) - -/* Address of the key for the node */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * -node_key(const MDBX_node *const __restrict node) { - return ptr_disp(node, NODESIZE); -} - -/* Address of the data for a node */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * -node_data(const MDBX_node *const __restrict node) { - return ptr_disp(node_key(node), node_ks(node)); -} - -/* Size of a node in a leaf page with a given key and data. - * This is node header plus key plus data size. */ -MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t -node_size_len(const size_t key_len, const size_t value_len) { - return NODESIZE + EVEN(key_len + value_len); -} -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -node_size(const MDBX_val *key, const MDBX_val *value) { - return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0); -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t -peek_pgno(const void *const __restrict ptr) { - if (sizeof(pgno_t) == sizeof(uint32_t)) - return (pgno_t)unaligned_peek_u32(1, ptr); - else if (sizeof(pgno_t) == sizeof(uint64_t)) - return (pgno_t)unaligned_peek_u64(1, ptr); - else { - pgno_t pgno; - memcpy(&pgno, ptr, sizeof(pgno)); - return pgno; - } -} - -static __always_inline void poke_pgno(void *const __restrict ptr, - const pgno_t pgno) { - if (sizeof(pgno) == sizeof(uint32_t)) - unaligned_poke_u32(1, ptr, pgno); - else if (sizeof(pgno) == sizeof(uint64_t)) - unaligned_poke_u64(1, ptr, pgno); - else - memcpy(ptr, &pgno, sizeof(pgno)); -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t -node_largedata_pgno(const MDBX_node *const __restrict node) { - assert(node_flags(node) & F_BIGDATA); - return peek_pgno(node_data(node)); -} - -/*------------------------------------------------------------------------------ - * Nodes, Keys & Values length limitation factors: - * - * BRANCH_NODE_MAX - * Branch-page must contain at least two nodes, within each a key and a child - * page number. But page can't be split if it contains less that 4 keys, - * i.e. a page should not overflow before adding the fourth key. Therefore, - * at least 3 branch-node should fit in the single branch-page. Further, the - * first node of a branch-page doesn't contain a key, i.e. the first node - * is always require space just for itself. Thus: - * PAGEROOM = pagesize - page_hdr_len; - * BRANCH_NODE_MAX = even_floor( - * (PAGEROOM - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t)); - * KEYLEN_MAX = BRANCH_NODE_MAX - node_hdr_len; - * - * LEAF_NODE_MAX - * Leaf-node must fit into single leaf-page, where a value could be placed on - * a large/overflow page. However, may require to insert a nearly page-sized - * node between two large nodes are already fill-up a page. In this case the - * page must be split to two if some pair of nodes fits on one page, or - * otherwise the page should be split to the THREE with a single node - * per each of ones. Such 1-into-3 page splitting is costly and complex since - * requires TWO insertion into the parent page, that could lead to split it - * and so on up to the root. Therefore double-splitting is avoided here and - * the maximum node size is half of a leaf page space: - * LEAF_NODE_MAX = even_floor(PAGEROOM / 2 - sizeof(indx_t)); - * DATALEN_NO_OVERFLOW = LEAF_NODE_MAX - NODESIZE - KEYLEN_MAX; - * - * - SubDatabase-node must fit into one leaf-page: - * SUBDB_NAME_MAX = LEAF_NODE_MAX - node_hdr_len - sizeof(MDBX_db); - * - * - Dupsort values itself are a keys in a dupsort-subdb and couldn't be longer - * than the KEYLEN_MAX. But dupsort node must not great than LEAF_NODE_MAX, - * since dupsort value couldn't be placed on a large/overflow page: - * DUPSORT_DATALEN_MAX = min(KEYLEN_MAX, - * max(DATALEN_NO_OVERFLOW, sizeof(MDBX_db)); - */ - -#define PAGEROOM(pagesize) ((pagesize)-PAGEHDRSZ) -#define EVEN_FLOOR(n) ((n) & ~(size_t)1) -#define BRANCH_NODE_MAX(pagesize) \ - (EVEN_FLOOR((PAGEROOM(pagesize) - sizeof(indx_t) - NODESIZE) / (3 - 1) - \ - sizeof(indx_t))) -#define LEAF_NODE_MAX(pagesize) \ - (EVEN_FLOOR(PAGEROOM(pagesize) / 2) - sizeof(indx_t)) -#define MAX_GC1OVPAGE(pagesize) (PAGEROOM(pagesize) / sizeof(pgno_t) - 1) - -static __inline size_t keysize_max(size_t pagesize, MDBX_db_flags_t flags) { - assert(pagesize >= MIN_PAGESIZE && pagesize <= MAX_PAGESIZE && - is_powerof2(pagesize)); - STATIC_ASSERT(BRANCH_NODE_MAX(MIN_PAGESIZE) - NODESIZE >= 8); - if (flags & MDBX_INTEGERKEY) - return 8 /* sizeof(uint64_t) */; - - const intptr_t max_branch_key = BRANCH_NODE_MAX(pagesize) - NODESIZE; - STATIC_ASSERT(LEAF_NODE_MAX(MIN_PAGESIZE) - NODESIZE - - /* sizeof(uint64) as a key */ 8 > - sizeof(MDBX_db)); - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { - const intptr_t max_dupsort_leaf_key = - LEAF_NODE_MAX(pagesize) - NODESIZE - sizeof(MDBX_db); - return (max_branch_key < max_dupsort_leaf_key) ? max_branch_key - : max_dupsort_leaf_key; - } - return max_branch_key; -} - -static __inline size_t keysize_min(MDBX_db_flags_t flags) { - return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; -} - -static __inline size_t valsize_min(MDBX_db_flags_t flags) { - if (flags & MDBX_INTEGERDUP) - return 4 /* sizeof(uint32_t) */; - else if (flags & MDBX_DUPFIXED) - return sizeof(indx_t); - else - return 0; -} - -static __inline size_t valsize_max(size_t pagesize, MDBX_db_flags_t flags) { - assert(pagesize >= MIN_PAGESIZE && pagesize <= MAX_PAGESIZE && - is_powerof2(pagesize)); - - if (flags & MDBX_INTEGERDUP) - return 8 /* sizeof(uint64_t) */; - - if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP)) - return keysize_max(pagesize, 0); - - const unsigned page_ln2 = log2n_powerof2(pagesize); - const size_t hard = 0x7FF00000ul; - const size_t hard_pages = hard >> page_ln2; - STATIC_ASSERT(MDBX_PGL_LIMIT <= MAX_PAGENO); - const size_t pages_limit = MDBX_PGL_LIMIT / 4; - const size_t limit = - (hard_pages < pages_limit) ? hard : (pages_limit << page_ln2); - return (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2; -} - -__cold int mdbx_env_get_maxkeysize(const MDBX_env *env) { - return mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT); -} - -__cold int mdbx_env_get_maxkeysize_ex(const MDBX_env *env, - MDBX_db_flags_t flags) { - if (unlikely(!env || env->me_signature.weak != MDBX_ME_SIGNATURE)) - return -1; - - return (int)mdbx_limits_keysize_max((intptr_t)env->me_psize, flags); -} - -size_t mdbx_default_pagesize(void) { - size_t pagesize = osal_syspagesize(); - ENSURE(nullptr, is_powerof2(pagesize)); - pagesize = (pagesize >= MIN_PAGESIZE) ? pagesize : MIN_PAGESIZE; - pagesize = (pagesize <= MAX_PAGESIZE) ? pagesize : MAX_PAGESIZE; - return pagesize; -} - -__cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, - MDBX_db_flags_t flags) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - return keysize_max(pagesize, flags); -} - -__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { - return keysize_min(flags); -} - -__cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, - MDBX_db_flags_t flags) { - if (unlikely(!env || env->me_signature.weak != MDBX_ME_SIGNATURE)) - return -1; - - return (int)mdbx_limits_valsize_max((intptr_t)env->me_psize, flags); -} - -__cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, - MDBX_db_flags_t flags) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - return valsize_max(pagesize, flags); -} - -__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { - return valsize_min(flags); -} - -__cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) - return BRANCH_NODE_MAX(pagesize) - NODESIZE; - - return LEAF_NODE_MAX(pagesize) - NODESIZE; -} - -__cold int mdbx_env_get_pairsize4page_max(const MDBX_env *env, - MDBX_db_flags_t flags) { - if (unlikely(!env || env->me_signature.weak != MDBX_ME_SIGNATURE)) - return -1; - - return (int)mdbx_limits_pairsize4page_max((intptr_t)env->me_psize, flags); -} - -__cold intptr_t mdbx_limits_valsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) - return valsize_max(pagesize, flags); - - return PAGEROOM(pagesize); -} - -__cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, - MDBX_db_flags_t flags) { - if (unlikely(!env || env->me_signature.weak != MDBX_ME_SIGNATURE)) - return -1; - - return (int)mdbx_limits_valsize4page_max((intptr_t)env->me_psize, flags); -} - -/* Calculate the size of a leaf node. - * - * The size depends on the environment's page size; if a data item - * is too large it will be put onto an large/overflow page and the node - * size will only include the key and not the data. Sizes are always - * rounded up to an even number of bytes, to guarantee 2-byte alignment - * of the MDBX_node headers. */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -leaf_size(const MDBX_env *env, const MDBX_val *key, const MDBX_val *data) { - size_t node_bytes = node_size(key, data); - if (node_bytes > env->me_leaf_nodemax) { - /* put on large/overflow page */ - node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t); - } - - return node_bytes + sizeof(indx_t); -} - -/* Calculate the size of a branch node. - * - * The size should depend on the environment's page size but since - * we currently don't support spilling large keys onto large/overflow - * pages, it's simply the size of the MDBX_node header plus the - * size of the key. Sizes are always rounded up to an even number - * of bytes, to guarantee 2-byte alignment of the MDBX_node headers. - * - * [in] env The environment handle. - * [in] key The key for the node. - * - * Returns The number of bytes needed to store the node. */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -branch_size(const MDBX_env *env, const MDBX_val *key) { - /* Size of a node in a branch page with a given key. - * This is just the node header plus the key, there is no data. */ - size_t node_bytes = node_size(key, nullptr); - if (unlikely(node_bytes > env->me_branch_nodemax)) { - /* put on large/overflow page */ - /* not implemented */ - mdbx_panic("node_size(key) %zu > %u branch_nodemax", node_bytes, - env->me_branch_nodemax); - node_bytes = node_size(key, nullptr) + sizeof(pgno_t); - } - - return node_bytes + sizeof(indx_t); -} - -MDBX_NOTHROW_CONST_FUNCTION static __always_inline uint16_t -flags_db2sub(uint16_t db_flags) { - uint16_t sub_flags = db_flags & MDBX_DUPFIXED; - - /* MDBX_INTEGERDUP => MDBX_INTEGERKEY */ -#define SHIFT_INTEGERDUP_TO_INTEGERKEY 2 - STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == - MDBX_INTEGERKEY); - sub_flags |= (db_flags & MDBX_INTEGERDUP) >> SHIFT_INTEGERDUP_TO_INTEGERKEY; - - /* MDBX_REVERSEDUP => MDBX_REVERSEKEY */ -#define SHIFT_REVERSEDUP_TO_REVERSEKEY 5 - STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == - MDBX_REVERSEKEY); - sub_flags |= (db_flags & MDBX_REVERSEDUP) >> SHIFT_REVERSEDUP_TO_REVERSEKEY; - - return sub_flags; -} - -/*----------------------------------------------------------------------------*/ - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -pgno2bytes(const MDBX_env *env, size_t pgno) { - eASSERT(env, (1u << env->me_psize2log) == env->me_psize); - return ((size_t)pgno) << env->me_psize2log; -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline MDBX_page * -pgno2page(const MDBX_env *env, size_t pgno) { - return ptr_disp(env->me_map, pgno2bytes(env, pgno)); -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t -bytes2pgno(const MDBX_env *env, size_t bytes) { - eASSERT(env, (env->me_psize >> env->me_psize2log) == 1); - return (pgno_t)(bytes >> env->me_psize2log); -} - -MDBX_NOTHROW_PURE_FUNCTION static size_t -pgno_align2os_bytes(const MDBX_env *env, size_t pgno) { - return ceil_powerof2(pgno2bytes(env, pgno), env->me_os_psize); -} - -MDBX_NOTHROW_PURE_FUNCTION static pgno_t pgno_align2os_pgno(const MDBX_env *env, - size_t pgno) { - return bytes2pgno(env, pgno_align2os_bytes(env, pgno)); -} - -MDBX_NOTHROW_PURE_FUNCTION static size_t -bytes_align2os_bytes(const MDBX_env *env, size_t bytes) { - return ceil_powerof2(ceil_powerof2(bytes, env->me_psize), env->me_os_psize); -} - -/* Address of first usable data byte in a page, after the header */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * -page_data(const MDBX_page *mp) { - return ptr_disp(mp, PAGEHDRSZ); -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline const MDBX_page * -data_page(const void *data) { - return container_of(data, MDBX_page, mp_ptrs); -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline MDBX_meta * -page_meta(MDBX_page *mp) { - return (MDBX_meta *)page_data(mp); -} - -/* Number of nodes on a page */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -page_numkeys(const MDBX_page *mp) { - return mp->mp_lower >> 1; -} - -/* The amount of space remaining in the page */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -page_room(const MDBX_page *mp) { - return mp->mp_upper - mp->mp_lower; -} - -/* Maximum free space in an empty page */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -page_space(const MDBX_env *env) { - STATIC_ASSERT(PAGEHDRSZ % 2 == 0); - return env->me_psize - PAGEHDRSZ; -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -page_used(const MDBX_env *env, const MDBX_page *mp) { - return page_space(env) - page_room(mp); -} - -/* The percentage of space used in the page, in a percents. */ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static __inline double -page_fill(const MDBX_env *env, const MDBX_page *mp) { - return page_used(env, mp) * 100.0 / page_space(env); -} - -/* The number of large/overflow pages needed to store the given size. */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t -number_of_ovpages(const MDBX_env *env, size_t bytes) { - return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1; -} - -__cold static const char *pagetype_caption(const uint8_t type, - char buf4unknown[16]) { - switch (type) { - case P_BRANCH: - return "branch"; - case P_LEAF: - return "leaf"; - case P_LEAF | P_SUBP: - return "subleaf"; - case P_LEAF | P_LEAF2: - return "dupfixed-leaf"; - case P_LEAF | P_LEAF2 | P_SUBP: - return "dupfixed-subleaf"; - case P_LEAF | P_LEAF2 | P_SUBP | P_LEGACY_DIRTY: - return "dupfixed-subleaf.legacy-dirty"; - case P_OVERFLOW: - return "large"; - default: - snprintf(buf4unknown, 16, "unknown_0x%x", type); - return buf4unknown; - } -} - -__cold static int MDBX_PRINTF_ARGS(2, 3) - bad_page(const MDBX_page *mp, const char *fmt, ...) { - if (LOG_ENABLED(MDBX_LOG_ERROR)) { - static const MDBX_page *prev; - if (prev != mp) { - char buf4unknown[16]; - prev = mp; - debug_log(MDBX_LOG_ERROR, "badpage", 0, - "corrupted %s-page #%u, mod-txnid %" PRIaTXN "\n", - pagetype_caption(PAGETYPE_WHOLE(mp), buf4unknown), mp->mp_pgno, - mp->mp_txnid); - } - - va_list args; - va_start(args, fmt); - debug_log_va(MDBX_LOG_ERROR, "badpage", 0, fmt, args); - va_end(args); - } - return MDBX_CORRUPTED; -} - -__cold static void MDBX_PRINTF_ARGS(2, 3) - poor_page(const MDBX_page *mp, const char *fmt, ...) { - if (LOG_ENABLED(MDBX_LOG_NOTICE)) { - static const MDBX_page *prev; - if (prev != mp) { - char buf4unknown[16]; - prev = mp; - debug_log(MDBX_LOG_NOTICE, "poorpage", 0, - "suboptimal %s-page #%u, mod-txnid %" PRIaTXN "\n", - pagetype_caption(PAGETYPE_WHOLE(mp), buf4unknown), mp->mp_pgno, - mp->mp_txnid); - } - - va_list args; - va_start(args, fmt); - debug_log_va(MDBX_LOG_NOTICE, "poorpage", 0, fmt, args); - va_end(args); - } -} - -/* Address of node i in page p */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline MDBX_node * -page_node(const MDBX_page *mp, size_t i) { - assert(PAGETYPE_COMPAT(mp) == P_LEAF || PAGETYPE_WHOLE(mp) == P_BRANCH); - assert(page_numkeys(mp) > i); - assert(mp->mp_ptrs[i] % 2 == 0); - return ptr_disp(mp, mp->mp_ptrs[i] + PAGEHDRSZ); -} - -/* The address of a key in a LEAF2 page. - * LEAF2 pages are used for MDBX_DUPFIXED sorted-duplicate sub-DBs. - * There are no node headers, keys are stored contiguously. */ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * -page_leaf2key(const MDBX_page *mp, size_t i, size_t keysize) { - assert(PAGETYPE_COMPAT(mp) == (P_LEAF | P_LEAF2)); - assert(mp->mp_leaf2_ksize == keysize); - (void)keysize; - return ptr_disp(mp, PAGEHDRSZ + i * mp->mp_leaf2_ksize); -} - -/* Set the node's key into keyptr. */ -static __always_inline void get_key(const MDBX_node *node, MDBX_val *keyptr) { - keyptr->iov_len = node_ks(node); - keyptr->iov_base = node_key(node); -} - -/* Set the node's key into keyptr, if requested. */ -static __always_inline void -get_key_optional(const MDBX_node *node, MDBX_val *keyptr /* __may_null */) { - if (keyptr) - get_key(node, keyptr); -} - -/*------------------------------------------------------------------------------ - * safe read/write volatile 64-bit fields on 32-bit architectures. */ - -#ifndef atomic_store64 -MDBX_MAYBE_UNUSED static __always_inline uint64_t -atomic_store64(MDBX_atomic_uint64_t *p, const uint64_t value, - enum MDBX_memory_order order) { - STATIC_ASSERT(sizeof(MDBX_atomic_uint64_t) == 8); -#if MDBX_64BIT_ATOMIC -#if __GNUC_PREREQ(11, 0) - STATIC_ASSERT(__alignof__(MDBX_atomic_uint64_t) >= sizeof(uint64_t)); -#endif /* GNU C >= 11 */ -#ifdef MDBX_HAVE_C11ATOMICS - assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); - atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value, mo_c11_store(order)); -#else /* MDBX_HAVE_C11ATOMICS */ - if (order != mo_Relaxed) - osal_compiler_barrier(); - p->weak = value; - osal_memory_fence(order, true); -#endif /* MDBX_HAVE_C11ATOMICS */ -#else /* !MDBX_64BIT_ATOMIC */ - osal_compiler_barrier(); - atomic_store32(&p->low, (uint32_t)value, mo_Relaxed); - jitter4testing(true); - atomic_store32(&p->high, (uint32_t)(value >> 32), order); - jitter4testing(true); -#endif /* !MDBX_64BIT_ATOMIC */ - return value; -} -#endif /* atomic_store64 */ - -#ifndef atomic_load64 -MDBX_MAYBE_UNUSED static -#if MDBX_64BIT_ATOMIC - __always_inline -#endif /* MDBX_64BIT_ATOMIC */ - uint64_t - atomic_load64(const volatile MDBX_atomic_uint64_t *p, - enum MDBX_memory_order order) { - STATIC_ASSERT(sizeof(MDBX_atomic_uint64_t) == 8); -#if MDBX_64BIT_ATOMIC -#ifdef MDBX_HAVE_C11ATOMICS - assert(atomic_is_lock_free(MDBX_c11a_ro(uint64_t, p))); - return atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)); -#else /* MDBX_HAVE_C11ATOMICS */ - osal_memory_fence(order, false); - const uint64_t value = p->weak; - if (order != mo_Relaxed) - osal_compiler_barrier(); - return value; -#endif /* MDBX_HAVE_C11ATOMICS */ -#else /* !MDBX_64BIT_ATOMIC */ - osal_compiler_barrier(); - uint64_t value = (uint64_t)atomic_load32(&p->high, order) << 32; - jitter4testing(true); - value |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed - : mo_AcquireRelease); - jitter4testing(true); - for (;;) { - osal_compiler_barrier(); - uint64_t again = (uint64_t)atomic_load32(&p->high, order) << 32; - jitter4testing(true); - again |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed - : mo_AcquireRelease); - jitter4testing(true); - if (likely(value == again)) - return value; - value = again; - } -#endif /* !MDBX_64BIT_ATOMIC */ -} -#endif /* atomic_load64 */ - -static __always_inline void atomic_yield(void) { -#if defined(_WIN32) || defined(_WIN64) - YieldProcessor(); -#elif defined(__ia32__) || defined(__e2k__) - __builtin_ia32_pause(); -#elif defined(__ia64__) -#if defined(__HP_cc__) || defined(__HP_aCC__) - _Asm_hint(_HINT_PAUSE); -#else - __asm__ __volatile__("hint @pause"); -#endif -#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || \ - defined(__ARM_ARCH_6K__) -#ifdef __CC_ARM - __yield(); -#else - __asm__ __volatile__("yield"); -#endif -#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && \ - __mips_isa_rev >= 2 - __asm__ __volatile__("pause"); -#elif defined(__mips) || defined(__mips__) || defined(__mips64) || \ - defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ - defined(__MWERKS__) || defined(__sgi) - __asm__ __volatile__(".word 0x00000140"); -#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE) - sched_yield(); -#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS) - pthread_yield(); -#endif -} - -#if MDBX_64BIT_CAS -static __always_inline bool atomic_cas64(MDBX_atomic_uint64_t *p, uint64_t c, - uint64_t v) { -#ifdef MDBX_HAVE_C11ATOMICS - STATIC_ASSERT(sizeof(long long) >= sizeof(uint64_t)); - assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); - return atomic_compare_exchange_strong(MDBX_c11a_rw(uint64_t, p), &c, v); -#elif defined(__GNUC__) || defined(__clang__) - return __sync_bool_compare_and_swap(&p->weak, c, v); -#elif defined(_MSC_VER) - return c == (uint64_t)_InterlockedCompareExchange64( - (volatile __int64 *)&p->weak, v, c); -#elif defined(__APPLE__) - return OSAtomicCompareAndSwap64Barrier(c, v, &p->weak); -#else -#error FIXME: Unsupported compiler -#endif -} -#endif /* MDBX_64BIT_CAS */ - -static __always_inline bool atomic_cas32(MDBX_atomic_uint32_t *p, uint32_t c, - uint32_t v) { -#ifdef MDBX_HAVE_C11ATOMICS - STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t)); - assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); - return atomic_compare_exchange_strong(MDBX_c11a_rw(uint32_t, p), &c, v); -#elif defined(__GNUC__) || defined(__clang__) - return __sync_bool_compare_and_swap(&p->weak, c, v); -#elif defined(_MSC_VER) - STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t)); - return c == - (uint32_t)_InterlockedCompareExchange((volatile long *)&p->weak, v, c); -#elif defined(__APPLE__) - return OSAtomicCompareAndSwap32Barrier(c, v, &p->weak); -#else -#error FIXME: Unsupported compiler -#endif -} - -static __always_inline uint32_t atomic_add32(MDBX_atomic_uint32_t *p, - uint32_t v) { -#ifdef MDBX_HAVE_C11ATOMICS - STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t)); - assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); - return atomic_fetch_add(MDBX_c11a_rw(uint32_t, p), v); -#elif defined(__GNUC__) || defined(__clang__) - return __sync_fetch_and_add(&p->weak, v); -#elif defined(_MSC_VER) - STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t)); - return (uint32_t)_InterlockedExchangeAdd((volatile long *)&p->weak, v); -#elif defined(__APPLE__) - return OSAtomicAdd32Barrier(v, &p->weak); -#else -#error FIXME: Unsupported compiler -#endif -} - -#define atomic_sub32(p, v) atomic_add32(p, 0 - (v)) - -static __always_inline uint64_t safe64_txnid_next(uint64_t txnid) { - txnid += xMDBX_TXNID_STEP; -#if !MDBX_64BIT_CAS - /* avoid overflow of low-part in safe64_reset() */ - txnid += (UINT32_MAX == (uint32_t)txnid); -#endif - return txnid; -} - -/* Atomically make target value >= SAFE64_INVALID_THRESHOLD */ -static __always_inline void safe64_reset(MDBX_atomic_uint64_t *p, - bool single_writer) { - if (single_writer) { -#if MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64 - atomic_store64(p, UINT64_MAX, mo_AcquireRelease); -#else - atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease); -#endif /* MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64 */ - } else { -#if MDBX_64BIT_CAS && MDBX_64BIT_ATOMIC - /* atomically make value >= SAFE64_INVALID_THRESHOLD by 64-bit operation */ - atomic_store64(p, UINT64_MAX, mo_AcquireRelease); -#elif MDBX_64BIT_CAS - /* atomically make value >= SAFE64_INVALID_THRESHOLD by 32-bit operation */ - atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease); -#else - /* it is safe to increment low-part to avoid ABA, since xMDBX_TXNID_STEP > 1 - * and overflow was preserved in safe64_txnid_next() */ - STATIC_ASSERT(xMDBX_TXNID_STEP > 1); - atomic_add32(&p->low, 1) /* avoid ABA in safe64_reset_compare() */; - atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease); - atomic_add32(&p->low, 1) /* avoid ABA in safe64_reset_compare() */; -#endif /* MDBX_64BIT_CAS && MDBX_64BIT_ATOMIC */ - } - assert(p->weak >= SAFE64_INVALID_THRESHOLD); - jitter4testing(true); -} - -static __always_inline bool safe64_reset_compare(MDBX_atomic_uint64_t *p, - txnid_t compare) { - /* LY: This function is used to reset `mr_txnid` from hsr-handler in case - * the asynchronously cancellation of read transaction. Therefore, - * there may be a collision between the cleanup performed here and - * asynchronous termination and restarting of the read transaction - * in another process/thread. In general we MUST NOT reset the `mr_txnid` - * if a new transaction was started (i.e. if `mr_txnid` was changed). */ -#if MDBX_64BIT_CAS - bool rc = atomic_cas64(p, compare, UINT64_MAX); -#else - /* LY: There is no gold ratio here since shared mutex is too costly, - * in such way we must acquire/release it for every update of mr_txnid, - * i.e. twice for each read transaction). */ - bool rc = false; - if (likely(atomic_load32(&p->low, mo_AcquireRelease) == (uint32_t)compare && - atomic_cas32(&p->high, (uint32_t)(compare >> 32), UINT32_MAX))) { - if (unlikely(atomic_load32(&p->low, mo_AcquireRelease) != - (uint32_t)compare)) - atomic_cas32(&p->high, UINT32_MAX, (uint32_t)(compare >> 32)); - else - rc = true; - } -#endif /* MDBX_64BIT_CAS */ - jitter4testing(true); - return rc; -} - -static __always_inline void safe64_write(MDBX_atomic_uint64_t *p, - const uint64_t v) { - assert(p->weak >= SAFE64_INVALID_THRESHOLD); -#if MDBX_64BIT_ATOMIC && MDBX_64BIT_CAS - atomic_store64(p, v, mo_AcquireRelease); -#else /* MDBX_64BIT_ATOMIC */ - osal_compiler_barrier(); - /* update low-part but still value >= SAFE64_INVALID_THRESHOLD */ - atomic_store32(&p->low, (uint32_t)v, mo_Relaxed); - assert(p->weak >= SAFE64_INVALID_THRESHOLD); - jitter4testing(true); - /* update high-part from SAFE64_INVALID_THRESHOLD to actual value */ - atomic_store32(&p->high, (uint32_t)(v >> 32), mo_AcquireRelease); -#endif /* MDBX_64BIT_ATOMIC */ - assert(p->weak == v); - jitter4testing(true); -} - -static __always_inline uint64_t safe64_read(const MDBX_atomic_uint64_t *p) { - jitter4testing(true); - uint64_t v; - do - v = atomic_load64(p, mo_AcquireRelease); - while (!MDBX_64BIT_ATOMIC && unlikely(v != p->weak)); - return v; -} - -#if 0 /* unused for now */ -MDBX_MAYBE_UNUSED static __always_inline bool safe64_is_valid(uint64_t v) { -#if MDBX_WORDBITS >= 64 - return v < SAFE64_INVALID_THRESHOLD; -#else - return (v >> 32) != UINT32_MAX; -#endif /* MDBX_WORDBITS */ -} - -MDBX_MAYBE_UNUSED static __always_inline bool - safe64_is_valid_ptr(const MDBX_atomic_uint64_t *p) { -#if MDBX_64BIT_ATOMIC - return atomic_load64(p, mo_AcquireRelease) < SAFE64_INVALID_THRESHOLD; -#else - return atomic_load32(&p->high, mo_AcquireRelease) != UINT32_MAX; -#endif /* MDBX_64BIT_ATOMIC */ -} -#endif /* unused for now */ - -/* non-atomic write with safety for reading a half-updated value */ -static __always_inline void safe64_update(MDBX_atomic_uint64_t *p, - const uint64_t v) { -#if MDBX_64BIT_ATOMIC - atomic_store64(p, v, mo_Relaxed); -#else - safe64_reset(p, true); - safe64_write(p, v); -#endif /* MDBX_64BIT_ATOMIC */ -} - -/* non-atomic increment with safety for reading a half-updated value */ -MDBX_MAYBE_UNUSED static -#if MDBX_64BIT_ATOMIC - __always_inline -#endif /* MDBX_64BIT_ATOMIC */ - void - safe64_inc(MDBX_atomic_uint64_t *p, const uint64_t v) { - assert(v > 0); - safe64_update(p, safe64_read(p) + v); -} - -/*----------------------------------------------------------------------------*/ -/* rthc (tls keys and destructors) */ - -static int rthc_register(MDBX_env *const env); -static int rthc_remove(MDBX_env *const env); -static int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found); - -typedef struct rthc_entry_t { - MDBX_env *env; -} rthc_entry_t; - -#if MDBX_DEBUG -#define RTHC_INITIAL_LIMIT 1 -#else -#define RTHC_INITIAL_LIMIT 16 -#endif - -static bin128_t bootid; - -#if defined(_WIN32) || defined(_WIN64) -static CRITICAL_SECTION rthc_critical_section; -#else - -static pthread_mutex_t rthc_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t rthc_cond = PTHREAD_COND_INITIALIZER; -static osal_thread_key_t rthc_key; -static MDBX_atomic_uint32_t rthc_pending; - -static __inline uint64_t rthc_signature(const void *addr, uint8_t kind) { - uint64_t salt = osal_thread_self() * UINT64_C(0xA2F0EEC059629A17) ^ - UINT64_C(0x01E07C6FDB596497) * (uintptr_t)(addr); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return salt << 8 | kind; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return (uint64_t)kind << 56 | salt >> 8; -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ -} - -#define MDBX_THREAD_RTHC_REGISTERED(addr) rthc_signature(addr, 0x0D) -#define MDBX_THREAD_RTHC_COUNTED(addr) rthc_signature(addr, 0xC0) -static __thread uint64_t rthc_thread_state -#if __has_attribute(tls_model) && \ - (defined(__PIC__) || defined(__pic__) || MDBX_BUILD_SHARED_LIBRARY) - __attribute__((tls_model("local-dynamic"))) -#endif - ; - -#if defined(__APPLE__) && defined(__SANITIZE_ADDRESS__) && \ - !defined(MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS) -/* Avoid ASAN-trap due the target TLS-variable feed by Darwin's tlv_free() */ -#define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS \ - __attribute__((__no_sanitize_address__, __noinline__)) -#else -#define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS __inline -#endif - -MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t rthc_read(const void *rthc) { - return *(volatile uint64_t *)rthc; -} - -MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t -rthc_compare_and_clean(const void *rthc, const uint64_t signature) { -#if MDBX_64BIT_CAS - return atomic_cas64((MDBX_atomic_uint64_t *)rthc, signature, 0); -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return atomic_cas32((MDBX_atomic_uint32_t *)rthc, (uint32_t)signature, 0); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return atomic_cas32((MDBX_atomic_uint32_t *)rthc, (uint32_t)(signature >> 32), - 0); -#else -#error "FIXME: Unsupported byte order" -#endif -} - -static __inline int rthc_atexit(void (*dtor)(void *), void *obj, - void *dso_symbol) { -#ifndef MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL -#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) || \ - defined(HAVE___CXA_THREAD_ATEXIT_IMPL) || __GLIBC_PREREQ(2, 18) || \ - defined(BIONIC) -#define MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL 1 -#else -#define MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL 0 -#endif -#endif /* MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL */ - -#ifndef MDBX_HAVE_CXA_THREAD_ATEXIT -#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT) || \ - defined(HAVE___CXA_THREAD_ATEXIT) -#define MDBX_HAVE_CXA_THREAD_ATEXIT 1 -#elif !MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL && \ - (defined(__linux__) || defined(__gnu_linux__)) -#define MDBX_HAVE_CXA_THREAD_ATEXIT 1 -#else -#define MDBX_HAVE_CXA_THREAD_ATEXIT 0 -#endif -#endif /* MDBX_HAVE_CXA_THREAD_ATEXIT */ - - int rc = MDBX_ENOSYS; -#if MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL && !MDBX_HAVE_CXA_THREAD_ATEXIT -#define __cxa_thread_atexit __cxa_thread_atexit_impl -#endif -#if MDBX_HAVE_CXA_THREAD_ATEXIT || defined(__cxa_thread_atexit) - extern int __cxa_thread_atexit(void (*dtor)(void *), void *obj, - void *dso_symbol) MDBX_WEAK_IMPORT_ATTRIBUTE; - if (&__cxa_thread_atexit) - rc = __cxa_thread_atexit(dtor, obj, dso_symbol); -#elif defined(__APPLE__) || defined(_DARWIN_C_SOURCE) - extern void _tlv_atexit(void (*termfunc)(void *objAddr), void *objAddr) - MDBX_WEAK_IMPORT_ATTRIBUTE; - if (&_tlv_atexit) { - (void)dso_symbol; - _tlv_atexit(dtor, obj); - rc = 0; - } -#else - (void)dtor; - (void)obj; - (void)dso_symbol; -#endif - return rc; -} - -__cold static void workaround_glibc_bug21031(void) { - /* Workaround for https://sourceware.org/bugzilla/show_bug.cgi?id=21031 - * - * Due race between pthread_key_delete() and __nptl_deallocate_tsd() - * The destructor(s) of thread-local-storage object(s) may be running - * in another thread(s) and be blocked or not finished yet. - * In such case we get a SEGFAULT after unload this library DSO. - * - * So just by yielding a few timeslices we give a chance - * to such destructor(s) for completion and avoids segfault. */ - sched_yield(); - sched_yield(); - sched_yield(); -} -#endif - -static unsigned rthc_count, rthc_limit; -static rthc_entry_t *rthc_table; -static rthc_entry_t rthc_table_static[RTHC_INITIAL_LIMIT]; - -static __inline void rthc_lock(void) { -#if defined(_WIN32) || defined(_WIN64) - EnterCriticalSection(&rthc_critical_section); -#else - ENSURE(nullptr, osal_pthread_mutex_lock(&rthc_mutex) == 0); -#endif -} - -static __inline void rthc_unlock(void) { -#if defined(_WIN32) || defined(_WIN64) - LeaveCriticalSection(&rthc_critical_section); -#else - ENSURE(nullptr, pthread_mutex_unlock(&rthc_mutex) == 0); -#endif -} - -static __inline int thread_key_create(osal_thread_key_t *key) { - int rc; -#if defined(_WIN32) || defined(_WIN64) - *key = TlsAlloc(); - rc = (*key != TLS_OUT_OF_INDEXES) ? MDBX_SUCCESS : GetLastError(); -#else - rc = pthread_key_create(key, nullptr); -#endif - TRACE("&key = %p, value %" PRIuPTR ", rc %d", __Wpedantic_format_voidptr(key), - (uintptr_t)*key, rc); - return rc; -} - -static __inline void thread_key_delete(osal_thread_key_t key) { - TRACE("key = %" PRIuPTR, (uintptr_t)key); -#if defined(_WIN32) || defined(_WIN64) - ENSURE(nullptr, TlsFree(key)); -#else - ENSURE(nullptr, pthread_key_delete(key) == 0); - workaround_glibc_bug21031(); -#endif -} - -static __inline void *thread_rthc_get(osal_thread_key_t key) { -#if defined(_WIN32) || defined(_WIN64) - return TlsGetValue(key); -#else - return pthread_getspecific(key); -#endif -} - -static void thread_rthc_set(osal_thread_key_t key, const void *value) { -#if defined(_WIN32) || defined(_WIN64) - ENSURE(nullptr, TlsSetValue(key, (void *)value)); -#else - const uint64_t sign_registered = - MDBX_THREAD_RTHC_REGISTERED(&rthc_thread_state); - const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(&rthc_thread_state); - if (value && unlikely(rthc_thread_state != sign_registered && - rthc_thread_state != sign_counted)) { - rthc_thread_state = sign_registered; - TRACE("thread registered 0x%" PRIxPTR, osal_thread_self()); - if (rthc_atexit(thread_dtor, &rthc_thread_state, - (void *)&mdbx_version /* dso_anchor */)) { - ENSURE(nullptr, pthread_setspecific(rthc_key, &rthc_thread_state) == 0); - rthc_thread_state = sign_counted; - const unsigned count_before = atomic_add32(&rthc_pending, 1); - ENSURE(nullptr, count_before < INT_MAX); - NOTICE("fallback to pthreads' tsd, key %" PRIuPTR ", count %u", - (uintptr_t)rthc_key, count_before); - (void)count_before; - } - } - ENSURE(nullptr, pthread_setspecific(key, value) == 0); -#endif -} - -/* dtor called for thread, i.e. for all mdbx's environment objects */ -__cold void thread_dtor(void *rthc) { - rthc_lock(); - const uint32_t current_pid = osal_getpid(); - TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", current_pid, - osal_thread_self(), rthc); - - for (size_t i = 0; i < rthc_count; ++i) { - MDBX_env *const env = rthc_table[i].env; - if (env->me_pid != current_pid) - continue; - if (!(env->me_flags & MDBX_ENV_TXKEY)) - continue; - MDBX_reader *const reader = thread_rthc_get(env->me_txkey); - MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; - MDBX_reader *const end = - &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; - if (reader < begin || reader >= end) - continue; -#if !defined(_WIN32) && !defined(_WIN64) - if (pthread_setspecific(env->me_txkey, nullptr) != 0) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p: ignore race with tsd-key deletion", - osal_thread_self(), __Wpedantic_format_voidptr(reader)); - continue /* ignore race with tsd-key deletion by mdbx_env_close() */; - } -#endif - - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, [%zi], %p ... %p (%+i), rtch-pid %i, " - "current-pid %i", - osal_thread_self(), __Wpedantic_format_voidptr(reader), i, - __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - (int)(reader - begin), reader->mr_pid.weak, current_pid); - if (atomic_load32(&reader->mr_pid, mo_Relaxed) == current_pid) { - TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), - __Wpedantic_format_voidptr(reader)); - (void)atomic_cas32(&reader->mr_pid, current_pid, 0); - atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); - } - } - -#if defined(_WIN32) || defined(_WIN64) - TRACE("<< thread 0x%" PRIxPTR ", rthc %p", osal_thread_self(), rthc); - rthc_unlock(); -#else - const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(rthc); - const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(rthc); - const uint64_t state = rthc_read(rthc); - if (state == sign_registered && - rthc_compare_and_clean(rthc, sign_registered)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), rthc, osal_getpid(), "registered", state); - } else if (state == sign_counted && - rthc_compare_and_clean(rthc, sign_counted)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), rthc, osal_getpid(), "counted", state); - ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); - } else { - WARNING("thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), rthc, osal_getpid(), "wrong", state); - } - - if (atomic_load32(&rthc_pending, mo_AcquireRelease) == 0) { - TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, wake", osal_thread_self(), - rthc, osal_getpid()); - ENSURE(nullptr, pthread_cond_broadcast(&rthc_cond) == 0); - } - - TRACE("<< thread 0x%" PRIxPTR ", rthc %p", osal_thread_self(), rthc); - /* Allow tail call optimization, i.e. gcc should generate the jmp instruction - * instead of a call for pthread_mutex_unlock() and therefore CPU could not - * return to current DSO's code section, which may be unloaded immediately - * after the mutex got released. */ - pthread_mutex_unlock(&rthc_mutex); -#endif -} - -MDBX_INTERNAL_VAR_INSTA struct mdbx_static mdbx_static = { - MDBX_RUNTIME_FLAGS_INIT, MDBX_LOG_FATAL, {nullptr}, 0, nullptr}; -static osal_fastmutex_t debug_lock; - -MDBX_EXCLUDE_FOR_GPROF -__cold void global_dtor(void) { - const uint32_t current_pid = osal_getpid(); - TRACE(">> pid %d", current_pid); - - rthc_lock(); -#if !defined(_WIN32) && !defined(_WIN64) - uint64_t *rthc = pthread_getspecific(rthc_key); - TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 - ", left %d", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - rthc ? rthc_read(rthc) : ~UINT64_C(0), - atomic_load32(&rthc_pending, mo_Relaxed)); - if (rthc) { - const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(rthc); - const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(rthc); - const uint64_t state = rthc_read(rthc); - if (state == sign_registered && - rthc_compare_and_clean(rthc, sign_registered)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - "registered", state); - } else if (state == sign_counted && - rthc_compare_and_clean(rthc, sign_counted)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - "counted", state); - ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); - } else { - WARNING("thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - "wrong", state); - } - } - - struct timespec abstime; - ENSURE(nullptr, clock_gettime(CLOCK_REALTIME, &abstime) == 0); - abstime.tv_nsec += 1000000000l / 10; - if (abstime.tv_nsec >= 1000000000l) { - abstime.tv_nsec -= 1000000000l; - abstime.tv_sec += 1; - } -#if MDBX_DEBUG > 0 - abstime.tv_sec += 600; -#endif - - for (unsigned left; - (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { - NOTICE("tls-cleanup: pid %d, pending %u, wait for...", current_pid, left); - const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); - if (rc && rc != EINTR) - break; - } - thread_key_delete(rthc_key); -#endif - - for (size_t i = 0; i < rthc_count; ++i) { - MDBX_env *const env = rthc_table[i].env; - if (env->me_pid != current_pid) - continue; - if (!(env->me_flags & MDBX_ENV_TXKEY)) - continue; - MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; - MDBX_reader *const end = - &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; - thread_key_delete(env->me_txkey); - bool cleaned = false; - for (MDBX_reader *reader = begin; reader < end; ++reader) { - TRACE("== [%zi] = key %" PRIuPTR ", %p ... %p, rthc %p (%+i), " - "rthc-pid %i, current-pid %i", - i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), - __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), - (int)(reader - begin), reader->mr_pid.weak, current_pid); - if (atomic_load32(&reader->mr_pid, mo_Relaxed) == current_pid) { - (void)atomic_cas32(&reader->mr_pid, current_pid, 0); - TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); - cleaned = true; - } - } - if (cleaned) - atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); - } - - rthc_limit = rthc_count = 0; - if (rthc_table != rthc_table_static) - osal_free(rthc_table); - rthc_table = nullptr; - rthc_unlock(); - -#if defined(_WIN32) || defined(_WIN64) - DeleteCriticalSection(&rthc_critical_section); -#else - /* LY: yielding a few timeslices to give a more chance - * to racing destructor(s) for completion. */ - workaround_glibc_bug21031(); -#endif - - osal_dtor(); - TRACE("<< pid %d\n", current_pid); - ENSURE(nullptr, osal_fastmutex_destroy(&debug_lock) == 0); -} - -__cold int rthc_register(MDBX_env *const env) { - TRACE(">> env %p, rthc_count %u, rthc_limit %u", - __Wpedantic_format_voidptr(env), rthc_count, rthc_limit); - - int rc = MDBX_SUCCESS; - for (size_t i = 0; i < rthc_count; ++i) - if (unlikely(rthc_table[i].env == env)) { - rc = MDBX_PANIC; - goto bailout; - } - - env->me_txkey = 0; - if (unlikely(rthc_count == rthc_limit)) { - rthc_entry_t *new_table = - osal_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table, - sizeof(rthc_entry_t) * rthc_limit * 2); - if (unlikely(new_table == nullptr)) { - rc = MDBX_ENOMEM; - goto bailout; - } - if (rthc_table == rthc_table_static) - memcpy(new_table, rthc_table, sizeof(rthc_entry_t) * rthc_limit); - rthc_table = new_table; - rthc_limit *= 2; - } - - if ((env->me_flags & MDBX_NOSTICKYTHREADS) == 0) { - rc = thread_key_create(&env->me_txkey); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - env->me_flags |= MDBX_ENV_TXKEY; - } - - rthc_table[rthc_count].env = env; - TRACE("== [%i] = env %p, key %" PRIuPTR, rthc_count, - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey); - ++rthc_count; - -bailout: - TRACE("<< env %p, key %" PRIuPTR ", rthc_count %u, rthc_limit %u, rc %d", - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, - rthc_limit, rc); - return rc; -} -__cold static int rthc_drown(MDBX_env *const env) { - const uint32_t current_pid = osal_getpid(); - int rc = MDBX_SUCCESS; - MDBX_env *inprocess_neighbor = nullptr; - if (likely(env->me_lck_mmap.lck && current_pid == env->me_pid)) { - MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; - MDBX_reader *const end = - &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; - TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", - (current_pid == env->me_pid) ? "cleanup" : "skip", - __Wpedantic_format_voidptr(env), env->me_pid, - __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - current_pid); - bool cleaned = false; - for (MDBX_reader *r = begin; r < end; ++r) { - if (atomic_load32(&r->mr_pid, mo_Relaxed) == current_pid) { - atomic_store32(&r->mr_pid, 0, mo_AcquireRelease); - TRACE("== cleanup %p", __Wpedantic_format_voidptr(r)); - cleaned = true; - } - } - if (cleaned) - atomic_store32(&env->me_lck_mmap.lck->mti_readers_refresh_flag, true, - mo_Relaxed); - rc = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); - if (!inprocess_neighbor && env->me_live_reader && - env->me_lfd != INVALID_HANDLE_VALUE) { - int err = osal_rpid_clear(env); - rc = rc ? rc : err; - } - } - int err = osal_lck_destroy(env, inprocess_neighbor, current_pid); - env->me_pid = 0; - return rc ? rc : err; -} - -__cold static int rthc_remove(MDBX_env *const env) { - TRACE(">>> env %p, key %zu, rthc_count %u, rthc_limit %u", - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, - rthc_limit); - - int rc = MDBX_SUCCESS; - if (likely(env->me_pid)) - rc = rthc_drown(env); - - for (size_t i = 0; i < rthc_count; ++i) { - if (rthc_table[i].env == env) { - if (--rthc_count > 0) - rthc_table[i] = rthc_table[rthc_count]; - else if (rthc_table != rthc_table_static) { - void *tmp = rthc_table; - rthc_table = rthc_table_static; - rthc_limit = RTHC_INITIAL_LIMIT; - osal_memory_barrier(); - osal_free(tmp); - } - break; - } - } - - TRACE("<<< %p, key %zu, rthc_count %u, rthc_limit %u", - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, - rthc_limit); - return rc; -} - -//------------------------------------------------------------------------------ - -MDBX_NOTHROW_CONST_FUNCTION static uint64_t rrxmrrxmsx_0(uint64_t v) { - /* Pelle Evensen's mixer, https://bit.ly/2HOfynt */ - v ^= (v << 39 | v >> 25) ^ (v << 14 | v >> 50); - v *= UINT64_C(0xA24BAED4963EE407); - v ^= (v << 40 | v >> 24) ^ (v << 15 | v >> 49); - v *= UINT64_C(0x9FB21C651E98DF25); - return v ^ v >> 28; -} - -static int uniq_peek(const osal_mmap_t *pending, osal_mmap_t *scan) { - int rc; - uint64_t bait; - MDBX_lockinfo *const pending_lck = pending->lck; - MDBX_lockinfo *const scan_lck = scan->lck; - if (pending_lck) { - bait = atomic_load64(&pending_lck->mti_bait_uniqueness, mo_AcquireRelease); - rc = MDBX_SUCCESS; - } else { - bait = 0 /* hush MSVC warning */; - rc = osal_msync(scan, 0, sizeof(MDBX_lockinfo), MDBX_SYNC_DATA); - if (rc == MDBX_SUCCESS) - rc = osal_pread(pending->fd, &bait, sizeof(scan_lck->mti_bait_uniqueness), - offsetof(MDBX_lockinfo, mti_bait_uniqueness)); - } - if (likely(rc == MDBX_SUCCESS) && - bait == atomic_load64(&scan_lck->mti_bait_uniqueness, mo_AcquireRelease)) - rc = MDBX_RESULT_TRUE; - - TRACE("uniq-peek: %s, bait 0x%016" PRIx64 ",%s rc %d", - pending_lck ? "mem" : "file", bait, - (rc == MDBX_RESULT_TRUE) ? " found," : (rc ? " FAILED," : ""), rc); - return rc; -} - -static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, - uint64_t *abra) { - if (*abra == 0) { - const uintptr_t tid = osal_thread_self(); - uintptr_t uit = 0; - memcpy(&uit, &tid, (sizeof(tid) < sizeof(uit)) ? sizeof(tid) : sizeof(uit)); - *abra = rrxmrrxmsx_0(osal_monotime() + UINT64_C(5873865991930747) * uit); - } - const uint64_t cadabra = - rrxmrrxmsx_0(*abra + UINT64_C(7680760450171793) * (unsigned)osal_getpid()) - << 24 | - *abra >> 40; - MDBX_lockinfo *const scan_lck = scan->lck; - atomic_store64(&scan_lck->mti_bait_uniqueness, cadabra, mo_AcquireRelease); - *abra = *abra * UINT64_C(6364136223846793005) + 1; - return uniq_peek(pending, scan); -} - -__cold static int rthc_uniq_check(const osal_mmap_t *pending, - MDBX_env **found) { - *found = nullptr; - uint64_t salt = 0; - for (size_t i = 0; i < rthc_count; ++i) { - MDBX_env *const scan = rthc_table[i].env; - if (!scan->me_lck_mmap.lck || &scan->me_lck_mmap == pending) - continue; - int err = atomic_load64(&scan->me_lck_mmap.lck->mti_bait_uniqueness, - mo_AcquireRelease) - ? uniq_peek(pending, &scan->me_lck_mmap) - : uniq_poke(pending, &scan->me_lck_mmap, &salt); - if (err == MDBX_ENODATA) { - uint64_t length = 0; - if (likely(osal_filesize(pending->fd, &length) == MDBX_SUCCESS && - length == 0)) { - /* LY: skip checking since LCK-file is empty, i.e. just created. */ - DEBUG("%s", "unique (new/empty lck)"); - return MDBX_SUCCESS; - } - } - if (err == MDBX_RESULT_TRUE) - err = uniq_poke(pending, &scan->me_lck_mmap, &salt); - if (err == MDBX_RESULT_TRUE) { - (void)osal_msync(&scan->me_lck_mmap, 0, sizeof(MDBX_lockinfo), - MDBX_SYNC_KICK); - err = uniq_poke(pending, &scan->me_lck_mmap, &salt); - } - if (err == MDBX_RESULT_TRUE) { - err = uniq_poke(pending, &scan->me_lck_mmap, &salt); - *found = scan; - DEBUG("found %p", __Wpedantic_format_voidptr(*found)); - return MDBX_SUCCESS; - } - if (unlikely(err != MDBX_SUCCESS)) { - DEBUG("failed rc %d", err); - return err; - } - } - - DEBUG("%s", "unique"); - return MDBX_SUCCESS; -} - -/*------------------------------------------------------------------------------ - * LY: State of the art quicksort-based sorting, with internal stack - * and network-sort for small chunks. - * Thanks to John M. Gamble for the http://pages.ripco.net/~jgamble/nw.html */ - -#if MDBX_HAVE_CMOV -#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ - do { \ - const TYPE swap_tmp = (a); \ - const bool swap_cmp = expect_with_probability(CMP(swap_tmp, b), 0, .5); \ - (a) = swap_cmp ? swap_tmp : b; \ - (b) = swap_cmp ? b : swap_tmp; \ - } while (0) -#else -#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ - do \ - if (expect_with_probability(!CMP(a, b), 0, .5)) { \ - const TYPE swap_tmp = (a); \ - (a) = (b); \ - (b) = swap_tmp; \ - } \ - while (0) -#endif - -// 3 comparators, 3 parallel operations -// o-----^--^--o -// | | -// o--^--|--v--o -// | | -// o--v--v-----o -// -// [[1,2]] -// [[0,2]] -// [[0,1]] -#define SORT_NETWORK_3(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - } while (0) - -// 5 comparators, 3 parallel operations -// o--^--^--------o -// | | -// o--v--|--^--^--o -// | | | -// o--^--v--|--v--o -// | | -// o--v-----v-----o -// -// [[0,1],[2,3]] -// [[0,2],[1,3]] -// [[1,2]] -#define SORT_NETWORK_4(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - } while (0) - -// 9 comparators, 5 parallel operations -// o--^--^-----^-----------o -// | | | -// o--|--|--^--v-----^--^--o -// | | | | | -// o--|--v--|--^--^--|--v--o -// | | | | | -// o--|-----v--|--v--|--^--o -// | | | | -// o--v--------v-----v--v--o -// -// [[0,4],[1,3]] -// [[0,2]] -// [[2,4],[0,1]] -// [[2,3],[1,4]] -// [[1,2],[3,4]] -#define SORT_NETWORK_5(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - } while (0) - -// 12 comparators, 6 parallel operations -// o-----^--^--^-----------------o -// | | | -// o--^--|--v--|--^--------^-----o -// | | | | | -// o--v--v-----|--|--^--^--|--^--o -// | | | | | | -// o-----^--^--v--|--|--|--v--v--o -// | | | | | -// o--^--|--v-----v--|--v--------o -// | | | -// o--v--v-----------v-----------o -// -// [[1,2],[4,5]] -// [[0,2],[3,5]] -// [[0,1],[3,4],[2,5]] -// [[0,3],[1,4]] -// [[2,4],[1,3]] -// [[2,3]] -#define SORT_NETWORK_6(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - } while (0) - -// 16 comparators, 6 parallel operations -// o--^--------^-----^-----------------o -// | | | -// o--|--^-----|--^--v--------^--^-----o -// | | | | | | -// o--|--|--^--v--|--^-----^--|--v-----o -// | | | | | | | -// o--|--|--|-----v--|--^--v--|--^--^--o -// | | | | | | | | -// o--v--|--|--^-----v--|--^--v--|--v--o -// | | | | | | -// o-----v--|--|--------v--v-----|--^--o -// | | | | -// o--------v--v-----------------v--v--o -// -// [[0,4],[1,5],[2,6]] -// [[0,2],[1,3],[4,6]] -// [[2,4],[3,5],[0,1]] -// [[2,3],[4,5]] -// [[1,4],[3,6]] -// [[1,2],[3,4],[5,6]] -#define SORT_NETWORK_7(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ - } while (0) - -// 19 comparators, 6 parallel operations -// o--^--------^-----^-----------------o -// | | | -// o--|--^-----|--^--v--------^--^-----o -// | | | | | | -// o--|--|--^--v--|--^-----^--|--v-----o -// | | | | | | | -// o--|--|--|--^--v--|--^--v--|--^--^--o -// | | | | | | | | | -// o--v--|--|--|--^--v--|--^--v--|--v--o -// | | | | | | | -// o-----v--|--|--|--^--v--v-----|--^--o -// | | | | | | -// o--------v--|--v--|--^--------v--v--o -// | | | -// o-----------v-----v--v--------------o -// -// [[0,4],[1,5],[2,6],[3,7]] -// [[0,2],[1,3],[4,6],[5,7]] -// [[2,4],[3,5],[0,1],[6,7]] -// [[2,3],[4,5]] -// [[1,4],[3,6]] -// [[1,2],[3,4],[5,6]] -#define SORT_NETWORK_8(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[7]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[7]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[6], begin[7]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ - } while (0) - -#define SORT_INNER(TYPE, CMP, begin, end, len) \ - switch (len) { \ - default: \ - assert(false); \ - __unreachable(); \ - case 0: \ - case 1: \ - break; \ - case 2: \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - break; \ - case 3: \ - SORT_NETWORK_3(TYPE, CMP, begin); \ - break; \ - case 4: \ - SORT_NETWORK_4(TYPE, CMP, begin); \ - break; \ - case 5: \ - SORT_NETWORK_5(TYPE, CMP, begin); \ - break; \ - case 6: \ - SORT_NETWORK_6(TYPE, CMP, begin); \ - break; \ - case 7: \ - SORT_NETWORK_7(TYPE, CMP, begin); \ - break; \ - case 8: \ - SORT_NETWORK_8(TYPE, CMP, begin); \ - break; \ - } - -#define SORT_SWAP(TYPE, a, b) \ - do { \ - const TYPE swap_tmp = (a); \ - (a) = (b); \ - (b) = swap_tmp; \ - } while (0) - -#define SORT_PUSH(low, high) \ - do { \ - top->lo = (low); \ - top->hi = (high); \ - ++top; \ - } while (0) - -#define SORT_POP(low, high) \ - do { \ - --top; \ - low = top->lo; \ - high = top->hi; \ - } while (0) - -#define SORT_IMPL(NAME, EXPECT_LOW_CARDINALITY_OR_PRESORTED, TYPE, CMP) \ - \ - static __inline bool NAME##_is_sorted(const TYPE *first, const TYPE *last) { \ - while (++first <= last) \ - if (expect_with_probability(CMP(first[0], first[-1]), 1, .1)) \ - return false; \ - return true; \ - } \ - \ - typedef struct { \ - TYPE *lo, *hi; \ - } NAME##_stack; \ - \ - __hot static void NAME(TYPE *const __restrict begin, \ - TYPE *const __restrict end) { \ - NAME##_stack stack[sizeof(size_t) * CHAR_BIT], *__restrict top = stack; \ - \ - TYPE *__restrict hi = end - 1; \ - TYPE *__restrict lo = begin; \ - while (true) { \ - const ptrdiff_t len = hi - lo; \ - if (len < 8) { \ - SORT_INNER(TYPE, CMP, lo, hi + 1, len + 1); \ - if (unlikely(top == stack)) \ - break; \ - SORT_POP(lo, hi); \ - continue; \ - } \ - \ - TYPE *__restrict mid = lo + (len >> 1); \ - SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ - SORT_CMP_SWAP(TYPE, CMP, *mid, *hi); \ - SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ - \ - TYPE *right = hi - 1; \ - TYPE *left = lo + 1; \ - while (1) { \ - while (expect_with_probability(CMP(*left, *mid), 0, .5)) \ - ++left; \ - while (expect_with_probability(CMP(*mid, *right), 0, .5)) \ - --right; \ - if (unlikely(left > right)) { \ - if (EXPECT_LOW_CARDINALITY_OR_PRESORTED) { \ - if (NAME##_is_sorted(lo, right)) \ - lo = right + 1; \ - if (NAME##_is_sorted(left, hi)) \ - hi = left; \ - } \ - break; \ - } \ - SORT_SWAP(TYPE, *left, *right); \ - mid = (mid == left) ? right : (mid == right) ? left : mid; \ - ++left; \ - --right; \ - } \ - \ - if (right - lo > hi - left) { \ - SORT_PUSH(lo, right); \ - lo = left; \ - } else { \ - SORT_PUSH(left, hi); \ - hi = right; \ - } \ - } \ - \ - if (AUDIT_ENABLED()) { \ - for (TYPE *scan = begin + 1; scan < end; ++scan) \ - assert(CMP(scan[-1], scan[0])); \ - } \ - } - -/*------------------------------------------------------------------------------ - * LY: radix sort for large chunks */ - -#define RADIXSORT_IMPL(NAME, TYPE, EXTRACT_KEY, BUFFER_PREALLOCATED, END_GAP) \ - \ - __hot static bool NAME##_radixsort(TYPE *const begin, const size_t length) { \ - TYPE *tmp; \ - if (BUFFER_PREALLOCATED) { \ - tmp = begin + length + END_GAP; \ - /* memset(tmp, 0xDeadBeef, sizeof(TYPE) * length); */ \ - } else { \ - tmp = osal_malloc(sizeof(TYPE) * length); \ - if (unlikely(!tmp)) \ - return false; \ - } \ - \ - size_t key_shift = 0, key_diff_mask; \ - do { \ - struct { \ - pgno_t a[256], b[256]; \ - } counters; \ - memset(&counters, 0, sizeof(counters)); \ - \ - key_diff_mask = 0; \ - size_t prev_key = EXTRACT_KEY(begin) >> key_shift; \ - TYPE *r = begin, *end = begin + length; \ - do { \ - const size_t key = EXTRACT_KEY(r) >> key_shift; \ - counters.a[key & 255]++; \ - counters.b[(key >> 8) & 255]++; \ - key_diff_mask |= prev_key ^ key; \ - prev_key = key; \ - } while (++r != end); \ - \ - pgno_t ta = 0, tb = 0; \ - for (size_t i = 0; i < 256; ++i) { \ - const pgno_t ia = counters.a[i]; \ - counters.a[i] = ta; \ - ta += ia; \ - const pgno_t ib = counters.b[i]; \ - counters.b[i] = tb; \ - tb += ib; \ - } \ - \ - r = begin; \ - do { \ - const size_t key = EXTRACT_KEY(r) >> key_shift; \ - tmp[counters.a[key & 255]++] = *r; \ - } while (++r != end); \ - \ - if (unlikely(key_diff_mask < 256)) { \ - memcpy(begin, tmp, ptr_dist(end, begin)); \ - break; \ - } \ - end = (r = tmp) + length; \ - do { \ - const size_t key = EXTRACT_KEY(r) >> key_shift; \ - begin[counters.b[(key >> 8) & 255]++] = *r; \ - } while (++r != end); \ - \ - key_shift += 16; \ - } while (key_diff_mask >> 16); \ - \ - if (!(BUFFER_PREALLOCATED)) \ - osal_free(tmp); \ - return true; \ - } - -/*------------------------------------------------------------------------------ - * LY: Binary search */ - -#if defined(__clang__) && __clang_major__ > 4 && defined(__ia32__) -#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ - do \ - __asm __volatile("" \ - : "+r"(size) \ - : "r" /* the `b` constraint is more suitable here, but \ - cause CLANG to allocate and push/pop an one more \ - register, so using the `r` which avoids this. */ \ - (flag)); \ - while (0) -#else -#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ - do { \ - /* nope for non-clang or non-x86 */; \ - } while (0) -#endif /* Workaround for CLANG */ - -#define BINARY_SEARCH_STEP(TYPE_LIST, CMP, it, size, key) \ - do { \ - } while (0) - -/* *INDENT-OFF* */ -/* clang-format off */ -#define SEARCH_IMPL(NAME, TYPE_LIST, TYPE_ARG, CMP) \ - static __always_inline const TYPE_LIST *NAME( \ - const TYPE_LIST *it, size_t length, const TYPE_ARG item) { \ - const TYPE_LIST *const begin = it, *const end = begin + length; \ - \ - if (MDBX_HAVE_CMOV) \ - do { \ - /* Адаптивно-упрощенный шаг двоичного поиска: \ - * - без переходов при наличии cmov или аналога; \ - * - допускает лишние итерации; \ - * - но ищет пока size > 2, что требует дозавершения поиска \ - * среди остающихся 0-1-2 элементов. */ \ - const TYPE_LIST *const middle = it + (length >> 1); \ - length = (length + 1) >> 1; \ - const bool flag = expect_with_probability(CMP(*middle, item), 0, .5); \ - WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(length, flag); \ - it = flag ? middle : it; \ - } while (length > 2); \ - else \ - while (length > 2) { \ - /* Вариант с использованием условного перехода. Основное отличие в \ - * том, что при "не равно" (true от компаратора) переход делается на 1 \ - * ближе к концу массива. Алгоритмически это верно и обеспечивает \ - * чуть-чуть более быструю сходимость, но зато требует больше \ - * вычислений при true от компаратора. Также ВАЖНО(!) не допускается \ - * спекулятивное выполнение при size == 0. */ \ - const TYPE_LIST *const middle = it + (length >> 1); \ - length = (length + 1) >> 1; \ - const bool flag = expect_with_probability(CMP(*middle, item), 0, .5); \ - if (flag) { \ - it = middle + 1; \ - length -= 1; \ - } \ - } \ - it += length > 1 && expect_with_probability(CMP(*it, item), 0, .5); \ - it += length > 0 && expect_with_probability(CMP(*it, item), 0, .5); \ - \ - if (AUDIT_ENABLED()) { \ - for (const TYPE_LIST *scan = begin; scan < it; ++scan) \ - assert(CMP(*scan, item)); \ - for (const TYPE_LIST *scan = it; scan < end; ++scan) \ - assert(!CMP(*scan, item)); \ - (void)begin, (void)end; \ - } \ - \ - return it; \ - } -/* *INDENT-ON* */ -/* clang-format on */ - -/*----------------------------------------------------------------------------*/ - -static __always_inline size_t pnl_size2bytes(size_t size) { - assert(size > 0 && size <= MDBX_PGL_LIMIT); -#if MDBX_PNL_PREALLOC_FOR_RADIXSORT - size += size; -#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ - STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + - (MDBX_PGL_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + - MDBX_PNL_GRANULATE + 3) * - sizeof(pgno_t) < - SIZE_MAX / 4 * 3); - size_t bytes = - ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), - MDBX_PNL_GRANULATE * sizeof(pgno_t)) - - MDBX_ASSUME_MALLOC_OVERHEAD; - return bytes; -} - -static __always_inline pgno_t pnl_bytes2size(const size_t bytes) { - size_t size = bytes / sizeof(pgno_t); - assert(size > 3 && size <= MDBX_PGL_LIMIT + /* alignment gap */ 65536); - size -= 3; -#if MDBX_PNL_PREALLOC_FOR_RADIXSORT - size >>= 1; -#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ - return (pgno_t)size; -} - -static MDBX_PNL pnl_alloc(size_t size) { - size_t bytes = pnl_size2bytes(size); - MDBX_PNL pl = osal_malloc(bytes); - if (likely(pl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(pl); -#endif /* malloc_usable_size */ - pl[0] = pnl_bytes2size(bytes); - assert(pl[0] >= size); - pl += 1; - *pl = 0; - } - return pl; -} - -static void pnl_free(MDBX_PNL pl) { - if (likely(pl)) - osal_free(pl - 1); -} - -/* Shrink the PNL to the default size if it has grown larger */ -static void pnl_shrink(MDBX_PNL __restrict *__restrict ppl) { - assert(pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) >= MDBX_PNL_INITIAL && - pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < - MDBX_PNL_INITIAL * 3 / 2); - assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && - MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); - MDBX_PNL_SETSIZE(*ppl, 0); - if (unlikely(MDBX_PNL_ALLOCLEN(*ppl) > - MDBX_PNL_INITIAL * (MDBX_PNL_PREALLOC_FOR_RADIXSORT ? 8 : 4) - - MDBX_CACHELINE_SIZE / sizeof(pgno_t))) { - size_t bytes = pnl_size2bytes(MDBX_PNL_INITIAL * 2); - MDBX_PNL pl = osal_realloc(*ppl - 1, bytes); - if (likely(pl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(pl); -#endif /* malloc_usable_size */ - *pl = pnl_bytes2size(bytes); - *ppl = pl + 1; - } - } -} - -/* Grow the PNL to the size growed to at least given size */ -static int pnl_reserve(MDBX_PNL __restrict *__restrict ppl, - const size_t wanna) { - const size_t allocated = MDBX_PNL_ALLOCLEN(*ppl); - assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && - MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); - if (likely(allocated >= wanna)) - return MDBX_SUCCESS; - - if (unlikely(wanna > /* paranoia */ MDBX_PGL_LIMIT)) { - ERROR("PNL too long (%zu > %zu)", wanna, (size_t)MDBX_PGL_LIMIT); - return MDBX_TXN_FULL; - } - - const size_t size = (wanna + wanna - allocated < MDBX_PGL_LIMIT) - ? wanna + wanna - allocated - : MDBX_PGL_LIMIT; - size_t bytes = pnl_size2bytes(size); - MDBX_PNL pl = osal_realloc(*ppl - 1, bytes); - if (likely(pl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(pl); -#endif /* malloc_usable_size */ - *pl = pnl_bytes2size(bytes); - assert(*pl >= wanna); - *ppl = pl + 1; - return MDBX_SUCCESS; - } - return MDBX_ENOMEM; -} - -/* Make room for num additional elements in an PNL */ -static __always_inline int __must_check_result -pnl_need(MDBX_PNL __restrict *__restrict ppl, size_t num) { - assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && - MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); - assert(num <= MDBX_PGL_LIMIT); - const size_t wanna = MDBX_PNL_GETSIZE(*ppl) + num; - return likely(MDBX_PNL_ALLOCLEN(*ppl) >= wanna) ? MDBX_SUCCESS - : pnl_reserve(ppl, wanna); -} - -static __always_inline void pnl_xappend(__restrict MDBX_PNL pl, pgno_t pgno) { - assert(MDBX_PNL_GETSIZE(pl) < MDBX_PNL_ALLOCLEN(pl)); - if (AUDIT_ENABLED()) { - for (size_t i = MDBX_PNL_GETSIZE(pl); i > 0; --i) - assert(pgno != pl[i]); - } - *pl += 1; - MDBX_PNL_LAST(pl) = pgno; -} - -/* Append an pgno range onto an unsorted PNL */ -__always_inline static int __must_check_result pnl_append_range( - bool spilled, __restrict MDBX_PNL *ppl, pgno_t pgno, size_t n) { - assert(n > 0); - int rc = pnl_need(ppl, n); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const MDBX_PNL pnl = *ppl; -#if MDBX_PNL_ASCENDING - size_t w = MDBX_PNL_GETSIZE(pnl); - do { - pnl[++w] = pgno; - pgno += spilled ? 2 : 1; - } while (--n); - MDBX_PNL_SETSIZE(pnl, w); -#else - size_t w = MDBX_PNL_GETSIZE(pnl) + n; - MDBX_PNL_SETSIZE(pnl, w); - do { - pnl[w--] = pgno; - pgno += spilled ? 2 : 1; - } while (--n); -#endif - - return MDBX_SUCCESS; -} - -/* Append an pgno range into the sorted PNL */ -__hot static int __must_check_result pnl_insert_range(__restrict MDBX_PNL *ppl, - pgno_t pgno, size_t n) { - assert(n > 0); - int rc = pnl_need(ppl, n); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const MDBX_PNL pnl = *ppl; - size_t r = MDBX_PNL_GETSIZE(pnl), w = r + n; - MDBX_PNL_SETSIZE(pnl, w); - while (r && MDBX_PNL_DISORDERED(pnl[r], pgno)) - pnl[w--] = pnl[r--]; - - for (pgno_t fill = MDBX_PNL_ASCENDING ? pgno + n : pgno; w > r; --w) - pnl[w] = MDBX_PNL_ASCENDING ? --fill : fill++; - - return MDBX_SUCCESS; -} - -__hot static bool pnl_check(const pgno_t *pl, const size_t limit) { - assert(limit >= MIN_PAGENO - MDBX_ENABLE_REFUND); - if (likely(MDBX_PNL_GETSIZE(pl))) { - if (unlikely(MDBX_PNL_GETSIZE(pl) > MDBX_PGL_LIMIT)) - return false; - if (unlikely(MDBX_PNL_LEAST(pl) < MIN_PAGENO)) - return false; - if (unlikely(MDBX_PNL_MOST(pl) >= limit)) - return false; - - if ((!MDBX_DISABLE_VALIDATION || AUDIT_ENABLED()) && - likely(MDBX_PNL_GETSIZE(pl) > 1)) { - const pgno_t *scan = MDBX_PNL_BEGIN(pl); - const pgno_t *const end = MDBX_PNL_END(pl); - pgno_t prev = *scan++; - do { - if (unlikely(!MDBX_PNL_ORDERED(prev, *scan))) - return false; - prev = *scan; - } while (likely(++scan != end)); - } - } - return true; -} - -static __always_inline bool pnl_check_allocated(const pgno_t *pl, - const size_t limit) { - return pl == nullptr || (MDBX_PNL_ALLOCLEN(pl) >= MDBX_PNL_GETSIZE(pl) && - pnl_check(pl, limit)); -} - -static __always_inline void -pnl_merge_inner(pgno_t *__restrict dst, const pgno_t *__restrict src_a, - const pgno_t *__restrict src_b, - const pgno_t *__restrict const src_b_detent) { - do { -#if MDBX_HAVE_CMOV - const bool flag = MDBX_PNL_ORDERED(*src_b, *src_a); -#if defined(__LCC__) || __CLANG_PREREQ(13, 0) - // lcc 1.26: 13ШК (подготовка и первая итерация) + 7ШК (цикл), БЕЗ loop-mode - // gcc>=7: cmp+jmp с возвратом в тело цикла (WTF?) - // gcc<=6: cmov×3 - // clang<=12: cmov×3 - // clang>=13: cmov, set+add/sub - *dst = flag ? *src_a-- : *src_b--; -#else - // gcc: cmov, cmp+set+add/sub - // clang<=5: cmov×2, set+add/sub - // clang>=6: cmov, set+add/sub - *dst = flag ? *src_a : *src_b; - src_b += (ptrdiff_t)flag - 1; - src_a -= flag; -#endif - --dst; -#else /* MDBX_HAVE_CMOV */ - while (MDBX_PNL_ORDERED(*src_b, *src_a)) - *dst-- = *src_a--; - *dst-- = *src_b--; -#endif /* !MDBX_HAVE_CMOV */ - } while (likely(src_b > src_b_detent)); -} - -/* Merge a PNL onto a PNL. The destination PNL must be big enough */ -__hot static size_t pnl_merge(MDBX_PNL dst, const MDBX_PNL src) { - assert(pnl_check_allocated(dst, MAX_PAGENO + 1)); - assert(pnl_check(src, MAX_PAGENO + 1)); - const size_t src_len = MDBX_PNL_GETSIZE(src); - const size_t dst_len = MDBX_PNL_GETSIZE(dst); - size_t total = dst_len; - assert(MDBX_PNL_ALLOCLEN(dst) >= total); - if (likely(src_len > 0)) { - total += src_len; - if (!MDBX_DEBUG && total < (MDBX_HAVE_CMOV ? 21 : 12)) - goto avoid_call_libc_for_short_cases; - if (dst_len == 0 || - MDBX_PNL_ORDERED(MDBX_PNL_LAST(dst), MDBX_PNL_FIRST(src))) - memcpy(MDBX_PNL_END(dst), MDBX_PNL_BEGIN(src), src_len * sizeof(pgno_t)); - else if (MDBX_PNL_ORDERED(MDBX_PNL_LAST(src), MDBX_PNL_FIRST(dst))) { - memmove(MDBX_PNL_BEGIN(dst) + src_len, MDBX_PNL_BEGIN(dst), - dst_len * sizeof(pgno_t)); - memcpy(MDBX_PNL_BEGIN(dst), MDBX_PNL_BEGIN(src), - src_len * sizeof(pgno_t)); - } else { - avoid_call_libc_for_short_cases: - dst[0] = /* the detent */ (MDBX_PNL_ASCENDING ? 0 : P_INVALID); - pnl_merge_inner(dst + total, dst + dst_len, src + src_len, src); - } - MDBX_PNL_SETSIZE(dst, total); - } - assert(pnl_check_allocated(dst, MAX_PAGENO + 1)); - return total; -} - -static void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) { - tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) && - txn->tw.spilled.least_removed > 0); - txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) - ? idx - : txn->tw.spilled.least_removed; - txn->tw.spilled.list[idx] |= 1; - MDBX_PNL_SETSIZE(txn->tw.spilled.list, - MDBX_PNL_GETSIZE(txn->tw.spilled.list) - - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); - - while (unlikely(npages > 1)) { - const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1; - if (MDBX_PNL_ASCENDING) { - if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) || - (txn->tw.spilled.list[idx] >> 1) != pgno) - return; - } else { - if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno) - return; - txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) - ? idx - : txn->tw.spilled.least_removed; - } - txn->tw.spilled.list[idx] |= 1; - MDBX_PNL_SETSIZE(txn->tw.spilled.list, - MDBX_PNL_GETSIZE(txn->tw.spilled.list) - - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); - --npages; - } -} - -static MDBX_PNL spill_purge(MDBX_txn *txn) { - tASSERT(txn, txn->tw.spilled.least_removed > 0); - const MDBX_PNL sl = txn->tw.spilled.list; - if (txn->tw.spilled.least_removed != INT_MAX) { - size_t len = MDBX_PNL_GETSIZE(sl), r, w; - for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) { - sl[w] = sl[r]; - w += 1 - (sl[r] & 1); - } - for (size_t i = 1; i < w; ++i) - tASSERT(txn, (sl[i] & 1) == 0); - MDBX_PNL_SETSIZE(sl, w - 1); - txn->tw.spilled.least_removed = INT_MAX; - } else { - for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i) - tASSERT(txn, (sl[i] & 1) == 0); - } - return sl; -} - -#if MDBX_PNL_ASCENDING -#define MDBX_PNL_EXTRACT_KEY(ptr) (*(ptr)) -#else -#define MDBX_PNL_EXTRACT_KEY(ptr) (P_INVALID - *(ptr)) -#endif -RADIXSORT_IMPL(pgno, pgno_t, MDBX_PNL_EXTRACT_KEY, - MDBX_PNL_PREALLOC_FOR_RADIXSORT, 0) - -SORT_IMPL(pgno_sort, false, pgno_t, MDBX_PNL_ORDERED) - -__hot __noinline static void pnl_sort_nochk(MDBX_PNL pnl) { - if (likely(MDBX_PNL_GETSIZE(pnl) < MDBX_RADIXSORT_THRESHOLD) || - unlikely(!pgno_radixsort(&MDBX_PNL_FIRST(pnl), MDBX_PNL_GETSIZE(pnl)))) - pgno_sort(MDBX_PNL_BEGIN(pnl), MDBX_PNL_END(pnl)); -} - -static __inline void pnl_sort(MDBX_PNL pnl, size_t limit4check) { - pnl_sort_nochk(pnl); - assert(pnl_check(pnl, limit4check)); - (void)limit4check; -} - -/* Search for an pgno in an PNL. - * Returns The index of the first item greater than or equal to pgno. */ -SEARCH_IMPL(pgno_bsearch, pgno_t, pgno_t, MDBX_PNL_ORDERED) - -__hot __noinline static size_t pnl_search_nochk(const MDBX_PNL pnl, - pgno_t pgno) { - const pgno_t *begin = MDBX_PNL_BEGIN(pnl); - const pgno_t *it = pgno_bsearch(begin, MDBX_PNL_GETSIZE(pnl), pgno); - const pgno_t *end = begin + MDBX_PNL_GETSIZE(pnl); - assert(it >= begin && it <= end); - if (it != begin) - assert(MDBX_PNL_ORDERED(it[-1], pgno)); - if (it != end) - assert(!MDBX_PNL_ORDERED(it[0], pgno)); - return it - begin + 1; -} - -static __inline size_t pnl_search(const MDBX_PNL pnl, pgno_t pgno, - size_t limit) { - assert(pnl_check_allocated(pnl, limit)); - if (MDBX_HAVE_CMOV) { - /* cmov-ускоренный бинарный поиск может читать (но не использовать) один - * элемент за концом данных, этот элемент в пределах выделенного участка - * памяти, но не инициализирован. */ - VALGRIND_MAKE_MEM_DEFINED(MDBX_PNL_END(pnl), sizeof(pgno_t)); - } - assert(pgno < limit); - (void)limit; - size_t n = pnl_search_nochk(pnl, pgno); - if (MDBX_HAVE_CMOV) { - VALGRIND_MAKE_MEM_UNDEFINED(MDBX_PNL_END(pnl), sizeof(pgno_t)); - } - return n; -} - -static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - const MDBX_PNL pnl = txn->tw.spilled.list; - if (likely(!pnl)) - return 0; - pgno <<= 1; - size_t n = pnl_search(pnl, pgno, (size_t)MAX_PAGENO + MAX_PAGENO + 1); - return (n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] == pgno) ? n : 0; -} - -static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno, - size_t npages) { - const MDBX_PNL pnl = txn->tw.spilled.list; - if (likely(!pnl)) - return false; - const size_t len = MDBX_PNL_GETSIZE(pnl); - if (LOG_ENABLED(MDBX_LOG_EXTRA)) { - DEBUG_EXTRA("PNL len %zu [", len); - for (size_t i = 1; i <= len; ++i) - DEBUG_EXTRA_PRINT(" %li", (pnl[i] & 1) ? -(long)(pnl[i] >> 1) - : (long)(pnl[i] >> 1)); - DEBUG_EXTRA_PRINT("%s\n", "]"); - } - const pgno_t spilled_range_begin = pgno << 1; - const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1; -#if MDBX_PNL_ASCENDING - const size_t n = - pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1); - assert(n && - (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_begin <= pnl[n])); - const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] <= spilled_range_last; -#else - const size_t n = - pnl_search(pnl, spilled_range_last, (size_t)MAX_PAGENO + MAX_PAGENO + 1); - assert(n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_last >= pnl[n])); - const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] >= spilled_range_begin; -#endif - if (ASSERT_ENABLED()) { - bool check = false; - for (size_t i = 0; i < npages; ++i) - check |= search_spilled(txn, (pgno_t)(pgno + i)) != 0; - assert(check == rc); - } - return rc; -} - -/*----------------------------------------------------------------------------*/ - -static __always_inline size_t txl_size2bytes(const size_t size) { - assert(size > 0 && size <= MDBX_TXL_MAX * 2); - size_t bytes = - ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(txnid_t) * (size + 2), - MDBX_TXL_GRANULATE * sizeof(txnid_t)) - - MDBX_ASSUME_MALLOC_OVERHEAD; - return bytes; -} - -static __always_inline size_t txl_bytes2size(const size_t bytes) { - size_t size = bytes / sizeof(txnid_t); - assert(size > 2 && size <= MDBX_TXL_MAX * 2); - return size - 2; -} - -static MDBX_TXL txl_alloc(void) { - size_t bytes = txl_size2bytes(MDBX_TXL_INITIAL); - MDBX_TXL tl = osal_malloc(bytes); - if (likely(tl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(tl); -#endif /* malloc_usable_size */ - tl[0] = txl_bytes2size(bytes); - assert(tl[0] >= MDBX_TXL_INITIAL); - tl += 1; - *tl = 0; - } - return tl; -} - -static void txl_free(MDBX_TXL tl) { - if (likely(tl)) - osal_free(tl - 1); -} - -static int txl_reserve(MDBX_TXL __restrict *__restrict ptl, - const size_t wanna) { - const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptl); - assert(MDBX_PNL_GETSIZE(*ptl) <= MDBX_TXL_MAX && - MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_GETSIZE(*ptl)); - if (likely(allocated >= wanna)) - return MDBX_SUCCESS; - - if (unlikely(wanna > /* paranoia */ MDBX_TXL_MAX)) { - ERROR("TXL too long (%zu > %zu)", wanna, (size_t)MDBX_TXL_MAX); - return MDBX_TXN_FULL; - } - - const size_t size = (wanna + wanna - allocated < MDBX_TXL_MAX) - ? wanna + wanna - allocated - : MDBX_TXL_MAX; - size_t bytes = txl_size2bytes(size); - MDBX_TXL tl = osal_realloc(*ptl - 1, bytes); - if (likely(tl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(tl); -#endif /* malloc_usable_size */ - *tl = txl_bytes2size(bytes); - assert(*tl >= wanna); - *ptl = tl + 1; - return MDBX_SUCCESS; - } - return MDBX_ENOMEM; -} - -static __always_inline int __must_check_result -txl_need(MDBX_TXL __restrict *__restrict ptl, size_t num) { - assert(MDBX_PNL_GETSIZE(*ptl) <= MDBX_TXL_MAX && - MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_GETSIZE(*ptl)); - assert(num <= MDBX_PGL_LIMIT); - const size_t wanna = (size_t)MDBX_PNL_GETSIZE(*ptl) + num; - return likely(MDBX_PNL_ALLOCLEN(*ptl) >= wanna) ? MDBX_SUCCESS - : txl_reserve(ptl, wanna); -} - -static __always_inline void txl_xappend(MDBX_TXL __restrict tl, txnid_t id) { - assert(MDBX_PNL_GETSIZE(tl) < MDBX_PNL_ALLOCLEN(tl)); - tl[0] += 1; - MDBX_PNL_LAST(tl) = id; -} - -#define TXNID_SORT_CMP(first, last) ((first) > (last)) -SORT_IMPL(txnid_sort, false, txnid_t, TXNID_SORT_CMP) -static void txl_sort(MDBX_TXL tl) { - txnid_sort(MDBX_PNL_BEGIN(tl), MDBX_PNL_END(tl)); -} - -static int __must_check_result txl_append(MDBX_TXL __restrict *ptl, - txnid_t id) { - if (unlikely(MDBX_PNL_GETSIZE(*ptl) == MDBX_PNL_ALLOCLEN(*ptl))) { - int rc = txl_need(ptl, MDBX_TXL_GRANULATE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - txl_xappend(*ptl, id); - return MDBX_SUCCESS; -} - -/*----------------------------------------------------------------------------*/ - -#define MDBX_DPL_GAP_MERGESORT 16 -#define MDBX_DPL_GAP_EDGING 2 -#define MDBX_DPL_RESERVE_GAP (MDBX_DPL_GAP_MERGESORT + MDBX_DPL_GAP_EDGING) - -static __always_inline size_t dpl_size2bytes(ptrdiff_t size) { - assert(size > CURSOR_STACK && (size_t)size <= MDBX_PGL_LIMIT); -#if MDBX_DPL_PREALLOC_FOR_RADIXSORT - size += size; -#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ - STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(MDBX_dpl) + - (MDBX_PGL_LIMIT * (MDBX_DPL_PREALLOC_FOR_RADIXSORT + 1) + - MDBX_DPL_RESERVE_GAP) * - sizeof(MDBX_dp) + - MDBX_PNL_GRANULATE * sizeof(void *) * 2 < - SIZE_MAX / 4 * 3); - size_t bytes = - ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(MDBX_dpl) + - ((size_t)size + MDBX_DPL_RESERVE_GAP) * sizeof(MDBX_dp), - MDBX_PNL_GRANULATE * sizeof(void *) * 2) - - MDBX_ASSUME_MALLOC_OVERHEAD; - return bytes; -} - -static __always_inline size_t dpl_bytes2size(const ptrdiff_t bytes) { - size_t size = (bytes - sizeof(MDBX_dpl)) / sizeof(MDBX_dp); - assert(size > CURSOR_STACK + MDBX_DPL_RESERVE_GAP && - size <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE); - size -= MDBX_DPL_RESERVE_GAP; -#if MDBX_DPL_PREALLOC_FOR_RADIXSORT - size >>= 1; -#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ - return size; -} - -static __always_inline size_t dpl_setlen(MDBX_dpl *dl, size_t len) { - static const MDBX_page dpl_stub_pageE = {INVALID_TXNID, - 0, - P_BAD, - {0}, - /* pgno */ ~(pgno_t)0}; - assert(dpl_stub_pageE.mp_flags == P_BAD && - dpl_stub_pageE.mp_pgno == P_INVALID); - dl->length = len; - dl->items[len + 1].ptr = (MDBX_page *)&dpl_stub_pageE; - dl->items[len + 1].pgno = P_INVALID; - dl->items[len + 1].npages = 1; - return len; -} - -static __always_inline void dpl_clear(MDBX_dpl *dl) { - static const MDBX_page dpl_stub_pageB = {INVALID_TXNID, - 0, - P_BAD, - {0}, - /* pgno */ 0}; - assert(dpl_stub_pageB.mp_flags == P_BAD && dpl_stub_pageB.mp_pgno == 0); - dl->sorted = dpl_setlen(dl, 0); - dl->pages_including_loose = 0; - dl->items[0].ptr = (MDBX_page *)&dpl_stub_pageB; - dl->items[0].pgno = 0; - dl->items[0].npages = 1; - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); -} - -static void dpl_free(MDBX_txn *txn) { - if (likely(txn->tw.dirtylist)) { - osal_free(txn->tw.dirtylist); - txn->tw.dirtylist = NULL; - } -} - -static MDBX_dpl *dpl_reserve(MDBX_txn *txn, size_t size) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - size_t bytes = - dpl_size2bytes((size < MDBX_PGL_LIMIT) ? size : MDBX_PGL_LIMIT); - MDBX_dpl *const dl = osal_realloc(txn->tw.dirtylist, bytes); - if (likely(dl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(dl); -#endif /* malloc_usable_size */ - dl->detent = dpl_bytes2size(bytes); - tASSERT(txn, txn->tw.dirtylist == NULL || dl->length <= dl->detent); - txn->tw.dirtylist = dl; - } - return dl; -} - -static int dpl_alloc(MDBX_txn *txn) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - const size_t wanna = (txn->mt_env->me_options.dp_initial < txn->mt_geo.upper) - ? txn->mt_env->me_options.dp_initial - : txn->mt_geo.upper; -#if MDBX_FORCE_ASSERTIONS || MDBX_DEBUG - if (txn->tw.dirtylist) - /* обнуляем чтобы не сработал ассерт внутри dpl_reserve() */ - txn->tw.dirtylist->sorted = txn->tw.dirtylist->length = 0; -#endif /* asertions enabled */ - if (unlikely(!txn->tw.dirtylist || txn->tw.dirtylist->detent < wanna || - txn->tw.dirtylist->detent > wanna + wanna) && - unlikely(!dpl_reserve(txn, wanna))) - return MDBX_ENOMEM; - - dpl_clear(txn->tw.dirtylist); - return MDBX_SUCCESS; -} - -#define MDBX_DPL_EXTRACT_KEY(ptr) ((ptr)->pgno) -RADIXSORT_IMPL(dpl, MDBX_dp, MDBX_DPL_EXTRACT_KEY, - MDBX_DPL_PREALLOC_FOR_RADIXSORT, 1) - -#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno) -SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP) - -__hot __noinline static MDBX_dpl *dpl_sort_slowpath(const MDBX_txn *txn) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - MDBX_dpl *dl = txn->tw.dirtylist; - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - const size_t unsorted = dl->length - dl->sorted; - if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) || - unlikely(!dpl_radixsort(dl->items + 1, dl->length))) { - if (dl->sorted > unsorted / 4 + 4 && - (MDBX_DPL_PREALLOC_FOR_RADIXSORT || - dl->length + unsorted < dl->detent + MDBX_DPL_GAP_MERGESORT)) { - MDBX_dp *const sorted_begin = dl->items + 1; - MDBX_dp *const sorted_end = sorted_begin + dl->sorted; - MDBX_dp *const end = - dl->items + (MDBX_DPL_PREALLOC_FOR_RADIXSORT - ? dl->length + dl->length + 1 - : dl->detent + MDBX_DPL_RESERVE_GAP); - MDBX_dp *const tmp = end - unsorted; - assert(dl->items + dl->length + 1 < tmp); - /* copy unsorted to the end of allocated space and sort it */ - memcpy(tmp, sorted_end, unsorted * sizeof(MDBX_dp)); - dp_sort(tmp, tmp + unsorted); - /* merge two parts from end to begin */ - MDBX_dp *__restrict w = dl->items + dl->length; - MDBX_dp *__restrict l = dl->items + dl->sorted; - MDBX_dp *__restrict r = end - 1; - do { - const bool cmp = expect_with_probability(l->pgno > r->pgno, 0, .5); -#if defined(__LCC__) || __CLANG_PREREQ(13, 0) || !MDBX_HAVE_CMOV - *w = cmp ? *l-- : *r--; -#else - *w = cmp ? *l : *r; - l -= cmp; - r += (ptrdiff_t)cmp - 1; -#endif - } while (likely(--w > l)); - assert(r == tmp - 1); - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); - if (ASSERT_ENABLED()) - for (size_t i = 0; i <= dl->length; ++i) - assert(dl->items[i].pgno < dl->items[i + 1].pgno); - } else { - dp_sort(dl->items + 1, dl->items + dl->length + 1); - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); - } - } else { - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); - } - dl->sorted = dl->length; - return dl; -} - -static __always_inline MDBX_dpl *dpl_sort(const MDBX_txn *txn) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - MDBX_dpl *dl = txn->tw.dirtylist; - assert(dl->length <= MDBX_PGL_LIMIT); - assert(dl->sorted <= dl->length); - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - return likely(dl->sorted == dl->length) ? dl : dpl_sort_slowpath(txn); -} - -/* Returns the index of the first dirty-page whose pgno - * member is greater than or equal to id. */ -#define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id)) -SEARCH_IMPL(dp_bsearch, MDBX_dp, pgno_t, DP_SEARCH_CMP) - -__hot __noinline static size_t dpl_search(const MDBX_txn *txn, pgno_t pgno) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - MDBX_dpl *dl = txn->tw.dirtylist; - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - if (AUDIT_ENABLED()) { - for (const MDBX_dp *ptr = dl->items + dl->sorted; --ptr > dl->items;) { - assert(ptr[0].pgno < ptr[1].pgno); - assert(ptr[0].pgno >= NUM_METAS); - } - } - - switch (dl->length - dl->sorted) { - default: - /* sort a whole */ - dpl_sort_slowpath(txn); - break; - case 0: - /* whole sorted cases */ - break; - -#define LINEAR_SEARCH_CASE(N) \ - case N: \ - if (dl->items[dl->length - N + 1].pgno == pgno) \ - return dl->length - N + 1; \ - __fallthrough - - /* use linear scan until the threshold */ - LINEAR_SEARCH_CASE(7); /* fall through */ - LINEAR_SEARCH_CASE(6); /* fall through */ - LINEAR_SEARCH_CASE(5); /* fall through */ - LINEAR_SEARCH_CASE(4); /* fall through */ - LINEAR_SEARCH_CASE(3); /* fall through */ - LINEAR_SEARCH_CASE(2); /* fall through */ - case 1: - if (dl->items[dl->length].pgno == pgno) - return dl->length; - /* continue bsearch on the sorted part */ - break; - } - return dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items; -} - -MDBX_NOTHROW_PURE_FUNCTION static __inline unsigned -dpl_npages(const MDBX_dpl *dl, size_t i) { - assert(0 <= (intptr_t)i && i <= dl->length); - unsigned n = dl->items[i].npages; - assert(n == (IS_OVERFLOW(dl->items[i].ptr) ? dl->items[i].ptr->mp_pages : 1)); - return n; -} - -MDBX_NOTHROW_PURE_FUNCTION static __inline pgno_t -dpl_endpgno(const MDBX_dpl *dl, size_t i) { - return dpl_npages(dl, i) + dl->items[i].pgno; -} - -static __inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, - size_t npages) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - MDBX_dpl *dl = txn->tw.dirtylist; - assert(dl->sorted == dl->length); - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - size_t const n = dpl_search(txn, pgno); - assert(n >= 1 && n <= dl->length + 1); - assert(pgno <= dl->items[n].pgno); - assert(pgno > dl->items[n - 1].pgno); - const bool rc = - /* intersection with founded */ pgno + npages > dl->items[n].pgno || - /* intersection with prev */ dpl_endpgno(dl, n - 1) > pgno; - if (ASSERT_ENABLED()) { - bool check = false; - for (size_t i = 1; i <= dl->length; ++i) { - const MDBX_page *const dp = dl->items[i].ptr; - if (!(dp->mp_pgno /* begin */ >= /* end */ pgno + npages || - dpl_endpgno(dl, i) /* end */ <= /* begin */ pgno)) - check |= true; - } - assert(check == rc); - } - return rc; -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline size_t -dpl_exist(const MDBX_txn *txn, pgno_t pgno) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - MDBX_dpl *dl = txn->tw.dirtylist; - size_t i = dpl_search(txn, pgno); - assert((int)i > 0); - return (dl->items[i].pgno == pgno) ? i : 0; -} - -MDBX_MAYBE_UNUSED static const MDBX_page *debug_dpl_find(const MDBX_txn *txn, - const pgno_t pgno) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - const MDBX_dpl *dl = txn->tw.dirtylist; - if (dl) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); - for (size_t i = dl->length; i > dl->sorted; --i) - if (dl->items[i].pgno == pgno) - return dl->items[i].ptr; - - if (dl->sorted) { - const size_t i = dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items; - if (dl->items[i].pgno == pgno) - return dl->items[i].ptr; - } - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } - return nullptr; -} - -static void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - MDBX_dpl *dl = txn->tw.dirtylist; - assert((intptr_t)i > 0 && i <= dl->length); - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - dl->pages_including_loose -= npages; - dl->sorted -= dl->sorted >= i; - dl->length -= 1; - memmove(dl->items + i, dl->items + i + 1, - (dl->length - i + 2) * sizeof(dl->items[0])); - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); -} - -static void dpl_remove(const MDBX_txn *txn, size_t i) { - dpl_remove_ex(txn, i, dpl_npages(txn->tw.dirtylist, i)); -} - -static __noinline void txn_lru_reduce(MDBX_txn *txn) { - NOTICE("lru-reduce %u -> %u", txn->tw.dirtylru, txn->tw.dirtylru >> 1); - tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); - do { - txn->tw.dirtylru >>= 1; - MDBX_dpl *dl = txn->tw.dirtylist; - for (size_t i = 1; i <= dl->length; ++i) { - size_t *const ptr = - ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); - *ptr >>= 1; - } - txn = txn->mt_parent; - } while (txn); -} - -MDBX_NOTHROW_PURE_FUNCTION static __inline uint32_t dpl_age(const MDBX_txn *txn, - size_t i) { - tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); - const MDBX_dpl *dl = txn->tw.dirtylist; - assert((intptr_t)i > 0 && i <= dl->length); - size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); - return txn->tw.dirtylru - (uint32_t)*ptr; -} - -static __inline uint32_t txn_lru_turn(MDBX_txn *txn) { - txn->tw.dirtylru += 1; - if (unlikely(txn->tw.dirtylru > UINT32_MAX / 3) && - (txn->mt_flags & MDBX_WRITEMAP) == 0) - txn_lru_reduce(txn); - return txn->tw.dirtylru; -} - -static __always_inline int __must_check_result dpl_append(MDBX_txn *txn, - pgno_t pgno, - MDBX_page *page, - size_t npages) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - const MDBX_dp dp = {page, pgno, (pgno_t)npages}; - if ((txn->mt_flags & MDBX_WRITEMAP) == 0) { - size_t *const ptr = ptr_disp(page, -(ptrdiff_t)sizeof(size_t)); - *ptr = txn->tw.dirtylru; - } - - MDBX_dpl *dl = txn->tw.dirtylist; - tASSERT(txn, dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE); - tASSERT(txn, dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); - if (AUDIT_ENABLED()) { - for (size_t i = dl->length; i > 0; --i) { - assert(dl->items[i].pgno != dp.pgno); - if (unlikely(dl->items[i].pgno == dp.pgno)) { - ERROR("Page %u already exist in the DPL at %zu", dp.pgno, i); - return MDBX_PROBLEM; - } - } - } - - if (unlikely(dl->length == dl->detent)) { - if (unlikely(dl->detent >= MDBX_PGL_LIMIT)) { - ERROR("DPL is full (MDBX_PGL_LIMIT %zu)", MDBX_PGL_LIMIT); - return MDBX_TXN_FULL; - } - const size_t size = (dl->detent < MDBX_PNL_INITIAL * 42) - ? dl->detent + dl->detent - : dl->detent + dl->detent / 2; - dl = dpl_reserve(txn, size); - if (unlikely(!dl)) - return MDBX_ENOMEM; - tASSERT(txn, dl->length < dl->detent); - } - - /* Сортировка нужна для быстрого поиска, используем несколько тактик: - * 1) Сохраняем упорядоченность при естественной вставке в нужном порядке. - * 2) Добавляем в не-сортированный хвост, который сортируем и сливаем - * с отсортированной головой по необходимости, а пока хвост короткий - * ищем в нём сканированием, избегая большой пересортировки. - * 3) Если не-сортированный хвост короткий, а добавляемый элемент близок - * к концу отсортированной головы, то выгоднее сразу вставить элемент - * в нужное место. - * - * Алгоритмически: - * - добавлять в не-сортированный хвост следует только если вставка сильно - * дорогая, т.е. если целевая позиция элемента сильно далека от конца; - * - для быстрой проверки достаточно сравнить добавляемый элемент с отстоящим - * от конца на максимально-приемлемое расстояние; - * - если список короче, либо элемент в этой позиции меньше вставляемого, - * то следует перемещать элементы и вставлять в отсортированную голову; - * - если не-сортированный хвост длиннее, либо элемент в этой позиции больше, - * то следует добавлять в не-сортированный хвост. */ - - dl->pages_including_loose += npages; - MDBX_dp *i = dl->items + dl->length; - -#define MDBX_DPL_INSERTION_THRESHOLD 42 - const ptrdiff_t pivot = (ptrdiff_t)dl->length - MDBX_DPL_INSERTION_THRESHOLD; -#if MDBX_HAVE_CMOV - const pgno_t pivot_pgno = - dl->items[(dl->length < MDBX_DPL_INSERTION_THRESHOLD) - ? 0 - : dl->length - MDBX_DPL_INSERTION_THRESHOLD] - .pgno; -#endif /* MDBX_HAVE_CMOV */ - - /* copy the stub beyond the end */ - i[2] = i[1]; - dl->length += 1; - - if (likely(pivot <= (ptrdiff_t)dl->sorted) && -#if MDBX_HAVE_CMOV - pivot_pgno < dp.pgno) { -#else - (pivot <= 0 || dl->items[pivot].pgno < dp.pgno)) { -#endif /* MDBX_HAVE_CMOV */ - dl->sorted += 1; - - /* сдвигаем несортированный хвост */ - while (i >= dl->items + dl->sorted) { -#if !defined(__GNUC__) /* пытаемся избежать вызова memmove() */ - i[1] = *i; -#elif MDBX_WORDBITS == 64 && \ - (defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) - STATIC_ASSERT(sizeof(MDBX_dp) == sizeof(__uint128_t)); - ((__uint128_t *)i)[1] = *(volatile __uint128_t *)i; -#else - i[1].ptr = i->ptr; - i[1].pgno = i->pgno; - i[1].npages = i->npages; -#endif - --i; - } - /* ищем нужную позицию сдвигая отсортированные элементы */ - while (i->pgno > pgno) { - tASSERT(txn, i > dl->items); - i[1] = *i; - --i; - } - tASSERT(txn, i->pgno < dp.pgno); - } - - i[1] = dp; - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - assert(dl->sorted <= dl->length); - return MDBX_SUCCESS; -} - -/*----------------------------------------------------------------------------*/ - -static __must_check_result __inline int page_retire(MDBX_cursor *mc, - MDBX_page *mp); - -static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp, - size_t npages); -typedef struct page_result { - MDBX_page *page; - int err; -} pgr_t; - -static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard); - -static pgr_t page_new(MDBX_cursor *mc, const unsigned flags); -static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages); -static int page_touch(MDBX_cursor *mc); -static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, - const MDBX_val *data); - -#define TXN_END_NAMES \ - { \ - "committed", "empty-commit", "abort", "reset", "reset-tmp", "fail-begin", \ - "fail-beginchild" \ - } -enum { - /* txn_end operation number, for logging */ - TXN_END_COMMITTED, - TXN_END_PURE_COMMIT, - TXN_END_ABORT, - TXN_END_RESET, - TXN_END_RESET_TMP, - TXN_END_FAIL_BEGIN, - TXN_END_FAIL_BEGINCHILD -}; -#define TXN_END_OPMASK 0x0F /* mask for txn_end() operation number */ -#define TXN_END_UPDATE 0x10 /* update env state (DBIs) */ -#define TXN_END_FREE 0x20 /* free txn unless it is MDBX_env.me_txn0 */ -#define TXN_END_EOTDONE 0x40 /* txn's cursors already closed */ -#define TXN_END_SLOT 0x80 /* release any reader slot if NOSTICKYTHREADS */ -static int txn_end(MDBX_txn *txn, const unsigned mode); - -static __always_inline pgr_t page_get_inline(const uint16_t ILL, - const MDBX_cursor *const mc, - const pgno_t pgno, - const txnid_t front); - -static pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, - const txnid_t front) { - return page_get_inline(P_ILL_BITS, mc, pgno, front); -} - -__hot static pgr_t page_get_three(const MDBX_cursor *const mc, - const pgno_t pgno, const txnid_t front) { - return page_get_inline(P_ILL_BITS | P_OVERFLOW, mc, pgno, front); -} - -static pgr_t page_get_large(const MDBX_cursor *const mc, const pgno_t pgno, - const txnid_t front) { - return page_get_inline(P_ILL_BITS | P_BRANCH | P_LEAF | P_LEAF2, mc, pgno, - front); -} - -static __always_inline int __must_check_result page_get(const MDBX_cursor *mc, - const pgno_t pgno, - MDBX_page **mp, - const txnid_t front) { - pgr_t ret = page_get_three(mc, pgno, front); - *mp = ret.page; - return ret.err; -} - -static int __must_check_result page_search_root(MDBX_cursor *mc, - const MDBX_val *key, int flags); - -#define MDBX_PS_MODIFY 1 -#define MDBX_PS_ROOTONLY 2 -#define MDBX_PS_FIRST 4 -#define MDBX_PS_LAST 8 -static int __must_check_result page_search(MDBX_cursor *mc, const MDBX_val *key, - int flags); -static int __must_check_result page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst); - -#define MDBX_SPLIT_REPLACE MDBX_APPENDDUP /* newkey is not new */ -static int __must_check_result page_split(MDBX_cursor *mc, - const MDBX_val *const newkey, - MDBX_val *const newdata, - pgno_t newpgno, const unsigned naf); - -static int coherency_timeout(uint64_t *timestamp, intptr_t pgno, - const MDBX_env *env); -static int __must_check_result validate_meta_copy(MDBX_env *env, - const MDBX_meta *meta, - MDBX_meta *dest); -static int __must_check_result override_meta(MDBX_env *env, size_t target, - txnid_t txnid, - const MDBX_meta *shape); -static int __must_check_result read_header(MDBX_env *env, MDBX_meta *meta, - const int lck_exclusive, - const mdbx_mode_t mode_bits); -static int __must_check_result sync_locked(MDBX_env *env, unsigned flags, - MDBX_meta *const pending, - meta_troika_t *const troika); -static int env_close(MDBX_env *env, bool resurrect_after_fork); - -struct node_result { - MDBX_node *node; - bool exact; -}; - -static struct node_result node_search(MDBX_cursor *mc, const MDBX_val *key); - -static int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, - const MDBX_val *key, - pgno_t pgno); -static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, - const MDBX_val *key, - MDBX_val *data, unsigned flags); -static int __must_check_result node_add_leaf2(MDBX_cursor *mc, size_t indx, - const MDBX_val *key); - -static void node_del(MDBX_cursor *mc, size_t ksize); -static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node); -static int __must_check_result node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, - bool fromleft); -static int __must_check_result node_read(MDBX_cursor *mc, const MDBX_node *leaf, - MDBX_val *data, const MDBX_page *mp); -static int __must_check_result rebalance(MDBX_cursor *mc); -static int __must_check_result update_key(MDBX_cursor *mc, const MDBX_val *key); - -static void cursor_pop(MDBX_cursor *mc); -static int __must_check_result cursor_push(MDBX_cursor *mc, MDBX_page *mp); - -static int __must_check_result audit_ex(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc); - -static int __must_check_result page_check(const MDBX_cursor *const mc, - const MDBX_page *const mp); -static int __must_check_result cursor_check(const MDBX_cursor *mc); -static int __must_check_result cursor_get(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op op); -static int __must_check_result cursor_put_checklen(MDBX_cursor *mc, - const MDBX_val *key, - MDBX_val *data, - unsigned flags); -static int __must_check_result cursor_put_nochecklen(MDBX_cursor *mc, - const MDBX_val *key, - MDBX_val *data, - unsigned flags); -static int __must_check_result cursor_check_updating(MDBX_cursor *mc); -static int __must_check_result cursor_del(MDBX_cursor *mc, - MDBX_put_flags_t flags); -static int __must_check_result delete(MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *key, const MDBX_val *data, - unsigned flags); -#define SIBLING_LEFT 0 -#define SIBLING_RIGHT 2 -static int __must_check_result cursor_sibling(MDBX_cursor *mc, int dir); -static int __must_check_result cursor_next(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op op); -static int __must_check_result cursor_prev(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op op); -struct cursor_set_result { - int err; - bool exact; -}; - -static struct cursor_set_result cursor_set(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op op); -static int __must_check_result cursor_first(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data); -static int __must_check_result cursor_last(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data); - -static int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, - size_t dbi); -static int __must_check_result cursor_xinit0(MDBX_cursor *mc); -static int __must_check_result cursor_xinit1(MDBX_cursor *mc, - const MDBX_node *node, - const MDBX_page *mp); -static int __must_check_result cursor_xinit2(MDBX_cursor *mc, - MDBX_xcursor *src_mx, - bool new_dupdata); -static void cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst); - -static int __must_check_result drop_tree(MDBX_cursor *mc, - const bool may_have_subDBs); -static int __must_check_result fetch_sdb(MDBX_txn *txn, size_t dbi); -static int __must_check_result setup_sdb(MDBX_dbx *const dbx, - const MDBX_db *const db, - const unsigned pagesize); - -static __inline MDBX_cmp_func *get_default_keycmp(MDBX_db_flags_t flags); -static __inline MDBX_cmp_func *get_default_datacmp(MDBX_db_flags_t flags); - -__cold const char *mdbx_liberr2str(int errnum) { - /* Table of descriptions for MDBX errors */ - static const char *const tbl[] = { - "MDBX_KEYEXIST: Key/data pair already exists", - "MDBX_NOTFOUND: No matching key/data pair found", - "MDBX_PAGE_NOTFOUND: Requested page not found", - "MDBX_CORRUPTED: Database is corrupted", - "MDBX_PANIC: Environment had fatal error", - "MDBX_VERSION_MISMATCH: DB version mismatch libmdbx", - "MDBX_INVALID: File is not an MDBX file", - "MDBX_MAP_FULL: Environment mapsize limit reached", - "MDBX_DBS_FULL: Too many DBI-handles (maxdbs reached)", - "MDBX_READERS_FULL: Too many readers (maxreaders reached)", - NULL /* MDBX_TLS_FULL (-30789): unused in MDBX */, - "MDBX_TXN_FULL: Transaction has too many dirty pages," - " i.e transaction is too big", - "MDBX_CURSOR_FULL: Cursor stack limit reachedn - this usually indicates" - " corruption, i.e branch-pages loop", - "MDBX_PAGE_FULL: Internal error - Page has no more space", - "MDBX_UNABLE_EXTEND_MAPSIZE: Database engine was unable to extend" - " mapping, e.g. since address space is unavailable or busy," - " or Operation system not supported such operations", - "MDBX_INCOMPATIBLE: Environment or database is not compatible" - " with the requested operation or the specified flags", - "MDBX_BAD_RSLOT: Invalid reuse of reader locktable slot," - " e.g. read-transaction already run for current thread", - "MDBX_BAD_TXN: Transaction is not valid for requested operation," - " e.g. had errored and be must aborted, has a child, or is invalid", - "MDBX_BAD_VALSIZE: Invalid size or alignment of key or data" - " for target database, either invalid subDB name", - "MDBX_BAD_DBI: The specified DBI-handle is invalid" - " or changed by another thread/transaction", - "MDBX_PROBLEM: Unexpected internal error, transaction should be aborted", - "MDBX_BUSY: Another write transaction is running," - " or environment is already used while opening with MDBX_EXCLUSIVE flag", - }; - - if (errnum >= MDBX_KEYEXIST && errnum <= MDBX_BUSY) { - int i = errnum - MDBX_KEYEXIST; - return tbl[i]; - } - - switch (errnum) { - case MDBX_SUCCESS: - return "MDBX_SUCCESS: Successful"; - case MDBX_EMULTIVAL: - return "MDBX_EMULTIVAL: The specified key has" - " more than one associated value"; - case MDBX_EBADSIGN: - return "MDBX_EBADSIGN: Wrong signature of a runtime object(s)," - " e.g. memory corruption or double-free"; - case MDBX_WANNA_RECOVERY: - return "MDBX_WANNA_RECOVERY: Database should be recovered," - " but this could NOT be done automatically for now" - " since it opened in read-only mode"; - case MDBX_EKEYMISMATCH: - return "MDBX_EKEYMISMATCH: The given key value is mismatched to the" - " current cursor position"; - case MDBX_TOO_LARGE: - return "MDBX_TOO_LARGE: Database is too large for current system," - " e.g. could NOT be mapped into RAM"; - case MDBX_THREAD_MISMATCH: - return "MDBX_THREAD_MISMATCH: A thread has attempted to use a not" - " owned object, e.g. a transaction that started by another thread"; - case MDBX_TXN_OVERLAPPING: - return "MDBX_TXN_OVERLAPPING: Overlapping read and write transactions for" - " the current thread"; - case MDBX_DUPLICATED_CLK: - return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists," - " please keep one and remove unused other"; - case MDBX_DANGLING_DBI: - return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be" - " closed before subDb or corresponding DBI-handle could be (re)used"; - default: - return NULL; - } -} - -__cold const char *mdbx_strerror_r(int errnum, char *buf, size_t buflen) { - const char *msg = mdbx_liberr2str(errnum); - if (!msg && buflen > 0 && buflen < INT_MAX) { -#if defined(_WIN32) || defined(_WIN64) - const DWORD size = FormatMessageA( - FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, - errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, - NULL); - return size ? buf : "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed"; -#elif defined(_GNU_SOURCE) && defined(__GLIBC__) - /* GNU-specific */ - if (errnum > 0) - msg = strerror_r(errnum, buf, buflen); -#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) - /* XSI-compliant */ - if (errnum > 0 && strerror_r(errnum, buf, buflen) == 0) - msg = buf; -#else - if (errnum > 0) { - msg = strerror(errnum); - if (msg) { - strncpy(buf, msg, buflen); - msg = buf; - } - } -#endif - if (!msg) { - (void)snprintf(buf, buflen, "error %d", errnum); - msg = buf; - } - buf[buflen - 1] = '\0'; - } - return msg; -} - -__cold const char *mdbx_strerror(int errnum) { -#if defined(_WIN32) || defined(_WIN64) - static char buf[1024]; - return mdbx_strerror_r(errnum, buf, sizeof(buf)); -#else - const char *msg = mdbx_liberr2str(errnum); - if (!msg) { - if (errnum > 0) - msg = strerror(errnum); - if (!msg) { - static char buf[32]; - (void)snprintf(buf, sizeof(buf) - 1, "error %d", errnum); - msg = buf; - } - } - return msg; -#endif -} - -#if defined(_WIN32) || defined(_WIN64) /* Bit of madness for Windows */ -const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, size_t buflen) { - const char *msg = mdbx_liberr2str(errnum); - if (!msg && buflen > 0 && buflen < INT_MAX) { - const DWORD size = FormatMessageA( - FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, - errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, - NULL); - if (!size) - msg = "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed"; - else if (!CharToOemBuffA(buf, buf, size)) - msg = "CharToOemBuffA() failed"; - else - msg = buf; - } - return msg; -} - -const char *mdbx_strerror_ANSI2OEM(int errnum) { - static char buf[1024]; - return mdbx_strerror_r_ANSI2OEM(errnum, buf, sizeof(buf)); -} -#endif /* Bit of madness for Windows */ - -__cold void debug_log_va(int level, const char *function, int line, - const char *fmt, va_list args) { - ENSURE(nullptr, osal_fastmutex_acquire(&debug_lock) == 0); - if (mdbx_static.logger.ptr) { - if (mdbx_static.logger_buffer == nullptr) - mdbx_static.logger.fmt(level, function, line, fmt, args); - else { - const int len = vsnprintf(mdbx_static.logger_buffer, - mdbx_static.logger_buffer_size, fmt, args); - if (len > 0) - mdbx_static.logger.nofmt(level, function, line, - mdbx_static.logger_buffer, len); - } - } else { -#if defined(_WIN32) || defined(_WIN64) - if (IsDebuggerPresent()) { - int prefix_len = 0; - char *prefix = nullptr; - if (function && line > 0) - prefix_len = osal_asprintf(&prefix, "%s:%d ", function, line); - else if (function) - prefix_len = osal_asprintf(&prefix, "%s: ", function); - else if (line > 0) - prefix_len = osal_asprintf(&prefix, "%d: ", line); - if (prefix_len > 0 && prefix) { - OutputDebugStringA(prefix); - osal_free(prefix); - } - char *msg = nullptr; - int msg_len = osal_vasprintf(&msg, fmt, args); - if (msg_len > 0 && msg) { - OutputDebugStringA(msg); - osal_free(msg); - } - } -#else - if (function && line > 0) - fprintf(stderr, "%s:%d ", function, line); - else if (function) - fprintf(stderr, "%s: ", function); - else if (line > 0) - fprintf(stderr, "%d: ", line); - vfprintf(stderr, fmt, args); - fflush(stderr); -#endif - } - ENSURE(nullptr, osal_fastmutex_release(&debug_lock) == 0); -} - -__cold void debug_log(int level, const char *function, int line, - const char *fmt, ...) { - va_list args; - va_start(args, fmt); - debug_log_va(level, function, line, fmt, args); - va_end(args); -} - -/* Dump a key in ascii or hexadecimal. */ -const char *mdbx_dump_val(const MDBX_val *key, char *const buf, - const size_t bufsize) { - if (!key) - return ""; - if (!key->iov_len) - return ""; - if (!buf || bufsize < 4) - return nullptr; - - bool is_ascii = true; - const uint8_t *const data = key->iov_base; - for (size_t i = 0; i < key->iov_len; i++) - if (data[i] < ' ' || data[i] > '~') { - is_ascii = false; - break; - } - - if (is_ascii) { - int len = - snprintf(buf, bufsize, "%.*s", - (key->iov_len > INT_MAX) ? INT_MAX : (int)key->iov_len, data); - assert(len > 0 && (size_t)len < bufsize); - (void)len; - } else { - char *const detent = buf + bufsize - 2; - char *ptr = buf; - *ptr++ = '<'; - for (size_t i = 0; i < key->iov_len && ptr < detent; i++) { - const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - *ptr++ = hex[data[i] >> 4]; - *ptr++ = hex[data[i] & 15]; - } - if (ptr < detent) - *ptr++ = '>'; - *ptr = '\0'; - } - return buf; -} - -/*------------------------------------------------------------------------------ - LY: debug stuff */ - -static const char *leafnode_type(MDBX_node *n) { - static const char *const tp[2][2] = {{"", ": DB"}, - {": sub-page", ": sub-DB"}}; - return (node_flags(n) & F_BIGDATA) - ? ": large page" - : tp[!!(node_flags(n) & F_DUPDATA)][!!(node_flags(n) & F_SUBDATA)]; -} - -/* Display all the keys in the page. */ -MDBX_MAYBE_UNUSED static void page_list(MDBX_page *mp) { - pgno_t pgno = mp->mp_pgno; - const char *type; - MDBX_node *node; - size_t i, nkeys, nsize, total = 0; - MDBX_val key; - DKBUF; - - switch (PAGETYPE_WHOLE(mp)) { - case P_BRANCH: - type = "Branch page"; - break; - case P_LEAF: - type = "Leaf page"; - break; - case P_LEAF | P_SUBP: - type = "Leaf sub-page"; - break; - case P_LEAF | P_LEAF2: - type = "Leaf2 page"; - break; - case P_LEAF | P_LEAF2 | P_SUBP: - type = "Leaf2 sub-page"; - break; - case P_OVERFLOW: - VERBOSE("Overflow page %" PRIaPGNO " pages %u\n", pgno, mp->mp_pages); - return; - case P_META: - VERBOSE("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno, - unaligned_peek_u64(4, page_meta(mp)->mm_txnid_a)); - return; - default: - VERBOSE("Bad page %" PRIaPGNO " flags 0x%X\n", pgno, mp->mp_flags); - return; - } - - nkeys = page_numkeys(mp); - VERBOSE("%s %" PRIaPGNO " numkeys %zu\n", type, pgno, nkeys); - - for (i = 0; i < nkeys; i++) { - if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */ - key.iov_len = nsize = mp->mp_leaf2_ksize; - key.iov_base = page_leaf2key(mp, i, nsize); - total += nsize; - VERBOSE("key %zu: nsize %zu, %s\n", i, nsize, DKEY(&key)); - continue; - } - node = page_node(mp, i); - key.iov_len = node_ks(node); - key.iov_base = node->mn_data; - nsize = NODESIZE + key.iov_len; - if (IS_BRANCH(mp)) { - VERBOSE("key %zu: page %" PRIaPGNO ", %s\n", i, node_pgno(node), - DKEY(&key)); - total += nsize; - } else { - if (node_flags(node) & F_BIGDATA) - nsize += sizeof(pgno_t); - else - nsize += node_ds(node); - total += nsize; - nsize += sizeof(indx_t); - VERBOSE("key %zu: nsize %zu, %s%s\n", i, nsize, DKEY(&key), - leafnode_type(node)); - } - total = EVEN(total); - } - VERBOSE("Total: header %zu + contents %zu + unused %zu\n", - IS_LEAF2(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->mp_lower, total, - page_room(mp)); -} - -/*----------------------------------------------------------------------------*/ - -/* Check if there is an initialized xcursor, so XCURSOR_REFRESH() is proper */ -#define XCURSOR_INITED(mc) \ - ((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - -/* Update sub-page pointer, if any, in mc->mc_xcursor. - * Needed when the node which contains the sub-page may have moved. - * Called with mp = mc->mc_pg[mc->mc_top], ki = mc->mc_ki[mc->mc_top]. */ -#define XCURSOR_REFRESH(mc, mp, ki) \ - do { \ - MDBX_page *xr_pg = (mp); \ - MDBX_node *xr_node = page_node(xr_pg, ki); \ - if ((node_flags(xr_node) & (F_DUPDATA | F_SUBDATA)) == F_DUPDATA) \ - (mc)->mc_xcursor->mx_cursor.mc_pg[0] = node_data(xr_node); \ - } while (0) - -MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { - for (MDBX_cursor *scan = mc->mc_txn->mt_cursors[mc->mc_dbi]; scan; - scan = scan->mc_next) - if (mc == ((mc->mc_flags & C_SUB) ? &scan->mc_xcursor->mx_cursor : scan)) - return true; - return false; -} - -/* Perform act while tracking temporary cursor mn */ -#define WITH_CURSOR_TRACKING(mn, act) \ - do { \ - cASSERT(&(mn), \ - mn.mc_txn->mt_cursors != NULL /* must be not rdonly txt */); \ - cASSERT(&(mn), !cursor_is_tracked(&(mn))); \ - MDBX_cursor mc_dummy; \ - MDBX_cursor **tracking_head = &(mn).mc_txn->mt_cursors[mn.mc_dbi]; \ - MDBX_cursor *tracked = &(mn); \ - if ((mn).mc_flags & C_SUB) { \ - mc_dummy.mc_flags = C_INITIALIZED; \ - mc_dummy.mc_top = 0; \ - mc_dummy.mc_snum = 0; \ - mc_dummy.mc_xcursor = (MDBX_xcursor *)&(mn); \ - tracked = &mc_dummy; \ - } \ - tracked->mc_next = *tracking_head; \ - *tracking_head = tracked; \ - { act; } \ - *tracking_head = tracked->mc_next; \ - } while (0) - -static int -env_defer_free_and_release(MDBX_env *const env, - struct mdbx_defer_free_item *const chain) { - size_t length = 0; - struct mdbx_defer_free_item *obsolete_chain = nullptr; -#if MDBX_ENABLE_DBI_LOCKFREE - const uint64_t now = osal_monotime(); - struct mdbx_defer_free_item **scan = &env->me_defer_free; - if (env->me_defer_free) { - const uint64_t threshold_1second = osal_16dot16_to_monotime(1 * 65536); - do { - struct mdbx_defer_free_item *item = *scan; - if (now - item->timestamp < threshold_1second) { - scan = &item->next; - length += 1; - } else { - *scan = item->next; - item->next = obsolete_chain; - obsolete_chain = item; - } - } while (*scan); - } - - eASSERT(env, *scan == nullptr); - if (chain) { - struct mdbx_defer_free_item *item = chain; - do { - item->timestamp = now; - item = item->next; - } while (item); - *scan = chain; - } -#else /* MDBX_ENABLE_DBI_LOCKFREE */ - obsolete_chain = chain; -#endif /* MDBX_ENABLE_DBI_LOCKFREE */ - - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - if (length > 42) { -#if defined(_WIN32) || defined(_WIN64) - SwitchToThread(); -#else - sched_yield(); -#endif /* Windows */ - } - while (obsolete_chain) { - struct mdbx_defer_free_item *item = obsolete_chain; - obsolete_chain = obsolete_chain->next; - osal_free(item); - } - return chain ? MDBX_SUCCESS : MDBX_BAD_DBI; -} - -#if MDBX_ENABLE_DBI_SPARSE - -static __inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { - tASSERT(txn, bmi > 0); - STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->mt_dbi_sparse[0])); -#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl) - if (sizeof(txn->mt_dbi_sparse[0]) <= sizeof(int)) - return __builtin_ctz((int)bmi); - if (sizeof(txn->mt_dbi_sparse[0]) == sizeof(long)) - return __builtin_ctzl((long)bmi); -#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ - __has_builtin(__builtin_ctzll) - return __builtin_ctzll(bmi); -#endif /* have(long long) && long long == uint64_t */ -#endif /* GNU C */ - -#if defined(_MSC_VER) - unsigned long index; - if (sizeof(txn->mt_dbi_sparse[0]) > 4) { -#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) - _BitScanForward64(&index, bmi); - return index; -#else - if (bmi > UINT32_MAX) { - _BitScanForward(&index, (uint32_t)((uint64_t)bmi >> 32)); - return index; - } -#endif - } - _BitScanForward(&index, (uint32_t)bmi); - return index; -#endif /* MSVC */ - - bmi &= -bmi; - if (sizeof(txn->mt_dbi_sparse[0]) > 4) { - static const uint8_t debruijn_ctz64[64] = { - 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, - 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, - 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, - 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12}; - return debruijn_ctz64[(UINT64_C(0x022FDD63CC95386D) * (uint64_t)bmi) >> 58]; - } else { - static const uint8_t debruijn_ctz32[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; - return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27]; - } -} - -/* LY: Макрос целенаправленно сделан с одним циклом, чтобы сохранить возможность - * использования оператора break */ -#define TXN_FOREACH_DBI_FROM(TXN, I, FROM) \ - for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->mt_dbi_sparse[0]), \ - bitmap_item = TXN->mt_dbi_sparse[0] >> FROM, I = FROM; \ - I < TXN->mt_numdbs; ++I) \ - if (bitmap_item == 0) { \ - I = (I - 1) | (bitmap_chunk - 1); \ - bitmap_item = TXN->mt_dbi_sparse[(1 + I) / bitmap_chunk]; \ - if (!bitmap_item) \ - I += bitmap_chunk; \ - continue; \ - } else if ((bitmap_item & 1) == 0) { \ - size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ - bitmap_item >>= bitmap_skip; \ - I += bitmap_skip - 1; \ - continue; \ - } else if (bitmap_item >>= 1, TXN->mt_dbi_state[I]) -#else -#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP) \ - for (size_t I = SKIP; I < TXN->mt_numdbs; ++I) \ - if (TXN->mt_dbi_state[I]) -#endif /* MDBX_ENABLE_DBI_SPARSE */ - -#define TXN_FOREACH_DBI_ALL(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, 0) -#define TXN_FOREACH_DBI_USER(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, CORE_DBS) - -/* Back up parent txn's cursor, then grab the original for tracking */ -static int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, - const size_t dbi) { - - tASSERT(nested_txn, dbi > FREE_DBI && dbi < nested_txn->mt_numdbs); - const size_t size = parent_cursor->mc_xcursor - ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) - : sizeof(MDBX_cursor); - for (MDBX_cursor *bk; parent_cursor; parent_cursor = bk->mc_next) { - bk = parent_cursor; - if (parent_cursor->mc_signature != MDBX_MC_LIVE) - continue; - bk = osal_malloc(size); - if (unlikely(!bk)) - return MDBX_ENOMEM; -#if MDBX_DEBUG - memset(bk, 0xCD, size); - VALGRIND_MAKE_MEM_UNDEFINED(bk, size); -#endif /* MDBX_DEBUG */ - *bk = *parent_cursor; - parent_cursor->mc_backup = bk; - /* Kill pointers into src to reduce abuse: The - * user may not use mc until dst ends. But we need a valid - * txn pointer here for cursor fixups to keep working. */ - parent_cursor->mc_txn = nested_txn; - parent_cursor->mc_db = &nested_txn->mt_dbs[dbi]; - parent_cursor->mc_dbi_state = &nested_txn->mt_dbi_state[dbi]; - MDBX_xcursor *mx = parent_cursor->mc_xcursor; - if (mx != NULL) { - *(MDBX_xcursor *)(bk + 1) = *mx; - mx->mx_cursor.mc_txn = nested_txn; - } - parent_cursor->mc_next = nested_txn->mt_cursors[dbi]; - nested_txn->mt_cursors[dbi] = parent_cursor; - } - return MDBX_SUCCESS; -} - -/* Close this txn's cursors, give parent txn's cursors back to parent. - * - * [in] txn the transaction handle. - * [in] merge true to keep changes to parent cursors, false to revert. - * - * Returns 0 on success, non-zero on failure. */ -static void cursors_eot(MDBX_txn *txn, const bool merge) { - tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr); - TXN_FOREACH_DBI_FROM(txn, i, /* skip FREE_DBI */ 1) { - MDBX_cursor *mc = txn->mt_cursors[i]; - if (!mc) - continue; - txn->mt_cursors[i] = nullptr; - do { - const unsigned stage = mc->mc_signature; - MDBX_cursor *const next = mc->mc_next; - MDBX_cursor *const bk = mc->mc_backup; - ENSURE(txn->mt_env, - stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); - cASSERT(mc, mc->mc_dbi == (MDBX_dbi)i); - if (bk) { - MDBX_xcursor *mx = mc->mc_xcursor; - tASSERT(txn, txn->mt_parent != NULL); - /* Zap: Using uninitialized memory '*mc->mc_backup'. */ - MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001); - ENSURE(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); - tASSERT(txn, mx == bk->mc_xcursor); - if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) - mc->mc_signature = stage /* Promote closed state to parent txn */; - else if (merge) { - /* Restore pointers to parent txn */ - mc->mc_next = bk->mc_next; - mc->mc_backup = bk->mc_backup; - mc->mc_txn = bk->mc_txn; - mc->mc_db = bk->mc_db; - mc->mc_dbi_state = bk->mc_dbi_state; - if (mx) { - if (mx != bk->mc_xcursor) { - *bk->mc_xcursor = *mx; - mx = bk->mc_xcursor; - } - mx->mx_cursor.mc_txn = bk->mc_txn; - } - } else { - /* Restore from backup, i.e. rollback/abort nested txn */ - *mc = *bk; - if (mx) - *mx = *(MDBX_xcursor *)(bk + 1); - } - bk->mc_signature = 0; - osal_free(bk); - } else { - ENSURE(txn->mt_env, stage == MDBX_MC_LIVE); - mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; - mc->mc_flags = 0 /* reset C_UNTRACK */; - } - mc = next; - } while (mc); - } -} - -static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi); - -static __inline bool db_check_flags(uint16_t db_flags) { - switch (db_flags & ~(DB_VALID | MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { - default: - NOTICE("invalid db-flags 0x%x", db_flags); - return false; - case MDBX_DUPSORT: - case MDBX_DUPSORT | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DB_DEFAULTS: - return (db_flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != - (MDBX_REVERSEKEY | MDBX_INTEGERKEY); - } -} - -static __inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { - STATIC_ASSERT(DBI_DIRTY == MDBX_DBI_DIRTY && DBI_STALE == MDBX_DBI_STALE && - DBI_FRESH == MDBX_DBI_FRESH && DBI_CREAT == MDBX_DBI_CREAT); - -#if MDBX_ENABLE_DBI_SPARSE - const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->mt_dbi_sparse[0]); - const size_t bitmap_indx = dbi / bitmap_chunk; - const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; - return likely(dbi < txn->mt_numdbs && - (txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) != 0) - ? txn->mt_dbi_state[dbi] - : 0; -#else - return likely(dbi < txn->mt_numdbs) ? txn->mt_dbi_state[dbi] : 0; -#endif /* MDBX_ENABLE_DBI_SPARSE */ -} - -static __inline bool dbi_changed(const MDBX_txn *txn, const size_t dbi) { - const MDBX_env *const env = txn->mt_env; - eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); - const uint32_t snap_seq = - atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); - return snap_seq != txn->mt_dbi_seqs[dbi]; -} - -static __always_inline int dbi_check(const MDBX_txn *txn, const size_t dbi) { - const uint8_t state = dbi_state(txn, dbi); - if (likely((state & DBI_LINDO) != 0 && !dbi_changed(txn, dbi))) - return (state & DBI_VALID) ? MDBX_SUCCESS : MDBX_BAD_DBI; - - /* Медленный путь: ленивая до-инициализацяи и импорт */ - return dbi_import((MDBX_txn *)txn, dbi); -} - -static __inline uint32_t dbi_seq_next(const MDBX_env *const env, size_t dbi) { - uint32_t v = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease) + 1; - return v ? v : 1; -} - -struct dbi_snap_result { - uint32_t sequence; - unsigned flags; -}; - -static struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) { - eASSERT(env, dbi < env->me_numdbs); - struct dbi_snap_result r; - uint32_t snap = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); - do { - r.sequence = snap; - r.flags = env->me_db_flags[dbi]; - snap = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); - } while (unlikely(snap != r.sequence)); - return r; -} - -static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { - const MDBX_env *const env = txn->mt_env; - if (dbi >= env->me_numdbs || !env->me_db_flags[dbi]) - return MDBX_BAD_DBI; - -#if MDBX_ENABLE_DBI_SPARSE - const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->mt_dbi_sparse[0]); - const size_t bitmap_indx = dbi / bitmap_chunk; - const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; - if (dbi >= txn->mt_numdbs) { - for (size_t i = (txn->mt_numdbs + bitmap_chunk - 1) / bitmap_chunk; - bitmap_indx >= i; ++i) - txn->mt_dbi_sparse[i] = 0; - eASSERT(env, (txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) == 0); - MDBX_txn *scan = txn; - do { - eASSERT(env, scan->mt_dbi_sparse == txn->mt_dbi_sparse); - eASSERT(env, scan->mt_numdbs < dbi + 1); - scan->mt_numdbs = (unsigned)dbi + 1; - scan->mt_dbi_state[dbi] = 0; - scan = scan->mt_parent; - } while (scan /* && scan->mt_dbi_sparse == txn->mt_dbi_sparse */); - txn->mt_dbi_sparse[bitmap_indx] |= bitmap_mask; - goto lindo; - } - if ((txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) == 0) { - MDBX_txn *scan = txn; - do { - eASSERT(env, scan->mt_dbi_sparse == txn->mt_dbi_sparse); - eASSERT(env, scan->mt_numdbs == txn->mt_numdbs); - scan->mt_dbi_state[dbi] = 0; - scan = scan->mt_parent; - } while (scan /* && scan->mt_dbi_sparse == txn->mt_dbi_sparse */); - txn->mt_dbi_sparse[bitmap_indx] |= bitmap_mask; - goto lindo; - } -#else - if (dbi >= txn->mt_numdbs) { - size_t i = txn->mt_numdbs; - do - txn->mt_dbi_state[i] = 0; - while (dbi >= ++i); - txn->mt_numdbs = i; - goto lindo; - } -#endif /* MDBX_ENABLE_DBI_SPARSE */ - - if (!txn->mt_dbi_state[dbi]) { - lindo: - /* dbi-слот еще не инициализирован в транзакции, а хендл не использовался */ - txn->mt_cursors[dbi] = nullptr; - MDBX_txn *const parent = txn->mt_parent; - if (parent) { - /* вложенная пишущая транзакция */ - int rc = dbi_check(parent, dbi); - /* копируем состояние subDB очищая new-флаги. */ - eASSERT(env, txn->mt_dbi_seqs == parent->mt_dbi_seqs); - txn->mt_dbi_state[dbi] = - parent->mt_dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - if (likely(rc == MDBX_SUCCESS)) { - txn->mt_dbs[dbi] = parent->mt_dbs[dbi]; - if (parent->mt_cursors[dbi]) { - rc = cursor_shadow(parent->mt_cursors[dbi], txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - /* не получилось забекапить курсоры */ - txn->mt_dbi_state[dbi] = DBI_OLDEN | DBI_LINDO | DBI_STALE; - txn->mt_flags |= MDBX_TXN_ERROR; - } - } - } - return rc; - } - txn->mt_dbi_seqs[dbi] = 0; - txn->mt_dbi_state[dbi] = DBI_LINDO; - } else { - eASSERT(env, txn->mt_dbi_seqs[dbi] != env->me_dbi_seqs[dbi].weak); - if (unlikely((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_OLDEN)) || - txn->mt_cursors[dbi])) { - /* хендл уже использовался в транзакции, но был закрыт или переоткрыт, - * либо при явном пере-открытии хендла есть висячие курсоры */ - eASSERT(env, (txn->mt_dbi_state[dbi] & DBI_STALE) == 0); - txn->mt_dbi_seqs[dbi] = env->me_dbi_seqs[dbi].weak; - txn->mt_dbi_state[dbi] = DBI_OLDEN | DBI_LINDO; - return txn->mt_cursors[dbi] ? MDBX_DANGLING_DBI : MDBX_BAD_DBI; - } - } - - /* хендл не использовался в транзакции, либо явно пере-отрывается при - * отсутствии висячих курсоров */ - eASSERT(env, (txn->mt_dbi_state[dbi] & DBI_LINDO) && !txn->mt_cursors[dbi]); - - /* читаем актуальные флаги и sequence */ - struct dbi_snap_result snap = dbi_snap(env, dbi); - txn->mt_dbi_seqs[dbi] = snap.sequence; - if (snap.flags & DB_VALID) { - txn->mt_dbs[dbi].md_flags = snap.flags & DB_PERSISTENT_FLAGS; - txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_STALE; - return MDBX_SUCCESS; - } - return MDBX_BAD_DBI; -} - -/* Export or close DBI handles opened in this txn. */ -static int dbi_update(MDBX_txn *txn, int keep) { - MDBX_env *const env = txn->mt_env; - tASSERT(txn, !txn->mt_parent && txn == env->me_txn0); - bool locked = false; - struct mdbx_defer_free_item *defer_chain = nullptr; - TXN_FOREACH_DBI_USER(txn, dbi) { - if (likely((txn->mt_dbi_state[dbi] & DBI_CREAT) == 0)) - continue; - if (!locked) { - int err = osal_fastmutex_acquire(&env->me_dbi_lock); - if (unlikely(err != MDBX_SUCCESS)) - return err; - locked = true; - if (dbi >= env->me_numdbs) - /* хендл был закрыт из другого потока пока захватывали блокировку */ - continue; - } - tASSERT(txn, dbi < env->me_numdbs); - if (keep) { - env->me_db_flags[dbi] = txn->mt_dbs[dbi].md_flags | DB_VALID; - } else { - uint32_t seq = dbi_seq_next(env, dbi); - struct mdbx_defer_free_item *item = env->me_dbxs[dbi].md_name.iov_base; - if (item) { - env->me_db_flags[dbi] = 0; - env->me_dbxs[dbi].md_name.iov_len = 0; - env->me_dbxs[dbi].md_name.iov_base = nullptr; - atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); - osal_flush_incoherent_cpu_writeback(); - item->next = defer_chain; - defer_chain = item; - } else { - eASSERT(env, env->me_dbxs[dbi].md_name.iov_len == 0); - eASSERT(env, env->me_db_flags[dbi] == 0); - } - } - } - - if (locked) { - size_t i = env->me_numdbs; - while ((env->me_db_flags[i - 1] & DB_VALID) == 0) { - --i; - eASSERT(env, i >= CORE_DBS); - eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && - !env->me_dbxs[i].md_name.iov_base); - } - env->me_numdbs = (unsigned)i; - env_defer_free_and_release(env, defer_chain); - } - return MDBX_SUCCESS; -} - -int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, - const MDBX_val *b) { - eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); - tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); - tASSERT(txn, dbi < txn->mt_env->me_numdbs && - (txn->mt_env->me_db_flags[dbi] & DB_VALID) != 0); - return txn->mt_env->me_dbxs[dbi].md_cmp(a, b); -} - -int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, - const MDBX_val *b) { - eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); - tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); - tASSERT(txn, dbi < txn->mt_env->me_numdbs && - (txn->mt_env->me_db_flags[dbi] & DB_VALID)); - return txn->mt_env->me_dbxs[dbi].md_dcmp(a, b); -} - -/* Allocate memory for a page. - * Re-use old malloc'ed pages first for singletons, otherwise just malloc. - * Set MDBX_TXN_ERROR on failure. */ -static MDBX_page *page_malloc(MDBX_txn *txn, size_t num) { - MDBX_env *env = txn->mt_env; - MDBX_page *np = env->me_dp_reserve; - size_t size = env->me_psize; - if (likely(num == 1 && np)) { - eASSERT(env, env->me_dp_reserve_len > 0); - MDBX_ASAN_UNPOISON_MEMORY_REGION(np, size); - VALGRIND_MEMPOOL_ALLOC(env, ptr_disp(np, -(ptrdiff_t)sizeof(size_t)), - size + sizeof(size_t)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(np), sizeof(MDBX_page *)); - env->me_dp_reserve = mp_next(np); - env->me_dp_reserve_len -= 1; - } else { - size = pgno2bytes(env, num); - void *const ptr = osal_malloc(size + sizeof(size_t)); - if (unlikely(!ptr)) { - txn->mt_flags |= MDBX_TXN_ERROR; - return nullptr; - } - VALGRIND_MEMPOOL_ALLOC(env, ptr, size + sizeof(size_t)); - np = ptr_disp(ptr, sizeof(size_t)); - } - - if ((env->me_flags & MDBX_NOMEMINIT) == 0) { - /* For a single page alloc, we init everything after the page header. - * For multi-page, we init the final page; if the caller needed that - * many pages they will be filling in at least up to the last page. */ - size_t skip = PAGEHDRSZ; - if (num > 1) - skip += pgno2bytes(env, num - 1); - memset(ptr_disp(np, skip), 0, size - skip); - } -#if MDBX_DEBUG - np->mp_pgno = 0; -#endif - VALGRIND_MAKE_MEM_UNDEFINED(np, size); - np->mp_flags = 0; - np->mp_pages = (pgno_t)num; - return np; -} - -/* Free a shadow dirty page */ -static void dpage_free(MDBX_env *env, MDBX_page *dp, size_t npages) { - VALGRIND_MAKE_MEM_UNDEFINED(dp, pgno2bytes(env, npages)); - MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages)); - if (unlikely(env->me_flags & MDBX_PAGEPERTURB)) - memset(dp, -1, pgno2bytes(env, npages)); - if (likely(npages == 1 && - env->me_dp_reserve_len < env->me_options.dp_reserve_limit)) { - MDBX_ASAN_POISON_MEMORY_REGION(dp, env->me_psize); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(dp), sizeof(MDBX_page *)); - mp_next(dp) = env->me_dp_reserve; - VALGRIND_MEMPOOL_FREE(env, ptr_disp(dp, -(ptrdiff_t)sizeof(size_t))); - env->me_dp_reserve = dp; - env->me_dp_reserve_len += 1; - } else { - /* large pages just get freed directly */ - void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); - VALGRIND_MEMPOOL_FREE(env, ptr); - osal_free(ptr); - } -} - -/* Return all dirty pages to dpage list */ -static void dlist_free(MDBX_txn *txn) { - tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); - MDBX_env *env = txn->mt_env; - MDBX_dpl *const dl = txn->tw.dirtylist; - - for (size_t i = 1; i <= dl->length; i++) - dpage_free(env, dl->items[i].ptr, dpl_npages(dl, i)); - - dpl_clear(dl); -} - -static __always_inline MDBX_db *outer_db(MDBX_cursor *mc) { - cASSERT(mc, (mc->mc_flags & C_SUB) != 0); - MDBX_xcursor *mx = container_of(mc->mc_db, MDBX_xcursor, mx_db); - MDBX_cursor_couple *couple = container_of(mx, MDBX_cursor_couple, inner); - cASSERT(mc, mc->mc_db == &couple->outer.mc_xcursor->mx_db); - cASSERT(mc, mc->mc_dbx == &couple->outer.mc_xcursor->mx_dbx); - return couple->outer.mc_db; -} - -MDBX_MAYBE_UNUSED __cold static bool dirtylist_check(MDBX_txn *txn) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - const MDBX_dpl *const dl = txn->tw.dirtylist; - if (!dl) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - return true; - } - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - - assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - tASSERT(txn, txn->tw.dirtyroom + dl->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - - if (!AUDIT_ENABLED()) - return true; - - size_t loose = 0, pages = 0; - for (size_t i = dl->length; i > 0; --i) { - const MDBX_page *const dp = dl->items[i].ptr; - if (!dp) - continue; - - tASSERT(txn, dp->mp_pgno == dl->items[i].pgno); - if (unlikely(dp->mp_pgno != dl->items[i].pgno)) - return false; - - if ((txn->mt_flags & MDBX_WRITEMAP) == 0) { - const uint32_t age = dpl_age(txn, i); - tASSERT(txn, age < UINT32_MAX / 3); - if (unlikely(age > UINT32_MAX / 3)) - return false; - } - - tASSERT(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); - if (dp->mp_flags == P_LOOSE) { - loose += 1; - } else if (unlikely(!IS_MODIFIABLE(txn, dp))) - return false; - - const unsigned num = dpl_npages(dl, i); - pages += num; - tASSERT(txn, txn->mt_next_pgno >= dp->mp_pgno + num); - if (unlikely(txn->mt_next_pgno < dp->mp_pgno + num)) - return false; - - if (i < dl->sorted) { - tASSERT(txn, dl->items[i + 1].pgno >= dp->mp_pgno + num); - if (unlikely(dl->items[i + 1].pgno < dp->mp_pgno + num)) - return false; - } - - const size_t rpa = - pnl_search(txn->tw.relist, dp->mp_pgno, txn->mt_next_pgno); - tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.relist) || - txn->tw.relist[rpa] != dp->mp_pgno); - if (rpa <= MDBX_PNL_GETSIZE(txn->tw.relist) && - unlikely(txn->tw.relist[rpa] == dp->mp_pgno)) - return false; - if (num > 1) { - const size_t rpb = - pnl_search(txn->tw.relist, dp->mp_pgno + num - 1, txn->mt_next_pgno); - tASSERT(txn, rpa == rpb); - if (unlikely(rpa != rpb)) - return false; - } - } - - tASSERT(txn, loose == txn->tw.loose_count); - if (unlikely(loose != txn->tw.loose_count)) - return false; - - tASSERT(txn, pages == dl->pages_including_loose); - if (unlikely(pages != dl->pages_including_loose)) - return false; - - for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.retired_pages); ++i) { - const MDBX_page *const dp = debug_dpl_find(txn, txn->tw.retired_pages[i]); - tASSERT(txn, !dp); - if (unlikely(dp)) - return false; - } - - return true; -} - -#if MDBX_ENABLE_REFUND -static void refund_reclaimed(MDBX_txn *txn) { - /* Scanning in descend order */ - pgno_t next_pgno = txn->mt_next_pgno; - const MDBX_PNL pnl = txn->tw.relist; - tASSERT(txn, MDBX_PNL_GETSIZE(pnl) && MDBX_PNL_MOST(pnl) == next_pgno - 1); -#if MDBX_PNL_ASCENDING - size_t i = MDBX_PNL_GETSIZE(pnl); - tASSERT(txn, pnl[i] == next_pgno - 1); - while (--next_pgno, --i > 0 && pnl[i] == next_pgno - 1) - ; - MDBX_PNL_SETSIZE(pnl, i); -#else - size_t i = 1; - tASSERT(txn, pnl[i] == next_pgno - 1); - size_t len = MDBX_PNL_GETSIZE(pnl); - while (--next_pgno, ++i <= len && pnl[i] == next_pgno - 1) - ; - MDBX_PNL_SETSIZE(pnl, len -= i - 1); - for (size_t move = 0; move < len; ++move) - pnl[1 + move] = pnl[i + move]; -#endif - VERBOSE("refunded %" PRIaPGNO " pages: %" PRIaPGNO " -> %" PRIaPGNO, - txn->mt_next_pgno - next_pgno, txn->mt_next_pgno, next_pgno); - txn->mt_next_pgno = next_pgno; - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - 1)); -} - -static void refund_loose(MDBX_txn *txn) { - tASSERT(txn, txn->tw.loose_pages != nullptr); - tASSERT(txn, txn->tw.loose_count > 0); - - MDBX_dpl *const dl = txn->tw.dirtylist; - if (dl) { - tASSERT(txn, dl->length >= txn->tw.loose_count); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } - - pgno_t onstack[MDBX_CACHELINE_SIZE * 8 / sizeof(pgno_t)]; - MDBX_PNL suitable = onstack; - - if (!dl || dl->length - dl->sorted > txn->tw.loose_count) { - /* Dirty list is useless since unsorted. */ - if (pnl_bytes2size(sizeof(onstack)) < txn->tw.loose_count) { - suitable = pnl_alloc(txn->tw.loose_count); - if (unlikely(!suitable)) - return /* this is not a reason for transaction fail */; - } - - /* Collect loose-pages which may be refunded. */ - tASSERT(txn, txn->mt_next_pgno >= MIN_PAGENO + txn->tw.loose_count); - pgno_t most = MIN_PAGENO; - size_t w = 0; - for (const MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { - tASSERT(txn, lp->mp_flags == P_LOOSE); - tASSERT(txn, txn->mt_next_pgno > lp->mp_pgno); - if (likely(txn->mt_next_pgno - txn->tw.loose_count <= lp->mp_pgno)) { - tASSERT(txn, - w < ((suitable == onstack) ? pnl_bytes2size(sizeof(onstack)) - : MDBX_PNL_ALLOCLEN(suitable))); - suitable[++w] = lp->mp_pgno; - most = (lp->mp_pgno > most) ? lp->mp_pgno : most; - } - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - - if (most + 1 == txn->mt_next_pgno) { - /* Sort suitable list and refund pages at the tail. */ - MDBX_PNL_SETSIZE(suitable, w); - pnl_sort(suitable, MAX_PAGENO + 1); - - /* Scanning in descend order */ - const intptr_t step = MDBX_PNL_ASCENDING ? -1 : 1; - const intptr_t begin = - MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(suitable) : 1; - const intptr_t end = - MDBX_PNL_ASCENDING ? 0 : MDBX_PNL_GETSIZE(suitable) + 1; - tASSERT(txn, suitable[begin] >= suitable[end - step]); - tASSERT(txn, most == suitable[begin]); - - for (intptr_t i = begin + step; i != end; i += step) { - if (suitable[i] != most - 1) - break; - most -= 1; - } - const size_t refunded = txn->mt_next_pgno - most; - DEBUG("refund-suitable %zu pages %" PRIaPGNO " -> %" PRIaPGNO, refunded, - most, txn->mt_next_pgno); - txn->mt_next_pgno = most; - txn->tw.loose_count -= refunded; - if (dl) { - txn->tw.dirtyroom += refunded; - dl->pages_including_loose -= refunded; - assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); - - /* Filter-out dirty list */ - size_t r = 0; - w = 0; - if (dl->sorted) { - do { - if (dl->items[++r].pgno < most) { - if (++w != r) - dl->items[w] = dl->items[r]; - } - } while (r < dl->sorted); - dl->sorted = w; - } - while (r < dl->length) { - if (dl->items[++r].pgno < most) { - if (++w != r) - dl->items[w] = dl->items[r]; - } - } - dpl_setlen(dl, w); - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - } - goto unlink_loose; - } - } else { - /* Dirtylist is mostly sorted, just refund loose pages at the end. */ - dpl_sort(txn); - tASSERT(txn, - dl->length < 2 || dl->items[1].pgno < dl->items[dl->length].pgno); - tASSERT(txn, dl->sorted == dl->length); - - /* Scan dirtylist tail-forward and cutoff suitable pages. */ - size_t n; - for (n = dl->length; dl->items[n].pgno == txn->mt_next_pgno - 1 && - dl->items[n].ptr->mp_flags == P_LOOSE; - --n) { - tASSERT(txn, n > 0); - MDBX_page *dp = dl->items[n].ptr; - DEBUG("refund-sorted page %" PRIaPGNO, dp->mp_pgno); - tASSERT(txn, dp->mp_pgno == dl->items[n].pgno); - txn->mt_next_pgno -= 1; - } - dpl_setlen(dl, n); - - if (dl->sorted != dl->length) { - const size_t refunded = dl->sorted - dl->length; - dl->sorted = dl->length; - txn->tw.loose_count -= refunded; - txn->tw.dirtyroom += refunded; - dl->pages_including_loose -= refunded; - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - - /* Filter-out loose chain & dispose refunded pages. */ - unlink_loose: - for (MDBX_page *__restrict *__restrict link = &txn->tw.loose_pages; - *link;) { - MDBX_page *dp = *link; - tASSERT(txn, dp->mp_flags == P_LOOSE); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(dp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(dp), sizeof(MDBX_page *)); - if (txn->mt_next_pgno > dp->mp_pgno) { - link = &mp_next(dp); - } else { - *link = mp_next(dp); - if ((txn->mt_flags & MDBX_WRITEMAP) == 0) - dpage_free(txn->mt_env, dp, 1); - } - } - } - } - - tASSERT(txn, dirtylist_check(txn)); - if (suitable != onstack) - pnl_free(suitable); - txn->tw.loose_refund_wl = txn->mt_next_pgno; -} - -static bool txn_refund(MDBX_txn *txn) { - const pgno_t before = txn->mt_next_pgno; - - if (txn->tw.loose_pages && txn->tw.loose_refund_wl > txn->mt_next_pgno) - refund_loose(txn); - - while (true) { - if (MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || - MDBX_PNL_MOST(txn->tw.relist) != txn->mt_next_pgno - 1) - break; - - refund_reclaimed(txn); - if (!txn->tw.loose_pages || txn->tw.loose_refund_wl <= txn->mt_next_pgno) - break; - - const pgno_t memo = txn->mt_next_pgno; - refund_loose(txn); - if (memo == txn->mt_next_pgno) - break; - } - - if (before == txn->mt_next_pgno) - return false; - - if (txn->tw.spilled.list) - /* Squash deleted pagenums if we refunded any */ - spill_purge(txn); - - return true; -} -#else /* MDBX_ENABLE_REFUND */ -static __inline bool txn_refund(MDBX_txn *txn) { - (void)txn; - /* No online auto-compactification. */ - return false; -} -#endif /* MDBX_ENABLE_REFUND */ - -__cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno, - size_t npages) { - MDBX_env *const env = txn->mt_env; - DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno); - eASSERT(env, pgno >= NUM_METAS && npages); - if (!IS_FROZEN(txn, mp)) { - const size_t bytes = pgno2bytes(env, npages); - memset(mp, -1, bytes); - mp->mp_pgno = pgno; - if ((txn->mt_flags & MDBX_WRITEMAP) == 0) - osal_pwrite(env->me_lazy_fd, mp, bytes, pgno2bytes(env, pgno)); - } else { - struct iovec iov[MDBX_AUXILARY_IOV_MAX]; - iov[0].iov_len = env->me_psize; - iov[0].iov_base = ptr_disp(env->me_pbuf, env->me_psize); - size_t iov_off = pgno2bytes(env, pgno), n = 1; - while (--npages) { - iov[n] = iov[0]; - if (++n == MDBX_AUXILARY_IOV_MAX) { - osal_pwritev(env->me_lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, iov_off); - iov_off += pgno2bytes(env, MDBX_AUXILARY_IOV_MAX); - n = 0; - } - } - osal_pwritev(env->me_lazy_fd, iov, n, iov_off); - } -} - -/* Remove page from dirty list, etc */ -static __inline void page_wash(MDBX_txn *txn, size_t di, MDBX_page *const mp, - const size_t npages) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - mp->mp_txnid = INVALID_TXNID; - mp->mp_flags = P_BAD; - - if (txn->tw.dirtylist) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, - MDBX_AVOID_MSYNC || (di && txn->tw.dirtylist->items[di].ptr == mp)); - if (!MDBX_AVOID_MSYNC || di) { - dpl_remove_ex(txn, di, npages); - txn->tw.dirtyroom++; - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) { - dpage_free(txn->mt_env, mp, npages); - return; - } - } - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC && !di); - txn->tw.writemap_dirty_npages -= (txn->tw.writemap_dirty_npages > npages) - ? npages - : txn->tw.writemap_dirty_npages; - } - VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ); - VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), - pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ); - MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), - pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ); -} - -static __inline bool suitable4loose(const MDBX_txn *txn, pgno_t pgno) { - /* TODO: - * 1) при включенной "экономии последовательностей" проверить, что - * страница не примыкает к какой-либо из уже находящийся в reclaimed. - * 2) стоит подумать над тем, чтобы при большом loose-списке отбрасывать - половину в reclaimed. */ - return txn->tw.loose_count < txn->mt_env->me_options.dp_loose_limit && - (!MDBX_ENABLE_REFUND || - /* skip pages near to the end in favor of compactification */ - txn->mt_next_pgno > pgno + txn->mt_env->me_options.dp_loose_limit || - txn->mt_next_pgno <= txn->mt_env->me_options.dp_loose_limit); -} - -/* Retire, loosen or free a single page. - * - * For dirty pages, saves single pages to a list for future reuse in this same - * txn. It has been pulled from the GC and already resides on the dirty list, - * but has been deleted. Use these pages first before pulling again from the GC. - * - * If the page wasn't dirtied in this txn, just add it - * to this txn's free list. */ -static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, - MDBX_page *mp /* maybe null */, - unsigned pageflags /* maybe unknown/zero */) { - int rc; - MDBX_txn *const txn = mc->mc_txn; - tASSERT(txn, !mp || (mp->mp_pgno == pgno && mp->mp_flags == pageflags)); - - /* During deleting entire subtrees, it is reasonable and possible to avoid - * reading leaf pages, i.e. significantly reduce hard page-faults & IOPs: - * - mp is null, i.e. the page has not yet been read; - * - pagetype is known and the P_LEAF bit is set; - * - we can determine the page status via scanning the lists - * of dirty and spilled pages. - * - * On the other hand, this could be suboptimal for WRITEMAP mode, since - * requires support the list of dirty pages and avoid explicit spilling. - * So for flexibility and avoid extra internal dependencies we just - * fallback to reading if dirty list was not allocated yet. */ - size_t di = 0, si = 0, npages = 1; - enum page_status { - unknown, - frozen, - spilled, - shadowed, - modifable - } status = unknown; - - if (unlikely(!mp)) { - if (ASSERT_ENABLED() && pageflags) { - pgr_t check; - check = page_get_any(mc, pgno, txn->mt_front); - if (unlikely(check.err != MDBX_SUCCESS)) - return check.err; - tASSERT(txn, - (check.page->mp_flags & ~P_SPILLED) == (pageflags & ~P_FROZEN)); - tASSERT(txn, !(pageflags & P_FROZEN) || IS_FROZEN(txn, check.page)); - } - if (pageflags & P_FROZEN) { - status = frozen; - if (ASSERT_ENABLED()) { - for (MDBX_txn *scan = txn; scan; scan = scan->mt_parent) { - tASSERT(txn, !txn->tw.spilled.list || !search_spilled(scan, pgno)); - tASSERT(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno)); - } - } - goto status_done; - } else if (pageflags && txn->tw.dirtylist) { - if ((di = dpl_exist(txn, pgno)) != 0) { - mp = txn->tw.dirtylist->items[di].ptr; - tASSERT(txn, IS_MODIFIABLE(txn, mp)); - status = modifable; - goto status_done; - } - if ((si = search_spilled(txn, pgno)) != 0) { - status = spilled; - goto status_done; - } - for (MDBX_txn *parent = txn->mt_parent; parent; - parent = parent->mt_parent) { - if (dpl_exist(parent, pgno)) { - status = shadowed; - goto status_done; - } - if (search_spilled(parent, pgno)) { - status = spilled; - goto status_done; - } - } - status = frozen; - goto status_done; - } - - pgr_t pg = page_get_any(mc, pgno, txn->mt_front); - if (unlikely(pg.err != MDBX_SUCCESS)) - return pg.err; - mp = pg.page; - tASSERT(txn, !pageflags || mp->mp_flags == pageflags); - pageflags = mp->mp_flags; - } - - if (IS_FROZEN(txn, mp)) { - status = frozen; - tASSERT(txn, !IS_MODIFIABLE(txn, mp)); - tASSERT(txn, !IS_SPILLED(txn, mp)); - tASSERT(txn, !IS_SHADOWED(txn, mp)); - tASSERT(txn, !debug_dpl_find(txn, pgno)); - tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno)); - } else if (IS_MODIFIABLE(txn, mp)) { - status = modifable; - if (txn->tw.dirtylist) - di = dpl_exist(txn, pgno); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) || !IS_SPILLED(txn, mp)); - tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno)); - } else if (IS_SHADOWED(txn, mp)) { - status = shadowed; - tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno)); - tASSERT(txn, !debug_dpl_find(txn, pgno)); - } else { - tASSERT(txn, IS_SPILLED(txn, mp)); - status = spilled; - si = search_spilled(txn, pgno); - tASSERT(txn, !debug_dpl_find(txn, pgno)); - } - -status_done: - if (likely((pageflags & P_OVERFLOW) == 0)) { - STATIC_ASSERT(P_BRANCH == 1); - const bool is_branch = pageflags & P_BRANCH; - if (unlikely(mc->mc_flags & C_SUB)) { - MDBX_db *outer = outer_db(mc); - cASSERT(mc, !is_branch || outer->md_branch_pages > 0); - outer->md_branch_pages -= is_branch; - cASSERT(mc, is_branch || outer->md_leaf_pages > 0); - outer->md_leaf_pages -= 1 - is_branch; - } - cASSERT(mc, !is_branch || mc->mc_db->md_branch_pages > 0); - mc->mc_db->md_branch_pages -= is_branch; - cASSERT(mc, (pageflags & P_LEAF) == 0 || mc->mc_db->md_leaf_pages > 0); - mc->mc_db->md_leaf_pages -= (pageflags & P_LEAF) != 0; - } else { - npages = mp->mp_pages; - cASSERT(mc, mc->mc_db->md_overflow_pages >= npages); - mc->mc_db->md_overflow_pages -= (pgno_t)npages; - } - - if (status == frozen) { - retire: - DEBUG("retire %zu page %" PRIaPGNO, npages, pgno); - rc = pnl_append_range(false, &txn->tw.retired_pages, pgno, npages); - tASSERT(txn, dirtylist_check(txn)); - return rc; - } - - /* Возврат страниц в нераспределенный "хвост" БД. - * Содержимое страниц не уничтожается, а для вложенных транзакций граница - * нераспределенного "хвоста" БД сдвигается только при их коммите. */ - if (MDBX_ENABLE_REFUND && unlikely(pgno + npages == txn->mt_next_pgno)) { - const char *kind = nullptr; - if (status == modifable) { - /* Страница испачкана в этой транзакции, но до этого могла быть - * аллоцирована, испачкана и пролита в одной из родительских транзакций. - * Её МОЖНО вытолкнуть в нераспределенный хвост. */ - kind = "dirty"; - /* Remove from dirty list */ - page_wash(txn, di, mp, npages); - } else if (si) { - /* Страница пролита в этой транзакции, т.е. она аллоцирована - * и запачкана в этой или одной из родительских транзакций. - * Её МОЖНО вытолкнуть в нераспределенный хвост. */ - kind = "spilled"; - tASSERT(txn, status == spilled); - spill_remove(txn, si, npages); - } else { - /* Страница аллоцирована, запачкана и возможно пролита в одной - * из родительских транзакций. - * Её МОЖНО вытолкнуть в нераспределенный хвост. */ - kind = "parent's"; - if (ASSERT_ENABLED() && mp) { - kind = nullptr; - for (MDBX_txn *parent = txn->mt_parent; parent; - parent = parent->mt_parent) { - if (search_spilled(parent, pgno)) { - kind = "parent-spilled"; - tASSERT(txn, status == spilled); - break; - } - if (mp == debug_dpl_find(parent, pgno)) { - kind = "parent-dirty"; - tASSERT(txn, status == shadowed); - break; - } - } - tASSERT(txn, kind != nullptr); - } - tASSERT(txn, status == spilled || status == shadowed); - } - DEBUG("refunded %zu %s page %" PRIaPGNO, npages, kind, pgno); - txn->mt_next_pgno = pgno; - txn_refund(txn); - return MDBX_SUCCESS; - } - - if (status == modifable) { - /* Dirty page from this transaction */ - /* If suitable we can reuse it through loose list */ - if (likely(npages == 1 && suitable4loose(txn, pgno)) && - (di || !txn->tw.dirtylist)) { - DEBUG("loosen dirty page %" PRIaPGNO, pgno); - if (MDBX_DEBUG != 0 || unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) - memset(page_data(mp), -1, txn->mt_env->me_psize - PAGEHDRSZ); - mp->mp_txnid = INVALID_TXNID; - mp->mp_flags = P_LOOSE; - mp_next(mp) = txn->tw.loose_pages; - txn->tw.loose_pages = mp; - txn->tw.loose_count++; -#if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = (pgno + 2 > txn->tw.loose_refund_wl) - ? pgno + 2 - : txn->tw.loose_refund_wl; -#endif /* MDBX_ENABLE_REFUND */ - VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), - txn->mt_env->me_psize - PAGEHDRSZ); - MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), - txn->mt_env->me_psize - PAGEHDRSZ); - return MDBX_SUCCESS; - } - -#if !MDBX_DEBUG && !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - if (unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) -#endif - { - /* Страница могла быть изменена в одной из родительских транзакций, - * в том числе, позже выгружена и затем снова загружена и изменена. - * В обоих случаях её нельзя затирать на диске и помечать недоступной - * в asan и/или valgrind */ - for (MDBX_txn *parent = txn->mt_parent; - parent && (parent->mt_flags & MDBX_TXN_SPILLS); - parent = parent->mt_parent) { - if (intersect_spilled(parent, pgno, npages)) - goto skip_invalidate; - if (dpl_intersect(parent, pgno, npages)) - goto skip_invalidate; - } - -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - if (MDBX_DEBUG != 0 || unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) -#endif - kill_page(txn, mp, pgno, npages); - if ((txn->mt_flags & MDBX_WRITEMAP) == 0) { - VALGRIND_MAKE_MEM_NOACCESS(page_data(pgno2page(txn->mt_env, pgno)), - pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ); - MDBX_ASAN_POISON_MEMORY_REGION(page_data(pgno2page(txn->mt_env, pgno)), - pgno2bytes(txn->mt_env, npages) - - PAGEHDRSZ); - } - } - skip_invalidate: - - /* wash dirty page */ - page_wash(txn, di, mp, npages); - - reclaim: - DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno); - rc = pnl_insert_range(&txn->tw.relist, pgno, npages); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - tASSERT(txn, dirtylist_check(txn)); - return rc; - } - - if (si) { - /* Page ws spilled in this txn */ - spill_remove(txn, si, npages); - /* Страница могла быть выделена и затем пролита в этой транзакции, - * тогда её необходимо поместить в reclaimed-список. - * Либо она могла быть выделена в одной из родительских транзакций и затем - * пролита в этой транзакции, тогда её необходимо поместить в - * retired-список для последующей фильтрации при коммите. */ - for (MDBX_txn *parent = txn->mt_parent; parent; - parent = parent->mt_parent) { - if (dpl_exist(parent, pgno)) - goto retire; - } - /* Страница точно была выделена в этой транзакции - * и теперь может быть использована повторно. */ - goto reclaim; - } - - if (status == shadowed) { - /* Dirty page MUST BE a clone from (one of) parent transaction(s). */ - if (ASSERT_ENABLED()) { - const MDBX_page *parent_dp = nullptr; - /* Check parent(s)'s dirty lists. */ - for (MDBX_txn *parent = txn->mt_parent; parent && !parent_dp; - parent = parent->mt_parent) { - tASSERT(txn, !search_spilled(parent, pgno)); - parent_dp = debug_dpl_find(parent, pgno); - } - tASSERT(txn, parent_dp && (!mp || parent_dp == mp)); - } - /* Страница была выделена в родительской транзакции и теперь может быть - * использована повторно, но только внутри этой транзакции, либо дочерних. - */ - goto reclaim; - } - - /* Страница может входить в доступный читателям MVCC-снимок, либо же она - * могла быть выделена, а затем пролита в одной из родительских - * транзакций. Поэтому пока помещаем её в retired-список, который будет - * фильтроваться относительно dirty- и spilled-списков родительских - * транзакций при коммите дочерних транзакций, либо же будет записан - * в GC в неизменном виде. */ - goto retire; -} - -static __inline int page_retire(MDBX_cursor *mc, MDBX_page *mp) { - return page_retire_ex(mc, mp->mp_pgno, mp, mp->mp_flags); -} - -typedef struct iov_ctx { - MDBX_env *env; - osal_ioring_t *ior; - mdbx_filehandle_t fd; - int err; -#ifndef MDBX_NEED_WRITTEN_RANGE -#define MDBX_NEED_WRITTEN_RANGE 1 -#endif /* MDBX_NEED_WRITTEN_RANGE */ -#if MDBX_NEED_WRITTEN_RANGE - pgno_t flush_begin; - pgno_t flush_end; -#endif /* MDBX_NEED_WRITTEN_RANGE */ - uint64_t coherency_timestamp; -} iov_ctx_t; - -__must_check_result static int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, - size_t items, size_t npages, - mdbx_filehandle_t fd, - bool check_coherence) { - ctx->env = txn->mt_env; - ctx->ior = &txn->mt_env->me_ioring; - ctx->fd = fd; - ctx->coherency_timestamp = - (check_coherence || txn->mt_env->me_lck->mti_pgop_stat.incoherence.weak) - ? 0 - : UINT64_MAX /* не выполнять сверку */; - ctx->err = osal_ioring_prepare(ctx->ior, items, - pgno_align2os_bytes(txn->mt_env, npages)); - if (likely(ctx->err == MDBX_SUCCESS)) { -#if MDBX_NEED_WRITTEN_RANGE - ctx->flush_begin = MAX_PAGENO; - ctx->flush_end = MIN_PAGENO; -#endif /* MDBX_NEED_WRITTEN_RANGE */ - osal_ioring_reset(ctx->ior); - } - return ctx->err; -} - -static inline bool iov_empty(const iov_ctx_t *ctx) { - return osal_ioring_used(ctx->ior) == 0; -} - -static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data, - size_t bytes) { - MDBX_env *const env = ctx->env; - eASSERT(env, (env->me_flags & MDBX_WRITEMAP) == 0); - - MDBX_page *wp = (MDBX_page *)data; - eASSERT(env, wp->mp_pgno == bytes2pgno(env, offset)); - eASSERT(env, bytes2pgno(env, bytes) >= (IS_OVERFLOW(wp) ? wp->mp_pages : 1u)); - eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0); - - if (likely(ctx->err == MDBX_SUCCESS)) { - const MDBX_page *const rp = ptr_disp(env->me_map, offset); - VALGRIND_MAKE_MEM_DEFINED(rp, bytes); - MDBX_ASAN_UNPOISON_MEMORY_REGION(rp, bytes); - osal_flush_incoherent_mmap(rp, bytes, env->me_os_psize); - /* check with timeout as the workaround - * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 - * - * Проблема проявляется только при неупорядоченности: если записанная - * последней мета-страница "обгоняет" ранее записанные, т.е. когда - * записанное в файл позже становится видимым в отображении раньше, - * чем записанное ранее. - * - * Исходно здесь всегда выполнялась полная сверка. Это давало полную - * гарантию защиты от проявления проблемы, но порождало накладные расходы. - * В некоторых сценариях наблюдалось снижение производительности до 10-15%, - * а в синтетических тестах до 30%. Конечно никто не вникал в причины, - * а просто останавливался на мнении "libmdbx не быстрее LMDB", - * например: https://clck.ru/3386er - * - * Поэтому после серии экспериментов и тестов реализовано следующее: - * 0. Посредством опции сборки MDBX_FORCE_CHECK_MMAP_COHERENCY=1 - * можно включить полную сверку после записи. - * Остальные пункты являются взвешенным компромиссом между полной - * гарантией обнаружения проблемы и бесполезными затратами на системах - * без этого недостатка. - * 1. При старте транзакций проверяется соответствие выбранной мета-страницы - * корневым страницам b-tree проверяется. Эта проверка показала себя - * достаточной без сверки после записи. При обнаружении "некогерентности" - * эти случаи подсчитываются, а при их ненулевом счетчике выполняется - * полная сверка. Таким образом, произойдет переключение в режим полной - * сверки, если показавшая себя достаточной проверка заметит проявление - * проблемы хоты-бы раз. - * 2. Сверка не выполняется при фиксации транзакции, так как: - * - при наличии проблемы "не-когерентности" (при отложенном копировании - * или обновлении PTE, после возврата из write-syscall), проверка - * в этом процессе не гарантирует актуальность данных в другом - * процессе, который может запустить транзакцию сразу после коммита; - * - сверка только последнего блока позволяет почти восстановить - * производительность в больших транзакциях, но одновременно размывает - * уверенность в отсутствии сбоев, чем обесценивает всю затею; - * - после записи данных будет записана мета-страница, соответствие - * которой корневым страницам b-tree проверяется при старте - * транзакций, и только эта проверка показала себя достаточной; - * 3. При спиллинге производится полная сверка записанных страниц. Тут был - * соблазн сверять не полностью, а например начало и конец каждого блока. - * Но при спиллинге возможна ситуация повторного вытеснения страниц, в - * том числе large/overflow. При этом возникает риск прочитать в текущей - * транзакции старую версию страницы, до повторной записи. В этом случае - * могут возникать крайне редкие невоспроизводимые ошибки. С учетом того - * что спиллинг выполняет крайне редко, решено отказаться от экономии - * в пользу надежности. */ -#ifndef MDBX_FORCE_CHECK_MMAP_COHERENCY -#define MDBX_FORCE_CHECK_MMAP_COHERENCY 0 -#endif /* MDBX_FORCE_CHECK_MMAP_COHERENCY */ - if ((MDBX_FORCE_CHECK_MMAP_COHERENCY || - ctx->coherency_timestamp != UINT64_MAX) && - unlikely(memcmp(wp, rp, bytes))) { - ctx->coherency_timestamp = 0; - env->me_lck->mti_pgop_stat.incoherence.weak = - (env->me_lck->mti_pgop_stat.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->me_lck->mti_pgop_stat.incoherence.weak + 1; - WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->mp_pgno, - "(workaround for incoherent flaw of unified page/buffer cache)"); - do - if (coherency_timeout(&ctx->coherency_timestamp, wp->mp_pgno, env) != - MDBX_RESULT_TRUE) { - ctx->err = MDBX_PROBLEM; - break; - } - while (unlikely(memcmp(wp, rp, bytes))); - } - } - - if (likely(bytes == env->me_psize)) - dpage_free(env, wp, 1); - else { - do { - eASSERT(env, wp->mp_pgno == bytes2pgno(env, offset)); - eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0); - size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u; - size_t chunk = pgno2bytes(env, npages); - eASSERT(env, bytes >= chunk); - MDBX_page *next = ptr_disp(wp, chunk); - dpage_free(env, wp, npages); - wp = next; - offset += chunk; - bytes -= chunk; - } while (bytes); - } -} - -static void iov_complete(iov_ctx_t *ctx) { - if ((ctx->env->me_flags & MDBX_WRITEMAP) == 0) - osal_ioring_walk(ctx->ior, ctx, iov_callback4dirtypages); - osal_ioring_reset(ctx->ior); -} - -__must_check_result static int iov_write(iov_ctx_t *ctx) { - eASSERT(ctx->env, !iov_empty(ctx)); - osal_ioring_write_result_t r = osal_ioring_write(ctx->ior, ctx->fd); -#if MDBX_ENABLE_PGOP_STAT - ctx->env->me_lck->mti_pgop_stat.wops.weak += r.wops; -#endif /* MDBX_ENABLE_PGOP_STAT */ - ctx->err = r.err; - if (unlikely(ctx->err != MDBX_SUCCESS)) - ERROR("Write error: %s", mdbx_strerror(ctx->err)); - iov_complete(ctx); - return ctx->err; -} - -__must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, - MDBX_page *dp, size_t npages) { - MDBX_env *const env = txn->mt_env; - tASSERT(txn, ctx->err == MDBX_SUCCESS); - tASSERT(txn, dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno); - tASSERT(txn, IS_MODIFIABLE(txn, dp)); - tASSERT(txn, !(dp->mp_flags & ~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW))); - - if (IS_SHADOWED(txn, dp)) { - tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP)); - dp->mp_txnid = txn->mt_txnid; - tASSERT(txn, IS_SPILLED(txn, dp)); -#if MDBX_AVOID_MSYNC - doit:; -#endif /* MDBX_AVOID_MSYNC */ - int err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->mp_pgno), dp, - pgno2bytes(env, npages)); - if (unlikely(err != MDBX_SUCCESS)) { - ctx->err = err; - if (unlikely(err != MDBX_RESULT_TRUE)) { - iov_complete(ctx); - return err; - } - err = iov_write(ctx); - tASSERT(txn, iov_empty(ctx)); - if (likely(err == MDBX_SUCCESS)) { - err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->mp_pgno), dp, - pgno2bytes(env, npages)); - if (unlikely(err != MDBX_SUCCESS)) { - iov_complete(ctx); - return ctx->err = err; - } - } - tASSERT(txn, ctx->err == MDBX_SUCCESS); - } - } else { - tASSERT(txn, txn->mt_flags & MDBX_WRITEMAP); -#if MDBX_AVOID_MSYNC - goto doit; -#endif /* MDBX_AVOID_MSYNC */ - } - -#if MDBX_NEED_WRITTEN_RANGE - ctx->flush_begin = - (ctx->flush_begin < dp->mp_pgno) ? ctx->flush_begin : dp->mp_pgno; - ctx->flush_end = (ctx->flush_end > dp->mp_pgno + (pgno_t)npages) - ? ctx->flush_end - : dp->mp_pgno + (pgno_t)npages; -#endif /* MDBX_NEED_WRITTEN_RANGE */ - return MDBX_SUCCESS; -} - -static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp, - const size_t npages) { - tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP)); -#if MDBX_ENABLE_PGOP_STAT - txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages; -#endif /* MDBX_ENABLE_PGOP_STAT */ - const pgno_t pgno = dp->mp_pgno; - int err = iov_page(txn, ctx, dp, npages); - if (likely(err == MDBX_SUCCESS)) - err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages); - return err; -} - -/* Set unspillable LRU-label for dirty pages watched by txn. - * Returns the number of pages marked as unspillable. */ -static size_t cursor_keep(const MDBX_txn *const txn, const MDBX_cursor *mc) { - tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); - size_t keep = 0; - while ((mc->mc_flags & C_INITIALIZED) && mc->mc_snum) { - tASSERT(txn, mc->mc_top == mc->mc_snum - 1); - const MDBX_page *mp; - size_t i = 0; - do { - mp = mc->mc_pg[i]; - tASSERT(txn, !IS_SUBP(mp)); - if (IS_MODIFIABLE(txn, mp)) { - size_t const n = dpl_search(txn, mp->mp_pgno); - if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno && - /* не считаем дважды */ dpl_age(txn, n)) { - size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr, - -(ptrdiff_t)sizeof(size_t)); - *ptr = txn->tw.dirtylru; - tASSERT(txn, dpl_age(txn, n) == 0); - ++keep; - } - } - } while (++i < mc->mc_snum); - - tASSERT(txn, IS_LEAF(mp)); - if (!mc->mc_xcursor || mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) - break; - if (!(node_flags(page_node(mp, mc->mc_ki[mc->mc_top])) & F_SUBDATA)) - break; - mc = &mc->mc_xcursor->mx_cursor; - } - return keep; -} - -static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { - tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); - txn_lru_turn(txn); - size_t keep = m0 ? cursor_keep(txn, m0) : 0; - - TXN_FOREACH_DBI_ALL(txn, dbi) { - if (F_ISSET(txn->mt_dbi_state[dbi], DBI_DIRTY | DBI_VALID) && - txn->mt_dbs[dbi].md_root != P_INVALID) - for (MDBX_cursor *mc = txn->mt_cursors[dbi]; mc; mc = mc->mc_next) - if (mc != m0) - keep += cursor_keep(txn, mc); - } - - return keep; -} - -/* Returns the spilling priority (0..255) for a dirty page: - * 0 = should be spilled; - * ... - * > 255 = must not be spilled. */ -MDBX_NOTHROW_PURE_FUNCTION static unsigned -spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { - MDBX_dpl *const dl = txn->tw.dirtylist; - const uint32_t age = dpl_age(txn, i); - const size_t npages = dpl_npages(dl, i); - const pgno_t pgno = dl->items[i].pgno; - if (age == 0) { - DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno); - return 256; - } - - MDBX_page *const dp = dl->items[i].ptr; - if (dp->mp_flags & (P_LOOSE | P_SPILLED)) { - DEBUG("skip %s %zu page %" PRIaPGNO, - (dp->mp_flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno); - return 256; - } - - /* Can't spill twice, - * make sure it's not already in a parent's spill list(s). */ - MDBX_txn *parent = txn->mt_parent; - if (parent && (parent->mt_flags & MDBX_TXN_SPILLS)) { - do - if (intersect_spilled(parent, pgno, npages)) { - DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno); - dp->mp_flags |= P_SPILLED; - return 256; - } - while ((parent = parent->mt_parent) != nullptr); - } - - tASSERT(txn, age * (uint64_t)reciprocal < UINT32_MAX); - unsigned prio = age * reciprocal >> 24; - tASSERT(txn, prio < 256); - if (likely(npages == 1)) - return prio = 256 - prio; - - /* make a large/overflow pages be likely to spill */ - size_t factor = npages | npages >> 1; - factor |= factor >> 2; - factor |= factor >> 4; - factor |= factor >> 8; - factor |= factor >> 16; - factor = (size_t)prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157; - factor = (factor < 256) ? 255 - factor : 0; - tASSERT(txn, factor < 256 && factor < (256 - prio)); - return prio = (unsigned)factor; -} - -__cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, - const intptr_t wanna_spill_entries, - const intptr_t wanna_spill_npages, - const size_t need); - -static __inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0, - const size_t need) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, !m0 || cursor_is_tracked(m0)); - - const intptr_t wanna_spill_entries = - txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0; - const intptr_t wanna_spill_npages = - need + - (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : txn->tw.writemap_dirty_npages) - - txn->tw.loose_count - txn->mt_env->me_options.dp_limit; - - /* production mode */ - if (likely(wanna_spill_npages < 1 && wanna_spill_entries < 1) -#if xMDBX_DEBUG_SPILLING == 1 - /* debug mode: always try to spill if xMDBX_DEBUG_SPILLING == 1 */ - && txn->mt_txnid % 23 > 11 -#endif - ) - return MDBX_SUCCESS; - - return txn_spill_slowpath(txn, m0, wanna_spill_entries, wanna_spill_npages, - need); -} - -static size_t spill_gate(const MDBX_env *env, intptr_t part, - const size_t total) { - const intptr_t spill_min = - env->me_options.spill_min_denominator - ? (total + env->me_options.spill_min_denominator - 1) / - env->me_options.spill_min_denominator - : 1; - const intptr_t spill_max = - total - (env->me_options.spill_max_denominator - ? total / env->me_options.spill_max_denominator - : 0); - part = (part < spill_max) ? part : spill_max; - part = (part > spill_min) ? part : spill_min; - eASSERT(env, part >= 0 && (size_t)part <= total); - return (size_t)part; -} - -__cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, - const intptr_t wanna_spill_entries, - const intptr_t wanna_spill_npages, - const size_t need) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - - int rc = MDBX_SUCCESS; - if (unlikely(txn->tw.loose_count >= - (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : txn->tw.writemap_dirty_npages))) - goto done; - - const size_t dirty_entries = - txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1; - const size_t dirty_npages = - (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : txn->tw.writemap_dirty_npages) - - txn->tw.loose_count; - const size_t need_spill_entries = - spill_gate(txn->mt_env, wanna_spill_entries, dirty_entries); - const size_t need_spill_npages = - spill_gate(txn->mt_env, wanna_spill_npages, dirty_npages); - - const size_t need_spill = (need_spill_entries > need_spill_npages) - ? need_spill_entries - : need_spill_npages; - if (!need_spill) - goto done; - - if (txn->mt_flags & MDBX_WRITEMAP) { - NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync", - dirty_entries, dirty_npages); - const MDBX_env *env = txn->mt_env; - tASSERT(txn, txn->tw.spilled.list == nullptr); - rc = - osal_msync(&txn->mt_env->me_dxb_mmap, 0, - pgno_align2os_bytes(env, txn->mt_next_pgno), MDBX_SYNC_KICK); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; -#if MDBX_AVOID_MSYNC - MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr); - tASSERT(txn, dirtylist_check(txn)); - env->me_lck->mti_unsynced_pages.weak += - txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count; - dpl_clear(txn->tw.dirtylist); - txn->tw.dirtyroom = env->me_options.dp_limit - txn->tw.loose_count; - for (MDBX_page *lp = txn->tw.loose_pages; lp != nullptr; lp = mp_next(lp)) { - tASSERT(txn, lp->mp_flags == P_LOOSE); - rc = dpl_append(txn, lp->mp_pgno, lp, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - tASSERT(txn, dirtylist_check(txn)); -#else - tASSERT(txn, txn->tw.dirtylist == nullptr); - env->me_lck->mti_unsynced_pages.weak += txn->tw.writemap_dirty_npages; - txn->tw.writemap_spilled_npages += txn->tw.writemap_dirty_npages; - txn->tw.writemap_dirty_npages = 0; -#endif /* MDBX_AVOID_MSYNC */ - goto done; - } - - NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write", - need_spill_entries, need_spill_npages); - MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr); - tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1); - tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >= - need_spill_npages); - if (!txn->tw.spilled.list) { - txn->tw.spilled.least_removed = INT_MAX; - txn->tw.spilled.list = pnl_alloc(need_spill); - if (unlikely(!txn->tw.spilled.list)) { - rc = MDBX_ENOMEM; - bailout: - txn->mt_flags |= MDBX_TXN_ERROR; - return rc; - } - } else { - /* purge deleted slots */ - spill_purge(txn); - rc = pnl_reserve(&txn->tw.spilled.list, need_spill); - (void)rc /* ignore since the resulting list may be shorter - and pnl_append() will increase pnl on demand */ - ; - } - - /* Сортируем чтобы запись на диск была полее последовательна */ - MDBX_dpl *const dl = dpl_sort(txn); - - /* Preserve pages which may soon be dirtied again */ - const size_t unspillable = txn_keep(txn, m0); - if (unspillable + txn->tw.loose_count >= dl->length) { -#if xMDBX_DEBUG_SPILLING == 1 /* avoid false failure in debug mode */ - if (likely(txn->tw.dirtyroom + txn->tw.loose_count >= need)) - return MDBX_SUCCESS; -#endif /* xMDBX_DEBUG_SPILLING */ - ERROR("all %zu dirty pages are unspillable since referenced " - "by a cursor(s), use fewer cursors or increase " - "MDBX_opt_txn_dp_limit", - unspillable); - goto done; - } - - /* Подзадача: Вытолкнуть часть страниц на диск в соответствии с LRU, - * но при этом учесть важные поправки: - * - лучше выталкивать старые large/overflow страницы, так будет освобождено - * больше памяти, а также так как они (в текущем понимании) гораздо реже - * повторно изменяются; - * - при прочих равных лучше выталкивать смежные страницы, так будет - * меньше I/O операций; - * - желательно потратить на это меньше времени чем std::partial_sort_copy; - * - * Решение: - * - Квантуем весь диапазон lru-меток до 256 значений и задействуем один - * проход 8-битного radix-sort. В результате получаем 256 уровней - * "свежести", в том числе значение lru-метки, старее которой страницы - * должны быть выгружены; - * - Двигаемся последовательно в сторону увеличения номеров страниц - * и выталкиваем страницы с lru-меткой старее отсекающего значения, - * пока не вытолкнем достаточно; - * - Встречая страницы смежные с выталкиваемыми для уменьшения кол-ва - * I/O операций выталкиваем и их, если они попадают в первую половину - * между выталкиваемыми и самыми свежими lru-метками; - * - дополнительно при сортировке умышленно старим large/overflow страницы, - * тем самым повышая их шансы на выталкивание. */ - - /* get min/max of LRU-labels */ - uint32_t age_max = 0; - for (size_t i = 1; i <= dl->length; ++i) { - const uint32_t age = dpl_age(txn, i); - age_max = (age_max >= age) ? age_max : age; - } - - VERBOSE("lru-head %u, age-max %u", txn->tw.dirtylru, age_max); - - /* half of 8-bit radix-sort */ - pgno_t radix_entries[256], radix_npages[256]; - memset(&radix_entries, 0, sizeof(radix_entries)); - memset(&radix_npages, 0, sizeof(radix_npages)); - size_t spillable_entries = 0, spillable_npages = 0; - const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1); - for (size_t i = 1; i <= dl->length; ++i) { - const unsigned prio = spill_prio(txn, i, reciprocal); - size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); - TRACE("page %" PRIaPGNO - ", lru %zu, is_multi %c, npages %u, age %u of %u, prio %u", - dl->items[i].pgno, *ptr, (dl->items[i].npages > 1) ? 'Y' : 'N', - dpl_npages(dl, i), dpl_age(txn, i), age_max, prio); - if (prio < 256) { - radix_entries[prio] += 1; - spillable_entries += 1; - const pgno_t npages = dpl_npages(dl, i); - radix_npages[prio] += npages; - spillable_npages += npages; - } - } - - tASSERT(txn, spillable_npages >= spillable_entries); - pgno_t spilled_entries = 0, spilled_npages = 0; - if (likely(spillable_entries > 0)) { - size_t prio2spill = 0, prio2adjacent = 128, - amount_entries = radix_entries[0], amount_npages = radix_npages[0]; - for (size_t i = 1; i < 256; i++) { - if (amount_entries < need_spill_entries || - amount_npages < need_spill_npages) { - prio2spill = i; - prio2adjacent = i + (257 - i) / 2; - amount_entries += radix_entries[i]; - amount_npages += radix_npages[i]; - } else if (amount_entries + amount_entries < - spillable_entries + need_spill_entries - /* РАВНОЗНАЧНО: amount - need_spill < spillable - amount */ - || amount_npages + amount_npages < - spillable_npages + need_spill_npages) { - prio2adjacent = i; - amount_entries += radix_entries[i]; - amount_npages += radix_npages[i]; - } else - break; - } - - VERBOSE("prio2spill %zu, prio2adjacent %zu, spillable %zu/%zu," - " wanna-spill %zu/%zu, amount %zu/%zu", - prio2spill, prio2adjacent, spillable_entries, spillable_npages, - need_spill_entries, need_spill_npages, amount_entries, - amount_npages); - tASSERT(txn, prio2spill < prio2adjacent && prio2adjacent <= 256); - - iov_ctx_t ctx; - rc = - iov_init(txn, &ctx, amount_entries, amount_npages, -#if defined(_WIN32) || defined(_WIN64) - txn->mt_env->me_overlapped_fd ? txn->mt_env->me_overlapped_fd : -#endif - txn->mt_env->me_lazy_fd, - true); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - size_t r = 0, w = 0; - pgno_t last = 0; - while (r < dl->length && (spilled_entries < need_spill_entries || - spilled_npages < need_spill_npages)) { - dl->items[++w] = dl->items[++r]; - unsigned prio = spill_prio(txn, w, reciprocal); - if (prio > prio2spill && - (prio >= prio2adjacent || last != dl->items[w].pgno)) - continue; - - const size_t e = w; - last = dpl_endpgno(dl, w); - while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno && - spill_prio(txn, w, reciprocal) < prio2adjacent) - ; - - for (size_t i = w; ++i <= e;) { - const unsigned npages = dpl_npages(dl, i); - prio = spill_prio(txn, i, reciprocal); - DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)", - (prio > prio2spill) ? "co-" : "", i, npages, dl->items[i].pgno, - dpl_age(txn, i), prio); - tASSERT(txn, prio < 256); - ++spilled_entries; - spilled_npages += npages; - rc = spill_page(txn, &ctx, dl->items[i].ptr, npages); - if (unlikely(rc != MDBX_SUCCESS)) - goto failed; - } - } - - VERBOSE("spilled entries %u, spilled npages %u", spilled_entries, - spilled_npages); - tASSERT(txn, spillable_entries == 0 || spilled_entries > 0); - tASSERT(txn, spilled_npages >= spilled_entries); - - failed: - while (r < dl->length) - dl->items[++w] = dl->items[++r]; - tASSERT(txn, r - w == spilled_entries || rc != MDBX_SUCCESS); - - dl->sorted = dpl_setlen(dl, w); - txn->tw.dirtyroom += spilled_entries; - txn->tw.dirtylist->pages_including_loose -= spilled_npages; - tASSERT(txn, dirtylist_check(txn)); - - if (!iov_empty(&ctx)) { - tASSERT(txn, rc == MDBX_SUCCESS); - rc = iov_write(&ctx); - } - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - txn->mt_env->me_lck->mti_unsynced_pages.weak += spilled_npages; - pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1); - txn->mt_flags |= MDBX_TXN_SPILLS; - NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room", - spilled_entries, spilled_npages, txn->tw.dirtyroom); - } else { - tASSERT(txn, rc == MDBX_SUCCESS); - for (size_t i = 1; i <= dl->length; ++i) { - MDBX_page *dp = dl->items[i].ptr; - VERBOSE( - "unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u", - i, dp->mp_pgno, dpl_npages(dl, i), dp->mp_flags, dpl_age(txn, i), - spill_prio(txn, i, reciprocal)); - } - } - -#if xMDBX_DEBUG_SPILLING == 2 - if (txn->tw.loose_count + txn->tw.dirtyroom <= need / 2 + 1) - ERROR("dirty-list length: before %zu, after %zu, parent %zi, loose %zu; " - "needed %zu, spillable %zu; " - "spilled %u dirty-entries, now have %zu dirty-room", - dl->length + spilled_entries, dl->length, - (txn->mt_parent && txn->mt_parent->tw.dirtylist) - ? (intptr_t)txn->mt_parent->tw.dirtylist->length - : -1, - txn->tw.loose_count, need, spillable_entries, spilled_entries, - txn->tw.dirtyroom); - ENSURE(txn->mt_env, txn->tw.loose_count + txn->tw.dirtyroom > need / 2); -#endif /* xMDBX_DEBUG_SPILLING */ - -done: - return likely(txn->tw.dirtyroom + txn->tw.loose_count > - ((need > CURSOR_STACK) ? CURSOR_STACK : need)) - ? MDBX_SUCCESS - : MDBX_TXN_FULL; -} - -/*----------------------------------------------------------------------------*/ - -static bool meta_bootid_match(const MDBX_meta *meta) { - return memcmp(&meta->mm_bootid, &bootid, 16) == 0 && - (bootid.x | bootid.y) != 0; -} - -static bool meta_weak_acceptable(const MDBX_env *env, const MDBX_meta *meta, - const int lck_exclusive) { - return lck_exclusive - ? /* exclusive lock */ meta_bootid_match(meta) - : /* db already opened */ env->me_lck_mmap.lck && - (env->me_lck_mmap.lck->mti_envmode.weak & MDBX_RDONLY) == 0; -} - -#define METAPAGE(env, n) page_meta(pgno2page(env, n)) -#define METAPAGE_END(env) METAPAGE(env, NUM_METAS) - -MDBX_NOTHROW_PURE_FUNCTION static txnid_t -constmeta_txnid(const MDBX_meta *meta) { - const txnid_t a = unaligned_peek_u64(4, &meta->mm_txnid_a); - const txnid_t b = unaligned_peek_u64(4, &meta->mm_txnid_b); - return likely(a == b) ? a : 0; -} - -typedef struct { - uint64_t txnid; - size_t is_steady; -} meta_snap_t; - -static __always_inline txnid_t -atomic_load_txnid(const volatile MDBX_atomic_uint32_t *ptr) { -#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ - MDBX_UNALIGNED_OK >= 8 - return atomic_load64((const volatile MDBX_atomic_uint64_t *)ptr, - mo_AcquireRelease); -#else - const uint32_t l = atomic_load32( - &ptr[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); - const uint32_t h = atomic_load32( - &ptr[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); - return (uint64_t)h << 32 | l; -#endif -} - -static __inline meta_snap_t meta_snap(const volatile MDBX_meta *meta) { - txnid_t txnid = atomic_load_txnid(meta->mm_txnid_a); - jitter4testing(true); - size_t is_steady = META_IS_STEADY(meta) && txnid >= MIN_TXNID; - jitter4testing(true); - if (unlikely(txnid != atomic_load_txnid(meta->mm_txnid_b))) - txnid = is_steady = 0; - meta_snap_t r = {txnid, is_steady}; - return r; -} - -static __inline txnid_t meta_txnid(const volatile MDBX_meta *meta) { - return meta_snap(meta).txnid; -} - -static __inline void meta_update_begin(const MDBX_env *env, MDBX_meta *meta, - txnid_t txnid) { - eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env)); - eASSERT(env, unaligned_peek_u64(4, meta->mm_txnid_a) < txnid && - unaligned_peek_u64(4, meta->mm_txnid_b) < txnid); - (void)env; -#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ - MDBX_UNALIGNED_OK >= 8 - atomic_store64((MDBX_atomic_uint64_t *)&meta->mm_txnid_b, 0, - mo_AcquireRelease); - atomic_store64((MDBX_atomic_uint64_t *)&meta->mm_txnid_a, txnid, - mo_AcquireRelease); -#else - atomic_store32(&meta->mm_txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], - 0, mo_AcquireRelease); - atomic_store32(&meta->mm_txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], - 0, mo_AcquireRelease); - atomic_store32(&meta->mm_txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], - (uint32_t)txnid, mo_AcquireRelease); - atomic_store32(&meta->mm_txnid_a[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], - (uint32_t)(txnid >> 32), mo_AcquireRelease); -#endif -} - -static __inline void meta_update_end(const MDBX_env *env, MDBX_meta *meta, - txnid_t txnid) { - eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env)); - eASSERT(env, unaligned_peek_u64(4, meta->mm_txnid_a) == txnid); - eASSERT(env, unaligned_peek_u64(4, meta->mm_txnid_b) < txnid); - (void)env; - jitter4testing(true); - memcpy(&meta->mm_bootid, &bootid, 16); -#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ - MDBX_UNALIGNED_OK >= 8 - atomic_store64((MDBX_atomic_uint64_t *)&meta->mm_txnid_b, txnid, - mo_AcquireRelease); -#else - atomic_store32(&meta->mm_txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], - (uint32_t)txnid, mo_AcquireRelease); - atomic_store32(&meta->mm_txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], - (uint32_t)(txnid >> 32), mo_AcquireRelease); -#endif -} - -static __inline void meta_set_txnid(const MDBX_env *env, MDBX_meta *meta, - const txnid_t txnid) { - eASSERT(env, - !env->me_map || meta < METAPAGE(env, 0) || meta >= METAPAGE_END(env)); - (void)env; - /* update inconsistently since this function used ONLY for filling meta-image - * for writing, but not the actual meta-page */ - memcpy(&meta->mm_bootid, &bootid, 16); - unaligned_poke_u64(4, meta->mm_txnid_a, txnid); - unaligned_poke_u64(4, meta->mm_txnid_b, txnid); -} - -static __inline uint64_t meta_sign(const MDBX_meta *meta) { - uint64_t sign = MDBX_DATASIGN_NONE; -#if 0 /* TODO */ - sign = hippeus_hash64(...); -#else - (void)meta; -#endif - /* LY: newer returns MDBX_DATASIGN_NONE or MDBX_DATASIGN_WEAK */ - return (sign > MDBX_DATASIGN_WEAK) ? sign : ~sign; -} - -typedef struct { - txnid_t txnid; - union { - const volatile MDBX_meta *ptr_v; - const MDBX_meta *ptr_c; - }; - size_t is_steady; -} meta_ptr_t; - -static meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n) { - eASSERT(env, n < NUM_METAS); - meta_ptr_t r; - meta_snap_t snap = meta_snap(r.ptr_v = METAPAGE(env, n)); - r.txnid = snap.txnid; - r.is_steady = snap.is_steady; - return r; -} - -static __always_inline uint8_t meta_cmp2int(txnid_t a, txnid_t b, uint8_t s) { - return unlikely(a == b) ? 1 * s : (a > b) ? 2 * s : 0 * s; -} - -static __always_inline uint8_t meta_cmp2recent(uint8_t ab_cmp2int, - bool a_steady, bool b_steady) { - assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */); - return ab_cmp2int > 1 || (ab_cmp2int == 1 && a_steady > b_steady); -} - -static __always_inline uint8_t meta_cmp2steady(uint8_t ab_cmp2int, - bool a_steady, bool b_steady) { - assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */); - return a_steady > b_steady || (a_steady == b_steady && ab_cmp2int > 1); -} - -static __inline bool meta_choice_recent(txnid_t a_txnid, bool a_steady, - txnid_t b_txnid, bool b_steady) { - return meta_cmp2recent(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady); -} - -static __inline bool meta_choice_steady(txnid_t a_txnid, bool a_steady, - txnid_t b_txnid, bool b_steady) { - return meta_cmp2steady(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady); -} - -MDBX_MAYBE_UNUSED static uint8_t meta_cmp2pack(uint8_t c01, uint8_t c02, - uint8_t c12, bool s0, bool s1, - bool s2) { - assert(c01 < 3 && c02 < 3 && c12 < 3); - /* assert(s0 < 2 && s1 < 2 && s2 < 2); */ - const uint8_t recent = meta_cmp2recent(c01, s0, s1) - ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) - : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); - const uint8_t prefer_steady = meta_cmp2steady(c01, s0, s1) - ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) - : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); - - uint8_t tail; - if (recent == 0) - tail = meta_cmp2steady(c12, s1, s2) ? 2 : 1; - else if (recent == 1) - tail = meta_cmp2steady(c02, s0, s2) ? 2 : 0; - else - tail = meta_cmp2steady(c01, s0, s1) ? 1 : 0; - - const bool valid = - c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; - const bool strict = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && - (c12 != 1 || s1 != s2); - return tail | recent << 2 | prefer_steady << 4 | strict << 6 | valid << 7; -} - -static __inline void meta_troika_unpack(meta_troika_t *troika, - const uint8_t packed) { - troika->recent = (packed >> 2) & 3; - troika->prefer_steady = (packed >> 4) & 3; - troika->tail_and_flags = packed & 0xC3; -#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */ - troika->unused_pad = 0; -#endif -} - -static const uint8_t troika_fsm_map[2 * 2 * 2 * 3 * 3 * 3] = { - 232, 201, 216, 216, 232, 233, 232, 232, 168, 201, 216, 152, 168, 233, 232, - 168, 233, 201, 216, 201, 233, 233, 232, 233, 168, 201, 152, 216, 232, 169, - 232, 168, 168, 193, 152, 152, 168, 169, 232, 168, 169, 193, 152, 194, 233, - 169, 232, 169, 232, 201, 216, 216, 232, 201, 232, 232, 168, 193, 216, 152, - 168, 193, 232, 168, 193, 193, 210, 194, 225, 193, 225, 193, 168, 137, 212, - 214, 232, 233, 168, 168, 168, 137, 212, 150, 168, 233, 168, 168, 169, 137, - 216, 201, 233, 233, 168, 169, 168, 137, 148, 214, 232, 169, 168, 168, 40, - 129, 148, 150, 168, 169, 168, 40, 169, 129, 152, 194, 233, 169, 168, 169, - 168, 137, 214, 214, 232, 201, 168, 168, 168, 129, 214, 150, 168, 193, 168, - 168, 129, 129, 210, 194, 225, 193, 161, 129, 212, 198, 212, 214, 228, 228, - 212, 212, 148, 201, 212, 150, 164, 233, 212, 148, 233, 201, 216, 201, 233, - 233, 216, 233, 148, 198, 148, 214, 228, 164, 212, 148, 148, 194, 148, 150, - 164, 169, 212, 148, 169, 194, 152, 194, 233, 169, 216, 169, 214, 198, 214, - 214, 228, 198, 212, 214, 150, 194, 214, 150, 164, 193, 212, 150, 194, 194, - 210, 194, 225, 193, 210, 194}; - -__hot static meta_troika_t meta_tap(const MDBX_env *env) { - meta_snap_t snap; - meta_troika_t troika; - snap = meta_snap(METAPAGE(env, 0)); - troika.txnid[0] = snap.txnid; - troika.fsm = (uint8_t)snap.is_steady << 0; - snap = meta_snap(METAPAGE(env, 1)); - troika.txnid[1] = snap.txnid; - troika.fsm += (uint8_t)snap.is_steady << 1; - troika.fsm += meta_cmp2int(troika.txnid[0], troika.txnid[1], 8); - snap = meta_snap(METAPAGE(env, 2)); - troika.txnid[2] = snap.txnid; - troika.fsm += (uint8_t)snap.is_steady << 2; - troika.fsm += meta_cmp2int(troika.txnid[0], troika.txnid[2], 8 * 3); - troika.fsm += meta_cmp2int(troika.txnid[1], troika.txnid[2], 8 * 3 * 3); - - meta_troika_unpack(&troika, troika_fsm_map[troika.fsm]); - return troika; -} - -static txnid_t recent_committed_txnid(const MDBX_env *env) { - const txnid_t m0 = meta_txnid(METAPAGE(env, 0)); - const txnid_t m1 = meta_txnid(METAPAGE(env, 1)); - const txnid_t m2 = meta_txnid(METAPAGE(env, 2)); - return (m0 > m1) ? ((m0 > m2) ? m0 : m2) : ((m1 > m2) ? m1 : m2); -} - -static __inline bool meta_eq(const meta_troika_t *troika, size_t a, size_t b) { - assert(a < NUM_METAS && b < NUM_METAS); - return troika->txnid[a] == troika->txnid[b] && - (((troika->fsm >> a) ^ (troika->fsm >> b)) & 1) == 0 && - troika->txnid[a]; -} - -static unsigned meta_eq_mask(const meta_troika_t *troika) { - return meta_eq(troika, 0, 1) | meta_eq(troika, 1, 2) << 1 | - meta_eq(troika, 2, 0) << 2; -} - -__hot static bool meta_should_retry(const MDBX_env *env, - meta_troika_t *troika) { - const meta_troika_t prev = *troika; - *troika = meta_tap(env); - return prev.fsm != troika->fsm || prev.txnid[0] != troika->txnid[0] || - prev.txnid[1] != troika->txnid[1] || prev.txnid[2] != troika->txnid[2]; -} - -static __always_inline meta_ptr_t meta_recent(const MDBX_env *env, - const meta_troika_t *troika) { - meta_ptr_t r; - r.txnid = troika->txnid[troika->recent]; - r.ptr_v = METAPAGE(env, troika->recent); - r.is_steady = (troika->fsm >> troika->recent) & 1; - return r; -} - -static __always_inline meta_ptr_t -meta_prefer_steady(const MDBX_env *env, const meta_troika_t *troika) { - meta_ptr_t r; - r.txnid = troika->txnid[troika->prefer_steady]; - r.ptr_v = METAPAGE(env, troika->prefer_steady); - r.is_steady = (troika->fsm >> troika->prefer_steady) & 1; - return r; -} - -static __always_inline meta_ptr_t meta_tail(const MDBX_env *env, - const meta_troika_t *troika) { - const uint8_t tail = troika->tail_and_flags & 3; - MDBX_ANALYSIS_ASSUME(tail < NUM_METAS); - meta_ptr_t r; - r.txnid = troika->txnid[tail]; - r.ptr_v = METAPAGE(env, tail); - r.is_steady = (troika->fsm >> tail) & 1; - return r; -} - -static const char *durable_caption(const volatile MDBX_meta *const meta) { - if (META_IS_STEADY(meta)) - return (unaligned_peek_u64_volatile(4, meta->mm_sign) == - meta_sign((const MDBX_meta *)meta)) - ? "Steady" - : "Tainted"; - return "Weak"; -} - -__cold static void meta_troika_dump(const MDBX_env *env, - const meta_troika_t *troika) { - const meta_ptr_t recent = meta_recent(env, troika); - const meta_ptr_t prefer_steady = meta_prefer_steady(env, troika); - const meta_ptr_t tail = meta_tail(env, troika); - NOTICE("troika: %" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " - "head=%d-%" PRIaTXN ".%c, " - "base=%d-%" PRIaTXN ".%c, " - "tail=%d-%" PRIaTXN ".%c, " - "valid %c, strict %c", - troika->txnid[0], (troika->fsm & 1) ? 's' : 'w', troika->txnid[1], - (troika->fsm & 2) ? 's' : 'w', troika->txnid[2], - (troika->fsm & 4) ? 's' : 'w', troika->fsm, troika->recent, - recent.txnid, recent.is_steady ? 's' : 'w', troika->prefer_steady, - prefer_steady.txnid, prefer_steady.is_steady ? 's' : 'w', - troika->tail_and_flags % NUM_METAS, tail.txnid, - tail.is_steady ? 's' : 'w', TROIKA_VALID(troika) ? 'Y' : 'N', - TROIKA_STRICT_VALID(troika) ? 'Y' : 'N'); -} - -/*----------------------------------------------------------------------------*/ - -static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * -lckless_stub(const MDBX_env *env) { - uintptr_t stub = (uintptr_t)&env->x_lckless_stub; - /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ - stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); - return (MDBX_lockinfo *)stub; -} - -/* Find oldest txnid still referenced. */ -static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { - const uint32_t nothing_changed = MDBX_STRING_TETRAD("None"); - eASSERT(env, steady <= env->me_txn0->mt_txnid); - - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (unlikely(lck == NULL /* exclusive without-lck mode */)) { - eASSERT(env, env->me_lck == lckless_stub(env)); - env->me_lck->mti_readers_refresh_flag.weak = nothing_changed; - return env->me_lck->mti_oldest_reader.weak = steady; - } - - const txnid_t prev_oldest = - atomic_load64(&lck->mti_oldest_reader, mo_AcquireRelease); - eASSERT(env, steady >= prev_oldest); - - txnid_t new_oldest = prev_oldest; - while (nothing_changed != - atomic_load32(&lck->mti_readers_refresh_flag, mo_AcquireRelease)) { - lck->mti_readers_refresh_flag.weak = nothing_changed; - jitter4testing(false); - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - new_oldest = steady; - - for (size_t i = 0; i < snap_nreaders; ++i) { - const uint32_t pid = - atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); - if (!pid) - continue; - jitter4testing(true); - - const txnid_t rtxn = safe64_read(&lck->mti_readers[i].mr_txnid); - if (unlikely(rtxn < prev_oldest)) { - if (unlikely(nothing_changed == - atomic_load32(&lck->mti_readers_refresh_flag, - mo_AcquireRelease)) && - safe64_reset_compare(&lck->mti_readers[i].mr_txnid, rtxn)) { - NOTICE("kick stuck reader[%zu of %zu].pid_%u %" PRIaTXN - " < prev-oldest %" PRIaTXN ", steady-txn %" PRIaTXN, - i, snap_nreaders, pid, rtxn, prev_oldest, steady); - } - continue; - } - - if (rtxn < new_oldest) { - new_oldest = rtxn; - if (!MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS && new_oldest == prev_oldest) - break; - } - } - } - - if (new_oldest != prev_oldest) { - VERBOSE("update oldest %" PRIaTXN " -> %" PRIaTXN, prev_oldest, new_oldest); - eASSERT(env, new_oldest >= lck->mti_oldest_reader.weak); - atomic_store64(&lck->mti_oldest_reader, new_oldest, mo_Relaxed); - } - return new_oldest; -} - -static txnid_t txn_oldest_reader(const MDBX_txn *const txn) { - return find_oldest_reader(txn->mt_env, - txn->tw.troika.txnid[txn->tw.troika.prefer_steady]); -} - -/* Find largest mvcc-snapshot still referenced. */ -static pgno_t find_largest_snapshot(const MDBX_env *env, - pgno_t last_used_page) { - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (likely(lck != NULL /* check for exclusive without-lck mode */)) { - retry:; - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - for (size_t i = 0; i < snap_nreaders; ++i) { - if (atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease)) { - /* jitter4testing(true); */ - const pgno_t snap_pages = atomic_load32( - &lck->mti_readers[i].mr_snapshot_pages_used, mo_Relaxed); - const txnid_t snap_txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (unlikely( - snap_pages != - atomic_load32(&lck->mti_readers[i].mr_snapshot_pages_used, - mo_AcquireRelease) || - snap_txnid != safe64_read(&lck->mti_readers[i].mr_txnid))) - goto retry; - if (last_used_page < snap_pages && snap_txnid <= env->me_txn0->mt_txnid) - last_used_page = snap_pages; - } - } - } - - return last_used_page; -} - -/* Add a page to the txn's dirty list */ -__hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp, - size_t npages) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - mp->mp_txnid = txn->mt_front; - if (!txn->tw.dirtylist) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - txn->tw.writemap_dirty_npages += npages; - tASSERT(txn, txn->tw.spilled.list == nullptr); - return MDBX_SUCCESS; - } - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - -#if xMDBX_DEBUG_SPILLING == 2 - txn->mt_env->debug_dirtied_act += 1; - ENSURE(txn->mt_env, - txn->mt_env->debug_dirtied_act < txn->mt_env->debug_dirtied_est); - ENSURE(txn->mt_env, txn->tw.dirtyroom + txn->tw.loose_count > 0); -#endif /* xMDBX_DEBUG_SPILLING == 2 */ - - int rc; - if (unlikely(txn->tw.dirtyroom == 0)) { - if (txn->tw.loose_count) { - MDBX_page *lp = txn->tw.loose_pages; - DEBUG("purge-and-reclaim loose page %" PRIaPGNO, lp->mp_pgno); - rc = pnl_insert_range(&txn->tw.relist, lp->mp_pgno, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - size_t di = dpl_search(txn, lp->mp_pgno); - tASSERT(txn, txn->tw.dirtylist->items[di].ptr == lp); - dpl_remove(txn, di); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - txn->tw.loose_pages = mp_next(lp); - txn->tw.loose_count--; - txn->tw.dirtyroom++; - if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) - dpage_free(txn->mt_env, lp, 1); - } else { - ERROR("Dirtyroom is depleted, DPL length %zu", txn->tw.dirtylist->length); - if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) - dpage_free(txn->mt_env, mp, npages); - return MDBX_TXN_FULL; - } - } - - rc = dpl_append(txn, mp->mp_pgno, mp, npages); - if (unlikely(rc != MDBX_SUCCESS)) { - bailout: - txn->mt_flags |= MDBX_TXN_ERROR; - return rc; - } - txn->tw.dirtyroom--; - tASSERT(txn, dirtylist_check(txn)); - return MDBX_SUCCESS; -} - -static void mincore_clean_cache(const MDBX_env *const env) { - memset(env->me_lck->mti_mincore_cache.begin, -1, - sizeof(env->me_lck->mti_mincore_cache.begin)); -} - -#if !(defined(_WIN32) || defined(_WIN64)) -MDBX_MAYBE_UNUSED static __always_inline int ignore_enosys(int err) { -#ifdef ENOSYS - if (err == ENOSYS) - return MDBX_RESULT_TRUE; -#endif /* ENOSYS */ -#ifdef ENOIMPL - if (err == ENOIMPL) - return MDBX_RESULT_TRUE; -#endif /* ENOIMPL */ -#ifdef ENOTSUP - if (err == ENOTSUP) - return MDBX_RESULT_TRUE; -#endif /* ENOTSUP */ -#ifdef ENOSUPP - if (err == ENOSUPP) - return MDBX_RESULT_TRUE; -#endif /* ENOSUPP */ -#ifdef EOPNOTSUPP - if (err == EOPNOTSUPP) - return MDBX_RESULT_TRUE; -#endif /* EOPNOTSUPP */ - if (err == EAGAIN) - return MDBX_RESULT_TRUE; - return err; -} -#endif /* defined(_WIN32) || defined(_WIN64) */ - -#if MDBX_ENABLE_MADVISE -/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */ -__cold static int set_readahead(const MDBX_env *env, const pgno_t edge, - const bool enable, const bool force_whole) { - eASSERT(env, edge >= NUM_METAS && edge <= MAX_PAGENO + 1); - eASSERT(env, (enable & 1) == (enable != 0)); - const bool toggle = force_whole || - ((enable ^ env->me_lck->mti_readahead_anchor) & 1) || - !env->me_lck->mti_readahead_anchor; - const pgno_t prev_edge = env->me_lck->mti_readahead_anchor >> 1; - const size_t limit = env->me_dxb_mmap.limit; - size_t offset = - toggle ? 0 - : pgno_align2os_bytes(env, (prev_edge < edge) ? prev_edge : edge); - offset = (offset < limit) ? offset : limit; - - size_t length = - pgno_align2os_bytes(env, (prev_edge < edge) ? edge : prev_edge); - length = (length < limit) ? length : limit; - length -= offset; - - eASSERT(env, 0 <= (intptr_t)length); - if (length == 0) - return MDBX_SUCCESS; - - NOTICE("readahead %s %u..%u", enable ? "ON" : "OFF", bytes2pgno(env, offset), - bytes2pgno(env, offset + length)); - -#if defined(F_RDAHEAD) - if (toggle && unlikely(fcntl(env->me_lazy_fd, F_RDAHEAD, enable) == -1)) - return errno; -#endif /* F_RDAHEAD */ - - int err; - void *const ptr = ptr_disp(env->me_map, offset); - if (enable) { -#if defined(MADV_NORMAL) - err = - madvise(ptr, length, MADV_NORMAL) ? ignore_enosys(errno) : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_MADV_NORMAL) - err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_NORMAL)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED) - err = ignore_enosys( - posix_fadvise(env->me_lazy_fd, offset, length, POSIX_FADV_NORMAL)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(_WIN32) || defined(_WIN64) - /* no madvise on Windows */ -#else -#warning "FIXME" -#endif - if (toggle) { - /* NOTE: Seems there is a bug in the Mach/Darwin/OSX kernel, - * because MADV_WILLNEED with offset != 0 may cause SIGBUS - * on following access to the hinted region. - * 19.6.0 Darwin Kernel Version 19.6.0: Tue Jan 12 22:13:05 PST 2021; - * root:xnu-6153.141.16~1/RELEASE_X86_64 x86_64 */ -#if defined(F_RDADVISE) - struct radvisory hint; - hint.ra_offset = offset; - hint.ra_count = - unlikely(length > INT_MAX && sizeof(length) > sizeof(hint.ra_count)) - ? INT_MAX - : (int)length; - (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl( - env->me_lazy_fd, F_RDADVISE, &hint); -#elif defined(MADV_WILLNEED) - err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_MADV_WILLNEED) - err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_WILLNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(_WIN32) || defined(_WIN64) - if (mdbx_PrefetchVirtualMemory) { - WIN32_MEMORY_RANGE_ENTRY hint; - hint.VirtualAddress = ptr; - hint.NumberOfBytes = length; - (void)mdbx_PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0); - } -#elif defined(POSIX_FADV_WILLNEED) - err = ignore_enosys( - posix_fadvise(env->me_lazy_fd, offset, length, POSIX_FADV_WILLNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#else -#warning "FIXME" -#endif - } - } else { - mincore_clean_cache(env); -#if defined(MADV_RANDOM) - err = - madvise(ptr, length, MADV_RANDOM) ? ignore_enosys(errno) : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_MADV_RANDOM) - err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_RANDOM)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_FADV_RANDOM) - err = ignore_enosys( - posix_fadvise(env->me_lazy_fd, offset, length, POSIX_FADV_RANDOM)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(_WIN32) || defined(_WIN64) - /* no madvise on Windows */ -#else -#warning "FIXME" -#endif /* MADV_RANDOM */ - } - - env->me_lck->mti_readahead_anchor = (enable & 1) + (edge << 1); - err = MDBX_SUCCESS; - return err; -} -#endif /* MDBX_ENABLE_MADVISE */ - -__cold static void update_mlcnt(const MDBX_env *env, - const pgno_t new_aligned_mlocked_pgno, - const bool lock_not_release) { - for (;;) { - const pgno_t mlock_pgno_before = - atomic_load32(&env->me_mlocked_pgno, mo_AcquireRelease); - eASSERT(env, - pgno_align2os_pgno(env, mlock_pgno_before) == mlock_pgno_before); - eASSERT(env, pgno_align2os_pgno(env, new_aligned_mlocked_pgno) == - new_aligned_mlocked_pgno); - if (lock_not_release ? (mlock_pgno_before >= new_aligned_mlocked_pgno) - : (mlock_pgno_before <= new_aligned_mlocked_pgno)) - break; - if (likely(atomic_cas32(&((MDBX_env *)env)->me_mlocked_pgno, - mlock_pgno_before, new_aligned_mlocked_pgno))) - for (;;) { - MDBX_atomic_uint32_t *const mlcnt = env->me_lck->mti_mlcnt; - const int32_t snap_locked = atomic_load32(mlcnt + 0, mo_Relaxed); - const int32_t snap_unlocked = atomic_load32(mlcnt + 1, mo_Relaxed); - if (mlock_pgno_before == 0 && (snap_locked - snap_unlocked) < INT_MAX) { - eASSERT(env, lock_not_release); - if (unlikely(!atomic_cas32(mlcnt + 0, snap_locked, snap_locked + 1))) - continue; - } - if (new_aligned_mlocked_pgno == 0 && - (snap_locked - snap_unlocked) > 0) { - eASSERT(env, !lock_not_release); - if (unlikely( - !atomic_cas32(mlcnt + 1, snap_unlocked, snap_unlocked + 1))) - continue; - } - NOTICE("%s-pages %u..%u, mlocked-process(es) %u -> %u", - lock_not_release ? "lock" : "unlock", - lock_not_release ? mlock_pgno_before : new_aligned_mlocked_pgno, - lock_not_release ? new_aligned_mlocked_pgno : mlock_pgno_before, - snap_locked - snap_unlocked, - atomic_load32(mlcnt + 0, mo_Relaxed) - - atomic_load32(mlcnt + 1, mo_Relaxed)); - return; - } - } -} - -__cold static void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, - const size_t end_bytes) { - if (atomic_load32(&env->me_mlocked_pgno, mo_AcquireRelease) > aligned_pgno) { - int err = MDBX_ENOSYS; - const size_t munlock_begin = pgno2bytes(env, aligned_pgno); - const size_t munlock_size = end_bytes - munlock_begin; - eASSERT(env, end_bytes % env->me_os_psize == 0 && - munlock_begin % env->me_os_psize == 0 && - munlock_size % env->me_os_psize == 0); -#if defined(_WIN32) || defined(_WIN64) - err = VirtualUnlock(ptr_disp(env->me_map, munlock_begin), munlock_size) - ? MDBX_SUCCESS - : (int)GetLastError(); - if (err == ERROR_NOT_LOCKED) - err = MDBX_SUCCESS; -#elif defined(_POSIX_MEMLOCK_RANGE) - err = munlock(ptr_disp(env->me_map, munlock_begin), munlock_size) - ? errno - : MDBX_SUCCESS; -#endif - if (likely(err == MDBX_SUCCESS)) - update_mlcnt(env, aligned_pgno, false); - else { -#if defined(_WIN32) || defined(_WIN64) - WARNING("VirtualUnlock(%zu, %zu) error %d", munlock_begin, munlock_size, - err); -#else - WARNING("munlock(%zu, %zu) error %d", munlock_begin, munlock_size, err); -#endif - } - } -} - -__cold static void munlock_all(const MDBX_env *env) { - munlock_after(env, 0, bytes_align2os_bytes(env, env->me_dxb_mmap.current)); -} - -__cold static unsigned default_rp_augment_limit(const MDBX_env *env) { - const size_t timeframe = /* 16 секунд */ 16 << 16; - const size_t remain_1sec = - (env->me_options.gc_time_limit < timeframe) - ? timeframe - (size_t)env->me_options.gc_time_limit - : 0; - const size_t minimum = (env->me_maxgc_ov1page * 2 > MDBX_PNL_INITIAL) - ? env->me_maxgc_ov1page * 2 - : MDBX_PNL_INITIAL; - const size_t one_third = env->me_dbgeo.now / 3 >> env->me_psize2log; - const size_t augment_limit = - (one_third > minimum) - ? minimum + (one_third - minimum) / timeframe * remain_1sec - : minimum; - eASSERT(env, augment_limit < MDBX_PGL_LIMIT); - return pnl_bytes2size(pnl_size2bytes(augment_limit)); -} - -static bool default_prefault_write(const MDBX_env *env) { - return !MDBX_MMAP_INCOHERENT_FILE_WRITE && !env->me_incore && - (env->me_flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == MDBX_WRITEMAP; -} - -static bool default_prefer_waf_insteadof_balance(const MDBX_env *env) { - (void)env; - return false; -} - -static void adjust_defaults(MDBX_env *env) { - if (!env->me_options.flags.non_auto.rp_augment_limit) - env->me_options.rp_augment_limit = default_rp_augment_limit(env); - if (!env->me_options.flags.non_auto.prefault_write) - env->me_options.prefault_write = default_prefault_write(env); - - const size_t basis = env->me_dbgeo.now; - /* TODO: use options? */ - const unsigned factor = 9; - size_t threshold = (basis < ((size_t)65536 << factor)) - ? 65536 /* minimal threshold */ - : (basis > (MEGABYTE * 4 << factor)) - ? MEGABYTE * 4 /* maximal threshold */ - : basis >> factor; - threshold = (threshold < env->me_dbgeo.shrink || !env->me_dbgeo.shrink) - ? threshold - : env->me_dbgeo.shrink; - - env->me_madv_threshold = - bytes2pgno(env, bytes_align2os_bytes(env, threshold)); -} - -enum resize_mode { implicit_grow, impilict_shrink, explicit_resize }; - -__cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, - const pgno_t size_pgno, pgno_t limit_pgno, - const enum resize_mode mode) { - /* Acquire guard to avoid collision between read and write txns - * around me_dbgeo and me_dxb_mmap */ -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_AcquireExclusive(&env->me_remap_guard); - int rc = MDBX_SUCCESS; - mdbx_handle_array_t *suspended = NULL; - mdbx_handle_array_t array_onstack; -#else - int rc = osal_fastmutex_acquire(&env->me_remap_guard); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; -#endif - - const size_t prev_size = env->me_dxb_mmap.current; - const size_t prev_limit = env->me_dxb_mmap.limit; - const pgno_t prev_limit_pgno = bytes2pgno(env, prev_limit); - eASSERT(env, limit_pgno >= size_pgno); - eASSERT(env, size_pgno >= used_pgno); - if (mode < explicit_resize && size_pgno <= prev_limit_pgno) { - /* The actual mapsize may be less since the geo.upper may be changed - * by other process. Avoids remapping until it necessary. */ - limit_pgno = prev_limit_pgno; - } - const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); - const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); -#if MDBX_ENABLE_MADVISE || defined(ENABLE_MEMCHECK) - const void *const prev_map = env->me_dxb_mmap.base; -#endif /* MDBX_ENABLE_MADVISE || ENABLE_MEMCHECK */ - - VERBOSE("resize/%d datafile/mapping: " - "present %" PRIuPTR " -> %" PRIuPTR ", " - "limit %" PRIuPTR " -> %" PRIuPTR, - mode, prev_size, size_bytes, prev_limit, limit_bytes); - - eASSERT(env, limit_bytes >= size_bytes); - eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno); - eASSERT(env, bytes2pgno(env, limit_bytes) >= limit_pgno); - - unsigned mresize_flags = - env->me_flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC); - if (mode >= impilict_shrink) - mresize_flags |= MDBX_SHRINK_ALLOWED; - - if (limit_bytes == env->me_dxb_mmap.limit && - size_bytes == env->me_dxb_mmap.current && - size_bytes == env->me_dxb_mmap.filesize) - goto bailout; - - /* При использовании MDBX_NOSTICKYTHREADS с транзакциями могут работать любые - * потоки и у нас нет информации о том, какие именно. Поэтому нет возможности - * выполнить remap-действия требующие приостановки работающих с БД потоков. */ - if ((env->me_flags & MDBX_NOSTICKYTHREADS) == 0) { -#if defined(_WIN32) || defined(_WIN64) - if ((size_bytes < env->me_dxb_mmap.current && mode > implicit_grow) || - limit_bytes != env->me_dxb_mmap.limit) { - /* 1) Windows allows only extending a read-write section, but not a - * corresponding mapped view. Therefore in other cases we must suspend - * the local threads for safe remap. - * 2) At least on Windows 10 1803 the entire mapped section is unavailable - * for short time during NtExtendSection() or VirtualAlloc() execution. - * 3) Under Wine runtime environment on Linux a section extending is not - * supported. - * - * THEREFORE LOCAL THREADS SUSPENDING IS ALWAYS REQUIRED! */ - array_onstack.limit = ARRAY_LENGTH(array_onstack.handles); - array_onstack.count = 0; - suspended = &array_onstack; - rc = osal_suspend_threads_before_remap(env, &suspended); - if (rc != MDBX_SUCCESS) { - ERROR("failed suspend-for-remap: errcode %d", rc); - goto bailout; - } - mresize_flags |= (mode < explicit_resize) - ? MDBX_MRESIZE_MAY_UNMAP - : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; - } -#else /* Windows */ - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (mode == explicit_resize && limit_bytes != env->me_dxb_mmap.limit) { - mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; - if (lck) { - int err = osal_rdt_lock(env) /* lock readers table until remap done */; - if (unlikely(MDBX_IS_ERROR(err))) { - rc = err; - goto bailout; - } - - /* looking for readers from this process */ - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - eASSERT(env, mode == explicit_resize); - for (size_t i = 0; i < snap_nreaders; ++i) { - if (lck->mti_readers[i].mr_pid.weak == env->me_pid && - lck->mti_readers[i].mr_tid.weak != osal_thread_self()) { - /* the base address of the mapping can't be changed since - * the other reader thread from this process exists. */ - osal_rdt_unlock(env); - mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE); - break; - } - } - } - } -#endif /* ! Windows */ - } - - const pgno_t aligned_munlock_pgno = - (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) - ? 0 - : bytes2pgno(env, size_bytes); - if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) { - mincore_clean_cache(env); - if ((env->me_flags & MDBX_WRITEMAP) && - env->me_lck->mti_unsynced_pages.weak) { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno), - MDBX_SYNC_NONE); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - } - munlock_after(env, aligned_munlock_pgno, size_bytes); - -#if MDBX_ENABLE_MADVISE - if (size_bytes < prev_size && mode > implicit_grow) { - NOTICE("resize-MADV_%s %u..%u", - (env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno, - bytes2pgno(env, prev_size)); - const uint32_t munlocks_before = - atomic_load32(&env->me_lck->mti_mlcnt[1], mo_Relaxed); - rc = MDBX_RESULT_TRUE; -#if defined(MADV_REMOVE) - if (env->me_flags & MDBX_WRITEMAP) - rc = madvise(ptr_disp(env->me_map, size_bytes), prev_size - size_bytes, - MADV_REMOVE) - ? ignore_enosys(errno) - : MDBX_SUCCESS; -#endif /* MADV_REMOVE */ -#if defined(MADV_DONTNEED) - if (rc == MDBX_RESULT_TRUE) - rc = madvise(ptr_disp(env->me_map, size_bytes), prev_size - size_bytes, - MADV_DONTNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; -#elif defined(POSIX_MADV_DONTNEED) - if (rc == MDBX_RESULT_TRUE) - rc = ignore_enosys(posix_madvise(ptr_disp(env->me_map, size_bytes), - prev_size - size_bytes, - POSIX_MADV_DONTNEED)); -#elif defined(POSIX_FADV_DONTNEED) - if (rc == MDBX_RESULT_TRUE) - rc = ignore_enosys(posix_fadvise(env->me_lazy_fd, size_bytes, - prev_size - size_bytes, - POSIX_FADV_DONTNEED)); -#endif /* MADV_DONTNEED */ - if (unlikely(MDBX_IS_ERROR(rc))) { - const uint32_t mlocks_after = - atomic_load32(&env->me_lck->mti_mlcnt[0], mo_Relaxed); - if (rc == MDBX_EINVAL) { - const int severity = - (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN; - if (LOG_ENABLED(severity)) - debug_log(severity, __func__, __LINE__, - "%s-madvise: ignore EINVAL (%d) since some pages maybe " - "locked (%u/%u mlcnt-processes)", - "resize", rc, mlocks_after, munlocks_before); - } else { - ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", - "mresize", "DONTNEED", size_bytes, prev_size - size_bytes, - mlocks_after, munlocks_before, rc); - goto bailout; - } - } else - env->me_lck->mti_discarded_tail.weak = size_pgno; - } -#endif /* MDBX_ENABLE_MADVISE */ - - rc = osal_mresize(mresize_flags, &env->me_dxb_mmap, size_bytes, limit_bytes); - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - -#if MDBX_ENABLE_MADVISE - if (rc == MDBX_SUCCESS) { - eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); - eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); - if (mode == explicit_resize) - eASSERT(env, size_bytes == env->me_dxb_mmap.current); - else - eASSERT(env, size_bytes <= env->me_dxb_mmap.current); - env->me_lck->mti_discarded_tail.weak = size_pgno; - const bool readahead = - !(env->me_flags & MDBX_NORDAHEAD) && - mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size); - const bool force = limit_bytes != prev_limit || - env->me_dxb_mmap.base != prev_map -#if defined(_WIN32) || defined(_WIN64) - || prev_size > size_bytes -#endif /* Windows */ - ; - rc = set_readahead(env, size_pgno, readahead, force); - } -#endif /* MDBX_ENABLE_MADVISE */ - -bailout: - if (rc == MDBX_SUCCESS) { - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); - eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); - if (mode == explicit_resize) - eASSERT(env, size_bytes == env->me_dxb_mmap.current); - else - eASSERT(env, size_bytes <= env->me_dxb_mmap.current); - /* update env-geo to avoid influences */ - env->me_dbgeo.now = env->me_dxb_mmap.current; - env->me_dbgeo.upper = env->me_dxb_mmap.limit; - adjust_defaults(env); -#ifdef ENABLE_MEMCHECK - if (prev_limit != env->me_dxb_mmap.limit || prev_map != env->me_map) { - VALGRIND_DISCARD(env->me_valgrind_handle); - env->me_valgrind_handle = 0; - if (env->me_dxb_mmap.limit) - env->me_valgrind_handle = - VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx"); - } -#endif /* ENABLE_MEMCHECK */ - } else { - if (rc != MDBX_UNABLE_EXTEND_MAPSIZE && rc != MDBX_EPERM) { - ERROR("failed resize datafile/mapping: " - "present %" PRIuPTR " -> %" PRIuPTR ", " - "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", - prev_size, size_bytes, prev_limit, limit_bytes, rc); - } else { - WARNING("unable resize datafile/mapping: " - "present %" PRIuPTR " -> %" PRIuPTR ", " - "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", - prev_size, size_bytes, prev_limit, limit_bytes, rc); - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - } - if (!env->me_dxb_mmap.base) { - env->me_flags |= MDBX_FATAL_ERROR; - if (env->me_txn) - env->me_txn->mt_flags |= MDBX_TXN_ERROR; - rc = MDBX_PANIC; - } - } - -#if defined(_WIN32) || defined(_WIN64) - int err = MDBX_SUCCESS; - osal_srwlock_ReleaseExclusive(&env->me_remap_guard); - if (suspended) { - err = osal_resume_threads_after_remap(suspended); - if (suspended != &array_onstack) - osal_free(suspended); - } -#else - if (env->me_lck_mmap.lck && - (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) != 0) - osal_rdt_unlock(env); - int err = osal_fastmutex_release(&env->me_remap_guard); -#endif /* Windows */ - if (err != MDBX_SUCCESS) { - FATAL("failed resume-after-remap: errcode %d", err); - return MDBX_PANIC; - } - return rc; -} - -static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than, - const pgno_t pgno) { - MDBX_meta *const meta = METAPAGE(env, pgno); - const txnid_t txnid = constmeta_txnid(meta); - if (unlikely(err != MDBX_SUCCESS) || !META_IS_STEADY(meta) || - !(txnid < early_than)) - return err; - - WARNING("wipe txn #%" PRIaTXN ", meta %" PRIaPGNO, txnid, pgno); - const uint64_t wipe = MDBX_DATASIGN_NONE; - const void *ptr = &wipe; - size_t bytes = sizeof(meta->mm_sign), - offset = ptr_dist(&meta->mm_sign, env->me_map); - if (env->me_flags & MDBX_WRITEMAP) { - unaligned_poke_u64(4, meta->mm_sign, wipe); - osal_flush_incoherent_cpu_writeback(); - if (!MDBX_AVOID_MSYNC) { - err = - osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - return err; - } - ptr = data_page(meta); - offset = ptr_dist(ptr, env->me_map); - bytes = env->me_psize; - } - -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.wops.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - err = osal_pwrite(env->me_fd4meta, ptr, bytes, offset); - if (likely(err == MDBX_SUCCESS) && env->me_fd4meta == env->me_lazy_fd) { - err = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } - return err; -} - -__cold static int wipe_steady(MDBX_txn *txn, txnid_t last_steady) { - MDBX_env *const env = txn->mt_env; - int err = MDBX_SUCCESS; - - /* early than last_steady */ - err = meta_unsteady(err, env, last_steady, 0); - err = meta_unsteady(err, env, last_steady, 1); - err = meta_unsteady(err, env, last_steady, 2); - - /* the last_steady */ - err = meta_unsteady(err, env, last_steady + 1, 0); - err = meta_unsteady(err, env, last_steady + 1, 1); - err = meta_unsteady(err, env, last_steady + 1, 2); - - osal_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS), - env->me_os_psize); - - /* force oldest refresh */ - atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); - - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - txn->tw.troika = meta_tap(env); - for (MDBX_txn *scan = txn->mt_env->me_txn0; scan; scan = scan->mt_child) - if (scan != txn) - scan->tw.troika = txn->tw.troika; - return err; -} - -//------------------------------------------------------------------------------ - -MDBX_MAYBE_UNUSED __hot static pgno_t * -scan4seq_fallback(pgno_t *range, const size_t len, const size_t seq) { - assert(seq > 0 && len > seq); -#if MDBX_PNL_ASCENDING - assert(range[-1] == len); - const pgno_t *const detent = range + len - seq; - const ptrdiff_t offset = (ptrdiff_t)seq; - const pgno_t target = (pgno_t)offset; - if (likely(len > seq + 3)) { - do { - const pgno_t diff0 = range[offset + 0] - range[0]; - const pgno_t diff1 = range[offset + 1] - range[1]; - const pgno_t diff2 = range[offset + 2] - range[2]; - const pgno_t diff3 = range[offset + 3] - range[3]; - if (diff0 == target) - return range + 0; - if (diff1 == target) - return range + 1; - if (diff2 == target) - return range + 2; - if (diff3 == target) - return range + 3; - range += 4; - } while (range + 3 < detent); - if (range == detent) - return nullptr; - } - do - if (range[offset] - *range == target) - return range; - while (++range < detent); -#else - assert(range[-(ptrdiff_t)len] == len); - const pgno_t *const detent = range - len + seq; - const ptrdiff_t offset = -(ptrdiff_t)seq; - const pgno_t target = (pgno_t)offset; - if (likely(len > seq + 3)) { - do { - const pgno_t diff0 = range[-0] - range[offset - 0]; - const pgno_t diff1 = range[-1] - range[offset - 1]; - const pgno_t diff2 = range[-2] - range[offset - 2]; - const pgno_t diff3 = range[-3] - range[offset - 3]; - /* Смысл вычислений до ветвлений в том, чтобы позволить компилятору - * загружать и вычислять все значения параллельно. */ - if (diff0 == target) - return range - 0; - if (diff1 == target) - return range - 1; - if (diff2 == target) - return range - 2; - if (diff3 == target) - return range - 3; - range -= 4; - } while (range > detent + 3); - if (range == detent) - return nullptr; - } - do - if (*range - range[offset] == target) - return range; - while (--range > detent); -#endif /* MDBX_PNL sort-order */ - return nullptr; -} - -MDBX_MAYBE_UNUSED static const pgno_t *scan4range_checker(const MDBX_PNL pnl, - const size_t seq) { - size_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pnl); -#if MDBX_PNL_ASCENDING - while (seq <= MDBX_PNL_GETSIZE(pnl) - begin) { - if (pnl[begin + seq] - pnl[begin] == seq) - return pnl + begin; - ++begin; - } -#else - while (begin > seq) { - if (pnl[begin - seq] - pnl[begin] == seq) - return pnl + begin; - --begin; - } -#endif /* MDBX_PNL sort-order */ - return nullptr; -} - -#if defined(_MSC_VER) && !defined(__builtin_clz) && \ - !__has_builtin(__builtin_clz) -MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clz(uint32_t value) { - unsigned long index; - _BitScanReverse(&index, value); - return 31 - index; -} -#endif /* _MSC_VER */ - -#if defined(_MSC_VER) && !defined(__builtin_clzl) && \ - !__has_builtin(__builtin_clzl) -MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { - unsigned long index; -#ifdef _WIN64 - assert(sizeof(value) == 8); - _BitScanReverse64(&index, value); - return 63 - index; -#else - assert(sizeof(value) == 4); - _BitScanReverse(&index, value); - return 31 - index; -#endif -} -#endif /* _MSC_VER */ - -#if !MDBX_PNL_ASCENDING - -#if !defined(MDBX_ATTRIBUTE_TARGET) && \ - (__has_attribute(__target__) || __GNUC_PREREQ(5, 0)) -#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target))) -#endif /* MDBX_ATTRIBUTE_TARGET */ - -#ifndef MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND -/* Workaround for GCC's bug with `-m32 -march=i686 -Ofast` - * gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1: - * error: inlining failed in call to 'always_inline' '_mm_movemask_ps': - * target specific option mismatch */ -#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || \ - defined(__e2k__) || defined(__clang__) || defined(__amd64__) || \ - defined(__SSE2__) -#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0 -#else -#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1 -#endif -#endif /* MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND */ - -#if defined(__SSE2__) && defined(__SSE__) -#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ -#elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__) -#define __SSE2__ -#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ - !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND -#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2") -#endif /* __SSE2__ */ - -#if defined(__AVX2__) -#define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ - !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND -#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2") -#endif /* __AVX2__ */ - -#if defined(MDBX_ATTRIBUTE_TARGET_AVX2) -#if defined(__AVX512BW__) -#define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ - !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \ - (__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0)) -#define MDBX_ATTRIBUTE_TARGET_AVX512BW \ - MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw") -#endif /* __AVX512BW__ */ -#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */ - -#ifdef MDBX_ATTRIBUTE_TARGET_SSE2 -MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned -diffcmp2mask_sse2(const pgno_t *const ptr, const ptrdiff_t offset, - const __m128i pattern) { - const __m128i f = _mm_loadu_si128((const __m128i *)ptr); - const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); - const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); - return _mm_movemask_ps(*(const __m128 *)&cmp); -} - -MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_SSE2 static pgno_t * -scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { - assert(seq > 0 && len > seq); -#if MDBX_PNL_ASCENDING -#error "FIXME: Not implemented" -#endif /* MDBX_PNL_ASCENDING */ - assert(range[-(ptrdiff_t)len] == len); - pgno_t *const detent = range - len + seq; - const ptrdiff_t offset = -(ptrdiff_t)seq; - const pgno_t target = (pgno_t)offset; - const __m128i pattern = _mm_set1_epi32(target); - uint8_t mask; - if (likely(len > seq + 3)) { - do { - mask = (uint8_t)diffcmp2mask_sse2(range - 3, offset, pattern); - if (mask) { -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - found: -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - return range + 28 - __builtin_clz(mask); - } - range -= 4; - } while (range > detent + 3); - if (range == detent) - return nullptr; - } - - /* Далее происходит чтение от 4 до 12 лишних байт, которые могут быть не - * только за пределами региона выделенного под PNL, но и пересекать границу - * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. - * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { - const unsigned extra = (unsigned)(detent + 4 - range); - assert(extra > 0 && extra < 4); - mask = 0xF << extra; - mask &= diffcmp2mask_sse2(range - 3, offset, pattern); - if (mask) - goto found; - return nullptr; - } -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - do - if (*range - range[offset] == target) - return range; - while (--range != detent); - return nullptr; -} -#endif /* MDBX_ATTRIBUTE_TARGET_SSE2 */ - -#ifdef MDBX_ATTRIBUTE_TARGET_AVX2 -MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned -diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, - const __m256i pattern) { - const __m256i f = _mm256_loadu_si256((const __m256i *)ptr); - const __m256i l = _mm256_loadu_si256((const __m256i *)(ptr + offset)); - const __m256i cmp = _mm256_cmpeq_epi32(_mm256_sub_epi32(f, l), pattern); - return _mm256_movemask_ps(*(const __m256 *)&cmp); -} - -MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned -diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, - const __m128i pattern) { - const __m128i f = _mm_loadu_si128((const __m128i *)ptr); - const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); - const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); - return _mm_movemask_ps(*(const __m128 *)&cmp); -} - -MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t * -scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { - assert(seq > 0 && len > seq); -#if MDBX_PNL_ASCENDING -#error "FIXME: Not implemented" -#endif /* MDBX_PNL_ASCENDING */ - assert(range[-(ptrdiff_t)len] == len); - pgno_t *const detent = range - len + seq; - const ptrdiff_t offset = -(ptrdiff_t)seq; - const pgno_t target = (pgno_t)offset; - const __m256i pattern = _mm256_set1_epi32(target); - uint8_t mask; - if (likely(len > seq + 7)) { - do { - mask = (uint8_t)diffcmp2mask_avx2(range - 7, offset, pattern); - if (mask) { -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - found: -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - return range + 24 - __builtin_clz(mask); - } - range -= 8; - } while (range > detent + 7); - if (range == detent) - return nullptr; - } - - /* Далее происходит чтение от 4 до 28 лишних байт, которые могут быть не - * только за пределами региона выделенного под PNL, но и пересекать границу - * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. - * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - const unsigned on_page_safe_mask = 0xfe0 /* enough for '-31' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { - const unsigned extra = (unsigned)(detent + 8 - range); - assert(extra > 0 && extra < 8); - mask = 0xFF << extra; - mask &= diffcmp2mask_avx2(range - 7, offset, pattern); - if (mask) - goto found; - return nullptr; - } -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - if (range - 3 > detent) { - mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); - if (mask) - return range + 28 - __builtin_clz(mask); - range -= 4; - } - while (range > detent) { - if (*range - range[offset] == target) - return range; - --range; - } - return nullptr; -} -#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 */ - -#ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW -MDBX_ATTRIBUTE_TARGET_AVX512BW static __always_inline unsigned -diffcmp2mask_avx512bw(const pgno_t *const ptr, const ptrdiff_t offset, - const __m512i pattern) { - const __m512i f = _mm512_loadu_si512((const __m512i *)ptr); - const __m512i l = _mm512_loadu_si512((const __m512i *)(ptr + offset)); - return _mm512_cmpeq_epi32_mask(_mm512_sub_epi32(f, l), pattern); -} - -MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX512BW static pgno_t * -scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { - assert(seq > 0 && len > seq); -#if MDBX_PNL_ASCENDING -#error "FIXME: Not implemented" -#endif /* MDBX_PNL_ASCENDING */ - assert(range[-(ptrdiff_t)len] == len); - pgno_t *const detent = range - len + seq; - const ptrdiff_t offset = -(ptrdiff_t)seq; - const pgno_t target = (pgno_t)offset; - const __m512i pattern = _mm512_set1_epi32(target); - unsigned mask; - if (likely(len > seq + 15)) { - do { - mask = diffcmp2mask_avx512bw(range - 15, offset, pattern); - if (mask) { -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - found: -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - return range + 16 - __builtin_clz(mask); - } - range -= 16; - } while (range > detent + 15); - if (range == detent) - return nullptr; - } - - /* Далее происходит чтение от 4 до 60 лишних байт, которые могут быть не - * только за пределами региона выделенного под PNL, но и пересекать границу - * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. - * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - const unsigned on_page_safe_mask = 0xfc0 /* enough for '-63' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { - const unsigned extra = (unsigned)(detent + 16 - range); - assert(extra > 0 && extra < 16); - mask = 0xFFFF << extra; - mask &= diffcmp2mask_avx512bw(range - 15, offset, pattern); - if (mask) - goto found; - return nullptr; - } -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - if (range - 7 > detent) { - mask = diffcmp2mask_avx2(range - 7, offset, *(const __m256i *)&pattern); - if (mask) - return range + 24 - __builtin_clz(mask); - range -= 8; - } - if (range - 3 > detent) { - mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); - if (mask) - return range + 28 - __builtin_clz(mask); - range -= 4; - } - while (range > detent) { - if (*range - range[offset] == target) - return range; - --range; - } - return nullptr; -} -#endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */ - -#if (defined(__ARM_NEON) || defined(__ARM_NEON__)) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -static __always_inline size_t diffcmp2mask_neon(const pgno_t *const ptr, - const ptrdiff_t offset, - const uint32x4_t pattern) { - const uint32x4_t f = vld1q_u32(ptr); - const uint32x4_t l = vld1q_u32(ptr + offset); - const uint16x4_t cmp = vmovn_u32(vceqq_u32(vsubq_u32(f, l), pattern)); - if (sizeof(size_t) > 7) - return vget_lane_u64(vreinterpret_u64_u16(cmp), 0); - else - return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(cmp, cmp))), - 0); -} - -__hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, - const size_t seq) { - assert(seq > 0 && len > seq); -#if MDBX_PNL_ASCENDING -#error "FIXME: Not implemented" -#endif /* MDBX_PNL_ASCENDING */ - assert(range[-(ptrdiff_t)len] == len); - pgno_t *const detent = range - len + seq; - const ptrdiff_t offset = -(ptrdiff_t)seq; - const pgno_t target = (pgno_t)offset; - const uint32x4_t pattern = vmovq_n_u32(target); - size_t mask; - if (likely(len > seq + 3)) { - do { - mask = diffcmp2mask_neon(range - 3, offset, pattern); - if (mask) { -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - found: -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - return ptr_disp(range, -(__builtin_clzl(mask) >> sizeof(size_t) / 4)); - } - range -= 4; - } while (range > detent + 3); - if (range == detent) - return nullptr; - } - - /* Далее происходит чтение от 4 до 12 лишних байт, которые могут быть не - * только за пределами региона выделенного под PNL, но и пересекать границу - * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. - * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) - const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { - const unsigned extra = (unsigned)(detent + 4 - range); - assert(extra > 0 && extra < 4); - mask = (~(size_t)0) << (extra * sizeof(size_t) * 2); - mask &= diffcmp2mask_neon(range - 3, offset, pattern); - if (mask) - goto found; - return nullptr; - } -#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ - do - if (*range - range[offset] == target) - return range; - while (--range != detent); - return nullptr; -} -#endif /* __ARM_NEON || __ARM_NEON__ */ - -#if defined(__AVX512BW__) && defined(MDBX_ATTRIBUTE_TARGET_AVX512BW) -#define scan4seq_default scan4seq_avx512bw -#define scan4seq_impl scan4seq_default -#elif defined(__AVX2__) && defined(MDBX_ATTRIBUTE_TARGET_AVX2) -#define scan4seq_default scan4seq_avx2 -#elif defined(__SSE2__) && defined(MDBX_ATTRIBUTE_TARGET_SSE2) -#define scan4seq_default scan4seq_sse2 -#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define scan4seq_default scan4seq_neon -/* Choosing of another variants should be added here. */ -#endif /* scan4seq_default */ - -#endif /* MDBX_PNL_ASCENDING */ - -#ifndef scan4seq_default -#define scan4seq_default scan4seq_fallback -#endif /* scan4seq_default */ - -#ifdef scan4seq_impl -/* The scan4seq_impl() is the best or no alternatives */ -#elif !MDBX_HAVE_BUILTIN_CPU_SUPPORTS -/* The scan4seq_default() will be used since no cpu-features detection support - * from compiler. Please don't ask to implement cpuid-based detection and don't - * make such PRs. */ -#define scan4seq_impl scan4seq_default -#else -/* Selecting the most appropriate implementation at runtime, - * depending on the available CPU features. */ -static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, - const size_t seq); -static pgno_t *(*scan4seq_impl)(pgno_t *range, const size_t len, - const size_t seq) = scan4seq_resolver; - -static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, - const size_t seq) { - pgno_t *(*choice)(pgno_t *range, const size_t len, const size_t seq) = - nullptr; -#if __has_builtin(__builtin_cpu_init) || defined(__BUILTIN_CPU_INIT__) || \ - __GNUC_PREREQ(4, 8) - __builtin_cpu_init(); -#endif /* __builtin_cpu_init() */ -#ifdef MDBX_ATTRIBUTE_TARGET_SSE2 - if (__builtin_cpu_supports("sse2")) - choice = scan4seq_sse2; -#endif /* MDBX_ATTRIBUTE_TARGET_SSE2 */ -#ifdef MDBX_ATTRIBUTE_TARGET_AVX2 - if (__builtin_cpu_supports("avx2")) - choice = scan4seq_avx2; -#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 */ -#ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW - if (__builtin_cpu_supports("avx512bw")) - choice = scan4seq_avx512bw; -#endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */ - /* Choosing of another variants should be added here. */ - scan4seq_impl = choice ? choice : scan4seq_default; - return scan4seq_impl(range, len, seq); -} -#endif /* scan4seq_impl */ - -//------------------------------------------------------------------------------ - -#define MDBX_ALLOC_DEFAULT 0 -#define MDBX_ALLOC_RESERVE 1 -#define MDBX_ALLOC_UNIMPORTANT 2 -#define MDBX_ALLOC_COALESCE 4 /* внутреннее состояние */ -#define MDBX_ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */ -#define MDBX_ALLOC_LIFO 16 /* внутреннее состояние */ - -static __inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, - const uint8_t flags) { - /* If txn is updating the GC, then the retired-list cannot play catch-up with - * itself by growing while trying to save it. */ - if (mc->mc_dbi == FREE_DBI && !(flags & MDBX_ALLOC_RESERVE) && - !(mc->mc_flags & C_GCU)) - return false; - - /* avoid search inside empty tree and while tree is updating, - https://libmdbx.dqdkfa.ru/dead-github/issues/31 */ - if (unlikely(txn->mt_dbs[FREE_DBI].md_entries == 0)) { - txn->mt_flags |= MDBX_TXN_DRAINED_GC; - return false; - } - - return true; -} - -__hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); - for (size_t i = 1; i <= len; ++i) - if (txn->tw.lifo_reclaimed[i] == id) - return true; - return false; -} - -__hot static pgno_t relist_get_single(MDBX_txn *txn) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); - assert(len > 0); - pgno_t *target = MDBX_PNL_EDGE(txn->tw.relist); - const ptrdiff_t dir = MDBX_PNL_ASCENDING ? 1 : -1; - - /* Есть ТРИ потенциально выигрышные, но противо-направленные тактики: - * - * 1. Стараться использовать страницы с наименьшими номерами. Так обмен с - * диском будет более кучным, а у страниц ближе к концу БД будет больше шансов - * попасть под авто-компактификацию. Частично эта тактика уже реализована, но - * для её эффективности требуется явно приоритезировать выделение страниц: - * - поддерживать для relist, для ближних и для дальних страниц; - * - использовать страницы из дальнего списка, если первый пуст, - * а второй слишком большой, либо при пустой GC. - * - * 2. Стараться выделять страницы последовательно. Так записываемые на диск - * регионы будут линейными, что принципиально ускоряет запись на HDD. - * Одновременно, в среднем это не повлияет на чтение, точнее говоря, если - * порядок чтения не совпадает с порядком изменения (иначе говоря, если - * чтение не коррклирует с обновлениями и/или вставками) то не повлияет, иначе - * может ускорить. Однако, последовательности в среднем достаточно редки. - * Поэтому для эффективности требуется аккумулировать и поддерживать в ОЗУ - * огромные списки страниц, а затем сохранять их обратно в БД. Текущий формат - * БД (без битовых карт) для этого крайне не удачен. Поэтому эта тактика не - * имеет шансов быть успешной без смены формата БД (Mithril). - * - * 3. Стараться экономить последовательности страниц. Это позволяет избегать - * лишнего чтения/поиска в GC при более-менее постоянном размещении и/или - * обновлении данных требующих более одной страницы. Проблема в том, что без - * информации от приложения библиотека не может знать насколько - * востребованными будут последовательности в ближайшей перспективе, а - * экономия последовательностей "на всякий случай" не только затратна - * сама-по-себе, но и работает во вред. - * - * Поэтому: - * - в TODO добавляется разделение relist на «ближние» и «дальние» страницы, - * с последующей реализацией первой тактики; - * - преимущественное использование последовательностей отправляется - * в MithrilDB как составляющая "HDD frendly" feature; - * - реализованная в 3757eb72f7c6b46862f8f17881ac88e8cecc1979 экономия - * последовательностей отключается через MDBX_ENABLE_SAVING_SEQUENCES=0. - * - * В качестве альтернативы для безусловной «экономии» последовательностей, - * в следующих версиях libmdbx, вероятно, будет предложено - * API для взаимодействия с GC: - * - получение размера GC, включая гистограммы размеров последовательностей - * и близости к концу БД; - * - включение формирования "линейного запаса" для последующего использования - * в рамках текущей транзакции; - * - намеренная загрузка GC в память для коагуляции и "выпрямления"; - * - намеренное копирование данных из страниц в конце БД для последующего - * из освобождения, т.е. контролируемая компактификация по запросу. */ - -#ifndef MDBX_ENABLE_SAVING_SEQUENCES -#define MDBX_ENABLE_SAVING_SEQUENCES 0 -#endif - if (MDBX_ENABLE_SAVING_SEQUENCES && unlikely(target[dir] == *target + 1) && - len > 2) { - /* Пытаемся пропускать последовательности при наличии одиночных элементов. - * TODO: необходимо кэшировать пропускаемые последовательности - * чтобы не сканировать список сначала при каждом выделении. */ - pgno_t *scan = target + dir + dir; - size_t left = len; - do { - if (likely(scan[-dir] != *scan - 1 && *scan + 1 != scan[dir])) { -#if MDBX_PNL_ASCENDING - target = scan; - break; -#else - /* вырезаем элемент с перемещением хвоста */ - const pgno_t pgno = *scan; - MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); - while (++scan <= target) - scan[-1] = *scan; - return pgno; -#endif - } - scan += dir; - } while (--left > 2); - } - - const pgno_t pgno = *target; -#if MDBX_PNL_ASCENDING - /* вырезаем элемент с перемещением хвоста */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); - for (const pgno_t *const end = txn->tw.relist + len - 1; target <= end; - ++target) - *target = target[1]; -#else - /* перемещать хвост не нужно, просто усекам список */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); -#endif - return pgno; -} - -__hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, - uint8_t flags) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); - pgno_t *edge = MDBX_PNL_EDGE(txn->tw.relist); - assert(len >= num && num > 1); - const size_t seq = num - 1; -#if !MDBX_PNL_ASCENDING - if (edge[-(ptrdiff_t)seq] - *edge == seq) { - if (unlikely(flags & MDBX_ALLOC_RESERVE)) - return P_INVALID; - assert(edge == scan4range_checker(txn->tw.relist, seq)); - /* перемещать хвост не нужно, просто усекам список */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - num); - return *edge; - } -#endif - pgno_t *target = scan4seq_impl(edge, len, seq); - assert(target == scan4range_checker(txn->tw.relist, seq)); - if (target) { - if (unlikely(flags & MDBX_ALLOC_RESERVE)) - return P_INVALID; - const pgno_t pgno = *target; - /* вырезаем найденную последовательность с перемещением хвоста */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - num); -#if MDBX_PNL_ASCENDING - for (const pgno_t *const end = txn->tw.relist + len - num; target <= end; - ++target) - *target = target[num]; -#else - for (const pgno_t *const end = txn->tw.relist + len; ++target <= end;) - target[-(ptrdiff_t)num] = *target; -#endif - return pgno; - } - return 0; -} - -#if MDBX_ENABLE_MINCORE -static __inline bool bit_tas(uint64_t *field, char bit) { - const uint64_t m = UINT64_C(1) << bit; - const bool r = (*field & m) != 0; - *field |= m; - return r; -} - -static bool mincore_fetch(MDBX_env *const env, const size_t unit_begin) { - MDBX_lockinfo *const lck = env->me_lck; - for (size_t i = 1; i < ARRAY_LENGTH(lck->mti_mincore_cache.begin); ++i) { - const ptrdiff_t dist = unit_begin - lck->mti_mincore_cache.begin[i]; - if (likely(dist >= 0 && dist < 64)) { - const pgno_t tmp_begin = lck->mti_mincore_cache.begin[i]; - const uint64_t tmp_mask = lck->mti_mincore_cache.mask[i]; - do { - lck->mti_mincore_cache.begin[i] = lck->mti_mincore_cache.begin[i - 1]; - lck->mti_mincore_cache.mask[i] = lck->mti_mincore_cache.mask[i - 1]; - } while (--i); - lck->mti_mincore_cache.begin[0] = tmp_begin; - lck->mti_mincore_cache.mask[0] = tmp_mask; - return bit_tas(lck->mti_mincore_cache.mask, (char)dist); - } - } - - size_t pages = 64; - unsigned unit_log = sys_pagesize_ln2; - unsigned shift = 0; - if (env->me_psize > env->me_os_psize) { - unit_log = env->me_psize2log; - shift = env->me_psize2log - sys_pagesize_ln2; - pages <<= shift; - } - - const size_t offset = unit_begin << unit_log; - size_t length = pages << sys_pagesize_ln2; - if (offset + length > env->me_dxb_mmap.current) { - length = env->me_dxb_mmap.current - offset; - pages = length >> sys_pagesize_ln2; - } - -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.mincore.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - uint8_t *const vector = alloca(pages); - if (unlikely(mincore(ptr_disp(env->me_dxb_mmap.base, offset), length, - (void *)vector))) { - NOTICE("mincore(+%zu, %zu), err %d", offset, length, errno); - return false; - } - - for (size_t i = 1; i < ARRAY_LENGTH(lck->mti_mincore_cache.begin); ++i) { - lck->mti_mincore_cache.begin[i] = lck->mti_mincore_cache.begin[i - 1]; - lck->mti_mincore_cache.mask[i] = lck->mti_mincore_cache.mask[i - 1]; - } - lck->mti_mincore_cache.begin[0] = unit_begin; - - uint64_t mask = 0; -#ifdef MINCORE_INCORE - STATIC_ASSERT(MINCORE_INCORE == 1); -#endif - for (size_t i = 0; i < pages; ++i) { - uint64_t bit = (vector[i] & 1) == 0; - bit <<= i >> shift; - mask |= bit; - } - - lck->mti_mincore_cache.mask[0] = ~mask; - return bit_tas(lck->mti_mincore_cache.mask, 0); -} -#endif /* MDBX_ENABLE_MINCORE */ - -MDBX_MAYBE_UNUSED static __inline bool mincore_probe(MDBX_env *const env, - const pgno_t pgno) { -#if MDBX_ENABLE_MINCORE - const size_t offset_aligned = - floor_powerof2(pgno2bytes(env, pgno), env->me_os_psize); - const unsigned unit_log2 = (env->me_psize2log > sys_pagesize_ln2) - ? env->me_psize2log - : sys_pagesize_ln2; - const size_t unit_begin = offset_aligned >> unit_log2; - eASSERT(env, (unit_begin << unit_log2) == offset_aligned); - const ptrdiff_t dist = unit_begin - env->me_lck->mti_mincore_cache.begin[0]; - if (likely(dist >= 0 && dist < 64)) - return bit_tas(env->me_lck->mti_mincore_cache.mask, (char)dist); - return mincore_fetch(env, unit_begin); -#else - (void)env; - (void)pgno; - return false; -#endif /* MDBX_ENABLE_MINCORE */ -} - -static __inline pgr_t page_alloc_finalize(MDBX_env *const env, - MDBX_txn *const txn, - const MDBX_cursor *const mc, - const pgno_t pgno, const size_t num) { -#if MDBX_ENABLE_PROFGC - size_t majflt_before; - const uint64_t cputime_before = osal_cputime(&majflt_before); - profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI) - ? &env->me_lck->mti_pgop_stat.gc_prof.self - : &env->me_lck->mti_pgop_stat.gc_prof.work; -#else - (void)mc; -#endif /* MDBX_ENABLE_PROFGC */ - ENSURE(env, pgno >= NUM_METAS); - - pgr_t ret; - bool need_clean = (env->me_flags & MDBX_PAGEPERTURB) != 0; - if (env->me_flags & MDBX_WRITEMAP) { - ret.page = pgno2page(env, pgno); - MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, pgno2bytes(env, num)); - VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num)); - - /* Содержимое выделенной страницы не нужно, но если страница отсутствует - * в ОЗУ (что весьма вероятно), то любое обращение к ней приведет - * к page-fault: - * - прерыванию по отсутствию страницы; - * - переключение контекста в режим ядра с засыпанием процесса; - * - чтение страницы с диска; - * - обновление PTE и пробуждением процесса; - * - переключение контекста по доступности ЦПУ. - * - * Пытаемся минимизировать накладные расходы записывая страницу, что при - * наличии unified page cache приведет к появлению страницы в ОЗУ без чтения - * с диска. При этом запись на диск должна быть отложена адекватным ядром, - * так как страница отображена в память в режиме чтения-записи и следом в - * неё пишет ЦПУ. */ - - /* В случае если страница в памяти процесса, то излишняя запись может быть - * достаточно дорогой. Кроме системного вызова и копирования данных, в особо - * одаренных ОС при этом могут включаться файловая система, выделяться - * временная страница, пополняться очереди асинхронного выполнения, - * обновляться PTE с последующей генерацией page-fault и чтением данных из - * грязной I/O очереди. Из-за этого штраф за лишнюю запись может быть - * сравним с избегаемым ненужным чтением. */ - if (env->me_prefault_write) { - void *const pattern = ptr_disp( - env->me_pbuf, need_clean ? env->me_psize : env->me_psize * 2); - size_t file_offset = pgno2bytes(env, pgno); - if (likely(num == 1)) { - if (!mincore_probe(env, pgno)) { - osal_pwrite(env->me_lazy_fd, pattern, env->me_psize, file_offset); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.prefault.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - need_clean = false; - } - } else { - struct iovec iov[MDBX_AUXILARY_IOV_MAX]; - size_t n = 0, cleared = 0; - for (size_t i = 0; i < num; ++i) { - if (!mincore_probe(env, pgno + (pgno_t)i)) { - ++cleared; - iov[n].iov_len = env->me_psize; - iov[n].iov_base = pattern; - if (unlikely(++n == MDBX_AUXILARY_IOV_MAX)) { - osal_pwritev(env->me_lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, - file_offset); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.prefault.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - file_offset += pgno2bytes(env, MDBX_AUXILARY_IOV_MAX); - n = 0; - } - } - } - if (likely(n > 0)) { - osal_pwritev(env->me_lazy_fd, iov, n, file_offset); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.prefault.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } - if (cleared == num) - need_clean = false; - } - } - } else { - ret.page = page_malloc(txn, num); - if (unlikely(!ret.page)) { - ret.err = MDBX_ENOMEM; - goto bailout; - } - } - - if (unlikely(need_clean)) - memset(ret.page, -1, pgno2bytes(env, num)); - - VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num)); - ret.page->mp_pgno = pgno; - ret.page->mp_leaf2_ksize = 0; - ret.page->mp_flags = 0; - if ((ASSERT_ENABLED() || AUDIT_ENABLED()) && num > 1) { - ret.page->mp_pages = (pgno_t)num; - ret.page->mp_flags = P_OVERFLOW; - } - - ret.err = page_dirty(txn, ret.page, (pgno_t)num); -bailout: - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); -#if MDBX_ENABLE_PROFGC - size_t majflt_after; - prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before; - prof->majflt += (uint32_t)(majflt_after - majflt_before); -#endif /* MDBX_ENABLE_PROFGC */ - return ret; -} - -struct monotime_cache { - uint64_t value; - int expire_countdown; -}; - -static __inline uint64_t monotime_since_cached(uint64_t begin_timestamp, - struct monotime_cache *cache) { - if (cache->expire_countdown) - cache->expire_countdown -= 1; - else { - cache->value = osal_monotime(); - cache->expire_countdown = 42 / 3; - } - return cache->value - begin_timestamp; -} - -static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, - uint8_t flags) { - pgr_t ret; - MDBX_txn *const txn = mc->mc_txn; - MDBX_env *const env = txn->mt_env; -#if MDBX_ENABLE_PROFGC - profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI) - ? &env->me_lck->mti_pgop_stat.gc_prof.self - : &env->me_lck->mti_pgop_stat.gc_prof.work; - prof->spe_counter += 1; -#endif /* MDBX_ENABLE_PROFGC */ - - eASSERT(env, num > 0 || (flags & MDBX_ALLOC_RESERVE)); - eASSERT(env, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - - size_t newnext; - const uint64_t monotime_begin = - (MDBX_ENABLE_PROFGC || (num > 1 && env->me_options.gc_time_limit)) - ? osal_monotime() - : 0; - struct monotime_cache now_cache; - now_cache.expire_countdown = - 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда - лимит времени задан нулевой или уже исчерпан, так и от подсчета - времени при не-достижении rp_augment_limit */ - ; - now_cache.value = monotime_begin; - pgno_t pgno = 0; - if (num > 1) { -#if MDBX_ENABLE_PROFGC - prof->xpages += 1; -#endif /* MDBX_ENABLE_PROFGC */ - if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno && - MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno); - pgno = relist_get_sequence(txn, num, flags); - if (likely(pgno)) - goto done; - } - } else { - eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->tw.relist) == 0); - eASSERT(env, !(flags & MDBX_ALLOC_RESERVE) || num == 0); - } - - //--------------------------------------------------------------------------- - - if (unlikely(!is_gc_usable(txn, mc, flags))) { - eASSERT(env, (txn->mt_flags & MDBX_TXN_DRAINED_GC) || num > 1); - goto no_gc; - } - - eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO | - MDBX_ALLOC_SHOULD_SCAN)) == 0); - flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0; - - if (/* Не коагулируем записи при подготовке резерва для обновления GC. - * Иначе попытка увеличить резерв может приводить к необходимости ещё - * большего резерва из-за увеличения списка переработанных страниц. */ - (flags & MDBX_ALLOC_RESERVE) == 0) { - if (txn->mt_dbs[FREE_DBI].md_branch_pages && - MDBX_PNL_GETSIZE(txn->tw.relist) < env->me_maxgc_ov1page / 2) - flags += MDBX_ALLOC_COALESCE; - } - - MDBX_cursor *const gc = ptr_disp(env->me_txn0, sizeof(MDBX_txn)); - eASSERT(env, mc != gc && gc->mc_next == nullptr); - gc->mc_txn = txn; - gc->mc_flags = 0; - - env->me_prefault_write = env->me_options.prefault_write; - if (env->me_prefault_write) { - /* Проверка посредством minicore() существенно снижает затраты, но в - * простейших случаях (тривиальный бенчмарк) интегральная производительность - * становится вдвое меньше. А на платформах без mincore() и с проблемной - * подсистемой виртуальной памяти ситуация может быть многократно хуже. - * Поэтому избегаем затрат в ситуациях когда prefault-write скорее всего не - * нужна. */ - const bool readahead_enabled = env->me_lck->mti_readahead_anchor & 1; - const pgno_t readahead_edge = env->me_lck->mti_readahead_anchor >> 1; - if (/* Не суетимся если GC почти пустая и БД маленькая */ - (txn->mt_dbs[FREE_DBI].md_branch_pages == 0 && - txn->mt_geo.now < 1234) || - /* Не суетимся если страница в зоне включенного упреждающего чтения */ - (readahead_enabled && pgno + num < readahead_edge)) - env->me_prefault_write = false; - } - -retry_gc_refresh_oldest:; - txnid_t oldest = txn_oldest_reader(txn); -retry_gc_have_oldest: - if (unlikely(oldest >= txn->mt_txnid)) { - ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN - " for current-txnid %" PRIaTXN, - oldest, txn->mt_txnid); - ret.err = MDBX_PROBLEM; - goto fail; - } - const txnid_t detent = oldest + 1; - - txnid_t id = 0; - MDBX_cursor_op op = MDBX_FIRST; - if (flags & MDBX_ALLOC_LIFO) { - if (!txn->tw.lifo_reclaimed) { - txn->tw.lifo_reclaimed = txl_alloc(); - if (unlikely(!txn->tw.lifo_reclaimed)) { - ret.err = MDBX_ENOMEM; - goto fail; - } - } - /* Begin lookup backward from oldest reader */ - id = detent - 1; - op = MDBX_SET_RANGE; - } else if (txn->tw.last_reclaimed) { - /* Continue lookup forward from last-reclaimed */ - id = txn->tw.last_reclaimed + 1; - if (id >= detent) - goto depleted_gc; - op = MDBX_SET_RANGE; - } - -next_gc:; - MDBX_val key; - key.iov_base = &id; - key.iov_len = sizeof(id); - -#if MDBX_ENABLE_PROFGC - prof->rsteps += 1; -#endif /* MDBX_ENABLE_PROFGC */ - - /* Seek first/next GC record */ - ret.err = cursor_get(gc, &key, NULL, op); - if (unlikely(ret.err != MDBX_SUCCESS)) { - if (unlikely(ret.err != MDBX_NOTFOUND)) - goto fail; - if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) { - op = MDBX_PREV; - goto next_gc; - } - goto depleted_gc; - } - if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC key-length"); - ret.err = MDBX_CORRUPTED; - goto fail; - } - id = unaligned_peek_u64(4, key.iov_base); - if (flags & MDBX_ALLOC_LIFO) { - op = MDBX_PREV; - if (id >= detent || is_already_reclaimed(txn, id)) - goto next_gc; - } else { - op = MDBX_NEXT; - if (unlikely(id >= detent)) - goto depleted_gc; - } - txn->mt_flags &= ~MDBX_TXN_DRAINED_GC; - - /* Reading next GC record */ - MDBX_val data; - MDBX_page *const mp = gc->mc_pg[gc->mc_top]; - if (unlikely((ret.err = node_read(gc, page_node(mp, gc->mc_ki[gc->mc_top]), - &data, mp)) != MDBX_SUCCESS)) - goto fail; - - pgno_t *gc_pnl = (pgno_t *)data.iov_base; - if (unlikely(data.iov_len % sizeof(pgno_t) || - data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) || - !pnl_check(gc_pnl, txn->mt_next_pgno))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC value-length"); - ret.err = MDBX_CORRUPTED; - goto fail; - } - - const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl); - TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist)); - - if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= - env->me_maxgc_ov1page)) { - /* Don't try to coalesce too much. */ - if (flags & MDBX_ALLOC_SHOULD_SCAN) { - eASSERT(env, flags & MDBX_ALLOC_COALESCE); - eASSERT(env, !(flags & MDBX_ALLOC_RESERVE)); - eASSERT(env, num > 0); -#if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1; -#endif /* MDBX_ENABLE_PROFGC */ - TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold"); - if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno && - MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno); - if (likely(num == 1)) { - pgno = relist_get_single(txn); - goto done; - } - pgno = relist_get_sequence(txn, num, flags); - if (likely(pgno)) - goto done; - } - flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN; - } - if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE( - txn->tw.relist) >= env->me_options.rp_augment_limit) && - ((/* not a slot-request from gc-update */ num && - /* have enough unallocated space */ txn->mt_geo.upper >= - txn->mt_next_pgno + num && - monotime_since_cached(monotime_begin, &now_cache) + - txn->tw.gc_time_acc >= - env->me_options.gc_time_limit) || - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) { - /* Stop reclaiming to avoid large/overflow the page list. This is a rare - * case while search for a continuously multi-page region in a - * large database, see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */ - NOTICE("stop reclaiming %s: %zu (current) + %zu " - "(chunk) -> %zu, rp_augment_limit %u", - likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) < MDBX_PGL_LIMIT) - ? "since rp_augment_limit was reached" - : "to avoid PNL overflow", - MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist), - env->me_options.rp_augment_limit); - goto depleted_gc; - } - } - - /* Remember ID of readed GC record */ - txn->tw.last_reclaimed = id; - if (flags & MDBX_ALLOC_LIFO) { - ret.err = txl_append(&txn->tw.lifo_reclaimed, id); - if (unlikely(ret.err != MDBX_SUCCESS)) - goto fail; - } - - /* Append PNL from GC record to tw.relist */ - ret.err = pnl_need(&txn->tw.relist, gc_len); - if (unlikely(ret.err != MDBX_SUCCESS)) - goto fail; - - if (LOG_ENABLED(MDBX_LOG_EXTRA)) { - DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO - " len %zu, PNL", - id, txn->mt_dbs[FREE_DBI].md_root, gc_len); - for (size_t i = gc_len; i; i--) - DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]); - DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno); - } - - /* Merge in descending sorted order */ - pnl_merge(txn->tw.relist, gc_pnl); - flags |= MDBX_ALLOC_SHOULD_SCAN; - if (AUDIT_ENABLED()) { - if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid txn retired-list"); - ret.err = MDBX_CORRUPTED; - goto fail; - } - } else { - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno)); - } - eASSERT(env, dirtylist_check(txn)); - - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || - MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno); - if (MDBX_ENABLE_REFUND && MDBX_PNL_GETSIZE(txn->tw.relist) && - unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) { - /* Refund suitable pages into "unallocated" space */ - txn_refund(txn); - } - eASSERT(env, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - - /* Done for a kick-reclaim mode, actually no page needed */ - if (unlikely(num == 0)) { - eASSERT(env, ret.err == MDBX_SUCCESS); - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); - goto early_exit; - } - - /* TODO: delete reclaimed records */ - - eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); - if (flags & MDBX_ALLOC_COALESCE) { - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); - goto next_gc; - } - -scan: - eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN); - eASSERT(env, num > 0); - if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno && - MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno); - if (likely(num == 1)) { - eASSERT(env, !(flags & MDBX_ALLOC_RESERVE)); - pgno = relist_get_single(txn); - goto done; - } - pgno = relist_get_sequence(txn, num, flags); - if (likely(pgno)) - goto done; - } - flags -= MDBX_ALLOC_SHOULD_SCAN; - if (ret.err == MDBX_SUCCESS) { - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); - goto next_gc; - } - -depleted_gc: - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); - ret.err = MDBX_NOTFOUND; - if (flags & MDBX_ALLOC_SHOULD_SCAN) - goto scan; - txn->mt_flags |= MDBX_TXN_DRAINED_GC; - - //------------------------------------------------------------------------- - - /* There is no suitable pages in the GC and to be able to allocate - * we should CHOICE one of: - * - make a new steady checkpoint if reclaiming was stopped by - * the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode; - * - kick lagging reader(s) if reclaiming was stopped by ones of it. - * - extend the database file. */ - - /* Will use new pages from the map if nothing is suitable in the GC. */ - newnext = txn->mt_next_pgno + num; - - /* Does reclaiming stopped at the last steady point? */ - const meta_ptr_t recent = meta_recent(env, &txn->tw.troika); - const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika); - if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && - detent == prefer_steady.txnid + 1) { - DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN - "-%s, detent %" PRIaTXN, - recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid, - durable_caption(prefer_steady.ptr_c), detent); - const pgno_t autosync_threshold = - atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed); - const uint64_t autosync_period = - atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed); - uint64_t eoos_timestamp; - /* wipe the last steady-point if one of: - * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified - * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted - * otherwise, make a new steady-point if one of: - * - auto-sync threshold is specified and reached; - * - upper limit of database size is reached; - * - database is full (with the current file size) - * AND auto-sync threshold it NOT specified */ - if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) && - ((autosync_threshold | autosync_period) == 0 || - newnext >= prefer_steady.ptr_c->mm_geo.now)) { - /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode - * without any auto-sync threshold(s). */ -#if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.wipes += 1; -#endif /* MDBX_ENABLE_PROFGC */ - ret.err = wipe_steady(txn, detent); - DEBUG("gc-wipe-steady, rc %d", ret.err); - if (unlikely(ret.err != MDBX_SUCCESS)) - goto fail; - eASSERT(env, prefer_steady.ptr_c != - meta_prefer_steady(env, &txn->tw.troika).ptr_c); - goto retry_gc_refresh_oldest; - } - if ((autosync_threshold && - atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >= - autosync_threshold) || - (autosync_period && - (eoos_timestamp = - atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) && - osal_monotime() - eoos_timestamp >= autosync_period) || - newnext >= txn->mt_geo.upper || - ((num == 0 || newnext >= txn->mt_end_pgno) && - (autosync_threshold | autosync_period) == 0)) { - /* make steady checkpoint. */ -#if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.flushes += 1; -#endif /* MDBX_ENABLE_PROFGC */ - MDBX_meta meta = *recent.ptr_c; - ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta, - &txn->tw.troika); - DEBUG("gc-make-steady, rc %d", ret.err); - eASSERT(env, ret.err != MDBX_RESULT_TRUE); - if (unlikely(ret.err != MDBX_SUCCESS)) - goto fail; - eASSERT(env, prefer_steady.ptr_c != - meta_prefer_steady(env, &txn->tw.troika).ptr_c); - goto retry_gc_refresh_oldest; - } - } - - if (unlikely(true == atomic_load32(&env->me_lck->mti_readers_refresh_flag, - mo_AcquireRelease))) { - oldest = txn_oldest_reader(txn); - if (oldest >= detent) - goto retry_gc_have_oldest; - } - - /* Avoid kick lagging reader(s) if is enough unallocated space - * at the end of database file. */ - if (!(flags & MDBX_ALLOC_RESERVE) && newnext <= txn->mt_end_pgno) { - eASSERT(env, pgno == 0); - goto done; - } - - if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) { - oldest = kick_longlived_readers(env, oldest); - if (oldest >= detent) - goto retry_gc_have_oldest; - } - - //--------------------------------------------------------------------------- - -no_gc: - eASSERT(env, pgno == 0); -#ifndef MDBX_ENABLE_BACKLOG_DEPLETED -#define MDBX_ENABLE_BACKLOG_DEPLETED 0 -#endif /* MDBX_ENABLE_BACKLOG_DEPLETED*/ - if (MDBX_ENABLE_BACKLOG_DEPLETED && - unlikely(!(txn->mt_flags & MDBX_TXN_DRAINED_GC))) { - ret.err = MDBX_BACKLOG_DEPLETED; - goto fail; - } - if (flags & MDBX_ALLOC_RESERVE) { - ret.err = MDBX_NOTFOUND; - goto fail; - } - - /* Will use new pages from the map if nothing is suitable in the GC. */ - newnext = txn->mt_next_pgno + num; - if (newnext <= txn->mt_end_pgno) - goto done; - - if (newnext > txn->mt_geo.upper || !txn->mt_geo.grow_pv) { - NOTICE("gc-alloc: next %zu > upper %" PRIaPGNO, newnext, txn->mt_geo.upper); - ret.err = MDBX_MAP_FULL; - goto fail; - } - - eASSERT(env, newnext > txn->mt_end_pgno); - const size_t grow_step = pv2pages(txn->mt_geo.grow_pv); - size_t aligned = pgno_align2os_pgno( - env, (pgno_t)(newnext + grow_step - newnext % grow_step)); - - if (aligned > txn->mt_geo.upper) - aligned = txn->mt_geo.upper; - eASSERT(env, aligned >= newnext); - - VERBOSE("try growth datafile to %zu pages (+%zu)", aligned, - aligned - txn->mt_end_pgno); - ret.err = dxb_resize(env, txn->mt_next_pgno, (pgno_t)aligned, - txn->mt_geo.upper, implicit_grow); - if (ret.err != MDBX_SUCCESS) { - ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned, - aligned - txn->mt_end_pgno, ret.err); - goto fail; - } - env->me_txn->mt_end_pgno = (pgno_t)aligned; - eASSERT(env, pgno == 0); - - //--------------------------------------------------------------------------- - -done: - ret.err = MDBX_SUCCESS; - if (likely((flags & MDBX_ALLOC_RESERVE) == 0)) { - if (pgno) { - eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS); - eASSERT(env, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - } else { - pgno = txn->mt_next_pgno; - txn->mt_next_pgno += (pgno_t)num; - eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno); - eASSERT(env, pgno >= NUM_METAS && pgno + num <= txn->mt_next_pgno); - } - - ret = page_alloc_finalize(env, txn, mc, pgno, num); - if (unlikely(ret.err != MDBX_SUCCESS)) { - fail: - eASSERT(env, ret.err != MDBX_SUCCESS); - eASSERT(env, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - int level; - const char *what; - if (flags & MDBX_ALLOC_RESERVE) { - level = - (flags & MDBX_ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE; - what = num ? "reserve-pages" : "fetch-slot"; - } else { - txn->mt_flags |= MDBX_TXN_ERROR; - level = MDBX_LOG_ERROR; - what = "pages"; - } - if (LOG_ENABLED(level)) - debug_log(level, __func__, __LINE__, - "unable alloc %zu %s, alloc-flags 0x%x, err %d, txn-flags " - "0x%x, re-list-len %zu, loose-count %zu, gc: height %u, " - "branch %zu, leaf %zu, large %zu, entries %zu\n", - num, what, flags, ret.err, txn->mt_flags, - MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count, - txn->mt_dbs[FREE_DBI].md_depth, - (size_t)txn->mt_dbs[FREE_DBI].md_branch_pages, - (size_t)txn->mt_dbs[FREE_DBI].md_leaf_pages, - (size_t)txn->mt_dbs[FREE_DBI].md_overflow_pages, - (size_t)txn->mt_dbs[FREE_DBI].md_entries); - ret.page = NULL; - } - if (num > 1) - txn->tw.gc_time_acc += monotime_since_cached(monotime_begin, &now_cache); - } else { - early_exit: - DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num, - num ? "RESERVE" : "SLOT", ret.err); - ret.page = NULL; - } - -#if MDBX_ENABLE_PROFGC - prof->rtime_monotonic += osal_monotime() - monotime_begin; -#endif /* MDBX_ENABLE_PROFGC */ - return ret; -} - -__hot static pgr_t page_alloc(const MDBX_cursor *const mc) { - MDBX_txn *const txn = mc->mc_txn; - tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(dbi_state(txn, mc->mc_dbi), - DBI_LINDO | DBI_VALID | DBI_DIRTY)); - - /* If there are any loose pages, just use them */ - while (likely(txn->tw.loose_pages)) { -#if MDBX_ENABLE_REFUND - if (unlikely(txn->tw.loose_refund_wl > txn->mt_next_pgno)) { - txn_refund(txn); - if (!txn->tw.loose_pages) - break; - } -#endif /* MDBX_ENABLE_REFUND */ - - MDBX_page *lp = txn->tw.loose_pages; - MDBX_ASAN_UNPOISON_MEMORY_REGION(lp, txn->mt_env->me_psize); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - txn->tw.loose_pages = mp_next(lp); - txn->tw.loose_count--; - DEBUG_EXTRA("db %d use loose page %" PRIaPGNO, DDBI(mc), lp->mp_pgno); - tASSERT(txn, lp->mp_pgno < txn->mt_next_pgno); - tASSERT(txn, lp->mp_pgno >= NUM_METAS); - VALGRIND_MAKE_MEM_UNDEFINED(page_data(lp), page_space(txn->mt_env)); - lp->mp_txnid = txn->mt_front; - pgr_t ret = {lp, MDBX_SUCCESS}; - return ret; - } - - if (likely(MDBX_PNL_GETSIZE(txn->tw.relist) > 0)) - return page_alloc_finalize(txn->mt_env, txn, mc, relist_get_single(txn), 1); - - return page_alloc_slowpath(mc, 1, MDBX_ALLOC_DEFAULT); -} - -/* Copy the used portions of a page. */ -__hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src, - const size_t size) { - STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ); - STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4); - void *copy_dst = dst; - const void *copy_src = src; - size_t copy_len = size; - if (src->mp_flags & P_LEAF2) { - copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src); - if (unlikely(copy_len > size)) - goto bailout; - } - if ((src->mp_flags & (P_LEAF2 | P_OVERFLOW)) == 0) { - size_t upper = src->mp_upper, lower = src->mp_lower; - intptr_t unused = upper - lower; - /* If page isn't full, just copy the used portion. Adjust - * alignment so memcpy may copy words instead of bytes. */ - if (unused > MDBX_CACHELINE_SIZE * 3) { - lower = ceil_powerof2(lower + PAGEHDRSZ, sizeof(void *)); - upper = floor_powerof2(upper + PAGEHDRSZ, sizeof(void *)); - if (unlikely(upper > copy_len)) - goto bailout; - memcpy(copy_dst, copy_src, lower); - copy_dst = ptr_disp(copy_dst, upper); - copy_src = ptr_disp(copy_src, upper); - copy_len -= upper; - } - } - memcpy(copy_dst, copy_src, copy_len); - return; - -bailout: - if (src->mp_flags & P_LEAF2) - bad_page(src, "%s addr %p, n-keys %zu, ksize %u", - "invalid/corrupted source page", __Wpedantic_format_voidptr(src), - page_numkeys(src), src->mp_leaf2_ksize); - else - bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page", - __Wpedantic_format_voidptr(src), src->mp_upper); - memset(dst, -1, size); -} - -/* Pull a page off the txn's spill list, if present. - * - * If a page being referenced was spilled to disk in this txn, bring - * it back and make it dirty/writable again. */ -static pgr_t __must_check_result page_unspill(MDBX_txn *const txn, - const MDBX_page *const mp) { - VERBOSE("unspill page %" PRIaPGNO, mp->mp_pgno); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0); - tASSERT(txn, IS_SPILLED(txn, mp)); - const MDBX_txn *scan = txn; - pgr_t ret; - do { - tASSERT(txn, (scan->mt_flags & MDBX_TXN_SPILLS) != 0); - const size_t si = search_spilled(scan, mp->mp_pgno); - if (!si) - continue; - const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1; - ret.page = page_malloc(txn, npages); - if (unlikely(!ret.page)) { - ret.err = MDBX_ENOMEM; - return ret; - } - page_copy(ret.page, mp, pgno2bytes(txn->mt_env, npages)); - if (scan == txn) { - /* If in current txn, this page is no longer spilled. - * If it happens to be the last page, truncate the spill list. - * Otherwise mark it as deleted by setting the LSB. */ - spill_remove(txn, si, npages); - } /* otherwise, if belonging to a parent txn, the - * page remains spilled until child commits */ - - ret.err = page_dirty(txn, ret.page, npages); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; -#if MDBX_ENABLE_PGOP_STAT - txn->mt_env->me_lck->mti_pgop_stat.unspill.weak += npages; -#endif /* MDBX_ENABLE_PGOP_STAT */ - ret.page->mp_flags |= (scan == txn) ? 0 : P_SPILLED; - ret.err = MDBX_SUCCESS; - return ret; - } while (likely((scan = scan->mt_parent) != nullptr && - (scan->mt_flags & MDBX_TXN_SPILLS) != 0)); - ERROR("Page %" PRIaPGNO " mod-txnid %" PRIaTXN - " not found in the spill-list(s), current txn %" PRIaTXN - " front %" PRIaTXN ", root txn %" PRIaTXN " front %" PRIaTXN, - mp->mp_pgno, mp->mp_txnid, txn->mt_txnid, txn->mt_front, - txn->mt_env->me_txn0->mt_txnid, txn->mt_env->me_txn0->mt_front); - ret.err = MDBX_PROBLEM; - ret.page = NULL; - return ret; -} - -/* Touch a page: make it dirty and re-insert into tree with updated pgno. - * Set MDBX_TXN_ERROR on failure. - * - * [in] mc cursor pointing to the page to be touched - * - * Returns 0 on success, non-zero on failure. */ -__hot static int page_touch(MDBX_cursor *mc) { - const MDBX_page *const mp = mc->mc_pg[mc->mc_top]; - MDBX_page *np; - MDBX_txn *txn = mc->mc_txn; - int rc; - - tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_LINDO | DBI_VALID | DBI_DIRTY)); - tASSERT(txn, !IS_OVERFLOW(mp)); - if (ASSERT_ENABLED()) { - if (mc->mc_flags & C_SUB) { - MDBX_xcursor *mx = container_of(mc->mc_db, MDBX_xcursor, mx_db); - MDBX_cursor_couple *couple = container_of(mx, MDBX_cursor_couple, inner); - tASSERT(txn, mc->mc_db == &couple->outer.mc_xcursor->mx_db); - tASSERT(txn, mc->mc_dbx == &couple->outer.mc_xcursor->mx_dbx); - tASSERT(txn, *couple->outer.mc_dbi_state & DBI_DIRTY); - } - tASSERT(txn, dirtylist_check(txn)); - } - - if (IS_MODIFIABLE(txn, mp)) { - if (!txn->tw.dirtylist) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC); - return MDBX_SUCCESS; - } - if (IS_SUBP(mp)) - return MDBX_SUCCESS; - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - const size_t n = dpl_search(txn, mp->mp_pgno); - if (MDBX_AVOID_MSYNC && - unlikely(txn->tw.dirtylist->items[n].pgno != mp->mp_pgno)) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP)); - tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length + 1); - VERBOSE("unspill page %" PRIaPGNO, mp->mp_pgno); - np = (MDBX_page *)mp; -#if MDBX_ENABLE_PGOP_STAT - txn->mt_env->me_lck->mti_pgop_stat.unspill.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - return page_dirty(txn, np, 1); - } - tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length); - tASSERT(txn, txn->tw.dirtylist->items[n].pgno == mp->mp_pgno && - txn->tw.dirtylist->items[n].ptr == mp); - if (!MDBX_AVOID_MSYNC || (txn->mt_flags & MDBX_WRITEMAP) == 0) { - size_t *const ptr = - ptr_disp(txn->tw.dirtylist->items[n].ptr, -(ptrdiff_t)sizeof(size_t)); - *ptr = txn->tw.dirtylru; - } - return MDBX_SUCCESS; - } - if (IS_SUBP(mp)) { - np = (MDBX_page *)mp; - np->mp_txnid = txn->mt_front; - return MDBX_SUCCESS; - } - tASSERT(txn, !IS_OVERFLOW(mp) && !IS_SUBP(mp)); - - if (IS_FROZEN(txn, mp)) { - /* CoW the page */ - rc = pnl_need(&txn->tw.retired_pages, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - const pgr_t par = page_alloc(mc); - rc = par.err; - np = par.page; - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - - const pgno_t pgno = np->mp_pgno; - DEBUG("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, DDBI(mc), - mp->mp_pgno, pgno); - tASSERT(txn, mp->mp_pgno != pgno); - pnl_xappend(txn->tw.retired_pages, mp->mp_pgno); - /* Update the parent page, if any, to point to the new page */ - if (mc->mc_top) { - MDBX_page *parent = mc->mc_pg[mc->mc_top - 1]; - MDBX_node *node = page_node(parent, mc->mc_ki[mc->mc_top - 1]); - node_set_pgno(node, pgno); - } else { - mc->mc_db->md_root = pgno; - } - -#if MDBX_ENABLE_PGOP_STAT - txn->mt_env->me_lck->mti_pgop_stat.cow.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - page_copy(np, mp, txn->mt_env->me_psize); - np->mp_pgno = pgno; - np->mp_txnid = txn->mt_front; - } else if (IS_SPILLED(txn, mp)) { - pgr_t pur = page_unspill(txn, mp); - np = pur.page; - rc = pur.err; - if (likely(rc == MDBX_SUCCESS)) { - tASSERT(txn, np != nullptr); - goto done; - } - goto fail; - } else { - if (unlikely(!txn->mt_parent)) { - ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s " - "page %" PRIaPGNO " mod-txnid %" PRIaTXN "," - " without parent transaction, current txn %" PRIaTXN - " front %" PRIaTXN, - IS_BRANCH(mp) ? "branch" : "leaf", mp->mp_pgno, mp->mp_txnid, - mc->mc_txn->mt_txnid, mc->mc_txn->mt_front); - rc = MDBX_PROBLEM; - goto fail; - } - - DEBUG("clone db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno); - tASSERT(txn, - txn->tw.dirtylist->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE); - /* No - copy it */ - np = page_malloc(txn, 1); - if (unlikely(!np)) { - rc = MDBX_ENOMEM; - goto fail; - } - page_copy(np, mp, txn->mt_env->me_psize); - - /* insert a clone of parent's dirty page, so don't touch dirtyroom */ - rc = page_dirty(txn, np, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - -#if MDBX_ENABLE_PGOP_STAT - txn->mt_env->me_lck->mti_pgop_stat.clone.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } - -done: - /* Adjust cursors pointing to mp */ - mc->mc_pg[mc->mc_top] = np; - MDBX_cursor *m2 = txn->mt_cursors[mc->mc_dbi]; - if (mc->mc_flags & C_SUB) { - for (; m2; m2 = m2->mc_next) { - MDBX_cursor *m3 = &m2->mc_xcursor->mx_cursor; - if (m3->mc_snum < mc->mc_snum) - continue; - if (m3->mc_pg[mc->mc_top] == mp) - m3->mc_pg[mc->mc_top] = np; - } - } else { - for (; m2; m2 = m2->mc_next) { - if (m2->mc_snum < mc->mc_snum) - continue; - if (m2 == mc) - continue; - if (m2->mc_pg[mc->mc_top] == mp) { - m2->mc_pg[mc->mc_top] = np; - if (XCURSOR_INITED(m2) && IS_LEAF(np)) - XCURSOR_REFRESH(m2, np, m2->mc_ki[mc->mc_top]); - } - } - } - return MDBX_SUCCESS; - -fail: - txn->mt_flags |= MDBX_TXN_ERROR; - return rc; -} - -static int meta_sync(const MDBX_env *env, const meta_ptr_t head) { - eASSERT(env, atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)head.txnid); - /* Функция может вызываться (в том числе) при (env->me_flags & - * MDBX_NOMETASYNC) == 0 и env->me_fd4meta == env->me_dsync_fd, например если - * предыдущая транзакция была выполненна с флагом MDBX_NOMETASYNC. */ - - int rc = MDBX_RESULT_TRUE; - if (env->me_flags & MDBX_WRITEMAP) { - if (!MDBX_AVOID_MSYNC) { - rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } else { -#if MDBX_ENABLE_PGOP_ST - env->me_lck->mti_pgop_stat.wops.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - const MDBX_page *page = data_page(head.ptr_c); - rc = osal_pwrite(env->me_fd4meta, page, env->me_psize, - ptr_dist(page, env->me_map)); - - if (likely(rc == MDBX_SUCCESS) && env->me_fd4meta == env->me_lazy_fd) { - rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } - } - } else { - rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } - - if (likely(rc == MDBX_SUCCESS)) - env->me_lck->mti_meta_sync_txnid.weak = (uint32_t)head.txnid; - return rc; -} - -static __inline bool env_txn0_owned(const MDBX_env *env) { - return (env->me_flags & MDBX_NOSTICKYTHREADS) - ? (env->me_txn0->mt_owner != 0) - : (env->me_txn0->mt_owner == osal_thread_self()); -} - -__cold static int env_sync(MDBX_env *env, bool force, bool nonblock) { - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - - const bool txn0_owned = env_txn0_owned(env); - bool should_unlock = false; - int rc = MDBX_RESULT_TRUE /* means "nothing to sync" */; - -retry:; - unsigned flags = env->me_flags & ~(MDBX_NOMETASYNC | MDBX_SHRINK_ALLOWED); - if (unlikely((flags & (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE)) != - MDBX_ENV_ACTIVE)) { - rc = (flags & MDBX_FATAL_ERROR) ? MDBX_PANIC : MDBX_EPERM; - goto bailout; - } - - const meta_troika_t troika = - (txn0_owned | should_unlock) ? env->me_txn0->tw.troika : meta_tap(env); - const meta_ptr_t head = meta_recent(env, &troika); - const uint64_t unsynced_pages = - atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed); - if (unsynced_pages == 0) { - const uint32_t synched_meta_txnid_u32 = - atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed); - if (synched_meta_txnid_u32 == (uint32_t)head.txnid && head.is_steady) - goto bailout; - } - - if (should_unlock && (env->me_flags & MDBX_WRITEMAP) && - unlikely(head.ptr_c->mm_geo.next > - bytes2pgno(env, env->me_dxb_mmap.current))) { - - if (unlikely(env->me_stuck_meta >= 0) && - troika.recent != (uint8_t)env->me_stuck_meta) { - NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " - "meta-page (%u)", - "sync datafile", env->me_stuck_meta, troika.recent); - rc = MDBX_RESULT_TRUE; - } else { - rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now, - head.ptr_c->mm_geo.upper, implicit_grow); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - } - - const size_t autosync_threshold = - atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed); - const uint64_t autosync_period = - atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed); - uint64_t eoos_timestamp; - if (force || (autosync_threshold && unsynced_pages >= autosync_threshold) || - (autosync_period && - (eoos_timestamp = - atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) && - osal_monotime() - eoos_timestamp >= autosync_period)) - flags &= MDBX_WRITEMAP /* clear flags for full steady sync */; - - if (!txn0_owned) { - if (!should_unlock) { -#if MDBX_ENABLE_PGOP_STAT - unsigned wops = 0; -#endif /* MDBX_ENABLE_PGOP_STAT */ - - int err; - /* pre-sync to avoid latency for writer */ - if (unsynced_pages > /* FIXME: define threshold */ 42 && - (flags & MDBX_SAFE_NOSYNC) == 0) { - eASSERT(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); - if (flags & MDBX_WRITEMAP) { - /* Acquire guard to avoid collision with remap */ -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_AcquireShared(&env->me_remap_guard); -#else - err = osal_fastmutex_acquire(&env->me_remap_guard); - if (unlikely(err != MDBX_SUCCESS)) - return err; -#endif - const size_t usedbytes = - pgno_align2os_bytes(env, head.ptr_c->mm_geo.next); - err = osal_msync(&env->me_dxb_mmap, 0, usedbytes, MDBX_SYNC_DATA); -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_ReleaseShared(&env->me_remap_guard); -#else - int unlock_err = osal_fastmutex_release(&env->me_remap_guard); - if (unlikely(unlock_err != MDBX_SUCCESS) && err == MDBX_SUCCESS) - err = unlock_err; -#endif - } else - err = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA); - - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#if MDBX_ENABLE_PGOP_STAT - wops = 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - /* pre-sync done */ - rc = MDBX_SUCCESS /* means "some data was synced" */; - } - - err = osal_txn_lock(env, nonblock); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - should_unlock = true; -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.wops.weak += wops; -#endif /* MDBX_ENABLE_PGOP_STAT */ - env->me_txn0->tw.troika = meta_tap(env); - eASSERT(env, !env->me_txn && !env->me_txn0->mt_child); - goto retry; - } - eASSERT(env, head.txnid == recent_committed_txnid(env)); - env->me_txn0->mt_txnid = head.txnid; - txn_oldest_reader(env->me_txn0); - flags |= MDBX_SHRINK_ALLOWED; - } - - eASSERT(env, txn0_owned || should_unlock); - eASSERT(env, !txn0_owned || (flags & MDBX_SHRINK_ALLOWED) == 0); - - if (!head.is_steady && unlikely(env->me_stuck_meta >= 0) && - troika.recent != (uint8_t)env->me_stuck_meta) { - NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " - "meta-page (%u)", - "sync datafile", env->me_stuck_meta, troika.recent); - rc = MDBX_RESULT_TRUE; - goto bailout; - } - if (!head.is_steady || ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) { - DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, - data_page(head.ptr_c)->mp_pgno, durable_caption(head.ptr_c), - unsynced_pages); - MDBX_meta meta = *head.ptr_c; - rc = sync_locked(env, flags, &meta, &env->me_txn0->tw.troika); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - /* LY: sync meta-pages if MDBX_NOMETASYNC enabled - * and someone was not synced above. */ - if (atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)head.txnid) - rc = meta_sync(env, head); - -bailout: - if (should_unlock) - osal_txn_unlock(env); - return rc; -} - -static __inline int check_env(const MDBX_env *env, const bool wanna_active) { - if (unlikely(!env)) - return MDBX_EINVAL; - - if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) - return MDBX_EBADSIGN; - - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) - return MDBX_PANIC; - - if (wanna_active) { -#if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid()) && env->me_pid) { - ((MDBX_env *)env)->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - if (unlikely((env->me_flags & MDBX_ENV_ACTIVE) == 0)) - return MDBX_EPERM; - eASSERT(env, env->me_map != nullptr); - } - - return MDBX_SUCCESS; -} - -__cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - return env_sync(env, force, nonblock); -} - -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) -/* Find largest mvcc-snapshot still referenced by this process. */ -static pgno_t find_largest_this(MDBX_env *env, pgno_t largest) { - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (likely(lck != NULL /* exclusive mode */)) { - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - for (size_t i = 0; i < snap_nreaders; ++i) { - retry: - if (atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease) == - env->me_pid) { - /* jitter4testing(true); */ - const pgno_t snap_pages = atomic_load32( - &lck->mti_readers[i].mr_snapshot_pages_used, mo_Relaxed); - const txnid_t snap_txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (unlikely( - snap_pages != - atomic_load32(&lck->mti_readers[i].mr_snapshot_pages_used, - mo_AcquireRelease) || - snap_txnid != safe64_read(&lck->mti_readers[i].mr_txnid))) - goto retry; - if (largest < snap_pages && - atomic_load64(&lck->mti_oldest_reader, mo_AcquireRelease) <= - /* ignore pending updates */ snap_txnid && - snap_txnid <= MAX_TXNID) - largest = snap_pages; - } - } - } - return largest; -} - -static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { -#if !defined(__SANITIZE_ADDRESS__) - if (!RUNNING_ON_VALGRIND) - return; -#endif - - if (txn) { /* transaction start */ - if (env->me_poison_edge < txn->mt_next_pgno) - env->me_poison_edge = txn->mt_next_pgno; - VALGRIND_MAKE_MEM_DEFINED(env->me_map, pgno2bytes(env, txn->mt_next_pgno)); - MDBX_ASAN_UNPOISON_MEMORY_REGION(env->me_map, - pgno2bytes(env, txn->mt_next_pgno)); - /* don't touch more, it should be already poisoned */ - } else { /* transaction end */ - bool should_unlock = false; - pgno_t last = MAX_PAGENO + 1; - if (env->me_pid != osal_getpid()) { - /* resurrect after fork */ - return; - } else if (env->me_txn && env_txn0_owned(env)) { - /* inside write-txn */ - last = meta_recent(env, &env->me_txn0->tw.troika).ptr_v->mm_geo.next; - } else if (env->me_flags & MDBX_RDONLY) { - /* read-only mode, no write-txn, no wlock mutex */ - last = NUM_METAS; - } else if (osal_txn_lock(env, true) == MDBX_SUCCESS) { - /* no write-txn */ - last = NUM_METAS; - should_unlock = true; - } else { - /* write txn is running, therefore shouldn't poison any memory range */ - return; - } - - last = find_largest_this(env, last); - const pgno_t edge = env->me_poison_edge; - if (edge > last) { - eASSERT(env, last >= NUM_METAS); - env->me_poison_edge = last; - VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->me_map, pgno2bytes(env, last)), - pgno2bytes(env, edge - last)); - MDBX_ASAN_POISON_MEMORY_REGION( - ptr_disp(env->me_map, pgno2bytes(env, last)), - pgno2bytes(env, edge - last)); - } - if (should_unlock) - osal_txn_unlock(env); - } -} -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - -typedef struct { - int err; - MDBX_reader *rslot; -} bind_rslot_result; - -static bind_rslot_result bind_rslot(MDBX_env *env, const uintptr_t tid) { - eASSERT(env, env->me_lck_mmap.lck); - eASSERT(env, env->me_lck->mti_magic_and_version == MDBX_LOCK_MAGIC); - eASSERT(env, env->me_lck->mti_os_and_format == MDBX_LOCK_FORMAT); - - bind_rslot_result result = {osal_rdt_lock(env), nullptr}; - if (unlikely(MDBX_IS_ERROR(result.err))) - return result; - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { - osal_rdt_unlock(env); - result.err = MDBX_PANIC; - return result; - } - if (unlikely(!env->me_map)) { - osal_rdt_unlock(env); - result.err = MDBX_EPERM; - return result; - } - - if (unlikely(env->me_live_reader != env->me_pid)) { - result.err = osal_rpid_set(env); - if (unlikely(result.err != MDBX_SUCCESS)) { - osal_rdt_unlock(env); - return result; - } - env->me_live_reader = env->me_pid; - } - - result.err = MDBX_SUCCESS; - size_t slot, nreaders; - while (1) { - nreaders = env->me_lck->mti_numreaders.weak; - for (slot = 0; slot < nreaders; slot++) - if (!atomic_load32(&env->me_lck->mti_readers[slot].mr_pid, - mo_AcquireRelease)) - break; - - if (likely(slot < env->me_maxreaders)) - break; - - result.err = cleanup_dead_readers(env, true, NULL); - if (result.err != MDBX_RESULT_TRUE) { - osal_rdt_unlock(env); - result.err = - (result.err == MDBX_SUCCESS) ? MDBX_READERS_FULL : result.err; - return result; - } - } - - result.rslot = &env->me_lck->mti_readers[slot]; - /* Claim the reader slot, carefully since other code - * uses the reader table un-mutexed: First reset the - * slot, next publish it in lck->mti_numreaders. After - * that, it is safe for mdbx_env_close() to touch it. - * When it will be closed, we can finally claim it. */ - atomic_store32(&result.rslot->mr_pid, 0, mo_AcquireRelease); - safe64_reset(&result.rslot->mr_txnid, true); - if (slot == nreaders) - env->me_lck->mti_numreaders.weak = (uint32_t)++nreaders; - result.rslot->mr_tid.weak = (env->me_flags & MDBX_NOSTICKYTHREADS) ? 0 : tid; - atomic_store32(&result.rslot->mr_pid, env->me_pid, mo_AcquireRelease); - osal_rdt_unlock(env); - - if (likely(env->me_flags & MDBX_ENV_TXKEY)) { - eASSERT(env, env->me_live_reader == env->me_pid); - thread_rthc_set(env->me_txkey, result.rslot); - } - return result; -} - -__cold int mdbx_thread_register(const MDBX_env *env) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!env->me_lck_mmap.lck)) - return (env->me_flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM; - - if (unlikely((env->me_flags & MDBX_ENV_TXKEY) == 0)) { - eASSERT(env, env->me_flags & MDBX_NOSTICKYTHREADS); - return MDBX_EINVAL /* MDBX_NOSTICKYTHREADS mode */; - } - - eASSERT(env, (env->me_flags & (MDBX_NOSTICKYTHREADS | MDBX_ENV_TXKEY)) == - MDBX_ENV_TXKEY); - MDBX_reader *r = thread_rthc_get(env->me_txkey); - if (unlikely(r != NULL)) { - eASSERT(env, r->mr_pid.weak == env->me_pid); - eASSERT(env, r->mr_tid.weak == osal_thread_self()); - if (unlikely(r->mr_pid.weak != env->me_pid)) - return MDBX_BAD_RSLOT; - return MDBX_RESULT_TRUE /* already registered */; - } - - const uintptr_t tid = osal_thread_self(); - if (env->me_txn && unlikely(env->me_txn0->mt_owner == tid)) - return MDBX_TXN_OVERLAPPING; - return bind_rslot((MDBX_env *)env, tid).err; -} - -__cold int mdbx_thread_unregister(const MDBX_env *env) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!env->me_lck_mmap.lck)) - return MDBX_RESULT_TRUE; - - if (unlikely((env->me_flags & MDBX_ENV_TXKEY) == 0)) { - eASSERT(env, env->me_flags & MDBX_NOSTICKYTHREADS); - return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */; - } - - eASSERT(env, (env->me_flags & (MDBX_NOSTICKYTHREADS | MDBX_ENV_TXKEY)) == - MDBX_ENV_TXKEY); - MDBX_reader *r = thread_rthc_get(env->me_txkey); - if (unlikely(r == NULL)) - return MDBX_RESULT_TRUE /* not registered */; - - eASSERT(env, r->mr_pid.weak == env->me_pid); - eASSERT(env, r->mr_tid.weak == osal_thread_self()); - if (unlikely(r->mr_pid.weak != env->me_pid || - r->mr_tid.weak != osal_thread_self())) - return MDBX_BAD_RSLOT; - - eASSERT(env, r->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD); - if (unlikely(r->mr_txnid.weak < SAFE64_INVALID_THRESHOLD)) - return MDBX_BUSY /* transaction is still active */; - - atomic_store32(&r->mr_pid, 0, mo_Relaxed); - atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, - mo_AcquireRelease); - thread_rthc_set(env->me_txkey, nullptr); - return MDBX_SUCCESS; -} - -/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ -static bool coherency_check(const MDBX_env *env, const txnid_t txnid, - const volatile MDBX_db *dbs, - const volatile MDBX_meta *meta, bool report) { - const txnid_t freedb_mod_txnid = dbs[FREE_DBI].md_mod_txnid; - const txnid_t maindb_mod_txnid = dbs[MAIN_DBI].md_mod_txnid; - const pgno_t last_pgno = meta->mm_geo.now; - - const pgno_t freedb_root_pgno = dbs[FREE_DBI].md_root; - const MDBX_page *freedb_root = (env->me_map && freedb_root_pgno < last_pgno) - ? pgno2page(env, freedb_root_pgno) - : nullptr; - - const pgno_t maindb_root_pgno = dbs[MAIN_DBI].md_root; - const MDBX_page *maindb_root = (env->me_map && maindb_root_pgno < last_pgno) - ? pgno2page(env, maindb_root_pgno) - : nullptr; - const uint64_t magic_and_version = - unaligned_peek_u64_volatile(4, &meta->mm_magic_and_version); - - bool ok = true; - if (freedb_root_pgno != P_INVALID && - unlikely(freedb_root_pgno >= last_pgno)) { - if (report) - WARNING( - "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN - " %s", - "free", freedb_root_pgno, txnid, - (env->me_stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); - ok = false; - } - if (maindb_root_pgno != P_INVALID && - unlikely(maindb_root_pgno >= last_pgno)) { - if (report) - WARNING( - "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN - " %s", - "main", maindb_root_pgno, txnid, - (env->me_stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); - ok = false; - } - if (unlikely(txnid < freedb_mod_txnid || - (!freedb_mod_txnid && freedb_root && - likely(magic_and_version == MDBX_DATA_MAGIC)))) { - if (report) - WARNING( - "catch invalid %sdb.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN - " %s", - "free", freedb_mod_txnid, txnid, - (env->me_stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); - ok = false; - } - if (unlikely(txnid < maindb_mod_txnid || - (!maindb_mod_txnid && maindb_root && - likely(magic_and_version == MDBX_DATA_MAGIC)))) { - if (report) - WARNING( - "catch invalid %sdb.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN - " %s", - "main", maindb_mod_txnid, txnid, - (env->me_stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); - ok = false; - } - if (likely(freedb_root && freedb_mod_txnid)) { - VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->mp_txnid)); - MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, - sizeof(freedb_root->mp_txnid)); - const txnid_t root_txnid = freedb_root->mp_txnid; - if (unlikely(root_txnid != freedb_mod_txnid)) { - if (report) - WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN - " for %sdb.mod_txnid %" PRIaTXN " %s", - freedb_root_pgno, root_txnid, "free", freedb_mod_txnid, - (env->me_stuck_meta < 0) ? "(workaround for incoherent flaw of " - "unified page/buffer cache)" - : "(wagering meta)"); - ok = false; - } - } - if (likely(maindb_root && maindb_mod_txnid)) { - VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->mp_txnid)); - MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, - sizeof(maindb_root->mp_txnid)); - const txnid_t root_txnid = maindb_root->mp_txnid; - if (unlikely(root_txnid != maindb_mod_txnid)) { - if (report) - WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN - " for %sdb.mod_txnid %" PRIaTXN " %s", - maindb_root_pgno, root_txnid, "main", maindb_mod_txnid, - (env->me_stuck_meta < 0) ? "(workaround for incoherent flaw of " - "unified page/buffer cache)" - : "(wagering meta)"); - ok = false; - } - } - if (unlikely(!ok) && report) - env->me_lck->mti_pgop_stat.incoherence.weak = - (env->me_lck->mti_pgop_stat.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->me_lck->mti_pgop_stat.incoherence.weak + 1; - return ok; -} - -__cold static int coherency_timeout(uint64_t *timestamp, intptr_t pgno, - const MDBX_env *env) { - if (likely(timestamp && *timestamp == 0)) - *timestamp = osal_monotime(); - else if (unlikely(!timestamp || osal_monotime() - *timestamp > - osal_16dot16_to_monotime(65536 / 10))) { - if (pgno >= 0 && pgno != env->me_stuck_meta) - ERROR("bailout waiting for %" PRIuSIZE " page arrival %s", pgno, - "(workaround for incoherent flaw of unified page/buffer cache)"); - else if (env->me_stuck_meta < 0) - ERROR("bailout waiting for valid snapshot (%s)", - "workaround for incoherent flaw of unified page/buffer cache"); - return MDBX_PROBLEM; - } - - osal_memory_fence(mo_AcquireRelease, true); -#if defined(_WIN32) || defined(_WIN64) - SwitchToThread(); -#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE) - sched_yield(); -#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS) - pthread_yield(); -#else - usleep(42); -#endif - return MDBX_RESULT_TRUE; -} - -/* check with timeout as the workaround - * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ -__hot static int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, - uint64_t *timestamp) { - /* Copy the DB info and flags */ - txn->mt_geo = head.ptr_v->mm_geo; - memcpy(txn->mt_dbs, head.ptr_c->mm_dbs, CORE_DBS * sizeof(MDBX_db)); - VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbs + CORE_DBS, - txn->mt_env->me_maxdbs - CORE_DBS); - txn->mt_canary = head.ptr_v->mm_canary; - - if (unlikely(!coherency_check(txn->mt_env, head.txnid, txn->mt_dbs, - head.ptr_v, *timestamp == 0))) - return coherency_timeout(timestamp, -1, txn->mt_env); - - tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); - return MDBX_SUCCESS; -} - -static int coherency_check_written(const MDBX_env *env, const txnid_t txnid, - const volatile MDBX_meta *meta, - const intptr_t pgno, uint64_t *timestamp) { - const bool report = !(timestamp && *timestamp); - const txnid_t head_txnid = meta_txnid(meta); - if (unlikely(head_txnid < MIN_TXNID || head_txnid < txnid)) { - if (report) { - env->me_lck->mti_pgop_stat.incoherence.weak = - (env->me_lck->mti_pgop_stat.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->me_lck->mti_pgop_stat.incoherence.weak + 1; - WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s", - (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid, - bytes2pgno(env, ptr_dist(meta, env->me_map)), - "(workaround for incoherent flaw of unified page/buffer cache)"); - } - return coherency_timeout(timestamp, pgno, env); - } - if (unlikely(!coherency_check(env, head_txnid, meta->mm_dbs, meta, report))) - return coherency_timeout(timestamp, pgno, env); - - eASSERT(env, meta->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - eASSERT(env, db_check_flags(meta->mm_dbs[MAIN_DBI].md_flags)); - return MDBX_SUCCESS; -} - -static bool check_meta_coherency(const MDBX_env *env, - const volatile MDBX_meta *meta, bool report) { - uint64_t timestamp = 0; - return coherency_check_written(env, 0, meta, -1, - report ? ×tamp : nullptr) == MDBX_SUCCESS; -} - -/* Common code for mdbx_txn_begin() and mdbx_txn_renew(). */ -static int txn_renew(MDBX_txn *txn, unsigned flags) { - MDBX_env *env = txn->mt_env; - int rc; - -#if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid())) { - env->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - - STATIC_ASSERT(sizeof(MDBX_reader) == 32); -#if MDBX_LOCKING > 0 - STATIC_ASSERT(offsetof(MDBX_lockinfo, mti_wlock) % MDBX_CACHELINE_SIZE == 0); - STATIC_ASSERT(offsetof(MDBX_lockinfo, mti_rlock) % MDBX_CACHELINE_SIZE == 0); -#else - STATIC_ASSERT( - offsetof(MDBX_lockinfo, mti_oldest_reader) % MDBX_CACHELINE_SIZE == 0); - STATIC_ASSERT(offsetof(MDBX_lockinfo, mti_numreaders) % MDBX_CACHELINE_SIZE == - 0); -#endif /* MDBX_LOCKING */ - STATIC_ASSERT(offsetof(MDBX_lockinfo, mti_readers) % MDBX_CACHELINE_SIZE == - 0); - - const uintptr_t tid = osal_thread_self(); - flags |= env->me_flags & (MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); - if (flags & MDBX_TXN_RDONLY) { - eASSERT(env, (flags & ~(MDBX_TXN_RO_BEGIN_FLAGS | MDBX_WRITEMAP | - MDBX_NOSTICKYTHREADS)) == 0); - txn->mt_flags = flags; - MDBX_reader *r = txn->to.reader; - STATIC_ASSERT(sizeof(uintptr_t) <= sizeof(r->mr_tid)); - if (likely(env->me_flags & MDBX_ENV_TXKEY)) { - eASSERT(env, !(env->me_flags & MDBX_NOSTICKYTHREADS)); - r = thread_rthc_get(env->me_txkey); - if (likely(r)) { - if (unlikely(!r->mr_pid.weak) && - (mdbx_static.flags & MDBX_DBG_LEGACY_MULTIOPEN)) { - thread_rthc_set(env->me_txkey, nullptr); - r = nullptr; - } else { - eASSERT(env, r->mr_pid.weak == env->me_pid); - eASSERT(env, r->mr_tid.weak == osal_thread_self()); - } - } - } else { - eASSERT(env, - !env->me_lck_mmap.lck || (env->me_flags & MDBX_NOSTICKYTHREADS)); - } - - if (likely(r)) { - if (unlikely(r->mr_pid.weak != env->me_pid || - r->mr_txnid.weak < SAFE64_INVALID_THRESHOLD)) - return MDBX_BAD_RSLOT; - } else if (env->me_lck_mmap.lck) { - bind_rslot_result brs = bind_rslot(env, tid); - if (unlikely(brs.err != MDBX_SUCCESS)) - return brs.err; - r = brs.rslot; - } - txn->to.reader = r; - STATIC_ASSERT(MDBX_TXN_RDONLY_PREPARE > MDBX_TXN_RDONLY); - if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) { - eASSERT(env, txn->mt_txnid == 0); - eASSERT(env, txn->mt_owner == 0); - eASSERT(env, txn->mt_numdbs == 0); - if (likely(r)) { - eASSERT(env, r->mr_snapshot_pages_used.weak == 0); - eASSERT(env, r->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD); - atomic_store32(&r->mr_snapshot_pages_used, 0, mo_Relaxed); - } - txn->mt_flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; - return MDBX_SUCCESS; - } - txn->mt_owner = tid; - - /* Seek & fetch the last meta */ - uint64_t timestamp = 0; - size_t loop = 0; - meta_troika_t troika = meta_tap(env); - while (1) { - const meta_ptr_t head = - likely(env->me_stuck_meta < 0) - ? /* regular */ meta_recent(env, &troika) - : /* recovery mode */ meta_ptr(env, env->me_stuck_meta); - if (likely(r)) { - safe64_reset(&r->mr_txnid, false); - atomic_store32(&r->mr_snapshot_pages_used, head.ptr_v->mm_geo.next, - mo_Relaxed); - atomic_store64( - &r->mr_snapshot_pages_retired, - unaligned_peek_u64_volatile(4, head.ptr_v->mm_pages_retired), - mo_Relaxed); - safe64_write(&r->mr_txnid, head.txnid); - eASSERT(env, r->mr_pid.weak == osal_getpid()); - eASSERT(env, r->mr_tid.weak == ((env->me_flags & MDBX_NOSTICKYTHREADS) - ? 0 - : osal_thread_self())); - eASSERT(env, r->mr_txnid.weak == head.txnid || - (r->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD && - head.txnid < env->me_lck->mti_oldest_reader.weak)); - atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, - mo_AcquireRelease); - } else { - /* exclusive mode without lck */ - eASSERT(env, !env->me_lck_mmap.lck && env->me_lck == lckless_stub(env)); - } - jitter4testing(true); - - /* Snap the state from current meta-head */ - txn->mt_txnid = head.txnid; - if (likely(env->me_stuck_meta < 0) && - unlikely(meta_should_retry(env, &troika) || - head.txnid < atomic_load64(&env->me_lck->mti_oldest_reader, - mo_AcquireRelease))) { - if (unlikely(++loop > 42)) { - ERROR("bailout waiting for valid snapshot (%s)", - "metapages are too volatile"); - rc = MDBX_PROBLEM; - txn->mt_txnid = INVALID_TXNID; - if (likely(r)) - safe64_reset(&r->mr_txnid, false); - goto bailout; - } - timestamp = 0; - continue; - } - - rc = coherency_check_head(txn, head, ×tamp); - jitter4testing(false); - if (likely(rc == MDBX_SUCCESS)) - break; - - if (unlikely(rc != MDBX_RESULT_TRUE)) { - txn->mt_txnid = INVALID_TXNID; - if (likely(r)) - safe64_reset(&r->mr_txnid, false); - goto bailout; - } - } - - if (unlikely(txn->mt_txnid < MIN_TXNID || txn->mt_txnid > MAX_TXNID)) { - ERROR("%s", "environment corrupted by died writer, must shutdown!"); - if (likely(r)) - safe64_reset(&r->mr_txnid, false); - txn->mt_txnid = INVALID_TXNID; - rc = MDBX_CORRUPTED; - goto bailout; - } - ENSURE(env, txn->mt_txnid >= - /* paranoia is appropriate here */ env->me_lck - ->mti_oldest_reader.weak); - tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); - } else { - eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | - MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); - if (unlikely(txn->mt_owner == tid || - /* not recovery mode */ env->me_stuck_meta >= 0)) - return MDBX_BUSY; - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (lck && (env->me_flags & MDBX_NOSTICKYTHREADS) == 0 && - (mdbx_static.flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - for (size_t i = 0; i < snap_nreaders; ++i) { - if (atomic_load32(&lck->mti_readers[i].mr_pid, mo_Relaxed) == - env->me_pid && - unlikely(atomic_load64(&lck->mti_readers[i].mr_tid, mo_Relaxed) == - tid)) { - const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (txnid >= MIN_TXNID && txnid <= MAX_TXNID) - return MDBX_TXN_OVERLAPPING; - } - } - } - - /* Not yet touching txn == env->me_txn0, it may be active */ - jitter4testing(false); - rc = osal_txn_lock(env, !!(flags & MDBX_TXN_TRY)); - if (unlikely(rc)) - return rc; - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { - osal_txn_unlock(env); - return MDBX_PANIC; - } -#if defined(_WIN32) || defined(_WIN64) - if (unlikely(!env->me_map)) { - osal_txn_unlock(env); - return MDBX_EPERM; - } -#endif /* Windows */ - - txn->tw.troika = meta_tap(env); - const meta_ptr_t head = meta_recent(env, &txn->tw.troika); - uint64_t timestamp = 0; - while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") { - rc = coherency_check_head(txn, head, ×tamp); - if (likely(rc == MDBX_SUCCESS)) - break; - if (unlikely(rc != MDBX_RESULT_TRUE)) - goto bailout; - } - eASSERT(env, meta_txnid(head.ptr_v) == head.txnid); - txn->mt_txnid = safe64_txnid_next(head.txnid); - if (unlikely(txn->mt_txnid > MAX_TXNID)) { - rc = MDBX_TXN_FULL; - ERROR("txnid overflow, raise %d", rc); - goto bailout; - } - - tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); - txn->mt_flags = flags; - txn->mt_child = NULL; - txn->tw.loose_pages = NULL; - txn->tw.loose_count = 0; -#if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ - MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0); - txn->tw.spilled.list = NULL; - txn->tw.spilled.least_removed = 0; - txn->tw.gc_time_acc = 0; - txn->tw.last_reclaimed = 0; - if (txn->tw.lifo_reclaimed) - MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); - env->me_txn = txn; - - if ((txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { - rc = dpl_alloc(txn); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - txn->tw.dirtyroom = txn->mt_env->me_options.dp_limit; - txn->tw.dirtylru = MDBX_DEBUG ? UINT32_MAX / 3 - 42 : 0; - } else { - tASSERT(txn, txn->tw.dirtylist == nullptr); - txn->tw.dirtylist = nullptr; - txn->tw.dirtyroom = MAX_PAGENO; - txn->tw.dirtylru = 0; - } - eASSERT(env, txn->tw.writemap_dirty_npages == 0); - eASSERT(env, txn->tw.writemap_spilled_npages == 0); - } - - txn->mt_front = - txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); - - /* Setup db info */ - tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); - VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbi_state, env->me_maxdbs); -#if MDBX_ENABLE_DBI_SPARSE - txn->mt_numdbs = CORE_DBS; - VALGRIND_MAKE_MEM_UNDEFINED( - txn->mt_dbi_sparse, - ceil_powerof2(env->me_maxdbs, CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / - CHAR_BIT); - txn->mt_dbi_sparse[0] = (1 << CORE_DBS) - 1; -#else - txn->mt_numdbs = (env->me_numdbs < 8) ? env->me_numdbs : 8; - if (txn->mt_numdbs > CORE_DBS) - memset(txn->mt_dbi_state + CORE_DBS, 0, txn->mt_numdbs - CORE_DBS); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - txn->mt_dbi_state[FREE_DBI] = DBI_LINDO | DBI_VALID; - txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO | DBI_VALID; - txn->mt_cursors[FREE_DBI] = nullptr; - txn->mt_cursors[MAIN_DBI] = nullptr; - txn->mt_dbi_seqs[FREE_DBI] = 0; - txn->mt_dbi_seqs[MAIN_DBI] = - atomic_load32(&env->me_dbi_seqs[MAIN_DBI], mo_AcquireRelease); - - if (unlikely(env->me_db_flags[MAIN_DBI] != - (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags))) { - const bool need_txn_lock = env->me_txn0 && env->me_txn0->mt_owner != tid; - bool should_unlock = false; - if (need_txn_lock) { - rc = osal_txn_lock(env, true); - if (rc == MDBX_SUCCESS) - should_unlock = true; - else if (rc != MDBX_BUSY && rc != MDBX_EDEADLK) - goto bailout; - } - rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - uint32_t seq = dbi_seq_next(env, MAIN_DBI); - /* проверяем повторно после захвата блокировки */ - if (env->me_db_flags[MAIN_DBI] != - (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags)) { - if (!need_txn_lock || should_unlock || - /* если нет активной пишущей транзакции, - * то следующая будет ждать на me_dbi_lock */ - !env->me_txn) { - if (env->me_db_flags[MAIN_DBI] != 0 || MDBX_DEBUG) - NOTICE("renew MainDB for %s-txn %" PRIaTXN - " since db-flags changes 0x%x -> 0x%x", - (txn->mt_flags & MDBX_TXN_RDONLY) ? "ro" : "rw", - txn->mt_txnid, env->me_db_flags[MAIN_DBI] & ~DB_VALID, - txn->mt_dbs[MAIN_DBI].md_flags); - env->me_db_flags[MAIN_DBI] = DB_POISON; - atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); - rc = setup_sdb(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], - env->me_psize); - if (likely(rc == MDBX_SUCCESS)) { - seq = dbi_seq_next(env, MAIN_DBI); - env->me_db_flags[MAIN_DBI] = - DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; - txn->mt_dbi_seqs[MAIN_DBI] = atomic_store32( - &env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); - } - } else { - ERROR("MainDB db-flags changes 0x%x -> 0x%x ahead of read-txn " - "%" PRIaTXN, - txn->mt_dbs[MAIN_DBI].md_flags, - env->me_db_flags[MAIN_DBI] & ~DB_VALID, txn->mt_txnid); - rc = MDBX_INCOMPATIBLE; - } - } - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } else { - DEBUG("me_dbi_lock failed, err %d", rc); - } - if (should_unlock) - osal_txn_unlock(env); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - txn->mt_dbs[FREE_DBI].md_flags); - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - - tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { - WARNING("%s", "environment had fatal error, must shutdown!"); - rc = MDBX_PANIC; - } else { - const size_t size_bytes = pgno2bytes(env, txn->mt_end_pgno); - const size_t used_bytes = pgno2bytes(env, txn->mt_next_pgno); - const size_t required_bytes = - (txn->mt_flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - if (unlikely(required_bytes > env->me_dxb_mmap.current)) { - /* Размер БД (для пишущих транзакций) или используемых данных (для - * читающих транзакций) больше предыдущего/текущего размера внутри - * процесса, увеличиваем. Сюда также попадает случай увеличения верхней - * границы размера БД и отображения. В читающих транзакциях нельзя - * изменять размер файла, который может быть больше необходимого этой - * транзакции. */ - if (txn->mt_geo.upper > MAX_PAGENO + 1 || - bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) != - txn->mt_geo.upper) { - rc = MDBX_UNABLE_EXTEND_MAPSIZE; - goto bailout; - } - rc = dxb_resize(env, txn->mt_next_pgno, txn->mt_end_pgno, - txn->mt_geo.upper, implicit_grow); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - } else if (unlikely(size_bytes < env->me_dxb_mmap.current)) { - /* Размер БД меньше предыдущего/текущего размера внутри процесса, можно - * уменьшить, но всё сложнее: - * - размер файла согласован со всеми читаемыми снимками на момент - * коммита последней транзакции; - * - в читающей транзакции размер файла может быть больше и него нельзя - * изменять, в том числе менять madvise (меньша размера файла нельзя, - * а за размером нет смысла). - * - в пишущей транзакции уменьшать размер файла можно только после - * проверки размера читаемых снимков, но в этом нет смысла, так как - * это будет сделано при фиксации транзакции. - * - * В сухом остатке, можно только установить dxb_mmap.current равным - * размеру файла, а это проще сделать без вызова dxb_resize() и усложения - * внутренней логики. - * - * В этой тактике есть недостаток: если пишущите транзакции не регулярны, - * и при завершении такой транзакции файл БД остаётся не-уменьшеным из-за - * читающих транзакций использующих предыдущие снимки. */ -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_AcquireShared(&env->me_remap_guard); -#else - rc = osal_fastmutex_acquire(&env->me_remap_guard); -#endif - if (likely(rc == MDBX_SUCCESS)) { - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - rc = osal_filesize(env->me_dxb_mmap.fd, &env->me_dxb_mmap.filesize); - if (likely(rc == MDBX_SUCCESS)) { - eASSERT(env, env->me_dxb_mmap.filesize >= required_bytes); - if (env->me_dxb_mmap.current > env->me_dxb_mmap.filesize) - env->me_dxb_mmap.current = - (env->me_dxb_mmap.limit < env->me_dxb_mmap.filesize) - ? env->me_dxb_mmap.limit - : (size_t)env->me_dxb_mmap.filesize; - } -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_ReleaseShared(&env->me_remap_guard); -#else - int err = osal_fastmutex_release(&env->me_remap_guard); - if (unlikely(err) && likely(rc == MDBX_SUCCESS)) - rc = err; -#endif - } - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - eASSERT(env, - pgno2bytes(env, txn->mt_next_pgno) <= env->me_dxb_mmap.current); - eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); - if (txn->mt_flags & MDBX_TXN_RDONLY) { -#if defined(_WIN32) || defined(_WIN64) - if (((used_bytes > env->me_dbgeo.lower && env->me_dbgeo.shrink) || - (mdbx_RunningUnderWine() && - /* under Wine acquisition of remap_guard is always required, - * since Wine don't support section extending, - * i.e. in both cases unmap+map are required. */ - used_bytes < env->me_dbgeo.upper && env->me_dbgeo.grow)) && - /* avoid recursive use SRW */ (txn->mt_flags & - MDBX_NOSTICKYTHREADS) == 0) { - txn->mt_flags |= MDBX_SHRINK_ALLOWED; - osal_srwlock_AcquireShared(&env->me_remap_guard); - } -#endif /* Windows */ - } else { - tASSERT(txn, txn == env->me_txn0); - MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); - rc = cursor_init(gc, txn, FREE_DBI); - if (rc != MDBX_SUCCESS) - goto bailout; - } -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - txn_valgrind(env, txn); -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - return MDBX_SUCCESS; - } -bailout: - tASSERT(txn, rc != MDBX_SUCCESS); - txn_end(txn, TXN_END_SLOT | TXN_END_FAIL_BEGIN); - return rc; -} - -static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) { - if (unlikely(!txn)) - return MDBX_EINVAL; - - if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) - return MDBX_EBADSIGN; - - if (unlikely(txn->mt_flags & bad_bits)) - return MDBX_BAD_TXN; - - tASSERT(txn, (txn->mt_flags & MDBX_TXN_FINISHED) || - (txn->mt_flags & MDBX_NOSTICKYTHREADS) == - (txn->mt_env->me_flags & MDBX_NOSTICKYTHREADS)); -#if MDBX_TXN_CHECKOWNER - STATIC_ASSERT((long)MDBX_NOSTICKYTHREADS > (long)MDBX_TXN_FINISHED); - if ((txn->mt_flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) < - MDBX_TXN_FINISHED && - unlikely(txn->mt_owner != osal_thread_self())) - return txn->mt_owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN; -#endif /* MDBX_TXN_CHECKOWNER */ - - if (bad_bits && unlikely(!txn->mt_env->me_map)) - return MDBX_EPERM; - - return MDBX_SUCCESS; -} - -static __always_inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) { - int err = check_txn(txn, bad_bits); - if (unlikely(err)) - return err; - - if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) - return MDBX_EACCESS; - - return MDBX_SUCCESS; -} - -int mdbx_txn_renew(MDBX_txn *txn) { - if (unlikely(!txn)) - return MDBX_EINVAL; - - if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) - return MDBX_EBADSIGN; - - if (unlikely((txn->mt_flags & MDBX_TXN_RDONLY) == 0)) - return MDBX_EINVAL; - - int rc; - if (unlikely(txn->mt_owner != 0 || !(txn->mt_flags & MDBX_TXN_FINISHED))) { - rc = mdbx_txn_reset(txn); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - rc = txn_renew(txn, MDBX_TXN_RDONLY); - if (rc == MDBX_SUCCESS) { - tASSERT(txn, txn->mt_owner == osal_thread_self()); - DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', - (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root, - txn->mt_dbs[FREE_DBI].md_root); - } - return rc; -} - -int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) { - int rc = check_txn(txn, MDBX_TXN_FINISHED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - txn->mt_userctx = ctx; - return MDBX_SUCCESS; -} - -void *mdbx_txn_get_userctx(const MDBX_txn *txn) { - return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->mt_userctx; -} - -int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, - MDBX_txn **ret, void *context) { - if (unlikely(!ret)) - return MDBX_EINVAL; - *ret = NULL; - - if (unlikely((flags & ~MDBX_TXN_RW_BEGIN_FLAGS) && - (flags & ~MDBX_TXN_RO_BEGIN_FLAGS))) - return MDBX_EINVAL; - - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(env->me_flags & MDBX_RDONLY & - ~flags)) /* write txn in RDONLY env */ - return MDBX_EACCESS; - - MDBX_txn *txn = nullptr; - if (parent) { - /* Nested transactions: Max 1 child, write txns only, no writemap */ - rc = check_txn_rw(parent, - MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (env->me_options.spill_parent4child_denominator) { - /* Spill dirty-pages of parent to provide dirtyroom for child txn */ - rc = txn_spill(parent, nullptr, - parent->tw.dirtylist->length / - env->me_options.spill_parent4child_denominator); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - tASSERT(parent, audit_ex(parent, 0, false) == 0); - - flags |= parent->mt_flags & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | - MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); - } else if (flags & MDBX_TXN_RDONLY) { - if ((env->me_flags & MDBX_NOSTICKYTHREADS) == 0 && env->me_txn && - unlikely(env->me_txn0->mt_owner == osal_thread_self()) && - (mdbx_static.flags & MDBX_DBG_LEGACY_OVERLAP) == 0) - return MDBX_TXN_OVERLAPPING; - } else { - /* Reuse preallocated write txn. However, do not touch it until - * txn_renew() succeeds, since it currently may be active. */ - txn = env->me_txn0; - goto renew; - } - - const intptr_t bitmap_bytes = -#if MDBX_ENABLE_DBI_SPARSE - ceil_powerof2(env->me_maxdbs, CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / - CHAR_BIT; -#else - 0; -#endif /* MDBX_ENABLE_DBI_SPARSE */ - STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); - const size_t base = (flags & MDBX_TXN_RDONLY) - ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) - : sizeof(MDBX_txn); - const size_t size = - base + - ((flags & MDBX_TXN_RDONLY) - ? (size_t)bitmap_bytes + env->me_maxdbs * sizeof(txn->mt_dbi_seqs[0]) - : 0) + - env->me_maxdbs * (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + - sizeof(txn->mt_dbi_state[0])); - txn = osal_malloc(size); - if (unlikely(txn == nullptr)) { - DEBUG("calloc: %s", "failed"); - return MDBX_ENOMEM; - } -#if MDBX_DEBUG - memset(txn, 0xCD, size); - VALGRIND_MAKE_MEM_UNDEFINED(txn, size); -#endif /* MDBX_DEBUG */ - MDBX_ANALYSIS_ASSUME(size > base); - memset(txn, 0, - (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); - txn->mt_dbs = ptr_disp(txn, base); - txn->mt_cursors = - ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); -#if MDBX_DEBUG - txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ -#endif - txn->mt_dbi_state = - ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); - txn->mt_flags = flags; - txn->mt_env = env; - - if (parent) { - tASSERT(parent, dirtylist_check(parent)); -#if MDBX_ENABLE_DBI_SPARSE - txn->mt_dbi_sparse = parent->mt_dbi_sparse; -#endif /* MDBX_ENABLE_DBI_SPARSE */ - txn->mt_dbi_seqs = parent->mt_dbi_seqs; - txn->mt_geo = parent->mt_geo; - rc = dpl_alloc(txn); - if (likely(rc == MDBX_SUCCESS)) { - const size_t len = - MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; - txn->tw.relist = - pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.relist)) - rc = MDBX_ENOMEM; - } - if (unlikely(rc != MDBX_SUCCESS)) { - nested_failed: - pnl_free(txn->tw.relist); - dpl_free(txn); - osal_free(txn); - return rc; - } - - /* Move loose pages to reclaimed list */ - if (parent->tw.loose_count) { - do { - MDBX_page *lp = parent->tw.loose_pages; - tASSERT(parent, lp->mp_flags == P_LOOSE); - rc = pnl_insert_range(&parent->tw.relist, lp->mp_pgno, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto nested_failed; - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - parent->tw.loose_pages = mp_next(lp); - /* Remove from dirty list */ - page_wash(parent, dpl_exist(parent, lp->mp_pgno), lp, 1); - } while (parent->tw.loose_pages); - parent->tw.loose_count = 0; -#if MDBX_ENABLE_REFUND - parent->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ - tASSERT(parent, dirtylist_check(parent)); - } - txn->tw.dirtyroom = parent->tw.dirtyroom; - txn->tw.dirtylru = parent->tw.dirtylru; - - dpl_sort(parent); - if (parent->tw.spilled.list) - spill_purge(parent); - - tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= - MDBX_PNL_GETSIZE(parent->tw.relist)); - memcpy(txn->tw.relist, parent->tw.relist, - MDBX_PNL_SIZEOF(parent->tw.relist)); - eASSERT(env, pnl_check_allocated( - txn->tw.relist, - (txn->mt_next_pgno /* LY: intentional assignment here, - only for assertion */ - = parent->mt_next_pgno) - - MDBX_ENABLE_REFUND)); - - txn->tw.gc_time_acc = parent->tw.gc_time_acc; - txn->tw.last_reclaimed = parent->tw.last_reclaimed; - if (parent->tw.lifo_reclaimed) { - txn->tw.lifo_reclaimed = parent->tw.lifo_reclaimed; - parent->tw.lifo_reclaimed = - (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.lifo_reclaimed); - } - - txn->tw.retired_pages = parent->tw.retired_pages; - parent->tw.retired_pages = - (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages); - - txn->mt_txnid = parent->mt_txnid; - txn->mt_front = parent->mt_front + 1; -#if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ - txn->mt_canary = parent->mt_canary; - parent->mt_flags |= MDBX_TXN_HAS_CHILD; - parent->mt_child = txn; - txn->mt_parent = parent; - txn->mt_owner = parent->mt_owner; - txn->tw.troika = parent->tw.troika; - - txn->mt_cursors[FREE_DBI] = nullptr; - txn->mt_cursors[MAIN_DBI] = nullptr; - txn->mt_dbi_state[FREE_DBI] = - parent->mt_dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - txn->mt_dbi_state[MAIN_DBI] = - parent->mt_dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - memset(txn->mt_dbi_state + CORE_DBS, 0, - (txn->mt_numdbs = parent->mt_numdbs) - CORE_DBS); - memcpy(txn->mt_dbs, parent->mt_dbs, sizeof(txn->mt_dbs[0]) * CORE_DBS); - - tASSERT(parent, - parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->mt_parent ? parent->mt_parent->tw.dirtyroom - : parent->mt_env->me_options.dp_limit)); - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - env->me_txn = txn; - tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr); - rc = parent->mt_cursors[MAIN_DBI] - ? cursor_shadow(parent->mt_cursors[MAIN_DBI], txn, MAIN_DBI) - : MDBX_SUCCESS; - if (AUDIT_ENABLED() && ASSERT_ENABLED()) { - txn->mt_signature = MDBX_MT_SIGNATURE; - tASSERT(txn, audit_ex(txn, 0, false) == 0); - } - if (unlikely(rc != MDBX_SUCCESS)) - txn_end(txn, TXN_END_FAIL_BEGINCHILD); - } else { /* MDBX_TXN_RDONLY */ - txn->mt_dbi_seqs = - ptr_disp(txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); -#if MDBX_ENABLE_DBI_SPARSE - txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - renew: - rc = txn_renew(txn, flags); - } - - if (unlikely(rc != MDBX_SUCCESS)) { - if (txn != env->me_txn0) - osal_free(txn); - } else { - if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) - eASSERT(env, txn->mt_flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)); - else if (flags & MDBX_TXN_RDONLY) - eASSERT(env, (txn->mt_flags & - ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | - /* Win32: SRWL flag */ MDBX_SHRINK_ALLOWED)) == 0); - else { - eASSERT(env, - (txn->mt_flags & - ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | - MDBX_NOMETASYNC | MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); - assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed); - } - txn->mt_signature = MDBX_MT_SIGNATURE; - txn->mt_userctx = context; - *ret = txn; - DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - txn->mt_txnid, (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, - (void *)env, txn->mt_dbs[MAIN_DBI].md_root, - txn->mt_dbs[FREE_DBI].md_root); - } - - return rc; -} - -int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { - int rc = check_txn(txn, MDBX_TXN_FINISHED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!info)) - return MDBX_EINVAL; - - MDBX_env *const env = txn->mt_env; -#if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid())) { - env->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - - info->txn_id = txn->mt_txnid; - info->txn_space_used = pgno2bytes(env, txn->mt_geo.next); - - if (txn->mt_flags & MDBX_TXN_RDONLY) { - meta_ptr_t head; - uint64_t head_retired; - meta_troika_t troika = meta_tap(env); - do { - /* fetch info from volatile head */ - head = meta_recent(env, &troika); - head_retired = - unaligned_peek_u64_volatile(4, head.ptr_v->mm_pages_retired); - info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->mm_geo.now); - info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->mm_geo.upper); - info->txn_space_leftover = - pgno2bytes(env, head.ptr_v->mm_geo.now - head.ptr_v->mm_geo.next); - } while (unlikely(meta_should_retry(env, &troika))); - - info->txn_reader_lag = head.txnid - info->txn_id; - info->txn_space_dirty = info->txn_space_retired = 0; - uint64_t reader_snapshot_pages_retired; - if (txn->to.reader && - head_retired > - (reader_snapshot_pages_retired = atomic_load64( - &txn->to.reader->mr_snapshot_pages_retired, mo_Relaxed))) { - info->txn_space_dirty = info->txn_space_retired = pgno2bytes( - env, (pgno_t)(head_retired - reader_snapshot_pages_retired)); - - size_t retired_next_reader = 0; - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (scan_rlt && info->txn_reader_lag > 1 && lck) { - /* find next more recent reader */ - txnid_t next_reader = head.txnid; - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - for (size_t i = 0; i < snap_nreaders; ++i) { - retry: - if (atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease)) { - jitter4testing(true); - const txnid_t snap_txnid = - safe64_read(&lck->mti_readers[i].mr_txnid); - const uint64_t snap_retired = - atomic_load64(&lck->mti_readers[i].mr_snapshot_pages_retired, - mo_AcquireRelease); - if (unlikely(snap_retired != - atomic_load64( - &lck->mti_readers[i].mr_snapshot_pages_retired, - mo_Relaxed)) || - snap_txnid != safe64_read(&lck->mti_readers[i].mr_txnid)) - goto retry; - if (snap_txnid <= txn->mt_txnid) { - retired_next_reader = 0; - break; - } - if (snap_txnid < next_reader) { - next_reader = snap_txnid; - retired_next_reader = pgno2bytes( - env, (pgno_t)(snap_retired - - atomic_load64( - &txn->to.reader->mr_snapshot_pages_retired, - mo_Relaxed))); - } - } - } - } - info->txn_space_dirty = retired_next_reader; - } - } else { - info->txn_space_limit_soft = pgno2bytes(env, txn->mt_geo.now); - info->txn_space_limit_hard = pgno2bytes(env, txn->mt_geo.upper); - info->txn_space_retired = pgno2bytes( - env, txn->mt_child ? (size_t)txn->tw.retired_pages - : MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom); - info->txn_space_dirty = pgno2bytes( - env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : (txn->tw.writemap_dirty_npages + - txn->tw.writemap_spilled_npages)); - info->txn_reader_lag = INT64_MAX; - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (scan_rlt && lck) { - txnid_t oldest_snapshot = txn->mt_txnid; - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - if (snap_nreaders) { - oldest_snapshot = txn_oldest_reader(txn); - if (oldest_snapshot == txn->mt_txnid - 1) { - /* check if there is at least one reader */ - bool exists = false; - for (size_t i = 0; i < snap_nreaders; ++i) { - if (atomic_load32(&lck->mti_readers[i].mr_pid, mo_Relaxed) && - txn->mt_txnid > safe64_read(&lck->mti_readers[i].mr_txnid)) { - exists = true; - break; - } - } - oldest_snapshot += !exists; - } - } - info->txn_reader_lag = txn->mt_txnid - oldest_snapshot; - } - } - - return MDBX_SUCCESS; -} - -MDBX_env *mdbx_txn_env(const MDBX_txn *txn) { - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE || - txn->mt_env->me_signature.weak != MDBX_ME_SIGNATURE)) - return NULL; - return txn->mt_env; -} - -uint64_t mdbx_txn_id(const MDBX_txn *txn) { - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) - return 0; - return txn->mt_txnid; -} - -int mdbx_txn_flags(const MDBX_txn *txn) { - STATIC_ASSERT( - (MDBX_TXN_INVALID & - (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | - MDBX_TXN_HAS_CHILD | MDBX_TXN_DRAINED_GC | MDBX_SHRINK_ALLOWED | - MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) == 0); - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) - return MDBX_TXN_INVALID; - assert(0 == (int)(txn->mt_flags & MDBX_TXN_INVALID)); - return txn->mt_flags; -} - -/* Filter-out pgno list from transaction's dirty-page list */ -static void dpl_sift(MDBX_txn *const txn, MDBX_PNL pl, const bool spilled) { - tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - if (MDBX_PNL_GETSIZE(pl) && txn->tw.dirtylist->length) { - tASSERT(txn, pnl_check_allocated(pl, (size_t)txn->mt_next_pgno << spilled)); - MDBX_dpl *dl = dpl_sort(txn); - - /* Scanning in ascend order */ - const intptr_t step = MDBX_PNL_ASCENDING ? 1 : -1; - const intptr_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pl); - const intptr_t end = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(pl) + 1 : 0; - tASSERT(txn, pl[begin] <= pl[end - step]); - - size_t w, r = dpl_search(txn, pl[begin] >> spilled); - tASSERT(txn, dl->sorted == dl->length); - for (intptr_t i = begin; r <= dl->length;) { /* scan loop */ - assert(i != end); - tASSERT(txn, !spilled || (pl[i] & 1) == 0); - pgno_t pl_pgno = pl[i] >> spilled; - pgno_t dp_pgno = dl->items[r].pgno; - if (likely(dp_pgno != pl_pgno)) { - const bool cmp = dp_pgno < pl_pgno; - r += cmp; - i += cmp ? 0 : step; - if (likely(i != end)) - continue; - return; - } - - /* update loop */ - unsigned npages; - w = r; - remove_dl: - npages = dpl_npages(dl, r); - dl->pages_including_loose -= npages; - if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) - dpage_free(txn->mt_env, dl->items[r].ptr, npages); - ++r; - next_i: - i += step; - if (unlikely(i == end)) { - while (r <= dl->length) - dl->items[w++] = dl->items[r++]; - } else { - while (r <= dl->length) { - assert(i != end); - tASSERT(txn, !spilled || (pl[i] & 1) == 0); - pl_pgno = pl[i] >> spilled; - dp_pgno = dl->items[r].pgno; - if (dp_pgno < pl_pgno) - dl->items[w++] = dl->items[r++]; - else if (dp_pgno > pl_pgno) - goto next_i; - else - goto remove_dl; - } - } - dl->sorted = dpl_setlen(dl, w - 1); - txn->tw.dirtyroom += r - w; - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - return; - } - } -} - -/* End a transaction, except successful commit of a nested transaction. - * May be called twice for readonly txns: First reset it, then abort. - * [in] txn the transaction handle to end - * [in] mode why and how to end the transaction */ -static int txn_end(MDBX_txn *txn, const unsigned mode) { - MDBX_env *env = txn->mt_env; - static const char *const names[] = TXN_END_NAMES; - - DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - names[mode & TXN_END_OPMASK], txn->mt_txnid, - (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, - txn->mt_dbs[MAIN_DBI].md_root, txn->mt_dbs[FREE_DBI].md_root); - - if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ - cursors_eot(txn, false); - - int rc = MDBX_SUCCESS; - if (txn->mt_flags & MDBX_TXN_RDONLY) { - if (txn->to.reader) { - MDBX_reader *slot = txn->to.reader; - eASSERT(env, slot->mr_pid.weak == env->me_pid); - if (likely(!(txn->mt_flags & MDBX_TXN_FINISHED))) { - ENSURE(env, txn->mt_txnid >= - /* paranoia is appropriate here */ env->me_lck - ->mti_oldest_reader.weak); - eASSERT(env, - txn->mt_txnid == slot->mr_txnid.weak && - slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - txn_valgrind(env, nullptr); -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); - safe64_reset(&slot->mr_txnid, false); - atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, - mo_Relaxed); - } else { - eASSERT(env, slot->mr_pid.weak == env->me_pid); - eASSERT(env, slot->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD); - } - if (mode & TXN_END_SLOT) { - if ((env->me_flags & MDBX_ENV_TXKEY) == 0) - atomic_store32(&slot->mr_pid, 0, mo_Relaxed); - txn->to.reader = NULL; - } - } -#if defined(_WIN32) || defined(_WIN64) - if (txn->mt_flags & MDBX_SHRINK_ALLOWED) - osal_srwlock_ReleaseShared(&env->me_remap_guard); -#endif - txn->mt_numdbs = 0; /* prevent further DBI activity */ - txn->mt_flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; - txn->mt_owner = 0; - } else if (!(txn->mt_flags & MDBX_TXN_FINISHED)) { - ENSURE(env, txn->mt_txnid >= - /* paranoia is appropriate here */ env->me_lck - ->mti_oldest_reader.weak); -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - if (txn == env->me_txn0) - txn_valgrind(env, nullptr); -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - - txn->mt_flags = MDBX_TXN_FINISHED; - env->me_txn = txn->mt_parent; - pnl_free(txn->tw.spilled.list); - txn->tw.spilled.list = nullptr; - if (txn == env->me_txn0) { - eASSERT(env, txn->mt_parent == NULL); - /* Export or close DBI handles created in this txn */ - rc = dbi_update(txn, mode & TXN_END_UPDATE); - pnl_shrink(&txn->tw.retired_pages); - pnl_shrink(&txn->tw.relist); - if (!(env->me_flags & MDBX_WRITEMAP)) - dlist_free(txn); - /* The writer mutex was locked in mdbx_txn_begin. */ - osal_txn_unlock(env); - } else { - eASSERT(env, txn->mt_parent != NULL); - MDBX_txn *const parent = txn->mt_parent; - eASSERT(env, parent->mt_signature == MDBX_MT_SIGNATURE); - eASSERT(env, parent->mt_child == txn && - (parent->mt_flags & MDBX_TXN_HAS_CHILD) != 0); - eASSERT(env, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, - sizeof(meta_troika_t)) == 0); - - txn->mt_owner = 0; - if (txn->tw.lifo_reclaimed) { - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) >= - (uintptr_t)parent->tw.lifo_reclaimed); - MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, - (uintptr_t)parent->tw.lifo_reclaimed); - parent->tw.lifo_reclaimed = txn->tw.lifo_reclaimed; - } - - if (txn->tw.retired_pages) { - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.retired_pages) >= - (uintptr_t)parent->tw.retired_pages); - MDBX_PNL_SETSIZE(txn->tw.retired_pages, - (uintptr_t)parent->tw.retired_pages); - parent->tw.retired_pages = txn->tw.retired_pages; - } - - parent->mt_child = nullptr; - parent->mt_flags &= ~MDBX_TXN_HAS_CHILD; - parent->tw.dirtylru = txn->tw.dirtylru; - tASSERT(parent, dirtylist_check(parent)); - tASSERT(parent, audit_ex(parent, 0, false) == 0); - dlist_free(txn); - dpl_free(txn); - pnl_free(txn->tw.relist); - - if (parent->mt_geo.upper != txn->mt_geo.upper || - parent->mt_geo.now != txn->mt_geo.now) { - /* undo resize performed by child txn */ - rc = dxb_resize(env, parent->mt_next_pgno, parent->mt_geo.now, - parent->mt_geo.upper, impilict_shrink); - if (rc == MDBX_EPERM) { - /* unable undo resize (it is regular for Windows), - * therefore promote size changes from child to the parent txn */ - WARNING("unable undo resize performed by child txn, promote to " - "the parent (%u->%u, %u->%u)", - txn->mt_geo.now, parent->mt_geo.now, txn->mt_geo.upper, - parent->mt_geo.upper); - parent->mt_geo.now = txn->mt_geo.now; - parent->mt_geo.upper = txn->mt_geo.upper; - parent->mt_flags |= MDBX_TXN_DIRTY; - rc = MDBX_SUCCESS; - } else if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("error %d while undo resize performed by child txn, fail " - "the parent", - rc); - parent->mt_flags |= MDBX_TXN_ERROR; - if (!env->me_dxb_mmap.base) - env->me_flags |= MDBX_FATAL_ERROR; - } - } - } - } - - eASSERT(env, txn == env->me_txn0 || txn->mt_owner == 0); - if ((mode & TXN_END_FREE) != 0 && txn != env->me_txn0) { - txn->mt_signature = 0; - osal_free(txn); - } - - return rc; -} - -int mdbx_txn_reset(MDBX_txn *txn) { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - /* This call is only valid for read-only txns */ - if (unlikely((txn->mt_flags & MDBX_TXN_RDONLY) == 0)) - return MDBX_EINVAL; - - /* LY: don't close DBI-handles */ - rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); - if (rc == MDBX_SUCCESS) { - tASSERT(txn, txn->mt_signature == MDBX_MT_SIGNATURE); - tASSERT(txn, txn->mt_owner == 0); - } - return rc; -} - -int mdbx_txn_break(MDBX_txn *txn) { - do { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - txn->mt_flags |= MDBX_TXN_ERROR; - if (txn->mt_flags & MDBX_TXN_RDONLY) - break; - txn = txn->mt_child; - } while (txn); - return MDBX_SUCCESS; -} - -static int txn_abort(MDBX_txn *txn) { - if (txn->mt_flags & MDBX_TXN_RDONLY) - /* LY: don't close DBI-handles */ - return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | - TXN_END_FREE); - - if (unlikely(txn->mt_flags & MDBX_TXN_FINISHED)) - return MDBX_BAD_TXN; - - if (txn->mt_child) - txn_abort(txn->mt_child); - - tASSERT(txn, (txn->mt_flags & MDBX_TXN_ERROR) || dirtylist_check(txn)); - return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); -} - -int mdbx_txn_abort(MDBX_txn *txn) { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = check_env(txn->mt_env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if ((txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == - MDBX_NOSTICKYTHREADS && - unlikely(txn->mt_owner != osal_thread_self())) { - mdbx_txn_break(txn); - return MDBX_THREAD_MISMATCH; - } - - return txn_abort(txn); -} - -__cold static MDBX_db *audit_db_dig(const MDBX_txn *txn, const size_t dbi, - MDBX_db *fallback) { - const MDBX_txn *dig = txn; - do { - tASSERT(txn, txn->mt_numdbs == dig->mt_numdbs); - const uint8_t state = dbi_state(dig, dbi); - if (state & DBI_LINDO) - switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) { - case DBI_VALID: - case DBI_OLDEN: - return dig->mt_dbs + dbi; - case 0: - return nullptr; - case DBI_VALID | DBI_STALE: - case DBI_OLDEN | DBI_STALE: - break; - default: - tASSERT(txn, !!"unexpected dig->mt_dbi_state[dbi]"); - } - dig = dig->mt_parent; - } while (dig); - return fallback; -} - -static size_t audit_db_used(const MDBX_db *db) { - return db ? (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + - (size_t)db->md_overflow_pages - : 0; -} - -/* Count all the pages in each DB and in the GC and make sure - * it matches the actual number of pages being used. */ -__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc) { - const MDBX_env *const env = txn->mt_env; - size_t pending = 0; - if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) - pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.relist) + - (MDBX_PNL_GETSIZE(txn->tw.retired_pages) - retired_stored); - - MDBX_cursor_couple cx; - int rc = cursor_init(&cx.outer, txn, FREE_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - size_t gc = 0; - MDBX_val key, data; - while ((rc = cursor_get(&cx.outer, &key, &data, MDBX_NEXT)) == 0) { - if (!dont_filter_gc) { - if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); - return MDBX_CORRUPTED; - } - txnid_t id = unaligned_peek_u64(4, key.iov_base); - if (txn->tw.lifo_reclaimed) { - for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); ++i) - if (id == txn->tw.lifo_reclaimed[i]) - goto skip; - } else if (id <= txn->tw.last_reclaimed) - goto skip; - } - - gc += *(pgno_t *)data.iov_base; - skip:; - } - tASSERT(txn, rc == MDBX_NOTFOUND); - - const size_t done_bitmap_size = (txn->mt_numdbs + CHAR_BIT - 1) / CHAR_BIT; - uint8_t *const done_bitmap = alloca(done_bitmap_size); - memset(done_bitmap, 0, done_bitmap_size); - if (txn->mt_parent) { - tASSERT(txn, txn->mt_numdbs == txn->mt_parent->mt_numdbs && - txn->mt_numdbs == txn->mt_env->me_txn->mt_numdbs); -#if MDBX_ENABLE_DBI_SPARSE - tASSERT(txn, txn->mt_dbi_sparse == txn->mt_parent->mt_dbi_sparse && - txn->mt_dbi_sparse == txn->mt_env->me_txn->mt_dbi_sparse); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - } - - size_t used = NUM_METAS + - audit_db_used(audit_db_dig(txn, FREE_DBI, nullptr)) + - audit_db_used(audit_db_dig(txn, MAIN_DBI, nullptr)); - rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - for (rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); rc == MDBX_SUCCESS; - rc = cursor_sibling(&cx.outer, SIBLING_RIGHT)) { - MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; - for (size_t k = 0; k < page_numkeys(mp); k++) { - MDBX_node *node = page_node(mp, k); - if (node_flags(node) != F_SUBDATA) - continue; - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); - return MDBX_CORRUPTED; - } - - MDBX_db reside; - const MDBX_db *db = memcpy(&reside, node_data(node), sizeof(reside)); - const MDBX_val name = {node_key(node), node_ks(node)}; - for (size_t dbi = CORE_DBS; dbi < env->me_numdbs; ++dbi) { - if (dbi >= txn->mt_numdbs || !(env->me_db_flags[dbi] & DB_VALID)) - continue; - if (env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[dbi].md_name)) - continue; - - done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; - db = audit_db_dig(txn, dbi, &reside); - break; - } - used += audit_db_used(db); - } - } - tASSERT(txn, rc == MDBX_NOTFOUND); - - for (size_t dbi = CORE_DBS; dbi < txn->mt_numdbs; ++dbi) { - if (done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT)) - continue; - const MDBX_db *db = audit_db_dig(txn, dbi, nullptr); - if (db) - used += audit_db_used(db); - else if (dbi_state(txn, dbi)) - WARNING("audit %s@%" PRIaTXN - ": unable account dbi %zd / \"%*s\", state 0x%02x", - txn->mt_parent ? "nested-" : "", txn->mt_txnid, dbi, - (int)env->me_dbxs[dbi].md_name.iov_len, - (const char *)env->me_dbxs[dbi].md_name.iov_base, - dbi_state(txn, dbi)); - } - - if (pending + gc + used == txn->mt_next_pgno) - return MDBX_SUCCESS; - - if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) - ERROR("audit @%" PRIaTXN ": %zu(pending) = %zu(loose) + " - "%zu(reclaimed) + %zu(retired-pending) - %zu(retired-stored)", - txn->mt_txnid, pending, txn->tw.loose_count, - MDBX_PNL_GETSIZE(txn->tw.relist), - txn->tw.retired_pages ? MDBX_PNL_GETSIZE(txn->tw.retired_pages) : 0, - retired_stored); - ERROR("audit @%" PRIaTXN ": %zu(pending) + %zu" - "(gc) + %zu(count) = %zu(total) <> %zu" - "(allocated)", - txn->mt_txnid, pending, gc, used, pending + gc + used, - (size_t)txn->mt_next_pgno); - return MDBX_PROBLEM; -} - -__cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc) { - MDBX_env *const env = txn->mt_env; - int rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - rc = audit_ex_locked(txn, retired_stored, dont_filter_gc); - ENSURE(txn->mt_env, - osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } - return rc; -} - -typedef struct gc_update_context { - size_t loop, reserve_adj; - size_t retired_stored; - size_t amount, reserved, cleaned_slot, reused_slot, fill_idx; - txnid_t cleaned_id, rid; - bool lifo, dense; -#if MDBX_ENABLE_BIGFOOT - txnid_t bigfoot; -#endif /* MDBX_ENABLE_BIGFOOT */ - MDBX_cursor cursor; -} gcu_context_t; - -static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) { - memset(ctx, 0, offsetof(gcu_context_t, cursor)); - ctx->lifo = (txn->mt_env->me_flags & MDBX_LIFORECLAIM) != 0; -#if MDBX_ENABLE_BIGFOOT - ctx->bigfoot = txn->mt_txnid; -#endif /* MDBX_ENABLE_BIGFOOT */ - return cursor_init(&ctx->cursor, txn, FREE_DBI); -} - -MDBX_MAYBE_UNUSED static __inline const char * -gcu_dbg_prefix(gcu_context_t *ctx) { - return ctx->lifo ? " lifo" : " fifo"; -} - -static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) { - return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count; -} - -static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) { - int err = MDBX_SUCCESS; - if (ctx->retired_stored) { - MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); - tASSERT(txn, txn == txn->mt_env->me_txn0 && gc->mc_next == nullptr); - gc->mc_txn = txn; - gc->mc_flags = 0; - gc->mc_next = txn->mt_cursors[FREE_DBI]; - txn->mt_cursors[FREE_DBI] = gc; - do { - MDBX_val key, val; -#if MDBX_ENABLE_BIGFOOT - key.iov_base = &ctx->bigfoot; -#else - key.iov_base = &txn->mt_txnid; -#endif /* MDBX_ENABLE_BIGFOOT */ - key.iov_len = sizeof(txnid_t); - const struct cursor_set_result csr = cursor_set(gc, &key, &val, MDBX_SET); - if (csr.err == MDBX_SUCCESS && csr.exact) { - ctx->retired_stored = 0; - err = cursor_del(gc, 0); - TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn), - err); - } else - err = (err == MDBX_NOTFOUND) ? MDBX_SUCCESS : err; - } -#if MDBX_ENABLE_BIGFOOT - while (!err && --ctx->bigfoot >= txn->mt_txnid); -#else - while (0); -#endif /* MDBX_ENABLE_BIGFOOT */ - txn->mt_cursors[FREE_DBI] = gc->mc_next; - gc->mc_next = nullptr; - } - return err; -} - -static int gcu_touch(gcu_context_t *ctx) { - MDBX_val key, val; - key.iov_base = val.iov_base = nullptr; - key.iov_len = sizeof(txnid_t); - val.iov_len = MDBX_PNL_SIZEOF(ctx->cursor.mc_txn->tw.retired_pages); - ctx->cursor.mc_flags |= C_GCU; - int err = cursor_touch(&ctx->cursor, &key, &val); - ctx->cursor.mc_flags -= C_GCU; - return err; -} - -/* Prepare a backlog of pages to modify GC itself, while reclaiming is - * prohibited. It should be enough to prevent search in page_alloc_slowpath() - * during a deleting, when GC tree is unbalanced. */ -static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { - const size_t for_cow = txn->mt_dbs[FREE_DBI].md_depth; - const size_t for_rebalance = for_cow + 1 + - (txn->mt_dbs[FREE_DBI].md_depth + 1ul >= - txn->mt_dbs[FREE_DBI].md_branch_pages); - size_t for_split = ctx->retired_stored == 0; - - const intptr_t retired_left = - MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored; - size_t for_relist = 0; - if (MDBX_ENABLE_BIGFOOT && retired_left > 0) { - for_relist = (retired_left + txn->mt_env->me_maxgc_ov1page - 1) / - txn->mt_env->me_maxgc_ov1page; - const size_t per_branch_page = txn->mt_env->me_maxgc_per_branch; - for (size_t entries = for_relist; entries > 1; for_split += entries) - entries = (entries + per_branch_page - 1) / per_branch_page; - } else if (!MDBX_ENABLE_BIGFOOT && retired_left != 0) { - for_relist = - number_of_ovpages(txn->mt_env, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); - } - - const size_t for_tree_before_touch = for_cow + for_rebalance + for_split; - const size_t for_tree_after_touch = for_rebalance + for_split; - const size_t for_all_before_touch = for_relist + for_tree_before_touch; - const size_t for_all_after_touch = for_relist + for_tree_after_touch; - - if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch) && - (ctx->cursor.mc_snum == 0 || - IS_MODIFIABLE(txn, ctx->cursor.mc_pg[ctx->cursor.mc_top]))) - return MDBX_SUCCESS; - - TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " - "4split %zu, " - "4cow %zu, 4tree %zu)", - ctx->retired_stored, retired_left, gcu_backlog_size(txn), - for_all_before_touch, for_relist, for_split, for_cow, - for_tree_before_touch); - - int err = gcu_touch(ctx); - TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err); - - if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) && - MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored && - err == MDBX_SUCCESS) { - if (unlikely(ctx->retired_stored)) { - err = gcu_clean_stored_retired(txn, ctx); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (!ctx->retired_stored) - return /* restart by tail-recursion */ gcu_prepare_backlog(txn, ctx); - } - err = page_alloc_slowpath(&ctx->cursor, for_relist, MDBX_ALLOC_RESERVE).err; - TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err); - cASSERT(&ctx->cursor, - gcu_backlog_size(txn) >= for_relist || err != MDBX_SUCCESS); - } - - while (gcu_backlog_size(txn) < for_all_after_touch && err == MDBX_SUCCESS) - err = page_alloc_slowpath(&ctx->cursor, 0, - MDBX_ALLOC_RESERVE | MDBX_ALLOC_UNIMPORTANT) - .err; - - TRACE("<< backlog %zu, err %d, gc: height %u, branch %zu, leaf %zu, large " - "%zu, entries %zu", - gcu_backlog_size(txn), err, txn->mt_dbs[FREE_DBI].md_depth, - (size_t)txn->mt_dbs[FREE_DBI].md_branch_pages, - (size_t)txn->mt_dbs[FREE_DBI].md_leaf_pages, - (size_t)txn->mt_dbs[FREE_DBI].md_overflow_pages, - (size_t)txn->mt_dbs[FREE_DBI].md_entries); - tASSERT(txn, - err != MDBX_NOTFOUND || (txn->mt_flags & MDBX_TXN_DRAINED_GC) != 0); - return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS; -} - -static __inline void gcu_zeroize_reserved(MDBX_env *env, MDBX_val pnl) { -#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) - /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() - * вызванное через макрос DVAL_DEBUG() на выходе - * из cursor_set(MDBX_SET_KEY), которая вызывается ниже внутри update_gc() в - * цикле очистки и цикле заполнения зарезервированных элементов. */ - memset(pnl.iov_base, 0xBB, pnl.iov_len); -#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ - - /* PNL is initially empty, zero out at least the length */ - memset(pnl.iov_base, 0, sizeof(pgno_t)); - if ((env->me_flags & (MDBX_WRITEMAP | MDBX_NOMEMINIT)) == 0) - /* zero out to avoid leaking values from uninitialized malloc'ed memory - * to the file in non-writemap mode if length of the saving page-list - * was changed during space reservation. */ - memset(pnl.iov_base, 0, pnl.iov_len); -} - -static int gcu_loose(MDBX_txn *txn, gcu_context_t *ctx) { - tASSERT(txn, txn->tw.loose_count > 0); - /* Return loose page numbers to tw.relist, - * though usually none are left at this point. - * The pages themselves remain in dirtylist. */ - if (unlikely(!txn->tw.lifo_reclaimed && txn->tw.last_reclaimed < 1)) { - TRACE("%s: try allocate gc-slot for %zu loose-pages", gcu_dbg_prefix(ctx), - txn->tw.loose_count); - int err = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err; - if (err == MDBX_SUCCESS) { - TRACE("%s: retry since gc-slot for %zu loose-pages available", - gcu_dbg_prefix(ctx), txn->tw.loose_count); - return MDBX_SUCCESS; - } - - /* Put loose page numbers in tw.retired_pages, - * since unable to return ones to tw.relist. */ - err = pnl_need(&txn->tw.retired_pages, txn->tw.loose_count); - if (unlikely(err != MDBX_SUCCESS)) - return err; - for (MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { - pnl_xappend(txn->tw.retired_pages, lp->mp_pgno); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - TRACE("%s: append %zu loose-pages to retired-pages", gcu_dbg_prefix(ctx), - txn->tw.loose_count); - } else { - /* Room for loose pages + temp PNL with same */ - int err = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2); - if (unlikely(err != MDBX_SUCCESS)) - return err; - MDBX_PNL loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - - txn->tw.loose_count - 1; - size_t count = 0; - for (MDBX_page *lp = txn->tw.loose_pages; lp; lp = mp_next(lp)) { - tASSERT(txn, lp->mp_flags == P_LOOSE); - loose[++count] = lp->mp_pgno; - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - tASSERT(txn, count == txn->tw.loose_count); - MDBX_PNL_SETSIZE(loose, count); - pnl_sort(loose, txn->mt_next_pgno); - pnl_merge(txn->tw.relist, loose); - TRACE("%s: append %zu loose-pages to reclaimed-pages", gcu_dbg_prefix(ctx), - txn->tw.loose_count); - } - - /* filter-out list of dirty-pages from loose-pages */ - MDBX_dpl *const dl = txn->tw.dirtylist; - if (dl) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, dl->sorted <= dl->length); - size_t w = 0, sorted_out = 0; - for (size_t r = w; ++r <= dl->length;) { - MDBX_page *dp = dl->items[r].ptr; - tASSERT(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); - tASSERT(txn, dpl_endpgno(dl, r) <= txn->mt_next_pgno); - if ((dp->mp_flags & P_LOOSE) == 0) { - if (++w != r) - dl->items[w] = dl->items[r]; - } else { - tASSERT(txn, dp->mp_flags == P_LOOSE); - sorted_out += dl->sorted >= r; - if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) - dpage_free(txn->mt_env, dp, 1); - } - } - TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", - gcu_dbg_prefix(ctx), dl->length, w); - tASSERT(txn, txn->tw.loose_count == dl->length - w); - dl->sorted -= sorted_out; - tASSERT(txn, dl->sorted <= w); - dpl_setlen(dl, w); - dl->pages_including_loose -= txn->tw.loose_count; - txn->tw.dirtyroom += txn->tw.loose_count; - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } - txn->tw.loose_pages = NULL; - txn->tw.loose_count = 0; -#if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ - return MDBX_SUCCESS; -} - -static int gcu_retired(MDBX_txn *txn, gcu_context_t *ctx) { - int err; - if (unlikely(!ctx->retired_stored)) { - /* Make sure last page of GC is touched and on retired-list */ - err = cursor_last(&ctx->cursor, nullptr, nullptr); - if (likely(err == MDBX_SUCCESS)) - err = gcu_touch(ctx); - if (unlikely(err != MDBX_SUCCESS) && err != MDBX_NOTFOUND) - return err; - } - - MDBX_val key, data; -#if MDBX_ENABLE_BIGFOOT - size_t retired_pages_before; - do { - if (ctx->bigfoot > txn->mt_txnid) { - err = gcu_clean_stored_retired(txn, ctx); - if (unlikely(err != MDBX_SUCCESS)) - return err; - tASSERT(txn, ctx->bigfoot <= txn->mt_txnid); - } - - retired_pages_before = MDBX_PNL_GETSIZE(txn->tw.retired_pages); - err = gcu_prepare_backlog(txn, ctx); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - TRACE("%s: retired-list changed (%zu -> %zu), retry", gcu_dbg_prefix(ctx), - retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - break; - } - - pnl_sort(txn->tw.retired_pages, txn->mt_next_pgno); - ctx->retired_stored = 0; - ctx->bigfoot = txn->mt_txnid; - do { - if (ctx->retired_stored) { - err = gcu_prepare_backlog(txn, ctx); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (ctx->retired_stored >= MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - TRACE("%s: retired-list changed (%zu -> %zu), retry", - gcu_dbg_prefix(ctx), retired_pages_before, - MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - break; - } - } - key.iov_len = sizeof(txnid_t); - key.iov_base = &ctx->bigfoot; - const size_t left = - MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored; - const size_t chunk = - (left > txn->mt_env->me_maxgc_ov1page && ctx->bigfoot < MAX_TXNID) - ? txn->mt_env->me_maxgc_ov1page - : left; - data.iov_len = (chunk + 1) * sizeof(pgno_t); - err = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) - /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() - * вызванное через макрос DVAL_DEBUG() на выходе - * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле - * очистки, так и ниже в цикле заполнения зарезервированных элементов. - */ - memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ - - if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) - ? left - chunk - : ctx->retired_stored; - pgno_t *const begin = txn->tw.retired_pages + at; - /* MDBX_PNL_ASCENDING == false && LIFO == false: - * - the larger pgno is at the beginning of retired list - * and should be placed with the larger txnid. - * MDBX_PNL_ASCENDING == true && LIFO == true: - * - the larger pgno is at the ending of retired list - * and should be placed with the smaller txnid. */ - const pgno_t save = *begin; - *begin = (pgno_t)chunk; - memcpy(data.iov_base, begin, data.iov_len); - *begin = save; - TRACE("%s: put-retired/bigfoot @ %" PRIaTXN - " (slice #%u) #%zu [%zu..%zu] of %zu", - gcu_dbg_prefix(ctx), ctx->bigfoot, - (unsigned)(ctx->bigfoot - txn->mt_txnid), chunk, at, at + chunk, - retired_pages_before); - } - ctx->retired_stored += chunk; - } while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) && - (++ctx->bigfoot, true)); - } while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)); -#else - /* Write to last page of GC */ - key.iov_len = sizeof(txnid_t); - key.iov_base = &txn->mt_txnid; - do { - gcu_prepare_backlog(txn, ctx); - data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages); - err = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) - /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() - * вызванное через макрос DVAL_DEBUG() на выходе - * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле - * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ - memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ - - /* Retry if tw.retired_pages[] grew during the Put() */ - } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); - - ctx->retired_stored = MDBX_PNL_GETSIZE(txn->tw.retired_pages); - pnl_sort(txn->tw.retired_pages, txn->mt_next_pgno); - eASSERT(env, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages)); - memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len); - - TRACE("%s: put-retired #%zu @ %" PRIaTXN, gcu_dbg_prefix(ctx), - ctx->retired_stored, txn->mt_txnid); -#endif /* MDBX_ENABLE_BIGFOOT */ - if (LOG_ENABLED(MDBX_LOG_EXTRA)) { - size_t i = ctx->retired_stored; - DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", - txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i); - for (; i; i--) - DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]); - DEBUG_EXTRA_PRINT("%s\n", "."); - } - return MDBX_SUCCESS; -} - -typedef struct gcu_rid_result -{ - int err; - txnid_t rid; -} gcu_rid_result; - -static gcu_rid_result gcu_get_rid_for_reclaimed(MDBX_txn *txn, gcu_context_t *ctx, const size_t left) { - gcu_rid_result r; - if (ctx->lifo) { - if (txn->tw.lifo_reclaimed == nullptr) { - txn->tw.lifo_reclaimed = txl_alloc(); - if (unlikely(!txn->tw.lifo_reclaimed)) { - r.err = MDBX_ENOMEM; - goto return_error; - } - } - if (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < MDBX_TXL_MAX && - left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) * - txn->mt_env->me_maxgc_ov1page && - !ctx->dense) { - /* Hужен свободный для для сохранения списка страниц. */ - bool need_cleanup = false; - txnid_t snap_oldest = 0; - retry_rid: - do { - r.err = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err; - snap_oldest = txn->mt_env->me_lck->mti_oldest_reader.weak; - if (likely(r.err == MDBX_SUCCESS)) { - TRACE("%s: took @%" PRIaTXN " from GC", gcu_dbg_prefix(ctx), - MDBX_PNL_LAST(txn->tw.lifo_reclaimed)); - need_cleanup = true; - } - } while ( - r.err == MDBX_SUCCESS && - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < MDBX_TXL_MAX && - left > - (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) * - txn->mt_env->me_maxgc_ov1page); - - if (likely(r.err == MDBX_SUCCESS)) { - TRACE("%s: got enough from GC.", gcu_dbg_prefix(ctx)); - goto return_continue; - } else if (unlikely(r.err != MDBX_NOTFOUND)) - /* LY: some troubles... */ - goto return_error; - - if (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { - if (need_cleanup) { - txl_sort(txn->tw.lifo_reclaimed); - ctx->cleaned_slot = 0; - } - ctx->rid = MDBX_PNL_LAST(txn->tw.lifo_reclaimed); - } else { - tASSERT(txn, txn->tw.last_reclaimed == 0); - if (unlikely(txn_oldest_reader(txn) != snap_oldest)) - /* should retry page_alloc_slowpath() - * if the oldest reader changes since the last attempt */ - goto retry_rid; - /* no reclaimable GC entries, - * therefore no entries with ID < mdbx_find_oldest(txn) */ - txn->tw.last_reclaimed = ctx->rid = snap_oldest; - TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, - gcu_dbg_prefix(ctx), ctx->rid); - } - - /* В GC нет годных к переработке записей, - * будем использовать свободные id в обратном порядке. */ - while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < MDBX_TXL_MAX && - left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - - ctx->reused_slot) * - txn->mt_env->me_maxgc_ov1page) { - if (unlikely(ctx->rid <= MIN_TXNID)) { - if (unlikely(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) <= - ctx->reused_slot)) { - NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " - "lifo_reclaimed %zu)", - ctx->reused_slot, - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); - goto return_restart; - } - break; - } - - tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID); - ctx->rid -= 1; - MDBX_val key = {&ctx->rid, sizeof(ctx->rid)}, data; - r.err = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; - if (unlikely(r.err == MDBX_SUCCESS)) { - DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", - gcu_dbg_prefix(ctx), ctx->rid); - r.err = cursor_first(&ctx->cursor, &key, nullptr); - if (unlikely(r.err != MDBX_SUCCESS || - key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); - r.err = MDBX_CORRUPTED; - goto return_error; - } - const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (unlikely(gc_first <= MIN_TXNID)) { - DEBUG("%s: no free GC's id(s) less than %" PRIaTXN - " (going dense-mode)", - gcu_dbg_prefix(ctx), ctx->rid); - ctx->dense = true; - goto return_restart; - } - ctx->rid = gc_first - 1; - } - - tASSERT(txn, !ctx->dense); - r.err = txl_append(&txn->tw.lifo_reclaimed, ctx->rid); - if (unlikely(r.err != MDBX_SUCCESS)) - goto return_error; - - if (ctx->reused_slot) - /* rare case, but it is better to clear and re-create GC entries - * with less fragmentation. */ - need_cleanup = true; - else - ctx->cleaned_slot += - 1 /* mark cleanup is not needed for added slot. */; - - TRACE("%s: append @%" PRIaTXN - " to lifo-reclaimed, cleaned-gc-slot = %zu", - gcu_dbg_prefix(ctx), ctx->rid, ctx->cleaned_slot); - } - - if (need_cleanup) { - if (ctx->cleaned_slot) { - TRACE("%s: restart to clear and re-create GC entries", - gcu_dbg_prefix(ctx)); - goto return_restart; - } - goto return_continue; - } - } - - const size_t i = - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot; - tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); - r.rid = txn->tw.lifo_reclaimed[i]; - TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", - gcu_dbg_prefix(ctx), r.rid, i); - } else { - tASSERT(txn, txn->tw.lifo_reclaimed == NULL); - if (unlikely(ctx->rid == 0)) { - ctx->rid = txn_oldest_reader(txn); - MDBX_val key; - r.err = cursor_first(&ctx->cursor, &key, nullptr); - if (likely(r.err == MDBX_SUCCESS)) { - if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); - r.err = MDBX_CORRUPTED; - goto return_error; - } - const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (ctx->rid >= gc_first) - ctx->rid = gc_first - 1; - if (unlikely(ctx->rid == 0)) { - ERROR("%s", "** no GC tail-space to store (going dense-mode)"); - ctx->dense = true; - goto return_restart; - } - } else if (r.err != MDBX_NOTFOUND) - return r; - txn->tw.last_reclaimed = ctx->rid; - ctx->cleaned_id = ctx->rid + 1; - } - r.rid = ctx->rid--; - TRACE("%s: take @%" PRIaTXN " from GC", gcu_dbg_prefix(ctx), - r.rid); - } - ++ctx->reused_slot; - r.err = MDBX_SUCCESS; - return r; - -return_continue: - r.err = MDBX_SUCCESS; - r.rid = 0; - return r; - -return_restart: - r.err = MDBX_RESULT_TRUE; - r.rid = 0; - return r; - -return_error: - tASSERT(txn, r.err != MDBX_SUCCESS); - r.rid = 0; - return r; -} - -/* Cleanups reclaimed GC (aka freeDB) records, saves the retired-list (aka - * freelist) of current transaction to GC, puts back into GC leftover of the - * reclaimed pages with chunking. This recursive changes the reclaimed-list, - * loose-list and retired-list. Keep trying until it stabilizes. - * - * NOTE: This code is a consequence of many iterations of adding crutches (aka - * "checks and balances") to partially bypass the fundamental design problems - * inherited from LMDB. So do not try to understand it completely in order to - * avoid your madness. */ -static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { - TRACE("\n>>> @%" PRIaTXN, txn->mt_txnid); - MDBX_env *const env = txn->mt_env; - ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI]; - txn->mt_cursors[FREE_DBI] = &ctx->cursor; - - pgno_t prev_next_pgno = 0; - /* txn->tw.relist[] can grow and shrink during this call. - * txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow. - * But page numbers cannot disappear from txn->tw.retired_pages[]. */ -retry_clean_adj: - ctx->reserve_adj = 0; -retry: - ctx->loop += prev_next_pgno == txn->mt_next_pgno; - prev_next_pgno = txn->mt_next_pgno; - - if (ctx->loop) - TRACE("%s", " >> restart"); - int rc = MDBX_SUCCESS; - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - tASSERT(txn, dirtylist_check(txn)); - if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) { - ERROR("too more loops %zu, bailout", ctx->loop); - rc = MDBX_PROBLEM; - goto bailout; - } - - if (unlikely(ctx->dense)) { - rc = gcu_clean_stored_retired(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - ctx->reserved = 0; - ctx->cleaned_slot = 0; - ctx->reused_slot = 0; - ctx->amount = ctx->fill_idx = ~0u; - ctx->cleaned_id = 0; - ctx->rid = txn->tw.last_reclaimed; - while (true) { - /* Come back here after each Put() in case retired-list changed */ - TRACE("%s", " >> continue"); - - if (ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages) && - (ctx->loop == 1 || ctx->retired_stored > env->me_maxgc_ov1page || - MDBX_PNL_GETSIZE(txn->tw.retired_pages) > env->me_maxgc_ov1page)) { - rc = gcu_prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - MDBX_val key, data; - if (ctx->lifo) { - if (ctx->cleaned_slot < (txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - : 0)) { - ctx->reserved = 0; - ctx->cleaned_slot = 0; - ctx->reused_slot = 0; - ctx->fill_idx = ~0u; - /* LY: cleanup reclaimed records. */ - do { - ctx->cleaned_id = txn->tw.lifo_reclaimed[++ctx->cleaned_slot]; - tASSERT(txn, - ctx->cleaned_slot > 0 && - ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak); - key.iov_base = &ctx->cleaned_id; - key.iov_len = sizeof(ctx->cleaned_id); - rc = cursor_set(&ctx->cursor, &key, NULL, MDBX_SET).err; - if (rc == MDBX_NOTFOUND) - continue; - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - if (likely(!ctx->dense)) { - rc = gcu_prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak); - TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, gcu_dbg_prefix(ctx), - ctx->cleaned_slot, ctx->cleaned_id); - tASSERT(txn, *txn->mt_cursors == &ctx->cursor); - rc = cursor_del(&ctx->cursor, 0); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); - txl_sort(txn->tw.lifo_reclaimed); - } - } else { - /* Удаляем оставшиеся вынутые из GC записи. */ - while (ctx->cleaned_id <= txn->tw.last_reclaimed) { - rc = cursor_first(&ctx->cursor, &key, NULL); - if (rc == MDBX_NOTFOUND) - break; - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - if (!MDBX_DISABLE_VALIDATION && - unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); - rc = MDBX_CORRUPTED; - goto bailout; - } - if (ctx->rid != ctx->cleaned_id) { - ctx->rid = ctx->cleaned_id; - ctx->reserved = 0; - ctx->reused_slot = 0; - } - ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base); - if (ctx->cleaned_id > txn->tw.last_reclaimed) - break; - if (likely(!ctx->dense)) { - rc = gcu_prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - tASSERT(txn, ctx->cleaned_id <= txn->tw.last_reclaimed); - tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak); - TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, gcu_dbg_prefix(ctx), - ctx->cleaned_id); - tASSERT(txn, *txn->mt_cursors == &ctx->cursor); - rc = cursor_del(&ctx->cursor, 0); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - } - - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - tASSERT(txn, dirtylist_check(txn)); - if (AUDIT_ENABLED()) { - rc = audit_ex(txn, ctx->retired_stored, false); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - /* return suitable into unallocated space */ - if (txn_refund(txn)) { - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - if (AUDIT_ENABLED()) { - rc = audit_ex(txn, ctx->retired_stored, false); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - } - - if (txn->tw.loose_pages) { - /* put loose pages into the reclaimed- or retired-list */ - rc = gcu_loose(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - if (unlikely(txn->tw.loose_pages)) - continue; - } - - if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) && - (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) > - env->me_maxgc_ov1page / 2)) { - TRACE("%s: reclaimed-list changed %zu -> %zu, retry", gcu_dbg_prefix(ctx), - ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); - goto retry; - } - ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist); - - if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - /* store retired-list into GC */ - rc = gcu_retired(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - continue; - } - - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - tASSERT(txn, txn->tw.loose_count == 0); - - TRACE("%s", " >> reserving"); - if (AUDIT_ENABLED()) { - rc = audit_ex(txn, ctx->retired_stored, false); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj; - TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, " - "lifo-reclaimed-slots %zu, " - "reused-gc-slots %zu", - gcu_dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, - left, - txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : 0, - ctx->reused_slot); - if (0 >= (intptr_t)left) - break; - - const gcu_rid_result rid_result = gcu_get_rid_for_reclaimed(txn, ctx, left); - if (unlikely(!rid_result.rid)) { - rc = rid_result.err; - if (likely(rc == MDBX_SUCCESS)) - continue; - if (likely(rc == MDBX_RESULT_TRUE)) - goto retry; - goto bailout; - } - tASSERT(txn, rid_result.err == MDBX_SUCCESS); - const txnid_t reservation_gc_id = rid_result.rid; - - // const size_t prefer_max_scatter = MDBX_ENABLE_BIGFOOT ? MDBX_TXL_MAX : 257; - size_t chunk = left; - if (unlikely(left > env->me_maxgc_ov1page)) { - const size_t avail_gc_slots = - txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot + 1 - : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid - : INT16_MAX; - if (likely(avail_gc_slots > 1)) { -#if MDBX_ENABLE_BIGFOOT - chunk = env->me_maxgc_ov1page; - if (avail_gc_slots < INT16_MAX && - unlikely(left > env->me_maxgc_ov1page * avail_gc_slots)) - /* TODO: Можно смотреть последовательности какой длины есть в relist - * и пробовать нарезать куски соответствующего размера. - * Смысл в том, чтобы не дробить последовательности страниц, - * а использовать целиком. */ - chunk = env->me_maxgc_ov1page + - left / (env->me_maxgc_ov1page * avail_gc_slots) * - env->me_maxgc_ov1page; -#else - if (chunk < env->me_maxgc_ov1page * 2) - chunk /= 2; - else { - const size_t threshold = - env->me_maxgc_ov1page * ((avail_gc_slots < prefer_max_scatter) - ? avail_gc_slots - : prefer_max_scatter); - if (left < threshold) - chunk = env->me_maxgc_ov1page; - else { - const size_t tail = left - threshold + env->me_maxgc_ov1page + 1; - size_t span = 1; - size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / - sizeof(pgno_t)) /* - 1 + span */; - if (tail > avail) { - for (size_t i = ctx->amount - span; i > 0; --i) { - if (MDBX_PNL_ASCENDING ? (txn->tw.relist[i] + span) - : (txn->tw.relist[i] - span) == - txn->tw.relist[i + span]) { - span += 1; - avail = - ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) - - 1 + span; - if (avail >= tail) - break; - } - } - } - - chunk = (avail >= tail) ? tail - span - : (avail_gc_slots > 3 && - ctx->reused_slot < prefer_max_scatter - 3) - ? avail - span - : tail; - } - } -#endif /* MDBX_ENABLE_BIGFOOT */ - } - } - tASSERT(txn, chunk > 0); - - TRACE("%s: gc_rid %" PRIaTXN ", reused_gc_slot %zu, reservation-id " - "%" PRIaTXN, - gcu_dbg_prefix(ctx), ctx->rid, ctx->reused_slot, reservation_gc_id); - - TRACE("%s: chunk %zu, gc-per-ovpage %u", gcu_dbg_prefix(ctx), chunk, - env->me_maxgc_ov1page); - - tASSERT(txn, reservation_gc_id <= env->me_lck->mti_oldest_reader.weak); - if (unlikely( - reservation_gc_id < MIN_TXNID || - reservation_gc_id > - atomic_load64(&env->me_lck->mti_oldest_reader, mo_Relaxed))) { - ERROR("** internal error (reservation_gc_id %" PRIaTXN ")", - reservation_gc_id); - rc = MDBX_PROBLEM; - goto bailout; - } - - key.iov_len = sizeof(reservation_gc_id); - key.iov_base = (void*)&reservation_gc_id; - data.iov_len = (chunk + 1) * sizeof(pgno_t); - TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, gcu_dbg_prefix(ctx), chunk, - ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id); - gcu_prepare_backlog(txn, ctx); - rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, - MDBX_RESERVE | MDBX_NOOVERWRITE); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - gcu_zeroize_reserved(env, data); - ctx->reserved += chunk; - TRACE("%s: reserved %zu (+%zu), continue", gcu_dbg_prefix(ctx), - ctx->reserved, chunk); - - continue; - } - - tASSERT(txn, - ctx->cleaned_slot == (txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - : 0)); - - TRACE("%s", " >> filling"); - /* Fill in the reserved records */ - size_t excess_slots = 0; - ctx->fill_idx = - txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot - : ctx->reused_slot; - rc = MDBX_SUCCESS; - tASSERT(txn, pnl_check_allocated(txn->tw.relist, - txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - tASSERT(txn, dirtylist_check(txn)); - if (ctx->amount) { - MDBX_val key, data; - key.iov_len = data.iov_len = 0; /* avoid MSVC warning */ - key.iov_base = data.iov_base = NULL; - - size_t left = ctx->amount, excess = 0; - if (txn->tw.lifo_reclaimed == nullptr) { - tASSERT(txn, ctx->lifo == 0); - rc = cursor_first(&ctx->cursor, &key, &data); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == MDBX_NOTFOUND && ctx->reserve_adj) - goto retry_clean_adj; - goto bailout; - } - } else { - tASSERT(txn, ctx->lifo != 0); - } - - while (true) { - txnid_t fill_gc_id; - TRACE("%s: left %zu of %zu", gcu_dbg_prefix(ctx), left, - MDBX_PNL_GETSIZE(txn->tw.relist)); - if (txn->tw.lifo_reclaimed == nullptr) { - tASSERT(txn, ctx->lifo == 0); - fill_gc_id = unaligned_peek_u64(4, key.iov_base); - if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.last_reclaimed) { - if (!left) - break; - NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN - " > last_reclaimed %" PRIaTXN ", left %zu", - ctx->fill_idx, fill_gc_id, txn->tw.last_reclaimed, left); - ctx->reserve_adj = - (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; - goto retry; - } - ctx->fill_idx -= 1; - } else { - tASSERT(txn, ctx->lifo != 0); - if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { - if (!left) - break; - NOTICE("** restart: reserve depleted (fill_idx %zu >= " - "lifo_reclaimed %zu, left %zu", - ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed), left); - ctx->reserve_adj = - (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; - goto retry; - } - ctx->fill_idx += 1; - fill_gc_id = txn->tw.lifo_reclaimed[ctx->fill_idx]; - TRACE("%s: seek-reservation @%" PRIaTXN " at lifo_reclaimed[%zu]", - gcu_dbg_prefix(ctx), fill_gc_id, ctx->fill_idx); - key.iov_base = &fill_gc_id; - key.iov_len = sizeof(fill_gc_id); - rc = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - tASSERT(txn, ctx->cleaned_slot == - (txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - : 0)); - tASSERT(txn, fill_gc_id > 0 && - fill_gc_id <= env->me_lck->mti_oldest_reader.weak); - key.iov_base = &fill_gc_id; - key.iov_len = sizeof(fill_gc_id); - - tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2); - size_t chunk = data.iov_len / sizeof(pgno_t) - 1; - if (unlikely(chunk > left)) { - const size_t delta = chunk - left; - excess += delta; - if (!left) { - excess_slots += 1; - goto next; - } - TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, gcu_dbg_prefix(ctx), - chunk, left, fill_gc_id); - if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || - delta > env->me_maxgc_ov1page) - data.iov_len = (left + 1) * sizeof(pgno_t); - chunk = left; - } - rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, - MDBX_CURRENT | MDBX_RESERVE); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - gcu_zeroize_reserved(env, data); - - if (unlikely(txn->tw.loose_count || - ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { - NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", - ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist), - txn->tw.loose_count); - if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) - goto retry_clean_adj; - goto retry; - } - - if (unlikely(txn->tw.lifo_reclaimed - ? ctx->cleaned_slot < - MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - : ctx->cleaned_id < txn->tw.last_reclaimed)) { - NOTICE("%s", "** restart: reclaimed-slots changed"); - goto retry; - } - if (unlikely(ctx->retired_stored != - MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { - tASSERT(txn, - ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - NOTICE("** restart: retired-list growth (%zu -> %zu)", - ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - goto retry; - } - - pgno_t *dst = data.iov_base; - *dst++ = (pgno_t)chunk; - pgno_t *src = MDBX_PNL_BEGIN(txn->tw.relist) + left - chunk; - memcpy(dst, src, chunk * sizeof(pgno_t)); - pgno_t *from = src, *to = src + chunk; - TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, - gcu_dbg_prefix(ctx), chunk, from - txn->tw.relist, from[0], - to - txn->tw.relist, to[-1], fill_gc_id); - - left -= chunk; - if (AUDIT_ENABLED()) { - rc = audit_ex(txn, ctx->retired_stored + ctx->amount - left, true); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - next: - if (txn->tw.lifo_reclaimed == nullptr) { - tASSERT(txn, ctx->lifo == 0); - rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND) - goto bailout; - rc = MDBX_SUCCESS; - break; - } - } else { - tASSERT(txn, ctx->lifo != 0); - } - } - - if (excess) { - size_t n = excess, adj = excess; - while (n >= env->me_maxgc_ov1page) - adj -= n /= env->me_maxgc_ov1page; - ctx->reserve_adj += adj; - TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", gcu_dbg_prefix(ctx), - excess, adj, ctx->reserve_adj); - } - } - - tASSERT(txn, rc == MDBX_SUCCESS); - if (unlikely(txn->tw.loose_count != 0 || - ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { - NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", - txn->tw.loose_count, ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry; - } - - if (unlikely(excess_slots)) { - const bool will_retry = ctx->loop < 5 || excess_slots > 1; - NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " - "loop %zu)", - will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, - ctx->reserve_adj, ctx->loop); - if (will_retry) - goto retry; - } - - tASSERT(txn, - txn->tw.lifo_reclaimed == NULL || - ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); - -bailout: - txn->mt_cursors[FREE_DBI] = ctx->cursor.mc_next; - - MDBX_PNL_SETSIZE(txn->tw.relist, 0); -#if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.wloops += (uint32_t)ctx->loop; -#endif /* MDBX_ENABLE_PROFGC */ - TRACE("<<< %zu loops, rc = %d", ctx->loop, rc); - return rc; -} - -static int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - MDBX_dpl *const dl = dpl_sort(txn); - int rc = MDBX_SUCCESS; - size_t r, w, total_npages = 0; - for (w = 0, r = 1; r <= dl->length; ++r) { - MDBX_page *dp = dl->items[r].ptr; - if (dp->mp_flags & P_LOOSE) { - dl->items[++w] = dl->items[r]; - continue; - } - unsigned npages = dpl_npages(dl, r); - total_npages += npages; - rc = iov_page(txn, ctx, dp, npages); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - if (!iov_empty(ctx)) { - tASSERT(txn, rc == MDBX_SUCCESS); - rc = iov_write(ctx); - } - - if (likely(rc == MDBX_SUCCESS) && ctx->fd == txn->mt_env->me_lazy_fd) { - txn->mt_env->me_lck->mti_unsynced_pages.weak += total_npages; - if (!txn->mt_env->me_lck->mti_eoos_timestamp.weak) - txn->mt_env->me_lck->mti_eoos_timestamp.weak = osal_monotime(); - } - - txn->tw.dirtylist->pages_including_loose -= total_npages; - while (r <= dl->length) - dl->items[++w] = dl->items[r++]; - - dl->sorted = dpl_setlen(dl, w); - txn->tw.dirtyroom += r - 1 - w; - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); - tASSERT(txn, txn->tw.dirtylist->length == txn->tw.loose_count); - tASSERT(txn, txn->tw.dirtylist->pages_including_loose == txn->tw.loose_count); - return rc; -} - -/* Merge child txn into parent */ -static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, - const size_t parent_retired_len) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0); - MDBX_dpl *const src = dpl_sort(txn); - - /* Remove refunded pages from parent's dirty list */ - MDBX_dpl *const dst = dpl_sort(parent); - if (MDBX_ENABLE_REFUND) { - size_t n = dst->length; - while (n && dst->items[n].pgno >= parent->mt_next_pgno) { - const unsigned npages = dpl_npages(dst, n); - dpage_free(txn->mt_env, dst->items[n].ptr, npages); - --n; - } - parent->tw.dirtyroom += dst->sorted - n; - dst->sorted = dpl_setlen(dst, n); - tASSERT(parent, - parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->mt_parent ? parent->mt_parent->tw.dirtyroom - : parent->mt_env->me_options.dp_limit)); - } - - /* Remove reclaimed pages from parent's dirty list */ - const MDBX_PNL reclaimed_list = parent->tw.relist; - dpl_sift(parent, reclaimed_list, false); - - /* Move retired pages from parent's dirty & spilled list to reclaimed */ - size_t r, w, d, s, l; - for (r = w = parent_retired_len; - ++r <= MDBX_PNL_GETSIZE(parent->tw.retired_pages);) { - const pgno_t pgno = parent->tw.retired_pages[r]; - const size_t di = dpl_exist(parent, pgno); - const size_t si = !di ? search_spilled(parent, pgno) : 0; - unsigned npages; - const char *kind; - if (di) { - MDBX_page *dp = dst->items[di].ptr; - tASSERT(parent, (dp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | - P_OVERFLOW | P_SPILLED)) == 0); - npages = dpl_npages(dst, di); - page_wash(parent, di, dp, npages); - kind = "dirty"; - l = 1; - if (unlikely(npages > l)) { - /* OVERFLOW-страница могла быть переиспользована по частям. Тогда - * в retired-списке может быть только начало последовательности, - * а остаток растащен по dirty, spilled и reclaimed спискам. Поэтому - * переносим в reclaimed с проверкой на обрыв последовательности. - * В любом случае, все осколки будут учтены и отфильтрованы, т.е. если - * страница была разбита на части, то важно удалить dirty-элемент, - * а все осколки будут учтены отдельно. */ - - /* Список retired страниц не сортирован, но для ускорения сортировки - * дополняется в соответствии с MDBX_PNL_ASCENDING */ -#if MDBX_PNL_ASCENDING - const size_t len = MDBX_PNL_GETSIZE(parent->tw.retired_pages); - while (r < len && parent->tw.retired_pages[r + 1] == pgno + l) { - ++r; - if (++l == npages) - break; - } -#else - while (w > parent_retired_len && - parent->tw.retired_pages[w - 1] == pgno + l) { - --w; - if (++l == npages) - break; - } -#endif - } - } else if (unlikely(si)) { - l = npages = 1; - spill_remove(parent, si, 1); - kind = "spilled"; - } else { - parent->tw.retired_pages[++w] = pgno; - continue; - } - - DEBUG("reclaim retired parent's %u -> %zu %s page %" PRIaPGNO, npages, l, - kind, pgno); - int err = pnl_insert_range(&parent->tw.relist, pgno, l); - ENSURE(txn->mt_env, err == MDBX_SUCCESS); - } - MDBX_PNL_SETSIZE(parent->tw.retired_pages, w); - - /* Filter-out parent spill list */ - if (parent->tw.spilled.list && - MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) { - const MDBX_PNL sl = spill_purge(parent); - size_t len = MDBX_PNL_GETSIZE(sl); - if (len) { - /* Remove refunded pages from parent's spill list */ - if (MDBX_ENABLE_REFUND && - MDBX_PNL_MOST(sl) >= (parent->mt_next_pgno << 1)) { -#if MDBX_PNL_ASCENDING - size_t i = MDBX_PNL_GETSIZE(sl); - assert(MDBX_PNL_MOST(sl) == MDBX_PNL_LAST(sl)); - do { - if ((sl[i] & 1) == 0) - DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1); - i -= 1; - } while (i && sl[i] >= (parent->mt_next_pgno << 1)); - MDBX_PNL_SETSIZE(sl, i); -#else - assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl)); - size_t i = 0; - do { - ++i; - if ((sl[i] & 1) == 0) - DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1); - } while (i < len && sl[i + 1] >= (parent->mt_next_pgno << 1)); - MDBX_PNL_SETSIZE(sl, len -= i); - memmove(sl + 1, sl + 1 + i, len * sizeof(sl[0])); -#endif - } - tASSERT(txn, pnl_check_allocated(sl, (size_t)parent->mt_next_pgno << 1)); - - /* Remove reclaimed pages from parent's spill list */ - s = MDBX_PNL_GETSIZE(sl), r = MDBX_PNL_GETSIZE(reclaimed_list); - /* Scanning from end to begin */ - while (s && r) { - if (sl[s] & 1) { - --s; - continue; - } - const pgno_t spilled_pgno = sl[s] >> 1; - const pgno_t reclaimed_pgno = reclaimed_list[r]; - if (reclaimed_pgno != spilled_pgno) { - const bool cmp = MDBX_PNL_ORDERED(spilled_pgno, reclaimed_pgno); - s -= !cmp; - r -= cmp; - } else { - DEBUG("remove reclaimed parent's spilled page %" PRIaPGNO, - reclaimed_pgno); - spill_remove(parent, s, 1); - --s; - --r; - } - } - - /* Remove anything in our dirty list from parent's spill list */ - /* Scanning spill list in descend order */ - const intptr_t step = MDBX_PNL_ASCENDING ? -1 : 1; - s = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(sl) : 1; - d = src->length; - while (d && (MDBX_PNL_ASCENDING ? s > 0 : s <= MDBX_PNL_GETSIZE(sl))) { - if (sl[s] & 1) { - s += step; - continue; - } - const pgno_t spilled_pgno = sl[s] >> 1; - const pgno_t dirty_pgno_form = src->items[d].pgno; - const unsigned npages = dpl_npages(src, d); - const pgno_t dirty_pgno_to = dirty_pgno_form + npages; - if (dirty_pgno_form > spilled_pgno) { - --d; - continue; - } - if (dirty_pgno_to <= spilled_pgno) { - s += step; - continue; - } - - DEBUG("remove dirtied parent's spilled %u page %" PRIaPGNO, npages, - dirty_pgno_form); - spill_remove(parent, s, 1); - s += step; - } - - /* Squash deleted pagenums if we deleted any */ - spill_purge(parent); - } - } - - /* Remove anything in our spill list from parent's dirty list */ - if (txn->tw.spilled.list) { - tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list, - (size_t)parent->mt_next_pgno << 1)); - dpl_sift(parent, txn->tw.spilled.list, true); - tASSERT(parent, - parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->mt_parent ? parent->mt_parent->tw.dirtyroom - : parent->mt_env->me_options.dp_limit)); - } - - /* Find length of merging our dirty list with parent's and release - * filter-out pages */ - for (l = 0, d = dst->length, s = src->length; d > 0 && s > 0;) { - MDBX_page *sp = src->items[s].ptr; - tASSERT(parent, (sp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | P_OVERFLOW | - P_LOOSE | P_SPILLED)) == 0); - const unsigned s_npages = dpl_npages(src, s); - const pgno_t s_pgno = src->items[s].pgno; - - MDBX_page *dp = dst->items[d].ptr; - tASSERT(parent, (dp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | P_OVERFLOW | - P_SPILLED)) == 0); - const unsigned d_npages = dpl_npages(dst, d); - const pgno_t d_pgno = dst->items[d].pgno; - - if (d_pgno >= s_pgno + s_npages) { - --d; - ++l; - } else if (d_pgno + d_npages <= s_pgno) { - if (sp->mp_flags != P_LOOSE) { - sp->mp_txnid = parent->mt_front; - sp->mp_flags &= ~P_SPILLED; - } - --s; - ++l; - } else { - dst->items[d--].ptr = nullptr; - dpage_free(txn->mt_env, dp, d_npages); - } - } - assert(dst->sorted == dst->length); - tASSERT(parent, dst->detent >= l + d + s); - dst->sorted = l + d + s; /* the merged length */ - - while (s > 0) { - MDBX_page *sp = src->items[s].ptr; - tASSERT(parent, (sp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | P_OVERFLOW | - P_LOOSE | P_SPILLED)) == 0); - if (sp->mp_flags != P_LOOSE) { - sp->mp_txnid = parent->mt_front; - sp->mp_flags &= ~P_SPILLED; - } - --s; - } - - /* Merge our dirty list into parent's, i.e. merge(dst, src) -> dst */ - if (dst->sorted >= dst->length) { - /* from end to begin with dst extending */ - for (l = dst->sorted, s = src->length, d = dst->length; s > 0 && d > 0;) { - if (unlikely(l <= d)) { - /* squash to get a gap of free space for merge */ - for (r = w = 1; r <= d; ++r) - if (dst->items[r].ptr) { - if (w != r) { - dst->items[w] = dst->items[r]; - dst->items[r].ptr = nullptr; - } - ++w; - } - VERBOSE("squash to begin for extending-merge %zu -> %zu", d, w - 1); - d = w - 1; - continue; - } - assert(l > d); - if (dst->items[d].ptr) { - dst->items[l--] = (dst->items[d].pgno > src->items[s].pgno) - ? dst->items[d--] - : src->items[s--]; - } else - --d; - } - if (s > 0) { - assert(l == s); - while (d > 0) { - assert(dst->items[d].ptr == nullptr); - --d; - } - do { - assert(l > 0); - dst->items[l--] = src->items[s--]; - } while (s > 0); - } else { - assert(l == d); - while (l > 0) { - assert(dst->items[l].ptr != nullptr); - --l; - } - } - } else { - /* from begin to end with shrinking (a lot of new large/overflow pages) */ - for (l = s = d = 1; s <= src->length && d <= dst->length;) { - if (unlikely(l >= d)) { - /* squash to get a gap of free space for merge */ - for (r = w = dst->length; r >= d; --r) - if (dst->items[r].ptr) { - if (w != r) { - dst->items[w] = dst->items[r]; - dst->items[r].ptr = nullptr; - } - --w; - } - VERBOSE("squash to end for shrinking-merge %zu -> %zu", d, w + 1); - d = w + 1; - continue; - } - assert(l < d); - if (dst->items[d].ptr) { - dst->items[l++] = (dst->items[d].pgno < src->items[s].pgno) - ? dst->items[d++] - : src->items[s++]; - } else - ++d; - } - if (s <= src->length) { - assert(dst->sorted - l == src->length - s); - while (d <= dst->length) { - assert(dst->items[d].ptr == nullptr); - --d; - } - do { - assert(l <= dst->sorted); - dst->items[l++] = src->items[s++]; - } while (s <= src->length); - } else { - assert(dst->sorted - l == dst->length - d); - while (l <= dst->sorted) { - assert(l <= d && d <= dst->length && dst->items[d].ptr); - dst->items[l++] = dst->items[d++]; - } - } - } - parent->tw.dirtyroom -= dst->sorted - dst->length; - assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit); - dpl_setlen(dst, dst->sorted); - parent->tw.dirtylru = txn->tw.dirtylru; - - /* В текущем понимании выгоднее пересчитать кол-во страниц, - * чем подмешивать лишние ветвления и вычисления в циклы выше. */ - dst->pages_including_loose = 0; - for (r = 1; r <= dst->length; ++r) - dst->pages_including_loose += dpl_npages(dst, r); - - tASSERT(parent, dirtylist_check(parent)); - dpl_free(txn); - - if (txn->tw.spilled.list) { - if (parent->tw.spilled.list) { - /* Must not fail since space was preserved above. */ - pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list); - pnl_free(txn->tw.spilled.list); - } else { - parent->tw.spilled.list = txn->tw.spilled.list; - parent->tw.spilled.least_removed = txn->tw.spilled.least_removed; - } - tASSERT(parent, dirtylist_check(parent)); - } - - parent->mt_flags &= ~MDBX_TXN_HAS_CHILD; - if (parent->tw.spilled.list) { - assert(pnl_check_allocated(parent->tw.spilled.list, - (size_t)parent->mt_next_pgno << 1)); - if (MDBX_PNL_GETSIZE(parent->tw.spilled.list)) - parent->mt_flags |= MDBX_TXN_SPILLS; - } -} - -static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { - MDBX_env *const env = txn->mt_env; - if (MDBX_ENABLE_PROFGC) { - pgop_stat_t *const ptr = &env->me_lck->mti_pgop_stat; - latency->gc_prof.work_counter = ptr->gc_prof.work.spe_counter; - latency->gc_prof.work_rtime_monotonic = - osal_monotime_to_16dot16(ptr->gc_prof.work.rtime_monotonic); - latency->gc_prof.work_xtime_cpu = - osal_monotime_to_16dot16(ptr->gc_prof.work.xtime_cpu); - latency->gc_prof.work_rsteps = ptr->gc_prof.work.rsteps; - latency->gc_prof.work_xpages = ptr->gc_prof.work.xpages; - latency->gc_prof.work_majflt = ptr->gc_prof.work.majflt; - - latency->gc_prof.self_counter = ptr->gc_prof.self.spe_counter; - latency->gc_prof.self_rtime_monotonic = - osal_monotime_to_16dot16(ptr->gc_prof.self.rtime_monotonic); - latency->gc_prof.self_xtime_cpu = - osal_monotime_to_16dot16(ptr->gc_prof.self.xtime_cpu); - latency->gc_prof.self_rsteps = ptr->gc_prof.self.rsteps; - latency->gc_prof.self_xpages = ptr->gc_prof.self.xpages; - latency->gc_prof.self_majflt = ptr->gc_prof.self.majflt; - - latency->gc_prof.wloops = ptr->gc_prof.wloops; - latency->gc_prof.coalescences = ptr->gc_prof.coalescences; - latency->gc_prof.wipes = ptr->gc_prof.wipes; - latency->gc_prof.flushes = ptr->gc_prof.flushes; - latency->gc_prof.kicks = ptr->gc_prof.kicks; - if (txn == env->me_txn0) - memset(&ptr->gc_prof, 0, sizeof(ptr->gc_prof)); - } else - memset(&latency->gc_prof, 0, sizeof(latency->gc_prof)); -} - -int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { - STATIC_ASSERT(MDBX_TXN_FINISHED == - MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR); - const uint64_t ts_0 = latency ? osal_monotime() : 0; - uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; - - int rc = check_txn(txn, MDBX_TXN_FINISHED); - if (unlikely(rc != MDBX_SUCCESS)) { - if (latency) - memset(latency, 0, sizeof(*latency)); - return rc; - } - - MDBX_env *const env = txn->mt_env; -#if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid())) { - env->me_flags |= MDBX_FATAL_ERROR; - if (latency) - memset(latency, 0, sizeof(*latency)); - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - - if (unlikely(txn->mt_flags & MDBX_TXN_ERROR)) { - rc = MDBX_RESULT_TRUE; - goto fail; - } - - /* txn_end() mode for a commit which writes nothing */ - unsigned end_mode = - TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; - if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) - goto done; - - if ((txn->mt_flags & MDBX_NOSTICKYTHREADS) && - unlikely(txn->mt_owner != osal_thread_self())) { - rc = MDBX_THREAD_MISMATCH; - goto fail; - } - - if (txn->mt_child) { - rc = mdbx_txn_commit_ex(txn->mt_child, NULL); - tASSERT(txn, txn->mt_child == NULL); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - if (unlikely(txn != env->me_txn)) { - DEBUG("%s", "attempt to commit unknown transaction"); - rc = MDBX_EINVAL; - goto fail; - } - - if (txn->mt_parent) { - tASSERT(txn, audit_ex(txn, 0, false) == 0); - eASSERT(env, txn != env->me_txn0); - MDBX_txn *const parent = txn->mt_parent; - eASSERT(env, parent->mt_signature == MDBX_MT_SIGNATURE); - eASSERT(env, parent->mt_child == txn && - (parent->mt_flags & MDBX_TXN_HAS_CHILD) != 0); - eASSERT(env, dirtylist_check(txn)); - - if (txn->tw.dirtylist->length == 0 && !(txn->mt_flags & MDBX_TXN_DIRTY) && - parent->mt_numdbs == txn->mt_numdbs) { - TXN_FOREACH_DBI_ALL(txn, i) { - tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); - if ((txn->mt_dbi_state[i] & DBI_STALE) && - !(parent->mt_dbi_state[i] & DBI_STALE)) - tASSERT(txn, memcmp(&parent->mt_dbs[i], &txn->mt_dbs[i], - sizeof(MDBX_db)) == 0); - } - - tASSERT(txn, memcmp(&parent->mt_geo, &txn->mt_geo, - sizeof(parent->mt_geo)) == 0); - tASSERT(txn, memcmp(&parent->mt_canary, &txn->mt_canary, - sizeof(parent->mt_canary)) == 0); - tASSERT(txn, !txn->tw.spilled.list || - MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); - tASSERT(txn, txn->tw.loose_count == 0); - - /* fast completion of pure nested transaction */ - VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->mt_txnid); - end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; - goto done; - } - - /* Preserve space for spill list to avoid parent's state corruption - * if allocation fails. */ - const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; - tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - const size_t retired_delta = - MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; - if (retired_delta) { - rc = pnl_need(&txn->tw.relist, retired_delta); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - if (txn->tw.spilled.list) { - if (parent->tw.spilled.list) { - rc = pnl_need(&parent->tw.spilled.list, - MDBX_PNL_GETSIZE(txn->tw.spilled.list)); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - spill_purge(txn); - } - - if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > - parent->tw.dirtylist->detent && - !dpl_reserve(parent, txn->tw.dirtylist->length + - parent->tw.dirtylist->length))) { - rc = MDBX_ENOMEM; - goto fail; - } - - //------------------------------------------------------------------------- - - parent->tw.lifo_reclaimed = txn->tw.lifo_reclaimed; - txn->tw.lifo_reclaimed = NULL; - - parent->tw.retired_pages = txn->tw.retired_pages; - txn->tw.retired_pages = NULL; - - pnl_free(parent->tw.relist); - parent->tw.relist = txn->tw.relist; - txn->tw.relist = NULL; - parent->tw.gc_time_acc = txn->tw.gc_time_acc; - parent->tw.last_reclaimed = txn->tw.last_reclaimed; - - parent->mt_geo = txn->mt_geo; - parent->mt_canary = txn->mt_canary; - parent->mt_flags |= txn->mt_flags & MDBX_TXN_DIRTY; - - /* Move loose pages to parent */ -#if MDBX_ENABLE_REFUND - parent->tw.loose_refund_wl = txn->tw.loose_refund_wl; -#endif /* MDBX_ENABLE_REFUND */ - parent->tw.loose_count = txn->tw.loose_count; - parent->tw.loose_pages = txn->tw.loose_pages; - - /* Merge our cursors into parent's and close them */ - cursors_eot(txn, true); - end_mode |= TXN_END_EOTDONE; - - /* Update parent's DBs array */ - eASSERT(env, parent->mt_numdbs == txn->mt_numdbs); - TXN_FOREACH_DBI_ALL(txn, dbi) { - if (txn->mt_dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { - parent->mt_dbs[dbi] = txn->mt_dbs[dbi]; - /* preserve parent's status */ - const uint8_t state = - txn->mt_dbi_state[dbi] | - (parent->mt_dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); - DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, - (parent->mt_dbi_state[dbi] != state) ? "update" : "still", - parent->mt_dbi_state[dbi], state); - parent->mt_dbi_state[dbi] = state; - } else { - eASSERT(env, txn->mt_dbi_state[dbi] == - (parent->mt_dbi_state[dbi] & - ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); - } - } - - if (latency) { - ts_1 = osal_monotime(); - ts_2 = /* no gc-update */ ts_1; - ts_3 = /* no audit */ ts_2; - ts_4 = /* no write */ ts_3; - ts_5 = /* no sync */ ts_4; - } - txn_merge(parent, txn, parent_retired_len); - env->me_txn = parent; - parent->mt_child = NULL; - tASSERT(parent, dirtylist_check(parent)); - -#if MDBX_ENABLE_REFUND - txn_refund(parent); - if (ASSERT_ENABLED()) { - /* Check parent's loose pages not suitable for refund */ - for (MDBX_page *lp = parent->tw.loose_pages; lp; lp = mp_next(lp)) { - tASSERT(parent, lp->mp_pgno < parent->tw.loose_refund_wl && - lp->mp_pgno + 1 < parent->mt_next_pgno); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *)); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *)); - } - /* Check parent's reclaimed pages not suitable for refund */ - if (MDBX_PNL_GETSIZE(parent->tw.relist)) - tASSERT(parent, - MDBX_PNL_MOST(parent->tw.relist) + 1 < parent->mt_next_pgno); - } -#endif /* MDBX_ENABLE_REFUND */ - - txn->mt_signature = 0; - osal_free(txn); - tASSERT(parent, audit_ex(parent, 0, false) == 0); - rc = MDBX_SUCCESS; - goto provide_latency; - } - - if (!txn->tw.dirtylist) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : env->me_options.dp_limit)); - } - cursors_eot(txn, false); - end_mode |= TXN_END_EOTDONE; - - if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && - (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { - TXN_FOREACH_DBI_ALL(txn, i) { - tASSERT(txn, !(txn->mt_dbi_state[i] & DBI_DIRTY)); - } -#if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT - rc = txn_end(txn, end_mode); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - rc = MDBX_RESULT_TRUE; - goto provide_latency; -#else - goto done; -#endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ - } - - DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root, - txn->mt_dbs[FREE_DBI].md_root); - - if (txn->mt_numdbs > CORE_DBS) { - /* Update subDB root pointers */ - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - cx.outer.mc_next = txn->mt_cursors[MAIN_DBI]; - txn->mt_cursors[MAIN_DBI] = &cx.outer; - TXN_FOREACH_DBI_USER(txn, i) { - if ((txn->mt_dbi_state[i] & DBI_DIRTY) == 0) - continue; - MDBX_db *const db = &txn->mt_dbs[i]; - DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN - " -> %" PRIaTXN, - i, db->md_mod_txnid, txn->mt_txnid); - /* Может быть mod_txnid > front после коммита вложенных тразакций */ - db->md_mod_txnid = txn->mt_txnid; - MDBX_val data = {db, sizeof(MDBX_db)}; - rc = cursor_put_nochecklen(&cx.outer, &env->me_dbxs[i].md_name, &data, - F_SUBDATA); - if (unlikely(rc != MDBX_SUCCESS)) { - txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; - goto fail; - } - } - txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; - } - - ts_1 = latency ? osal_monotime() : 0; - - gcu_context_t gcu_ctx; - gc_cputime = latency ? osal_cputime(nullptr) : 0; - rc = gcu_context_init(txn, &gcu_ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - rc = update_gc(txn, &gcu_ctx); - gc_cputime = latency ? osal_cputime(nullptr) - gc_cputime : 0; - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - - tASSERT(txn, txn->tw.loose_count == 0); - txn->mt_dbs[FREE_DBI].md_mod_txnid = (txn->mt_dbi_state[FREE_DBI] & DBI_DIRTY) - ? txn->mt_txnid - : txn->mt_dbs[FREE_DBI].md_mod_txnid; - - txn->mt_dbs[MAIN_DBI].md_mod_txnid = (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) - ? txn->mt_txnid - : txn->mt_dbs[MAIN_DBI].md_mod_txnid; - - ts_2 = latency ? osal_monotime() : 0; - ts_3 = ts_2; - if (AUDIT_ENABLED()) { - rc = audit_ex(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages), true); - ts_3 = osal_monotime(); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - bool need_flush_for_nometasync = false; - const meta_ptr_t head = meta_recent(env, &txn->tw.troika); - const uint32_t meta_sync_txnid = - atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed); - /* sync prev meta */ - if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) { - /* Исправление унаследованного от LMDB недочета: - * - * Всё хорошо, если все процессы работающие с БД не используют WRITEMAP. - * Тогда мета-страница (обновленная, но не сброшенная на диск) будет - * сохранена в результате fdatasync() при записи данных этой транзакции. - * - * Всё хорошо, если все процессы работающие с БД используют WRITEMAP - * без MDBX_AVOID_MSYNC. - * Тогда мета-страница (обновленная, но не сброшенная на диск) будет - * сохранена в результате msync() при записи данных этой транзакции. - * - * Если же в процессах работающих с БД используется оба метода, как sync() - * в режиме MDBX_WRITEMAP, так и записи через файловый дескриптор, то - * становится невозможным обеспечить фиксацию на диске мета-страницы - * предыдущей транзакции и данных текущей транзакции, за счет одной - * sync-операцией выполняемой после записи данных текущей транзакции. - * Соответственно, требуется явно обновлять мета-страницу, что полностью - * уничтожает выгоду от NOMETASYNC. */ - const uint32_t txnid_dist = - ((txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) - ? MDBX_NOMETASYNC_LAZY_FD - : MDBX_NOMETASYNC_LAZY_WRITEMAP; - /* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync() - * или msync() для гарантированной фиксации на диске мета-страницы, - * которая была "лениво" отправлена на запись в предыдущей транзакции, - * но не сброшена на диск из-за активного режима MDBX_NOMETASYNC. */ - if ( -#if defined(_WIN32) || defined(_WIN64) - !env->me_overlapped_fd && -#endif - meta_sync_txnid == (uint32_t)head.txnid - txnid_dist) - need_flush_for_nometasync = true; - else { - rc = meta_sync(env, head); - if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("txn-%s: error %d", "presync-meta", rc); - goto fail; - } - } - } - - if (txn->tw.dirtylist) { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, txn->tw.loose_count == 0); - - mdbx_filehandle_t fd = -#if defined(_WIN32) || defined(_WIN64) - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; - (void)need_flush_for_nometasync; -#else -#define MDBX_WRITETHROUGH_THRESHOLD_DEFAULT 2 - (need_flush_for_nometasync || - env->me_dsync_fd == INVALID_HANDLE_VALUE || - txn->tw.dirtylist->length > env->me_options.writethrough_threshold || - atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed)) - ? env->me_lazy_fd - : env->me_dsync_fd; -#endif /* Windows */ - - iov_ctx_t write_ctx; - rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, - txn->tw.dirtylist->pages_including_loose, fd, false); - if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("txn-%s: error %d", "iov-init", rc); - goto fail; - } - - rc = txn_write(txn, &write_ctx); - if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("txn-%s: error %d", "write", rc); - goto fail; - } - } else { - tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - env->me_lck->mti_unsynced_pages.weak += txn->tw.writemap_dirty_npages; - if (!env->me_lck->mti_eoos_timestamp.weak) - env->me_lck->mti_eoos_timestamp.weak = osal_monotime(); - } - - /* TODO: use ctx.flush_begin & ctx.flush_end for range-sync */ - ts_4 = latency ? osal_monotime() : 0; - - MDBX_meta meta; - memcpy(meta.mm_magic_and_version, head.ptr_c->mm_magic_and_version, 8); - meta.mm_extra_flags = head.ptr_c->mm_extra_flags; - meta.mm_validator_id = head.ptr_c->mm_validator_id; - meta.mm_extra_pagehdr = head.ptr_c->mm_extra_pagehdr; - unaligned_poke_u64(4, meta.mm_pages_retired, - unaligned_peek_u64(4, head.ptr_c->mm_pages_retired) + - MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - meta.mm_geo = txn->mt_geo; - meta.mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI]; - meta.mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; - meta.mm_canary = txn->mt_canary; - - txnid_t commit_txnid = txn->mt_txnid; -#if MDBX_ENABLE_BIGFOOT - if (gcu_ctx.bigfoot > txn->mt_txnid) { - commit_txnid = gcu_ctx.bigfoot; - TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, - (size_t)(commit_txnid - txn->mt_txnid)); - } -#endif - meta.unsafe_sign = MDBX_DATASIGN_NONE; - meta_set_txnid(env, &meta, commit_txnid); - - rc = sync_locked(env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, - &meta, &txn->tw.troika); - - ts_5 = latency ? osal_monotime() : 0; - if (unlikely(rc != MDBX_SUCCESS)) { - env->me_flags |= MDBX_FATAL_ERROR; - ERROR("txn-%s: error %d", "sync", rc); - goto fail; - } - - end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE; - -done: - if (latency) - take_gcprof(txn, latency); - rc = txn_end(txn, end_mode); - -provide_latency: - if (latency) { - latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0; - latency->gc_wallclock = - (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0; - latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0; - latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0; - latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0; - latency->sync = (ts_5 > ts_4) ? osal_monotime_to_16dot16(ts_5 - ts_4) : 0; - const uint64_t ts_6 = osal_monotime(); - latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0; - latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0); - } - return rc; - -fail: - txn->mt_flags |= MDBX_TXN_ERROR; - if (latency) - take_gcprof(txn, latency); - mdbx_txn_abort(txn); - goto provide_latency; -} - -static __always_inline int cmp_int_inline(const size_t expected_alignment, - const MDBX_val *a, - const MDBX_val *b) { - if (likely(a->iov_len == b->iov_len)) { - if (sizeof(size_t) > 7 && likely(a->iov_len == 8)) - return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base), - unaligned_peek_u64(expected_alignment, b->iov_base)); - if (likely(a->iov_len == 4)) - return CMP2INT(unaligned_peek_u32(expected_alignment, a->iov_base), - unaligned_peek_u32(expected_alignment, b->iov_base)); - if (sizeof(size_t) < 8 && likely(a->iov_len == 8)) - return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base), - unaligned_peek_u64(expected_alignment, b->iov_base)); - } - ERROR("mismatch and/or invalid size %p.%zu/%p.%zu for INTEGERKEY/INTEGERDUP", - a->iov_base, a->iov_len, b->iov_base, b->iov_len); - return 0; -} - -__hot static int cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b) { - return cmp_int_inline(1, a, b); -} - -/* Compare two items pointing at 2-byte aligned unsigned int's. */ -#if MDBX_UNALIGNED_OK < 2 || \ - (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) -__hot static int cmp_int_align2(const MDBX_val *a, const MDBX_val *b) { - return cmp_int_inline(2, a, b); -} -#else -#define cmp_int_align2 cmp_int_unaligned -#endif /* !MDBX_UNALIGNED_OK || debug */ - -/* Compare two items pointing at aligned unsigned int's. */ -#if MDBX_UNALIGNED_OK < 4 || \ - (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) -__hot static int cmp_int_align4(const MDBX_val *a, const MDBX_val *b) { - return cmp_int_inline(4, a, b); -} -#else -#define cmp_int_align4 cmp_int_unaligned -#endif /* !MDBX_UNALIGNED_OK || debug */ - -/* Compare two items lexically */ -__hot static int cmp_lexical(const MDBX_val *a, const MDBX_val *b) { - if (a->iov_len == b->iov_len) - return a->iov_len ? memcmp(a->iov_base, b->iov_base, a->iov_len) : 0; - - const int diff_len = (a->iov_len < b->iov_len) ? -1 : 1; - const size_t shortest = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len; - int diff_data = shortest ? memcmp(a->iov_base, b->iov_base, shortest) : 0; - return likely(diff_data) ? diff_data : diff_len; -} - -MDBX_NOTHROW_PURE_FUNCTION static __always_inline unsigned -tail3le(const uint8_t *p, size_t l) { - STATIC_ASSERT(sizeof(unsigned) > 2); - // 1: 0 0 0 - // 2: 0 1 1 - // 3: 0 1 2 - return p[0] | p[l >> 1] << 8 | p[l - 1] << 16; -} - -/* Compare two items in reverse byte order */ -__hot static int cmp_reverse(const MDBX_val *a, const MDBX_val *b) { - size_t left = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len; - if (likely(left)) { - const uint8_t *pa = ptr_disp(a->iov_base, a->iov_len); - const uint8_t *pb = ptr_disp(b->iov_base, b->iov_len); - while (left >= sizeof(size_t)) { - pa -= sizeof(size_t); - pb -= sizeof(size_t); - left -= sizeof(size_t); - STATIC_ASSERT(sizeof(size_t) == 4 || sizeof(size_t) == 8); - if (sizeof(size_t) == 4) { - uint32_t xa = unaligned_peek_u32(1, pa); - uint32_t xb = unaligned_peek_u32(1, pb); -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - xa = osal_bswap32(xa); - xb = osal_bswap32(xb); -#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ - if (xa != xb) - return (xa < xb) ? -1 : 1; - } else { - uint64_t xa = unaligned_peek_u64(1, pa); - uint64_t xb = unaligned_peek_u64(1, pb); -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - xa = osal_bswap64(xa); - xb = osal_bswap64(xb); -#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ - if (xa != xb) - return (xa < xb) ? -1 : 1; - } - } - if (sizeof(size_t) == 8 && left >= 4) { - pa -= 4; - pb -= 4; - left -= 4; - uint32_t xa = unaligned_peek_u32(1, pa); - uint32_t xb = unaligned_peek_u32(1, pb); -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - xa = osal_bswap32(xa); - xb = osal_bswap32(xb); -#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ - if (xa != xb) - return (xa < xb) ? -1 : 1; - } - if (left) { - unsigned xa = tail3le(pa - left, left); - unsigned xb = tail3le(pb - left, left); - if (xa != xb) - return (xa < xb) ? -1 : 1; - } - } - return CMP2INT(a->iov_len, b->iov_len); -} - -/* Fast non-lexically comparator */ -__hot static int cmp_lenfast(const MDBX_val *a, const MDBX_val *b) { - int diff = CMP2INT(a->iov_len, b->iov_len); - return (likely(diff) || a->iov_len == 0) - ? diff - : memcmp(a->iov_base, b->iov_base, a->iov_len); -} - -__hot static bool eq_fast_slowpath(const uint8_t *a, const uint8_t *b, - size_t l) { - if (likely(l > 3)) { - if (MDBX_UNALIGNED_OK >= 4 && likely(l < 9)) - return ((unaligned_peek_u32(1, a) - unaligned_peek_u32(1, b)) | - (unaligned_peek_u32(1, a + l - 4) - - unaligned_peek_u32(1, b + l - 4))) == 0; - if (MDBX_UNALIGNED_OK >= 8 && sizeof(size_t) > 7 && likely(l < 17)) - return ((unaligned_peek_u64(1, a) - unaligned_peek_u64(1, b)) | - (unaligned_peek_u64(1, a + l - 8) - - unaligned_peek_u64(1, b + l - 8))) == 0; - return memcmp(a, b, l) == 0; - } - if (likely(l)) - return tail3le(a, l) == tail3le(b, l); - return true; -} - -static __always_inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) { - return unlikely(a->iov_len == b->iov_len) && - eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); -} - -static int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return eq_fast(a, b) ? 0 : 1; -} - -static int validate_meta(MDBX_env *env, MDBX_meta *const meta, - const MDBX_page *const page, - const unsigned meta_number, unsigned *guess_pagesize) { - const uint64_t magic_and_version = - unaligned_peek_u64(4, &meta->mm_magic_and_version); - if (unlikely(magic_and_version != MDBX_DATA_MAGIC && - magic_and_version != MDBX_DATA_MAGIC_LEGACY_COMPAT && - magic_and_version != MDBX_DATA_MAGIC_LEGACY_DEVEL)) { - ERROR("meta[%u] has invalid magic/version %" PRIx64, meta_number, - magic_and_version); - return ((magic_and_version >> 8) != MDBX_MAGIC) ? MDBX_INVALID - : MDBX_VERSION_MISMATCH; - } - - if (unlikely(page->mp_pgno != meta_number)) { - ERROR("meta[%u] has invalid pageno %" PRIaPGNO, meta_number, page->mp_pgno); - return MDBX_INVALID; - } - - if (unlikely(page->mp_flags != P_META)) { - ERROR("page #%u not a meta-page", meta_number); - return MDBX_INVALID; - } - - /* LY: check pagesize */ - if (unlikely(!is_powerof2(meta->mm_psize) || meta->mm_psize < MIN_PAGESIZE || - meta->mm_psize > MAX_PAGESIZE)) { - WARNING("meta[%u] has invalid pagesize (%u), skip it", meta_number, - meta->mm_psize); - return is_powerof2(meta->mm_psize) ? MDBX_VERSION_MISMATCH : MDBX_INVALID; - } - - if (guess_pagesize && *guess_pagesize != meta->mm_psize) { - *guess_pagesize = meta->mm_psize; - VERBOSE("meta[%u] took pagesize %u", meta_number, meta->mm_psize); - } - - const txnid_t txnid = unaligned_peek_u64(4, &meta->mm_txnid_a); - if (unlikely(txnid != unaligned_peek_u64(4, &meta->mm_txnid_b))) { - WARNING("meta[%u] not completely updated, skip it", meta_number); - return MDBX_RESULT_TRUE; - } - - /* LY: check signature as a checksum */ - if (META_IS_STEADY(meta) && - unlikely(unaligned_peek_u64(4, &meta->mm_sign) != meta_sign(meta))) { - WARNING("meta[%u] has invalid steady-checksum (0x%" PRIx64 " != 0x%" PRIx64 - "), skip it", - meta_number, unaligned_peek_u64(4, &meta->mm_sign), - meta_sign(meta)); - return MDBX_RESULT_TRUE; - } - - if (unlikely(meta->mm_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { - WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, - "GC/FreeDB", meta->mm_dbs[FREE_DBI].md_flags); - return MDBX_INCOMPATIBLE; - } - if (unlikely(!db_check_flags(meta->mm_dbs[MAIN_DBI].md_flags))) { - WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, - "MainDB", meta->mm_dbs[MAIN_DBI].md_flags); - return MDBX_INCOMPATIBLE; - } - - DEBUG("checking meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO - ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO - " +%u -%u, txn_id %" PRIaTXN ", %s", - page->mp_pgno, meta->mm_dbs[MAIN_DBI].md_root, - meta->mm_dbs[FREE_DBI].md_root, meta->mm_geo.lower, meta->mm_geo.next, - meta->mm_geo.now, meta->mm_geo.upper, pv2pages(meta->mm_geo.grow_pv), - pv2pages(meta->mm_geo.shrink_pv), txnid, durable_caption(meta)); - - if (unlikely(txnid < MIN_TXNID || txnid > MAX_TXNID)) { - WARNING("meta[%u] has invalid txnid %" PRIaTXN ", skip it", meta_number, - txnid); - return MDBX_RESULT_TRUE; - } - - /* LY: check min-pages value */ - if (unlikely(meta->mm_geo.lower < MIN_PAGENO || - meta->mm_geo.lower > MAX_PAGENO + 1)) { - WARNING("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it", - meta_number, meta->mm_geo.lower); - return MDBX_INVALID; - } - - /* LY: check max-pages value */ - if (unlikely(meta->mm_geo.upper < MIN_PAGENO || - meta->mm_geo.upper > MAX_PAGENO + 1 || - meta->mm_geo.upper < meta->mm_geo.lower)) { - WARNING("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it", - meta_number, meta->mm_geo.upper); - return MDBX_INVALID; - } - - /* LY: check last_pgno */ - if (unlikely(meta->mm_geo.next < MIN_PAGENO || - meta->mm_geo.next - 1 > MAX_PAGENO)) { - WARNING("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it", - meta_number, meta->mm_geo.next); - return MDBX_CORRUPTED; - } - - /* LY: check filesize & used_bytes */ - const uint64_t used_bytes = meta->mm_geo.next * (uint64_t)meta->mm_psize; - if (unlikely(used_bytes > env->me_dxb_mmap.filesize)) { - /* Here could be a race with DB-shrinking performed by other process */ - int err = osal_filesize(env->me_lazy_fd, &env->me_dxb_mmap.filesize); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (unlikely(used_bytes > env->me_dxb_mmap.filesize)) { - WARNING("meta[%u] used-bytes (%" PRIu64 ") beyond filesize (%" PRIu64 - "), skip it", - meta_number, used_bytes, env->me_dxb_mmap.filesize); - return MDBX_CORRUPTED; - } - } - if (unlikely(meta->mm_geo.next - 1 > MAX_PAGENO || - used_bytes > MAX_MAPSIZE)) { - WARNING("meta[%u] has too large used-space (%" PRIu64 "), skip it", - meta_number, used_bytes); - return MDBX_TOO_LARGE; - } - - /* LY: check mapsize limits */ - pgno_t geo_lower = meta->mm_geo.lower; - uint64_t mapsize_min = geo_lower * (uint64_t)meta->mm_psize; - STATIC_ASSERT(MAX_MAPSIZE < PTRDIFF_MAX - MAX_PAGESIZE); - STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); - STATIC_ASSERT((uint64_t)(MAX_PAGENO + 1) * MIN_PAGESIZE % (4ul << 20) == 0); - if (unlikely(mapsize_min < MIN_MAPSIZE || mapsize_min > MAX_MAPSIZE)) { - if (MAX_MAPSIZE != MAX_MAPSIZE64 && mapsize_min > MAX_MAPSIZE && - mapsize_min <= MAX_MAPSIZE64) { - eASSERT(env, - meta->mm_geo.next - 1 <= MAX_PAGENO && used_bytes <= MAX_MAPSIZE); - WARNING("meta[%u] has too large min-mapsize (%" PRIu64 "), " - "but size of used space still acceptable (%" PRIu64 ")", - meta_number, mapsize_min, used_bytes); - geo_lower = (pgno_t)((mapsize_min = MAX_MAPSIZE) / meta->mm_psize); - if (geo_lower > MAX_PAGENO + 1) { - geo_lower = MAX_PAGENO + 1; - mapsize_min = geo_lower * (uint64_t)meta->mm_psize; - } - WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO - " instead of wrong %" PRIaPGNO - ", will be corrected on next commit(s)", - meta_number, "lower", geo_lower, meta->mm_geo.lower); - meta->mm_geo.lower = geo_lower; - } else { - WARNING("meta[%u] has invalid min-mapsize (%" PRIu64 "), skip it", - meta_number, mapsize_min); - return MDBX_VERSION_MISMATCH; - } - } - - pgno_t geo_upper = meta->mm_geo.upper; - uint64_t mapsize_max = geo_upper * (uint64_t)meta->mm_psize; - STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); - if (unlikely(mapsize_max > MAX_MAPSIZE || - (MAX_PAGENO + 1) < - ceil_powerof2((size_t)mapsize_max, env->me_os_psize) / - (size_t)meta->mm_psize)) { - if (mapsize_max > MAX_MAPSIZE64) { - WARNING("meta[%u] has invalid max-mapsize (%" PRIu64 "), skip it", - meta_number, mapsize_max); - return MDBX_VERSION_MISMATCH; - } - /* allow to open large DB from a 32-bit environment */ - eASSERT(env, - meta->mm_geo.next - 1 <= MAX_PAGENO && used_bytes <= MAX_MAPSIZE); - WARNING("meta[%u] has too large max-mapsize (%" PRIu64 "), " - "but size of used space still acceptable (%" PRIu64 ")", - meta_number, mapsize_max, used_bytes); - geo_upper = (pgno_t)((mapsize_max = MAX_MAPSIZE) / meta->mm_psize); - if (geo_upper > MAX_PAGENO + 1) { - geo_upper = MAX_PAGENO + 1; - mapsize_max = geo_upper * (uint64_t)meta->mm_psize; - } - WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO - " instead of wrong %" PRIaPGNO - ", will be corrected on next commit(s)", - meta_number, "upper", geo_upper, meta->mm_geo.upper); - meta->mm_geo.upper = geo_upper; - } - - /* LY: check and silently put mm_geo.now into [geo.lower...geo.upper]. - * - * Copy-with-compaction by previous version of libmdbx could produce DB-file - * less than meta.geo.lower bound, in case actual filling is low or no data - * at all. This is not a problem as there is no damage or loss of data. - * Therefore it is better not to consider such situation as an error, but - * silently correct it. */ - pgno_t geo_now = meta->mm_geo.now; - if (geo_now < geo_lower) - geo_now = geo_lower; - if (geo_now > geo_upper && meta->mm_geo.next <= geo_upper) - geo_now = geo_upper; - - if (unlikely(meta->mm_geo.next > geo_now)) { - WARNING("meta[%u] next-pageno (%" PRIaPGNO - ") is beyond end-pgno (%" PRIaPGNO "), skip it", - meta_number, meta->mm_geo.next, geo_now); - return MDBX_CORRUPTED; - } - if (meta->mm_geo.now != geo_now) { - WARNING("meta[%u] consider geo-%s pageno is %" PRIaPGNO - " instead of wrong %" PRIaPGNO - ", will be corrected on next commit(s)", - meta_number, "now", geo_now, meta->mm_geo.now); - meta->mm_geo.now = geo_now; - } - - /* GC */ - if (meta->mm_dbs[FREE_DBI].md_root == P_INVALID) { - if (unlikely(meta->mm_dbs[FREE_DBI].md_branch_pages || - meta->mm_dbs[FREE_DBI].md_depth || - meta->mm_dbs[FREE_DBI].md_entries || - meta->mm_dbs[FREE_DBI].md_leaf_pages || - meta->mm_dbs[FREE_DBI].md_overflow_pages)) { - WARNING("meta[%u] has false-empty %s, skip it", meta_number, "GC"); - return MDBX_CORRUPTED; - } - } else if (unlikely(meta->mm_dbs[FREE_DBI].md_root >= meta->mm_geo.next)) { - WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, - "GC", meta->mm_dbs[FREE_DBI].md_root); - return MDBX_CORRUPTED; - } - - /* MainDB */ - if (meta->mm_dbs[MAIN_DBI].md_root == P_INVALID) { - if (unlikely(meta->mm_dbs[MAIN_DBI].md_branch_pages || - meta->mm_dbs[MAIN_DBI].md_depth || - meta->mm_dbs[MAIN_DBI].md_entries || - meta->mm_dbs[MAIN_DBI].md_leaf_pages || - meta->mm_dbs[MAIN_DBI].md_overflow_pages)) { - WARNING("meta[%u] has false-empty %s", meta_number, "MainDB"); - return MDBX_CORRUPTED; - } - } else if (unlikely(meta->mm_dbs[MAIN_DBI].md_root >= meta->mm_geo.next)) { - WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, - "MainDB", meta->mm_dbs[MAIN_DBI].md_root); - return MDBX_CORRUPTED; - } - - if (unlikely(meta->mm_dbs[FREE_DBI].md_mod_txnid > txnid)) { - WARNING("meta[%u] has wrong md_mod_txnid %" PRIaTXN " for %s, skip it", - meta_number, meta->mm_dbs[FREE_DBI].md_mod_txnid, "GC"); - return MDBX_CORRUPTED; - } - - if (unlikely(meta->mm_dbs[MAIN_DBI].md_mod_txnid > txnid)) { - WARNING("meta[%u] has wrong md_mod_txnid %" PRIaTXN " for %s, skip it", - meta_number, meta->mm_dbs[MAIN_DBI].md_mod_txnid, "MainDB"); - return MDBX_CORRUPTED; - } - - return MDBX_SUCCESS; -} - -static int validate_meta_copy(MDBX_env *env, const MDBX_meta *meta, - MDBX_meta *dest) { - *dest = *meta; - return validate_meta(env, dest, data_page(meta), - bytes2pgno(env, ptr_dist(meta, env->me_map)), nullptr); -} - -/* Read the environment parameters of a DB environment - * before mapping it into memory. */ -__cold static int read_header(MDBX_env *env, MDBX_meta *dest, - const int lck_exclusive, - const mdbx_mode_t mode_bits) { - memset(dest, 0, sizeof(MDBX_meta)); - int rc = osal_filesize(env->me_lazy_fd, &env->me_dxb_mmap.filesize); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - unaligned_poke_u64(4, dest->mm_sign, MDBX_DATASIGN_WEAK); - rc = MDBX_CORRUPTED; - - /* Read twice all meta pages so we can find the latest one. */ - unsigned loop_limit = NUM_METAS * 2; - /* We don't know the page size on first time. So, just guess it. */ - unsigned guess_pagesize = 0; - for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) { - const unsigned meta_number = loop_count % NUM_METAS; - const unsigned offset = (guess_pagesize ? guess_pagesize - : (loop_count > NUM_METAS) ? env->me_psize - : env->me_os_psize) * - meta_number; - - char buffer[MIN_PAGESIZE]; - unsigned retryleft = 42; - while (1) { - TRACE("reading meta[%d]: offset %u, bytes %u, retry-left %u", meta_number, - offset, MIN_PAGESIZE, retryleft); - int err = osal_pread(env->me_lazy_fd, buffer, MIN_PAGESIZE, offset); - if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && - env->me_dxb_mmap.filesize == 0 && - mode_bits /* non-zero for DB creation */ != 0) { - NOTICE("read meta: empty file (%d, %s)", err, mdbx_strerror(err)); - return err; - } -#if defined(_WIN32) || defined(_WIN64) - if (err == ERROR_LOCK_VIOLATION) { - SleepEx(0, true); - err = osal_pread(env->me_lazy_fd, buffer, MIN_PAGESIZE, offset); - if (err == ERROR_LOCK_VIOLATION && --retryleft) { - WARNING("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err, - mdbx_strerror(err)); - continue; - } - } -#endif /* Windows */ - if (err != MDBX_SUCCESS) { - ERROR("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err, - mdbx_strerror(err)); - return err; - } - - char again[MIN_PAGESIZE]; - err = osal_pread(env->me_lazy_fd, again, MIN_PAGESIZE, offset); -#if defined(_WIN32) || defined(_WIN64) - if (err == ERROR_LOCK_VIOLATION) { - SleepEx(0, true); - err = osal_pread(env->me_lazy_fd, again, MIN_PAGESIZE, offset); - if (err == ERROR_LOCK_VIOLATION && --retryleft) { - WARNING("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err, - mdbx_strerror(err)); - continue; - } - } -#endif /* Windows */ - if (err != MDBX_SUCCESS) { - ERROR("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err, - mdbx_strerror(err)); - return err; - } - - if (memcmp(buffer, again, MIN_PAGESIZE) == 0 || --retryleft == 0) - break; - - VERBOSE("meta[%u] was updated, re-read it", meta_number); - } - - if (!retryleft) { - ERROR("meta[%u] is too volatile, skip it", meta_number); - continue; - } - - MDBX_page *const page = (MDBX_page *)buffer; - MDBX_meta *const meta = page_meta(page); - rc = validate_meta(env, meta, page, meta_number, &guess_pagesize); - if (rc != MDBX_SUCCESS) - continue; - - bool latch; - if (env->me_stuck_meta >= 0) - latch = (meta_number == (unsigned)env->me_stuck_meta); - else if (meta_bootid_match(meta)) - latch = meta_choice_recent( - meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), - dest->unsafe_txnid, SIGN_IS_STEADY(dest->unsafe_sign)); - else - latch = meta_choice_steady( - meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), - dest->unsafe_txnid, SIGN_IS_STEADY(dest->unsafe_sign)); - if (latch) { - *dest = *meta; - if (!lck_exclusive && !META_IS_STEADY(dest)) - loop_limit += 1; /* LY: should re-read to hush race with update */ - VERBOSE("latch meta[%u]", meta_number); - } - } - - if (dest->mm_psize == 0 || - (env->me_stuck_meta < 0 && - !(META_IS_STEADY(dest) || - meta_weak_acceptable(env, dest, lck_exclusive)))) { - ERROR("%s", "no usable meta-pages, database is corrupted"); - if (rc == MDBX_SUCCESS) { - /* TODO: try to restore the database by fully checking b-tree structure - * for the each meta page, if the corresponding option was given */ - return MDBX_CORRUPTED; - } - return rc; - } - - return MDBX_SUCCESS; -} - -__cold static MDBX_page *meta_model(const MDBX_env *env, MDBX_page *model, - size_t num) { - ENSURE(env, is_powerof2(env->me_psize)); - ENSURE(env, env->me_psize >= MIN_PAGESIZE); - ENSURE(env, env->me_psize <= MAX_PAGESIZE); - ENSURE(env, env->me_dbgeo.lower >= MIN_MAPSIZE); - ENSURE(env, env->me_dbgeo.upper <= MAX_MAPSIZE); - ENSURE(env, env->me_dbgeo.now >= env->me_dbgeo.lower); - ENSURE(env, env->me_dbgeo.now <= env->me_dbgeo.upper); - - memset(model, 0, env->me_psize); - model->mp_pgno = (pgno_t)num; - model->mp_flags = P_META; - MDBX_meta *const model_meta = page_meta(model); - unaligned_poke_u64(4, model_meta->mm_magic_and_version, MDBX_DATA_MAGIC); - - model_meta->mm_geo.lower = bytes2pgno(env, env->me_dbgeo.lower); - model_meta->mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper); - model_meta->mm_geo.grow_pv = pages2pv(bytes2pgno(env, env->me_dbgeo.grow)); - model_meta->mm_geo.shrink_pv = - pages2pv(bytes2pgno(env, env->me_dbgeo.shrink)); - model_meta->mm_geo.now = bytes2pgno(env, env->me_dbgeo.now); - model_meta->mm_geo.next = NUM_METAS; - - ENSURE(env, model_meta->mm_geo.lower >= MIN_PAGENO); - ENSURE(env, model_meta->mm_geo.upper <= MAX_PAGENO + 1); - ENSURE(env, model_meta->mm_geo.now >= model_meta->mm_geo.lower); - ENSURE(env, model_meta->mm_geo.now <= model_meta->mm_geo.upper); - ENSURE(env, model_meta->mm_geo.next >= MIN_PAGENO); - ENSURE(env, model_meta->mm_geo.next <= model_meta->mm_geo.now); - ENSURE(env, model_meta->mm_geo.grow_pv == - pages2pv(pv2pages(model_meta->mm_geo.grow_pv))); - ENSURE(env, model_meta->mm_geo.shrink_pv == - pages2pv(pv2pages(model_meta->mm_geo.shrink_pv))); - - model_meta->mm_psize = env->me_psize; - model_meta->mm_dbs[FREE_DBI].md_flags = MDBX_INTEGERKEY; - model_meta->mm_dbs[FREE_DBI].md_root = P_INVALID; - model_meta->mm_dbs[MAIN_DBI].md_root = P_INVALID; - meta_set_txnid(env, model_meta, MIN_TXNID + num); - unaligned_poke_u64(4, model_meta->mm_sign, meta_sign(model_meta)); - eASSERT(env, check_meta_coherency(env, model_meta, true)); - return ptr_disp(model, env->me_psize); -} - -/* Fill in most of the zeroed meta-pages for an empty database environment. - * Return pointer to recently (head) meta-page. */ -__cold static MDBX_meta *init_metas(const MDBX_env *env, void *buffer) { - MDBX_page *page0 = (MDBX_page *)buffer; - MDBX_page *page1 = meta_model(env, page0, 0); - MDBX_page *page2 = meta_model(env, page1, 1); - meta_model(env, page2, 2); - return page_meta(page2); -} - -static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, - meta_troika_t *const troika) { - eASSERT(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0); - eASSERT(env, pending->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - eASSERT(env, db_check_flags(pending->mm_dbs[MAIN_DBI].md_flags)); - const MDBX_meta *const meta0 = METAPAGE(env, 0); - const MDBX_meta *const meta1 = METAPAGE(env, 1); - const MDBX_meta *const meta2 = METAPAGE(env, 2); - const meta_ptr_t head = meta_recent(env, troika); - int rc; - - eASSERT(env, - pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS)); - eASSERT(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0); - eASSERT(env, pending->mm_geo.next <= pending->mm_geo.now); - - if (flags & MDBX_SAFE_NOSYNC) { - /* Check auto-sync conditions */ - const pgno_t autosync_threshold = - atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed); - const uint64_t autosync_period = - atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed); - uint64_t eoos_timestamp; - if ((autosync_threshold && - atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >= - autosync_threshold) || - (autosync_period && - (eoos_timestamp = - atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) && - osal_monotime() - eoos_timestamp >= autosync_period)) - flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ - } - - pgno_t shrink = 0; - if (flags & MDBX_SHRINK_ALLOWED) { - const size_t prev_discarded_pgno = - atomic_load32(&env->me_lck->mti_discarded_tail, mo_Relaxed); - if (prev_discarded_pgno < pending->mm_geo.next) - env->me_lck->mti_discarded_tail.weak = pending->mm_geo.next; - else if (prev_discarded_pgno >= - pending->mm_geo.next + env->me_madv_threshold) { - /* LY: check conditions to discard unused pages */ - const pgno_t largest_pgno = find_largest_snapshot( - env, (head.ptr_c->mm_geo.next > pending->mm_geo.next) - ? head.ptr_c->mm_geo.next - : pending->mm_geo.next); - eASSERT(env, largest_pgno >= NUM_METAS); - -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - const pgno_t edge = env->me_poison_edge; - if (edge > largest_pgno) { - env->me_poison_edge = largest_pgno; - VALGRIND_MAKE_MEM_NOACCESS( - ptr_disp(env->me_map, pgno2bytes(env, largest_pgno)), - pgno2bytes(env, edge - largest_pgno)); - MDBX_ASAN_POISON_MEMORY_REGION( - ptr_disp(env->me_map, pgno2bytes(env, largest_pgno)), - pgno2bytes(env, edge - largest_pgno)); - } -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - -#if MDBX_ENABLE_MADVISE && \ - (defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)) - const size_t discard_edge_pgno = pgno_align2os_pgno(env, largest_pgno); - if (prev_discarded_pgno >= discard_edge_pgno + env->me_madv_threshold) { - const size_t prev_discarded_bytes = - pgno_align2os_bytes(env, prev_discarded_pgno); - const size_t discard_edge_bytes = pgno2bytes(env, discard_edge_pgno); - /* из-за выравнивания prev_discarded_bytes и discard_edge_bytes - * могут быть равны */ - if (prev_discarded_bytes > discard_edge_bytes) { - NOTICE("shrink-MADV_%s %zu..%zu", "DONTNEED", discard_edge_pgno, - prev_discarded_pgno); - munlock_after(env, discard_edge_pgno, - bytes_align2os_bytes(env, env->me_dxb_mmap.current)); - const uint32_t munlocks_before = - atomic_load32(&env->me_lck->mti_mlcnt[1], mo_Relaxed); -#if defined(MADV_DONTNEED) - int advise = MADV_DONTNEED; -#if defined(MADV_FREE) && \ - 0 /* MADV_FREE works for only anonymous vma at the moment */ - if ((env->me_flags & MDBX_WRITEMAP) && - linux_kernel_version > 0x04050000) - advise = MADV_FREE; -#endif /* MADV_FREE */ - int err = madvise(ptr_disp(env->me_map, discard_edge_bytes), - prev_discarded_bytes - discard_edge_bytes, advise) - ? ignore_enosys(errno) - : MDBX_SUCCESS; -#else - int err = ignore_enosys(posix_madvise( - ptr_disp(env->me_map, discard_edge_bytes), - prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED)); -#endif - if (unlikely(MDBX_IS_ERROR(err))) { - const uint32_t mlocks_after = - atomic_load32(&env->me_lck->mti_mlcnt[0], mo_Relaxed); - if (err == MDBX_EINVAL) { - const int severity = (mlocks_after - munlocks_before) - ? MDBX_LOG_NOTICE - : MDBX_LOG_WARN; - if (LOG_ENABLED(severity)) - debug_log( - severity, __func__, __LINE__, - "%s-madvise: ignore EINVAL (%d) since some pages maybe " - "locked (%u/%u mlcnt-processes)", - "shrink", err, mlocks_after, munlocks_before); - } else { - ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", - "shrink", "DONTNEED", discard_edge_bytes, - prev_discarded_bytes - discard_edge_bytes, mlocks_after, - munlocks_before, err); - return err; - } - } else - env->me_lck->mti_discarded_tail.weak = discard_edge_pgno; - } - } -#endif /* MDBX_ENABLE_MADVISE && (MADV_DONTNEED || POSIX_MADV_DONTNEED) */ - - /* LY: check conditions to shrink datafile */ - const pgno_t backlog_gap = 3 + pending->mm_dbs[FREE_DBI].md_depth * 3; - pgno_t shrink_step = 0; - if (pending->mm_geo.shrink_pv && - pending->mm_geo.now - pending->mm_geo.next > - (shrink_step = pv2pages(pending->mm_geo.shrink_pv)) + - backlog_gap) { - if (pending->mm_geo.now > largest_pgno && - pending->mm_geo.now - largest_pgno > shrink_step + backlog_gap) { - const pgno_t aligner = - pending->mm_geo.grow_pv - ? /* grow_step */ pv2pages(pending->mm_geo.grow_pv) - : shrink_step; - const pgno_t with_backlog_gap = largest_pgno + backlog_gap; - const pgno_t aligned = - pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - - with_backlog_gap % aligner); - const pgno_t bottom = (aligned > pending->mm_geo.lower) - ? aligned - : pending->mm_geo.lower; - if (pending->mm_geo.now > bottom) { - if (TROIKA_HAVE_STEADY(troika)) - /* force steady, but only if steady-checkpoint is present */ - flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; - shrink = pending->mm_geo.now - bottom; - pending->mm_geo.now = bottom; - if (unlikely(head.txnid == pending->unsafe_txnid)) { - const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid); - NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, - pending->unsafe_txnid, txnid); - ENSURE(env, !env->me_txn0 || !env->me_txn); - if (unlikely(txnid > MAX_TXNID)) { - rc = MDBX_TXN_FULL; - ERROR("txnid overflow, raise %d", rc); - goto fail; - } - meta_set_txnid(env, pending, txnid); - eASSERT(env, check_meta_coherency(env, pending, true)); - } - } - } - } - } - } - - /* LY: step#1 - sync previously written/updated data-pages */ - rc = MDBX_RESULT_FALSE /* carry steady */; - if (atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed)) { - eASSERT(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); - enum osal_syncmode_bits mode_bits = MDBX_SYNC_NONE; - unsigned sync_op = 0; - if ((flags & MDBX_SAFE_NOSYNC) == 0) { - sync_op = 1; - mode_bits = MDBX_SYNC_DATA; - if (pending->mm_geo.next > - meta_prefer_steady(env, troika).ptr_c->mm_geo.now) - mode_bits |= MDBX_SYNC_SIZE; - if (flags & MDBX_NOMETASYNC) - mode_bits |= MDBX_SYNC_IODQ; - } else if (unlikely(env->me_incore)) - goto skip_incore_sync; - if (flags & MDBX_WRITEMAP) { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += sync_op; -#else - (void)sync_op; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = - osal_msync(&env->me_dxb_mmap, 0, - pgno_align2os_bytes(env, pending->mm_geo.next), mode_bits); - } else { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += sync_op; -#else - (void)sync_op; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_fsync(env->me_lazy_fd, mode_bits); - } - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */ - : MDBX_RESULT_FALSE /* carry steady */; - } - eASSERT(env, check_meta_coherency(env, pending, true)); - - /* Steady or Weak */ - if (rc == MDBX_RESULT_FALSE /* carry steady */) { - unaligned_poke_u64(4, pending->mm_sign, meta_sign(pending)); - atomic_store64(&env->me_lck->mti_eoos_timestamp, 0, mo_Relaxed); - atomic_store64(&env->me_lck->mti_unsynced_pages, 0, mo_Relaxed); - } else { - assert(rc == MDBX_RESULT_TRUE /* carry non-steady */); - skip_incore_sync: - eASSERT(env, env->me_lck->mti_unsynced_pages.weak > 0); - /* Может быть нулевым если unsynced_pages > 0 в результате спиллинга. - * eASSERT(env, env->me_lck->mti_eoos_timestamp.weak != 0); */ - unaligned_poke_u64(4, pending->mm_sign, MDBX_DATASIGN_WEAK); - } - - const bool legal4overwrite = - head.txnid == pending->unsafe_txnid && - memcmp(&head.ptr_c->mm_dbs, &pending->mm_dbs, sizeof(pending->mm_dbs)) == - 0 && - memcmp(&head.ptr_c->mm_canary, &pending->mm_canary, - sizeof(pending->mm_canary)) == 0 && - memcmp(&head.ptr_c->mm_geo, &pending->mm_geo, sizeof(pending->mm_geo)) == - 0; - MDBX_meta *target = nullptr; - if (head.txnid == pending->unsafe_txnid) { - ENSURE(env, legal4overwrite); - if (!head.is_steady && META_IS_STEADY(pending)) - target = (MDBX_meta *)head.ptr_c; - else { - WARNING("%s", "skip update meta"); - return MDBX_SUCCESS; - } - } else { - const unsigned troika_tail = troika->tail_and_flags & 3; - ENSURE(env, troika_tail < NUM_METAS && troika_tail != troika->recent && - troika_tail != troika->prefer_steady); - target = (MDBX_meta *)meta_tail(env, troika).ptr_c; - } - - /* LY: step#2 - update meta-page. */ - DEBUG("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO - ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO - " +%u -%u, txn_id %" PRIaTXN ", %s", - data_page(target)->mp_pgno, pending->mm_dbs[MAIN_DBI].md_root, - pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower, - pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper, - pv2pages(pending->mm_geo.grow_pv), pv2pages(pending->mm_geo.shrink_pv), - pending->unsafe_txnid, durable_caption(pending)); - - DEBUG("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, - (meta0 == head.ptr_c) ? "head" - : (meta0 == target) ? "tail" - : "stay", - durable_caption(meta0), constmeta_txnid(meta0), - meta0->mm_dbs[MAIN_DBI].md_root, meta0->mm_dbs[FREE_DBI].md_root); - DEBUG("meta1: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, - (meta1 == head.ptr_c) ? "head" - : (meta1 == target) ? "tail" - : "stay", - durable_caption(meta1), constmeta_txnid(meta1), - meta1->mm_dbs[MAIN_DBI].md_root, meta1->mm_dbs[FREE_DBI].md_root); - DEBUG("meta2: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, - (meta2 == head.ptr_c) ? "head" - : (meta2 == target) ? "tail" - : "stay", - durable_caption(meta2), constmeta_txnid(meta2), - meta2->mm_dbs[MAIN_DBI].md_root, meta2->mm_dbs[FREE_DBI].md_root); - - eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta0) || - (META_IS_STEADY(pending) && !META_IS_STEADY(meta0))); - eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta1) || - (META_IS_STEADY(pending) && !META_IS_STEADY(meta1))); - eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta2) || - (META_IS_STEADY(pending) && !META_IS_STEADY(meta2))); - - eASSERT(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0); - ENSURE(env, target == head.ptr_c || - constmeta_txnid(target) < pending->unsafe_txnid); - if (flags & MDBX_WRITEMAP) { - jitter4testing(true); - if (likely(target != head.ptr_c)) { - /* LY: 'invalidate' the meta. */ - meta_update_begin(env, target, pending->unsafe_txnid); - unaligned_poke_u64(4, target->mm_sign, MDBX_DATASIGN_WEAK); -#ifndef NDEBUG - /* debug: provoke failure to catch a violators, but don't touch mm_psize - * to allow readers catch actual pagesize. */ - void *provoke_begin = &target->mm_dbs[FREE_DBI].md_root; - void *provoke_end = &target->mm_sign; - memset(provoke_begin, 0xCC, ptr_dist(provoke_end, provoke_begin)); - jitter4testing(false); -#endif - - /* LY: update info */ - target->mm_geo = pending->mm_geo; - target->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI]; - target->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI]; - eASSERT(env, target->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - eASSERT(env, db_check_flags(target->mm_dbs[MAIN_DBI].md_flags)); - target->mm_canary = pending->mm_canary; - memcpy(target->mm_pages_retired, pending->mm_pages_retired, 8); - jitter4testing(true); - - /* LY: 'commit' the meta */ - meta_update_end(env, target, unaligned_peek_u64(4, pending->mm_txnid_b)); - jitter4testing(true); - eASSERT(env, check_meta_coherency(env, target, true)); - } else { - /* dangerous case (target == head), only mm_sign could - * me updated, check assertions once again */ - eASSERT(env, - legal4overwrite && !head.is_steady && META_IS_STEADY(pending)); - } - memcpy(target->mm_sign, pending->mm_sign, 8); - osal_flush_incoherent_cpu_writeback(); - jitter4testing(true); - if (!env->me_incore) { - if (!MDBX_AVOID_MSYNC) { - /* sync meta-pages */ -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync( - &env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), - (flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE - : MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - } else { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.wops.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - const MDBX_page *page = data_page(target); - rc = osal_pwrite(env->me_fd4meta, page, env->me_psize, - ptr_dist(page, env->me_map)); - if (likely(rc == MDBX_SUCCESS)) { - osal_flush_incoherent_mmap(target, sizeof(MDBX_meta), - env->me_os_psize); - if ((flags & MDBX_NOMETASYNC) == 0 && - env->me_fd4meta == env->me_lazy_fd) { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - } - } - } - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - } else { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.wops.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - const MDBX_meta undo_meta = *target; - eASSERT(env, pending->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); - eASSERT(env, db_check_flags(pending->mm_dbs[MAIN_DBI].md_flags)); - rc = osal_pwrite(env->me_fd4meta, pending, sizeof(MDBX_meta), - ptr_dist(target, env->me_map)); - if (unlikely(rc != MDBX_SUCCESS)) { - undo: - DEBUG("%s", "write failed, disk error?"); - /* On a failure, the pagecache still contains the new data. - * Try write some old data back, to prevent it from being used. */ - osal_pwrite(env->me_fd4meta, &undo_meta, sizeof(MDBX_meta), - ptr_dist(target, env->me_map)); - goto fail; - } - osal_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize); - /* sync meta-pages */ - if ((flags & MDBX_NOMETASYNC) == 0 && env->me_fd4meta == env->me_lazy_fd && - !env->me_incore) { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - if (rc != MDBX_SUCCESS) - goto undo; - } - } - - uint64_t timestamp = 0; - while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") { - rc = coherency_check_written(env, pending->unsafe_txnid, target, - bytes2pgno(env, ptr_dist(target, env->me_map)), - ×tamp); - if (likely(rc == MDBX_SUCCESS)) - break; - if (unlikely(rc != MDBX_RESULT_TRUE)) - goto fail; - } - - const uint32_t sync_txnid_dist = - ((flags & MDBX_NOMETASYNC) == 0) ? 0 - : ((flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) - ? MDBX_NOMETASYNC_LAZY_FD - : MDBX_NOMETASYNC_LAZY_WRITEMAP; - env->me_lck->mti_meta_sync_txnid.weak = - pending->mm_txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__].weak - - sync_txnid_dist; - - *troika = meta_tap(env); - for (MDBX_txn *txn = env->me_txn0; txn; txn = txn->mt_child) - if (troika != &txn->tw.troika) - txn->tw.troika = *troika; - - /* LY: shrink datafile if needed */ - if (unlikely(shrink)) { - VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", - pending->mm_geo.now, shrink); - rc = dxb_resize(env, pending->mm_geo.next, pending->mm_geo.now, - pending->mm_geo.upper, impilict_shrink); - if (rc != MDBX_SUCCESS && rc != MDBX_EPERM) - goto fail; - eASSERT(env, check_meta_coherency(env, target, true)); - } - - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (likely(lck)) - /* toggle oldest refresh */ - atomic_store32(&lck->mti_readers_refresh_flag, false, mo_Relaxed); - - return MDBX_SUCCESS; - -fail: - env->me_flags |= MDBX_FATAL_ERROR; - return rc; -} - -static void recalculate_merge_threshold(MDBX_env *env) { - const size_t bytes = page_space(env); - env->me_merge_threshold = - (uint16_t)(bytes - - (bytes * env->me_options.merge_threshold_16dot16_percent >> - 16)); - env->me_merge_threshold_gc = - (uint16_t)(bytes - - ((env->me_options.merge_threshold_16dot16_percent > 19005) - ? bytes / 3 /* 33 % */ - : bytes / 4 /* 25 % */)); -} - -__cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { - STATIC_ASSERT(PTRDIFF_MAX > MAX_MAPSIZE); - STATIC_ASSERT(MIN_PAGESIZE > sizeof(MDBX_page) + sizeof(MDBX_meta)); - ENSURE(env, is_powerof2(pagesize)); - ENSURE(env, pagesize >= MIN_PAGESIZE); - ENSURE(env, pagesize <= MAX_PAGESIZE); - env->me_psize = (unsigned)pagesize; - if (env->me_pbuf) { - osal_memalign_free(env->me_pbuf); - env->me_pbuf = nullptr; - } - - STATIC_ASSERT(MAX_GC1OVPAGE(MIN_PAGESIZE) > 4); - STATIC_ASSERT(MAX_GC1OVPAGE(MAX_PAGESIZE) < MDBX_PGL_LIMIT); - const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1; - ENSURE(env, - maxgc_ov1page > 42 && maxgc_ov1page < (intptr_t)MDBX_PGL_LIMIT / 4); - env->me_maxgc_ov1page = (unsigned)maxgc_ov1page; - env->me_maxgc_per_branch = - (unsigned)((pagesize - PAGEHDRSZ) / - (sizeof(indx_t) + sizeof(MDBX_node) + sizeof(txnid_t))); - - STATIC_ASSERT(LEAF_NODE_MAX(MIN_PAGESIZE) > sizeof(MDBX_db) + NODESIZE + 42); - STATIC_ASSERT(LEAF_NODE_MAX(MAX_PAGESIZE) < UINT16_MAX); - STATIC_ASSERT(LEAF_NODE_MAX(MIN_PAGESIZE) >= BRANCH_NODE_MAX(MIN_PAGESIZE)); - STATIC_ASSERT(BRANCH_NODE_MAX(MAX_PAGESIZE) > NODESIZE + 42); - STATIC_ASSERT(BRANCH_NODE_MAX(MAX_PAGESIZE) < UINT16_MAX); - const intptr_t branch_nodemax = BRANCH_NODE_MAX(pagesize); - const intptr_t leaf_nodemax = LEAF_NODE_MAX(pagesize); - ENSURE(env, branch_nodemax > (intptr_t)(NODESIZE + 42) && - branch_nodemax % 2 == 0 && - leaf_nodemax > (intptr_t)(sizeof(MDBX_db) + NODESIZE + 42) && - leaf_nodemax >= branch_nodemax && - leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0); - env->me_leaf_nodemax = (uint16_t)leaf_nodemax; - env->me_branch_nodemax = (uint16_t)branch_nodemax; - env->me_psize2log = (uint8_t)log2n_powerof2(pagesize); - eASSERT(env, pgno2bytes(env, 1) == pagesize); - eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); - recalculate_merge_threshold(env); - - /* TODO: recalculate me_subpage_xyz values from MDBX_opt_subpage_xyz. */ - env->me_subpage_limit = env->me_leaf_nodemax - NODESIZE; - env->me_subpage_room_threshold = 0; - env->me_subpage_reserve_prereq = env->me_leaf_nodemax; - env->me_subpage_reserve_limit = env->me_subpage_limit / 42; - eASSERT(env, - env->me_subpage_reserve_prereq > - env->me_subpage_room_threshold + env->me_subpage_reserve_limit); - eASSERT(env, env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); - - const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); - if (!env->me_options.flags.non_auto.dp_limit) { - /* auto-setup dp_limit by "The42" ;-) */ - intptr_t total_ram_pages, avail_ram_pages; - int err = mdbx_get_sysraminfo(nullptr, &total_ram_pages, &avail_ram_pages); - if (unlikely(err != MDBX_SUCCESS)) - ERROR("mdbx_get_sysraminfo(), rc %d", err); - else { - size_t reasonable_dpl_limit = - (size_t)(total_ram_pages + avail_ram_pages) / 42; - if (pagesize > env->me_os_psize) - reasonable_dpl_limit /= pagesize / env->me_os_psize; - else if (pagesize < env->me_os_psize) - reasonable_dpl_limit *= env->me_os_psize / pagesize; - reasonable_dpl_limit = (reasonable_dpl_limit < MDBX_PGL_LIMIT) - ? reasonable_dpl_limit - : MDBX_PGL_LIMIT; - reasonable_dpl_limit = (reasonable_dpl_limit > CURSOR_STACK * 4) - ? reasonable_dpl_limit - : CURSOR_STACK * 4; - env->me_options.dp_limit = (unsigned)reasonable_dpl_limit; - } - } - if (env->me_options.dp_limit > max_pgno - NUM_METAS) - env->me_options.dp_limit = max_pgno - NUM_METAS; - if (env->me_options.dp_initial > env->me_options.dp_limit) - env->me_options.dp_initial = env->me_options.dp_limit; -} - -__cold int mdbx_env_create(MDBX_env **penv) { - if (unlikely(!penv)) - return MDBX_EINVAL; - *penv = nullptr; - -#ifdef MDBX_HAVE_C11ATOMICS - if (unlikely(!atomic_is_lock_free((const volatile uint32_t *)penv))) { - ERROR("lock-free atomic ops for %u-bit types is required", 32); - return MDBX_INCOMPATIBLE; - } -#if MDBX_64BIT_ATOMIC - if (unlikely(!atomic_is_lock_free((const volatile uint64_t *)penv))) { - ERROR("lock-free atomic ops for %u-bit types is required", 64); - return MDBX_INCOMPATIBLE; - } -#endif /* MDBX_64BIT_ATOMIC */ -#endif /* MDBX_HAVE_C11ATOMICS */ - - const size_t os_psize = osal_syspagesize(); - if (unlikely(!is_powerof2(os_psize) || os_psize < MIN_PAGESIZE)) { - ERROR("unsuitable system pagesize %" PRIuPTR, os_psize); - return MDBX_INCOMPATIBLE; - } - -#if defined(__linux__) || defined(__gnu_linux__) - if (unlikely(linux_kernel_version < 0x04000000)) { - /* 2022-09-01: Прошло уже больше двух после окончания какой-либо поддержки - * самого "долгоиграющего" ядра 3.16.85 ветки 3.x */ - ERROR("too old linux kernel %u.%u.%u.%u, the >= 4.0.0 is required", - linux_kernel_version >> 24, (linux_kernel_version >> 16) & 255, - (linux_kernel_version >> 8) & 255, linux_kernel_version & 255); - return MDBX_INCOMPATIBLE; - } -#endif /* Linux */ - - MDBX_env *env = osal_calloc(1, sizeof(MDBX_env)); - if (unlikely(!env)) - return MDBX_ENOMEM; - - env->me_maxreaders = DEFAULT_READERS; - env->me_maxdbs = env->me_numdbs = CORE_DBS; - env->me_lazy_fd = env->me_dsync_fd = env->me_fd4meta = env->me_lfd = - INVALID_HANDLE_VALUE; - env->me_stuck_meta = -1; - - env->me_options.rp_augment_limit = MDBX_PNL_INITIAL; - env->me_options.dp_reserve_limit = MDBX_PNL_INITIAL; - env->me_options.dp_initial = MDBX_PNL_INITIAL; - env->me_options.spill_max_denominator = 8; - env->me_options.spill_min_denominator = 8; - env->me_options.spill_parent4child_denominator = 0; - env->me_options.dp_loose_limit = 64; - env->me_options.merge_threshold_16dot16_percent = 65536 / 4 /* 25% */; - if (default_prefer_waf_insteadof_balance(env)) - env->me_options.prefer_waf_insteadof_balance = true; - -#if !(defined(_WIN32) || defined(_WIN64)) - env->me_options.writethrough_threshold = -#if defined(__linux__) || defined(__gnu_linux__) - mdbx_RunningOnWSL1 ? MAX_PAGENO : -#endif /* Linux */ - MDBX_WRITETHROUGH_THRESHOLD_DEFAULT; -#endif /* Windows */ - - env->me_os_psize = (unsigned)os_psize; - setup_pagesize(env, (env->me_os_psize < MAX_PAGESIZE) ? env->me_os_psize - : MAX_PAGESIZE); - - int rc = osal_fastmutex_init(&env->me_dbi_lock); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_Init(&env->me_remap_guard); - InitializeCriticalSection(&env->me_windowsbug_lock); -#else - rc = osal_fastmutex_init(&env->me_remap_guard); - if (unlikely(rc != MDBX_SUCCESS)) { - osal_fastmutex_destroy(&env->me_dbi_lock); - goto bailout; - } - -#if MDBX_LOCKING > MDBX_LOCKING_SYSV - MDBX_lockinfo *const stub = lckless_stub(env); - rc = osal_ipclock_stubinit(&stub->mti_wlock); -#endif /* MDBX_LOCKING */ - if (unlikely(rc != MDBX_SUCCESS)) { - osal_fastmutex_destroy(&env->me_remap_guard); - osal_fastmutex_destroy(&env->me_dbi_lock); - goto bailout; - } -#endif /* Windows */ - - VALGRIND_CREATE_MEMPOOL(env, 0, 0); - env->me_signature.weak = MDBX_ME_SIGNATURE; - *penv = env; - return MDBX_SUCCESS; - -bailout: - osal_free(env); - return rc; -} - -__cold static intptr_t get_reasonable_db_maxsize(intptr_t *cached_result) { - if (*cached_result == 0) { - intptr_t pagesize, total_ram_pages; - if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != - MDBX_SUCCESS)) - return *cached_result = MAX_MAPSIZE32 /* the 32-bit limit is good enough - for fallback */ - ; - - if (unlikely((size_t)total_ram_pages * 2 > MAX_MAPSIZE / (size_t)pagesize)) - return *cached_result = MAX_MAPSIZE; - assert(MAX_MAPSIZE >= (size_t)(total_ram_pages * pagesize * 2)); - - /* Suggesting should not be more than golden ratio of the size of RAM. */ - *cached_result = (intptr_t)((size_t)total_ram_pages * 207 >> 7) * pagesize; - - /* Round to the nearest human-readable granulation. */ - for (size_t unit = MEGABYTE; unit; unit <<= 5) { - const size_t floor = floor_powerof2(*cached_result, unit); - const size_t ceil = ceil_powerof2(*cached_result, unit); - const size_t threshold = (size_t)*cached_result >> 4; - const bool down = - *cached_result - floor < ceil - *cached_result || ceil > MAX_MAPSIZE; - if (threshold < (down ? *cached_result - floor : ceil - *cached_result)) - break; - *cached_result = down ? floor : ceil; - } - } - return *cached_result; -} - -__cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, - intptr_t size_now, intptr_t size_upper, - intptr_t growth_step, - intptr_t shrink_threshold, intptr_t pagesize) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const bool txn0_owned = env->me_txn0 && env_txn0_owned(env); - const bool inside_txn = txn0_owned && env->me_txn; - bool should_unlock = false; - -#if MDBX_DEBUG - if (growth_step < 0) { - growth_step = 1; - if (shrink_threshold < 0) - shrink_threshold = 1; - } -#endif /* MDBX_DEBUG */ - - intptr_t reasonable_maxsize = 0; - if (env->me_map) { - /* env already mapped */ - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - - if (!txn0_owned) { - int err = osal_txn_lock(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - should_unlock = true; - env->me_txn0->tw.troika = meta_tap(env); - eASSERT(env, !env->me_txn && !env->me_txn0->mt_child); - env->me_txn0->mt_txnid = - env->me_txn0->tw.troika.txnid[env->me_txn0->tw.troika.recent]; - txn_oldest_reader(env->me_txn0); - } - - /* get untouched params from current TXN or DB */ - if (pagesize <= 0 || pagesize >= INT_MAX) - pagesize = env->me_psize; - const MDBX_geo *const geo = - inside_txn ? &env->me_txn->mt_geo - : &meta_recent(env, &env->me_txn0->tw.troika).ptr_c->mm_geo; - if (size_lower < 0) - size_lower = pgno2bytes(env, geo->lower); - if (size_now < 0) - size_now = pgno2bytes(env, geo->now); - if (size_upper < 0) - size_upper = pgno2bytes(env, geo->upper); - if (growth_step < 0) - growth_step = pgno2bytes(env, pv2pages(geo->grow_pv)); - if (shrink_threshold < 0) - shrink_threshold = pgno2bytes(env, pv2pages(geo->shrink_pv)); - - if (pagesize != (intptr_t)env->me_psize) { - rc = MDBX_EINVAL; - goto bailout; - } - const size_t usedbytes = - pgno2bytes(env, find_largest_snapshot(env, geo->next)); - if ((size_t)size_upper < usedbytes) { - rc = MDBX_MAP_FULL; - goto bailout; - } - if ((size_t)size_now < usedbytes) - size_now = usedbytes; - } else { - /* env NOT yet mapped */ - if (unlikely(inside_txn)) - return MDBX_PANIC; - - /* is requested some auto-value for pagesize ? */ - if (pagesize >= INT_MAX /* maximal */) - pagesize = MAX_PAGESIZE; - else if (pagesize <= 0) { - if (pagesize < 0 /* default */) { - pagesize = env->me_os_psize; - if ((uintptr_t)pagesize > MAX_PAGESIZE) - pagesize = MAX_PAGESIZE; - eASSERT(env, (uintptr_t)pagesize >= MIN_PAGESIZE); - } else if (pagesize == 0 /* minimal */) - pagesize = MIN_PAGESIZE; - - /* choose pagesize */ - intptr_t max_size = (size_now > size_lower) ? size_now : size_lower; - max_size = (size_upper > max_size) ? size_upper : max_size; - if (max_size < 0 /* default */) - max_size = DEFAULT_MAPSIZE; - else if (max_size == 0 /* minimal */) - max_size = MIN_MAPSIZE; - else if (max_size >= (intptr_t)MAX_MAPSIZE /* maximal */) - max_size = get_reasonable_db_maxsize(&reasonable_maxsize); - - while (max_size > pagesize * (int64_t)(MAX_PAGENO + 1) && - pagesize < MAX_PAGESIZE) - pagesize <<= 1; - } - } - - if (pagesize < (intptr_t)MIN_PAGESIZE || pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2(pagesize)) { - rc = MDBX_EINVAL; - goto bailout; - } - - if (size_lower <= 0) { - size_lower = MIN_MAPSIZE; - if (MIN_MAPSIZE / pagesize < MIN_PAGENO) - size_lower = MIN_PAGENO * pagesize; - } - if (size_lower >= INTPTR_MAX) { - size_lower = get_reasonable_db_maxsize(&reasonable_maxsize); - if ((size_t)size_lower / pagesize > MAX_PAGENO + 1) - size_lower = pagesize * (MAX_PAGENO + 1); - } - - if (size_now <= 0) { - size_now = size_lower; - if (size_upper >= size_lower && size_now > size_upper) - size_now = size_upper; - } - if (size_now >= INTPTR_MAX) { - size_now = get_reasonable_db_maxsize(&reasonable_maxsize); - if ((size_t)size_now / pagesize > MAX_PAGENO + 1) - size_now = pagesize * (MAX_PAGENO + 1); - } - - if (size_upper <= 0) { - if (size_now >= get_reasonable_db_maxsize(&reasonable_maxsize) / 2) - size_upper = get_reasonable_db_maxsize(&reasonable_maxsize); - else if (MAX_MAPSIZE != MAX_MAPSIZE32 && - (size_t)size_now >= MAX_MAPSIZE32 / 2 && - (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) - size_upper = MAX_MAPSIZE32; - else { - size_upper = size_now + size_now; - if ((size_t)size_upper < DEFAULT_MAPSIZE * 2) - size_upper = DEFAULT_MAPSIZE * 2; - } - if ((size_t)size_upper / pagesize > (MAX_PAGENO + 1)) - size_upper = pagesize * (MAX_PAGENO + 1); - } else if (size_upper >= INTPTR_MAX) { - size_upper = get_reasonable_db_maxsize(&reasonable_maxsize); - if ((size_t)size_upper / pagesize > MAX_PAGENO + 1) - size_upper = pagesize * (MAX_PAGENO + 1); - } - - if (unlikely(size_lower < (intptr_t)MIN_MAPSIZE || size_lower > size_upper)) { - rc = MDBX_EINVAL; - goto bailout; - } - - if ((uint64_t)size_lower / pagesize < MIN_PAGENO) { - size_lower = pagesize * MIN_PAGENO; - if (unlikely(size_lower > size_upper)) { - rc = MDBX_EINVAL; - goto bailout; - } - if (size_now < size_lower) - size_now = size_lower; - } - - if (unlikely((size_t)size_upper > MAX_MAPSIZE || - (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { - rc = MDBX_TOO_LARGE; - goto bailout; - } - - const size_t unit = (env->me_os_psize > (size_t)pagesize) ? env->me_os_psize - : (size_t)pagesize; - size_lower = ceil_powerof2(size_lower, unit); - size_upper = ceil_powerof2(size_upper, unit); - size_now = ceil_powerof2(size_now, unit); - - /* LY: подбираем значение size_upper: - * - кратное размеру страницы - * - без нарушения MAX_MAPSIZE и MAX_PAGENO */ - while (unlikely((size_t)size_upper > MAX_MAPSIZE || - (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { - if ((size_t)size_upper < unit + MIN_MAPSIZE || - (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) { - /* паранойа на случай переполнения при невероятных значениях */ - rc = MDBX_EINVAL; - goto bailout; - } - size_upper -= unit; - if ((size_t)size_upper < (size_t)size_lower) - size_lower = size_upper; - } - eASSERT(env, (size_upper - size_lower) % env->me_os_psize == 0); - - if (size_now < size_lower) - size_now = size_lower; - if (size_now > size_upper) - size_now = size_upper; - - if (growth_step < 0) { - growth_step = ((size_t)(size_upper - size_lower)) / 42; - if (growth_step > size_lower && size_lower < (intptr_t)MEGABYTE) - growth_step = size_lower; - if (growth_step < 65536) - growth_step = 65536; - if ((size_t)growth_step > MAX_MAPSIZE / 64) - growth_step = MAX_MAPSIZE / 64; - } - if (growth_step == 0 && shrink_threshold > 0) - growth_step = 1; - growth_step = ceil_powerof2(growth_step, unit); - - if (shrink_threshold < 0) - shrink_threshold = growth_step + growth_step; - shrink_threshold = ceil_powerof2(shrink_threshold, unit); - - //---------------------------------------------------------------------------- - - if (!env->me_map) { - /* save user's geo-params for future open/create */ - if (pagesize != (intptr_t)env->me_psize) - setup_pagesize(env, pagesize); - env->me_dbgeo.lower = size_lower; - env->me_dbgeo.now = size_now; - env->me_dbgeo.upper = size_upper; - env->me_dbgeo.grow = - pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, growth_step)))); - env->me_dbgeo.shrink = - pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, shrink_threshold)))); - adjust_defaults(env); - - ENSURE(env, env->me_dbgeo.lower >= MIN_MAPSIZE); - ENSURE(env, env->me_dbgeo.lower / (unsigned)pagesize >= MIN_PAGENO); - ENSURE(env, env->me_dbgeo.lower % (unsigned)pagesize == 0); - ENSURE(env, env->me_dbgeo.lower % env->me_os_psize == 0); - - ENSURE(env, env->me_dbgeo.upper <= MAX_MAPSIZE); - ENSURE(env, env->me_dbgeo.upper / (unsigned)pagesize <= MAX_PAGENO + 1); - ENSURE(env, env->me_dbgeo.upper % (unsigned)pagesize == 0); - ENSURE(env, env->me_dbgeo.upper % env->me_os_psize == 0); - - ENSURE(env, env->me_dbgeo.now >= env->me_dbgeo.lower); - ENSURE(env, env->me_dbgeo.now <= env->me_dbgeo.upper); - ENSURE(env, env->me_dbgeo.now % (unsigned)pagesize == 0); - ENSURE(env, env->me_dbgeo.now % env->me_os_psize == 0); - - ENSURE(env, env->me_dbgeo.grow % (unsigned)pagesize == 0); - ENSURE(env, env->me_dbgeo.grow % env->me_os_psize == 0); - ENSURE(env, env->me_dbgeo.shrink % (unsigned)pagesize == 0); - ENSURE(env, env->me_dbgeo.shrink % env->me_os_psize == 0); - - rc = MDBX_SUCCESS; - } else { - /* apply new params to opened environment */ - ENSURE(env, pagesize == (intptr_t)env->me_psize); - MDBX_meta meta; - memset(&meta, 0, sizeof(meta)); - if (!inside_txn) { - eASSERT(env, should_unlock); - const meta_ptr_t head = meta_recent(env, &env->me_txn0->tw.troika); - - uint64_t timestamp = 0; - while ("workaround for " - "https://libmdbx.dqdkfa.ru/dead-github/issues/269") { - rc = coherency_check_head(env->me_txn0, head, ×tamp); - if (likely(rc == MDBX_SUCCESS)) - break; - if (unlikely(rc != MDBX_RESULT_TRUE)) - goto bailout; - } - meta = *head.ptr_c; - const txnid_t txnid = safe64_txnid_next(head.txnid); - if (unlikely(txnid > MAX_TXNID)) { - rc = MDBX_TXN_FULL; - ERROR("txnid overflow, raise %d", rc); - goto bailout; - } - meta_set_txnid(env, &meta, txnid); - } - - const MDBX_geo *const current_geo = - &(env->me_txn ? env->me_txn : env->me_txn0)->mt_geo; - /* update env-geo to avoid influences */ - env->me_dbgeo.now = pgno2bytes(env, current_geo->now); - env->me_dbgeo.lower = pgno2bytes(env, current_geo->lower); - env->me_dbgeo.upper = pgno2bytes(env, current_geo->upper); - env->me_dbgeo.grow = pgno2bytes(env, pv2pages(current_geo->grow_pv)); - env->me_dbgeo.shrink = pgno2bytes(env, pv2pages(current_geo->shrink_pv)); - - MDBX_geo new_geo; - new_geo.lower = bytes2pgno(env, size_lower); - new_geo.now = bytes2pgno(env, size_now); - new_geo.upper = bytes2pgno(env, size_upper); - new_geo.grow_pv = pages2pv(bytes2pgno(env, growth_step)); - new_geo.shrink_pv = pages2pv(bytes2pgno(env, shrink_threshold)); - new_geo.next = current_geo->next; - - ENSURE(env, pgno_align2os_bytes(env, new_geo.lower) == (size_t)size_lower); - ENSURE(env, pgno_align2os_bytes(env, new_geo.upper) == (size_t)size_upper); - ENSURE(env, pgno_align2os_bytes(env, new_geo.now) == (size_t)size_now); - ENSURE(env, new_geo.grow_pv == pages2pv(pv2pages(new_geo.grow_pv))); - ENSURE(env, new_geo.shrink_pv == pages2pv(pv2pages(new_geo.shrink_pv))); - - ENSURE(env, (size_t)size_lower >= MIN_MAPSIZE); - ENSURE(env, new_geo.lower >= MIN_PAGENO); - ENSURE(env, (size_t)size_upper <= MAX_MAPSIZE); - ENSURE(env, new_geo.upper <= MAX_PAGENO + 1); - ENSURE(env, new_geo.now >= new_geo.next); - ENSURE(env, new_geo.upper >= new_geo.now); - ENSURE(env, new_geo.now >= new_geo.lower); - - if (memcmp(current_geo, &new_geo, sizeof(MDBX_geo)) != 0) { -#if defined(_WIN32) || defined(_WIN64) - /* Was DB shrinking disabled before and now it will be enabled? */ - if (new_geo.lower < new_geo.upper && new_geo.shrink_pv && - !(current_geo->lower < current_geo->upper && - current_geo->shrink_pv)) { - if (!env->me_lck_mmap.lck) { - rc = MDBX_EPERM; - goto bailout; - } - int err = osal_rdt_lock(env); - if (unlikely(MDBX_IS_ERROR(err))) { - rc = err; - goto bailout; - } - - /* Check if there are any reading threads that do not use the SRWL */ - const size_t CurrentTid = GetCurrentThreadId(); - const MDBX_reader *const begin = env->me_lck_mmap.lck->mti_readers; - const MDBX_reader *const end = - begin + atomic_load32(&env->me_lck_mmap.lck->mti_numreaders, - mo_AcquireRelease); - for (const MDBX_reader *reader = begin; reader < end; ++reader) { - if (reader->mr_pid.weak == env->me_pid && reader->mr_tid.weak && - reader->mr_tid.weak != CurrentTid) { - /* At least one thread may don't use SRWL */ - rc = MDBX_EPERM; - break; - } - } - - osal_rdt_unlock(env); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } -#endif /* Windows */ - - if (new_geo.now != current_geo->now || - new_geo.upper != current_geo->upper) { - rc = dxb_resize(env, current_geo->next, new_geo.now, new_geo.upper, - explicit_resize); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - if (inside_txn) { - env->me_txn->mt_geo = new_geo; - env->me_txn->mt_flags |= MDBX_TXN_DIRTY; - } else { - meta.mm_geo = new_geo; - rc = sync_locked(env, env->me_flags, &meta, &env->me_txn0->tw.troika); - if (likely(rc == MDBX_SUCCESS)) { - env->me_dbgeo.now = pgno2bytes(env, new_geo.now = meta.mm_geo.now); - env->me_dbgeo.upper = - pgno2bytes(env, new_geo.upper = meta.mm_geo.upper); - } - } - } - if (likely(rc == MDBX_SUCCESS)) { - /* update env-geo to avoid influences */ - eASSERT(env, env->me_dbgeo.now == pgno2bytes(env, new_geo.now)); - env->me_dbgeo.lower = pgno2bytes(env, new_geo.lower); - eASSERT(env, env->me_dbgeo.upper == pgno2bytes(env, new_geo.upper)); - env->me_dbgeo.grow = pgno2bytes(env, pv2pages(new_geo.grow_pv)); - env->me_dbgeo.shrink = pgno2bytes(env, pv2pages(new_geo.shrink_pv)); - } - } - -bailout: - if (should_unlock) - osal_txn_unlock(env); - return rc; -} - -__cold static int alloc_page_buf(MDBX_env *env) { - return env->me_pbuf ? MDBX_SUCCESS - : osal_memalign_alloc(env->me_os_psize, - env->me_psize * (size_t)NUM_METAS, - &env->me_pbuf); -} - -/* Further setup required for opening an MDBX environment */ -__cold static int setup_dxb(MDBX_env *env, const int lck_rc, - const mdbx_mode_t mode_bits) { - MDBX_meta header; - eASSERT(env, !(env->me_flags & MDBX_ENV_ACTIVE)); - int rc = MDBX_RESULT_FALSE; - int err = read_header(env, &header, lck_rc, mode_bits); - if (unlikely(err != MDBX_SUCCESS)) { - if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || - (env->me_flags & MDBX_RDONLY) != 0 || - /* recovery mode */ env->me_stuck_meta >= 0) - return err; - - DEBUG("%s", "create new database"); - rc = /* new database */ MDBX_RESULT_TRUE; - - if (!env->me_dbgeo.now) { - /* set defaults if not configured */ - err = mdbx_env_set_geometry(env, 0, -1, DEFAULT_MAPSIZE, -1, -1, -1); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - - err = alloc_page_buf(env); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - header = *init_metas(env, env->me_pbuf); - err = osal_pwrite(env->me_lazy_fd, env->me_pbuf, - env->me_psize * (size_t)NUM_METAS, 0); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - err = osal_ftruncate(env->me_lazy_fd, env->me_dxb_mmap.filesize = - env->me_dxb_mmap.current = - env->me_dbgeo.now); - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#ifndef NDEBUG /* just for checking */ - err = read_header(env, &header, lck_rc, mode_bits); - if (unlikely(err != MDBX_SUCCESS)) - return err; -#endif - } - - VERBOSE("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO - "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN - ", %s", - header.mm_dbs[MAIN_DBI].md_root, header.mm_dbs[FREE_DBI].md_root, - header.mm_geo.lower, header.mm_geo.next, header.mm_geo.now, - header.mm_geo.upper, pv2pages(header.mm_geo.grow_pv), - pv2pages(header.mm_geo.shrink_pv), - unaligned_peek_u64(4, header.mm_txnid_a), durable_caption(&header)); - - if (unlikely(header.mm_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - header.mm_dbs[FREE_DBI].md_flags); - return MDBX_INCOMPATIBLE; - } - env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; - env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ - env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; - env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; - env->me_dbxs[FREE_DBI].md_vlen_min = 4; - env->me_dbxs[FREE_DBI].md_vlen_max = - mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); - - if (env->me_psize != header.mm_psize) - setup_pagesize(env, header.mm_psize); - const size_t used_bytes = pgno2bytes(env, header.mm_geo.next); - const size_t used_aligned2os_bytes = - ceil_powerof2(used_bytes, env->me_os_psize); - if ((env->me_flags & MDBX_RDONLY) /* readonly */ - || lck_rc != MDBX_RESULT_TRUE /* not exclusive */ - || /* recovery mode */ env->me_stuck_meta >= 0) { - /* use present params from db */ - const size_t pagesize = header.mm_psize; - err = mdbx_env_set_geometry( - env, header.mm_geo.lower * pagesize, header.mm_geo.now * pagesize, - header.mm_geo.upper * pagesize, - pv2pages(header.mm_geo.grow_pv) * pagesize, - pv2pages(header.mm_geo.shrink_pv) * pagesize, header.mm_psize); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("%s: err %d", "could not apply geometry from db", err); - return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; - } - } else if (env->me_dbgeo.now) { - /* silently growth to last used page */ - if (env->me_dbgeo.now < used_aligned2os_bytes) - env->me_dbgeo.now = used_aligned2os_bytes; - if (env->me_dbgeo.upper < used_aligned2os_bytes) - env->me_dbgeo.upper = used_aligned2os_bytes; - - /* apply preconfigured params, but only if substantial changes: - * - upper or lower limit changes - * - shrink threshold or growth step - * But ignore change just a 'now/current' size. */ - if (bytes_align2os_bytes(env, env->me_dbgeo.upper) != - pgno2bytes(env, header.mm_geo.upper) || - bytes_align2os_bytes(env, env->me_dbgeo.lower) != - pgno2bytes(env, header.mm_geo.lower) || - bytes_align2os_bytes(env, env->me_dbgeo.shrink) != - pgno2bytes(env, pv2pages(header.mm_geo.shrink_pv)) || - bytes_align2os_bytes(env, env->me_dbgeo.grow) != - pgno2bytes(env, pv2pages(header.mm_geo.grow_pv))) { - - if (env->me_dbgeo.shrink && env->me_dbgeo.now > used_bytes) - /* pre-shrink if enabled */ - env->me_dbgeo.now = used_bytes + env->me_dbgeo.shrink - - used_bytes % env->me_dbgeo.shrink; - - err = mdbx_env_set_geometry(env, env->me_dbgeo.lower, env->me_dbgeo.now, - env->me_dbgeo.upper, env->me_dbgeo.grow, - env->me_dbgeo.shrink, header.mm_psize); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("%s: err %d", "could not apply preconfigured db-geometry", err); - return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; - } - - /* update meta fields */ - header.mm_geo.now = bytes2pgno(env, env->me_dbgeo.now); - header.mm_geo.lower = bytes2pgno(env, env->me_dbgeo.lower); - header.mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper); - header.mm_geo.grow_pv = pages2pv(bytes2pgno(env, env->me_dbgeo.grow)); - header.mm_geo.shrink_pv = pages2pv(bytes2pgno(env, env->me_dbgeo.shrink)); - - VERBOSE("amended: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO - "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO - " +%u -%u, txn_id %" PRIaTXN ", %s", - header.mm_dbs[MAIN_DBI].md_root, header.mm_dbs[FREE_DBI].md_root, - header.mm_geo.lower, header.mm_geo.next, header.mm_geo.now, - header.mm_geo.upper, pv2pages(header.mm_geo.grow_pv), - pv2pages(header.mm_geo.shrink_pv), - unaligned_peek_u64(4, header.mm_txnid_a), - durable_caption(&header)); - } else { - /* fetch back 'now/current' size, since it was ignored during comparison - * and may differ. */ - env->me_dbgeo.now = pgno_align2os_bytes(env, header.mm_geo.now); - } - ENSURE(env, header.mm_geo.now >= header.mm_geo.next); - } else { - /* geo-params are not pre-configured by user, - * get current values from the meta. */ - env->me_dbgeo.now = pgno2bytes(env, header.mm_geo.now); - env->me_dbgeo.lower = pgno2bytes(env, header.mm_geo.lower); - env->me_dbgeo.upper = pgno2bytes(env, header.mm_geo.upper); - env->me_dbgeo.grow = pgno2bytes(env, pv2pages(header.mm_geo.grow_pv)); - env->me_dbgeo.shrink = pgno2bytes(env, pv2pages(header.mm_geo.shrink_pv)); - } - - ENSURE(env, pgno_align2os_bytes(env, header.mm_geo.now) == env->me_dbgeo.now); - ENSURE(env, env->me_dbgeo.now >= used_bytes); - const uint64_t filesize_before = env->me_dxb_mmap.filesize; - if (unlikely(filesize_before != env->me_dbgeo.now)) { - if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) { - VERBOSE("filesize mismatch (expect %" PRIuPTR "b/%" PRIaPGNO - "p, have %" PRIu64 "b/%" PRIaPGNO "p), " - "assume other process working", - env->me_dbgeo.now, bytes2pgno(env, env->me_dbgeo.now), - filesize_before, bytes2pgno(env, (size_t)filesize_before)); - } else { - WARNING("filesize mismatch (expect %" PRIuSIZE "b/%" PRIaPGNO - "p, have %" PRIu64 "b/%" PRIaPGNO "p)", - env->me_dbgeo.now, bytes2pgno(env, env->me_dbgeo.now), - filesize_before, bytes2pgno(env, (size_t)filesize_before)); - if (filesize_before < used_bytes) { - ERROR("last-page beyond end-of-file (last %" PRIaPGNO - ", have %" PRIaPGNO ")", - header.mm_geo.next, bytes2pgno(env, (size_t)filesize_before)); - return MDBX_CORRUPTED; - } - - if (env->me_flags & MDBX_RDONLY) { - if (filesize_before & (env->me_os_psize - 1)) { - ERROR("%s", "filesize should be rounded-up to system page"); - return MDBX_WANNA_RECOVERY; - } - WARNING("%s", "ignore filesize mismatch in readonly-mode"); - } else { - VERBOSE("will resize datafile to %" PRIuSIZE " bytes, %" PRIaPGNO - " pages", - env->me_dbgeo.now, bytes2pgno(env, env->me_dbgeo.now)); - } - } - } - - VERBOSE("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)", bootid.x, - bootid.y, (bootid.x | bootid.y) ? "" : "not-"); - -#if MDBX_ENABLE_MADVISE - /* calculate readahead hint before mmap with zero redundant pages */ - const bool readahead = - !(env->me_flags & MDBX_NORDAHEAD) && - mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; -#endif /* MDBX_ENABLE_MADVISE */ - - err = osal_mmap( - env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, env->me_dbgeo.upper, - (lck_rc && env->me_stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0); - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#if MDBX_ENABLE_MADVISE -#if defined(MADV_DONTDUMP) - err = madvise(env->me_map, env->me_dxb_mmap.limit, MADV_DONTDUMP) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#endif /* MADV_DONTDUMP */ -#if defined(MADV_DODUMP) - if (mdbx_static.flags & MDBX_DBG_DUMP) { - const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS); - err = madvise(env->me_map, meta_length_aligned2os, MADV_DODUMP) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; - } -#endif /* MADV_DODUMP */ -#endif /* MDBX_ENABLE_MADVISE */ - -#ifdef ENABLE_MEMCHECK - env->me_valgrind_handle = - VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx"); -#endif /* ENABLE_MEMCHECK */ - - eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && - used_bytes <= env->me_dxb_mmap.limit); -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - if (env->me_dxb_mmap.filesize > used_bytes && - env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) { - VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->me_map, used_bytes), - env->me_dxb_mmap.filesize - used_bytes); - MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->me_map, used_bytes), - env->me_dxb_mmap.filesize - used_bytes); - } - env->me_poison_edge = - bytes2pgno(env, (env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) - ? env->me_dxb_mmap.filesize - : env->me_dxb_mmap.limit); -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - - meta_troika_t troika = meta_tap(env); -#if MDBX_DEBUG - meta_troika_dump(env, &troika); -#endif - //-------------------------------- validate/rollback head & steady meta-pages - if (unlikely(env->me_stuck_meta >= 0)) { - /* recovery mode */ - MDBX_meta clone; - MDBX_meta const *const target = METAPAGE(env, env->me_stuck_meta); - err = validate_meta_copy(env, target, &clone); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("target meta[%u] is corrupted", - bytes2pgno(env, ptr_dist(data_page(target), env->me_map))); - meta_troika_dump(env, &troika); - return MDBX_CORRUPTED; - } - } else /* not recovery mode */ - while (1) { - const unsigned meta_clash_mask = meta_eq_mask(&troika); - if (unlikely(meta_clash_mask)) { - ERROR("meta-pages are clashed: mask 0x%d", meta_clash_mask); - meta_troika_dump(env, &troika); - return MDBX_CORRUPTED; - } - - if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) { - /* non-exclusive mode, - * meta-pages should be validated by a first process opened the DB */ - if (troika.recent == troika.prefer_steady) - break; - - if (!env->me_lck_mmap.lck) { - /* LY: without-lck (read-only) mode, so it is impossible that other - * process made weak checkpoint. */ - ERROR("%s", "without-lck, unable recovery/rollback"); - meta_troika_dump(env, &troika); - return MDBX_WANNA_RECOVERY; - } - - /* LY: assume just have a collision with other running process, - * or someone make a weak checkpoint */ - VERBOSE("%s", "assume collision or online weak checkpoint"); - break; - } - eASSERT(env, lck_rc == MDBX_RESULT_TRUE); - /* exclusive mode */ - - const meta_ptr_t recent = meta_recent(env, &troika); - const meta_ptr_t prefer_steady = meta_prefer_steady(env, &troika); - MDBX_meta clone; - if (prefer_steady.is_steady) { - err = validate_meta_copy(env, prefer_steady.ptr_c, &clone); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed", - bytes2pgno(env, ptr_dist(prefer_steady.ptr_c, env->me_map)), - "steady", prefer_steady.txnid, "manual recovery"); - meta_troika_dump(env, &troika); - return MDBX_CORRUPTED; - } - if (prefer_steady.ptr_c == recent.ptr_c) - break; - } - - const pgno_t pgno = bytes2pgno(env, ptr_dist(recent.ptr_c, env->me_map)); - const bool last_valid = - validate_meta_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS; - eASSERT(env, - !prefer_steady.is_steady || recent.txnid != prefer_steady.txnid); - if (unlikely(!last_valid)) { - if (unlikely(!prefer_steady.is_steady)) { - ERROR("%s for open or automatic rollback, %s", - "there are no suitable meta-pages", - "manual recovery is required"); - meta_troika_dump(env, &troika); - return MDBX_CORRUPTED; - } - WARNING("meta[%u] with last txnid %" PRIaTXN - " is corrupted, rollback needed", - pgno, recent.txnid); - meta_troika_dump(env, &troika); - goto purge_meta_head; - } - - if (meta_bootid_match(recent.ptr_c)) { - if (env->me_flags & MDBX_RDONLY) { - ERROR("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: " - "rollback NOT needed, steady-sync NEEDED%s", - "opening after an unclean shutdown", bootid.x, bootid.y, - ", but unable in read-only mode"); - meta_troika_dump(env, &troika); - return MDBX_WANNA_RECOVERY; - } - WARNING("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: " - "rollback NOT needed, steady-sync NEEDED%s", - "opening after an unclean shutdown", bootid.x, bootid.y, ""); - header = clone; - env->me_lck->mti_unsynced_pages.weak = header.mm_geo.next; - if (!env->me_lck->mti_eoos_timestamp.weak) - env->me_lck->mti_eoos_timestamp.weak = osal_monotime(); - break; - } - if (unlikely(!prefer_steady.is_steady)) { - ERROR("%s, but %s for automatic rollback: %s", - "opening after an unclean shutdown", - "there are no suitable meta-pages", - "manual recovery is required"); - meta_troika_dump(env, &troika); - return MDBX_CORRUPTED; - } - if (env->me_flags & MDBX_RDONLY) { - ERROR("%s and rollback needed: (from head %" PRIaTXN - " to steady %" PRIaTXN ")%s", - "opening after an unclean shutdown", recent.txnid, - prefer_steady.txnid, ", but unable in read-only mode"); - meta_troika_dump(env, &troika); - return MDBX_WANNA_RECOVERY; - } - - purge_meta_head: - NOTICE("%s and doing automatic rollback: " - "purge%s meta[%u] with%s txnid %" PRIaTXN, - "opening after an unclean shutdown", last_valid ? "" : " invalid", - pgno, last_valid ? " weak" : "", recent.txnid); - meta_troika_dump(env, &troika); - ENSURE(env, prefer_steady.is_steady); - err = override_meta(env, pgno, 0, - last_valid ? recent.ptr_c : prefer_steady.ptr_c); - if (err) { - ERROR("rollback: overwrite meta[%u] with txnid %" PRIaTXN ", error %d", - pgno, recent.txnid, err); - return err; - } - troika = meta_tap(env); - ENSURE(env, 0 == meta_txnid(recent.ptr_v)); - ENSURE(env, 0 == meta_eq_mask(&troika)); - } - - if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) { - //-------------------------------------------------- shrink DB & update geo - /* re-check size after mmap */ - if ((env->me_dxb_mmap.current & (env->me_os_psize - 1)) != 0 || - env->me_dxb_mmap.current < used_bytes) { - ERROR("unacceptable/unexpected datafile size %" PRIuPTR, - env->me_dxb_mmap.current); - return MDBX_PROBLEM; - } - if (env->me_dxb_mmap.current != env->me_dbgeo.now) { - header.mm_geo.now = bytes2pgno(env, env->me_dxb_mmap.current); - NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO - " pages", - env->me_dxb_mmap.current, header.mm_geo.now); - } - - const meta_ptr_t recent = meta_recent(env, &troika); - if (/* не учитываем различия в geo.next */ - header.mm_geo.grow_pv != recent.ptr_c->mm_geo.grow_pv || - header.mm_geo.shrink_pv != recent.ptr_c->mm_geo.shrink_pv || - header.mm_geo.lower != recent.ptr_c->mm_geo.lower || - header.mm_geo.upper != recent.ptr_c->mm_geo.upper || - header.mm_geo.now != recent.ptr_c->mm_geo.now) { - if ((env->me_flags & MDBX_RDONLY) != 0 || - /* recovery mode */ env->me_stuck_meta >= 0) { - WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO - "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u, to l%" PRIaPGNO - "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u", - (env->me_stuck_meta < 0) ? "read-only" : "recovery", - recent.ptr_c->mm_geo.lower, recent.ptr_c->mm_geo.now, - recent.ptr_c->mm_geo.upper, - pv2pages(recent.ptr_c->mm_geo.shrink_pv), - pv2pages(recent.ptr_c->mm_geo.grow_pv), header.mm_geo.lower, - header.mm_geo.now, header.mm_geo.upper, - pv2pages(header.mm_geo.shrink_pv), - pv2pages(header.mm_geo.grow_pv)); - } else { - const txnid_t next_txnid = safe64_txnid_next(recent.txnid); - if (unlikely(next_txnid > MAX_TXNID)) { - ERROR("txnid overflow, raise %d", MDBX_TXN_FULL); - return MDBX_TXN_FULL; - } - NOTICE("updating meta.geo: " - "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN "), " - "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN ")", - recent.ptr_c->mm_geo.lower, recent.ptr_c->mm_geo.now, - recent.ptr_c->mm_geo.upper, - pv2pages(recent.ptr_c->mm_geo.shrink_pv), - pv2pages(recent.ptr_c->mm_geo.grow_pv), recent.txnid, - header.mm_geo.lower, header.mm_geo.now, header.mm_geo.upper, - pv2pages(header.mm_geo.shrink_pv), - pv2pages(header.mm_geo.grow_pv), next_txnid); - - ENSURE(env, header.unsafe_txnid == recent.txnid); - meta_set_txnid(env, &header, next_txnid); - err = sync_locked(env, env->me_flags | MDBX_SHRINK_ALLOWED, &header, - &troika); - if (err) { - ERROR("error %d, while updating meta.geo: " - "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN "), " - "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN ")", - err, recent.ptr_c->mm_geo.lower, recent.ptr_c->mm_geo.now, - recent.ptr_c->mm_geo.upper, - pv2pages(recent.ptr_c->mm_geo.shrink_pv), - pv2pages(recent.ptr_c->mm_geo.grow_pv), recent.txnid, - header.mm_geo.lower, header.mm_geo.now, header.mm_geo.upper, - pv2pages(header.mm_geo.shrink_pv), - pv2pages(header.mm_geo.grow_pv), header.unsafe_txnid); - return err; - } - } - } - - atomic_store32(&env->me_lck->mti_discarded_tail, - bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed); - - if ((env->me_flags & MDBX_RDONLY) == 0 && env->me_stuck_meta < 0 && - (mdbx_static.flags & MDBX_DBG_DONT_UPGRADE) == 0) { - for (int n = 0; n < NUM_METAS; ++n) { - MDBX_meta *const meta = METAPAGE(env, n); - if (unlikely(unaligned_peek_u64(4, &meta->mm_magic_and_version) != - MDBX_DATA_MAGIC)) { - const txnid_t txnid = constmeta_txnid(meta); - NOTICE("%s %s" - "meta[%u], txnid %" PRIaTXN, - "updating db-format signature for", - META_IS_STEADY(meta) ? "stead-" : "weak-", n, txnid); - err = override_meta(env, n, txnid, meta); - if (unlikely(err != MDBX_SUCCESS) && - /* Just ignore the MDBX_PROBLEM error, since here it is - * returned only in case of the attempt to upgrade an obsolete - * meta-page that is invalid for current state of a DB, - * e.g. after shrinking DB file */ - err != MDBX_PROBLEM) { - ERROR("%s meta[%u], txnid %" PRIaTXN ", error %d", - "updating db-format signature for", n, txnid, err); - return err; - } - troika = meta_tap(env); - } - } - } - } /* lck exclusive, lck_rc == MDBX_RESULT_TRUE */ - - //---------------------------------------------------- setup madvise/readahead -#if MDBX_ENABLE_MADVISE - if (used_aligned2os_bytes < env->me_dxb_mmap.current) { -#if defined(MADV_REMOVE) - if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0 && - /* not recovery mode */ env->me_stuck_meta < 0) { - NOTICE("open-MADV_%s %u..%u", "REMOVE (deallocate file space)", - env->me_lck->mti_discarded_tail.weak, - bytes2pgno(env, env->me_dxb_mmap.current)); - err = - madvise(ptr_disp(env->me_map, used_aligned2os_bytes), - env->me_dxb_mmap.current - used_aligned2os_bytes, MADV_REMOVE) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; - } -#endif /* MADV_REMOVE */ -#if defined(MADV_DONTNEED) - NOTICE("open-MADV_%s %u..%u", "DONTNEED", - env->me_lck->mti_discarded_tail.weak, - bytes2pgno(env, env->me_dxb_mmap.current)); - err = - madvise(ptr_disp(env->me_map, used_aligned2os_bytes), - env->me_dxb_mmap.current - used_aligned2os_bytes, MADV_DONTNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_MADV_DONTNEED) - err = ignore_enosys(posix_madvise( - ptr_disp(env->me_map, used_aligned2os_bytes), - env->me_dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_FADV_DONTNEED) - err = ignore_enosys(posix_fadvise( - env->me_lazy_fd, used_aligned2os_bytes, - env->me_dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#endif /* MADV_DONTNEED */ - } - - err = set_readahead(env, bytes2pgno(env, used_bytes), readahead, true); - if (unlikely(err != MDBX_SUCCESS)) - return err; -#endif /* MDBX_ENABLE_MADVISE */ - - return rc; -} - -/******************************************************************************/ - -__cold static int setup_lck_locked(MDBX_env *env) { - int err = rthc_register(env); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - int lck_seize_rc = osal_lck_seize(env); - if (unlikely(MDBX_IS_ERROR(lck_seize_rc))) - return lck_seize_rc; - - if (env->me_lfd == INVALID_HANDLE_VALUE) { - env->me_lck = lckless_stub(env); - env->me_maxreaders = UINT_MAX; - DEBUG("lck-setup:%s%s%s", " lck-less", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - return lck_seize_rc; - } - - DEBUG("lck-setup:%s%s%s", " with-lck", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - - MDBX_env *inprocess_neighbor = nullptr; - err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); - if (unlikely(MDBX_IS_ERROR(err))) - return err; - if (inprocess_neighbor) { - if ((mdbx_static.flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0) - return MDBX_BUSY; - if (lck_seize_rc == MDBX_RESULT_TRUE) { - err = osal_lck_downgrade(env); - if (unlikely(err != MDBX_SUCCESS)) - return err; - lck_seize_rc = MDBX_RESULT_FALSE; - } - } - - uint64_t size = 0; - err = osal_filesize(env->me_lfd, &size); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - if (lck_seize_rc == MDBX_RESULT_TRUE) { - size = ceil_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + - sizeof(MDBX_lockinfo), - env->me_os_psize); - jitter4testing(false); - } else { - if (env->me_flags & MDBX_EXCLUSIVE) - return MDBX_BUSY; - if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 || - size < env->me_os_psize) { - ERROR("lck-file has invalid size %" PRIu64 " bytes", size); - return MDBX_PROBLEM; - } - } - - const size_t maxreaders = - ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader); - if (maxreaders < 4) { - ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); - return MDBX_PROBLEM; - } - env->me_maxreaders = (maxreaders <= MDBX_READERS_LIMIT) - ? (unsigned)maxreaders - : (unsigned)MDBX_READERS_LIMIT; - - err = osal_mmap((env->me_flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, - &env->me_lck_mmap, (size_t)size, (size_t)size, - lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE - : MMAP_OPTION_SEMAPHORE); - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#if MDBX_ENABLE_MADVISE -#ifdef MADV_DODUMP - err = madvise(env->me_lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#endif /* MADV_DODUMP */ - -#ifdef MADV_WILLNEED - err = madvise(env->me_lck_mmap.lck, size, MADV_WILLNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#elif defined(POSIX_MADV_WILLNEED) - err = ignore_enosys( - posix_madvise(env->me_lck_mmap.lck, size, POSIX_MADV_WILLNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - return err; -#endif /* MADV_WILLNEED */ -#endif /* MDBX_ENABLE_MADVISE */ - - struct MDBX_lockinfo *lck = env->me_lck_mmap.lck; - if (lck_seize_rc == MDBX_RESULT_TRUE) { - /* If we succeed got exclusive lock, then nobody is using the lock region - * and we should initialize it. */ - memset(lck, 0, (size_t)size); - jitter4testing(false); - lck->mti_magic_and_version = MDBX_LOCK_MAGIC; - lck->mti_os_and_format = MDBX_LOCK_FORMAT; -#if MDBX_ENABLE_PGOP_STAT - lck->mti_pgop_stat.wops.weak = 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - err = osal_msync(&env->me_lck_mmap, 0, (size_t)size, - MDBX_SYNC_DATA | MDBX_SYNC_SIZE); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); - eASSERT(env, MDBX_IS_ERROR(err)); - return err; - } - } else { - if (lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { - const bool invalid = (lck->mti_magic_and_version >> 8) != MDBX_MAGIC; - ERROR("lock region has %s", - invalid - ? "invalid magic" - : "incompatible version (only applications with nearly or the " - "same versions of libmdbx can share the same database)"); - return invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; - } - if (lck->mti_os_and_format != MDBX_LOCK_FORMAT) { - ERROR("lock region has os/format signature 0x%" PRIx32 - ", expected 0x%" PRIx32, - lck->mti_os_and_format, MDBX_LOCK_FORMAT); - return MDBX_VERSION_MISMATCH; - } - } - - err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); - if (unlikely(err != MDBX_SUCCESS)) { - eASSERT(env, MDBX_IS_ERROR(err)); - return err; - } - - env->me_lck = lck; - eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); - return lck_seize_rc; -} - -/* Open and/or initialize the lock region for the environment. */ -__cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { - eASSERT(env, env->me_lazy_fd != INVALID_HANDLE_VALUE); - eASSERT(env, env->me_lfd == INVALID_HANDLE_VALUE); - - int err = osal_openfile(MDBX_OPEN_LCK, env, env->me_pathname.lck, - &env->me_lfd, mode); - if (err != MDBX_SUCCESS) { - switch (err) { - default: - return err; - case MDBX_ENOFILE: - case MDBX_EACCESS: - case MDBX_EPERM: - if (!F_ISSET(env->me_flags, MDBX_RDONLY | MDBX_EXCLUSIVE)) - return err; - break; - case MDBX_EROFS: - if ((env->me_flags & MDBX_RDONLY) == 0) - return err; - break; - } - - if (err != MDBX_ENOFILE) { - /* ENSURE the file system is read-only */ - err = osal_check_fs_rdonly(env->me_lazy_fd, env->me_pathname.lck, err); - if (err != MDBX_SUCCESS && - /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ - !(err == MDBX_ENOSYS && (env->me_flags & MDBX_EXCLUSIVE))) - return err; - } - - /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ - env->me_lfd = INVALID_HANDLE_VALUE; - } - - rthc_lock(); - err = setup_lck_locked(env); - rthc_unlock(); - return err; -} - -__cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { - if (volume <= 1024 * 1024 * 4ul) - return MDBX_RESULT_TRUE; - - intptr_t pagesize, total_ram_pages; - int err = mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - const int log2page = log2n_powerof2(pagesize); - const intptr_t volume_pages = (volume + pagesize - 1) >> log2page; - const intptr_t redundancy_pages = - (redundancy < 0) ? -(intptr_t)((-redundancy + pagesize - 1) >> log2page) - : (intptr_t)(redundancy + pagesize - 1) >> log2page; - if (volume_pages >= total_ram_pages || - volume_pages + redundancy_pages >= total_ram_pages) - return MDBX_RESULT_FALSE; - - intptr_t avail_ram_pages; - err = mdbx_get_sysraminfo(nullptr, nullptr, &avail_ram_pages); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - return (volume_pages + redundancy_pages >= avail_ram_pages) - ? MDBX_RESULT_FALSE - : MDBX_RESULT_TRUE; -} - -/* Merge sync flags */ -static uint32_t merge_sync_flags(const uint32_t a, const uint32_t b) { - uint32_t r = a | b; - - /* avoid false MDBX_UTTERLY_NOSYNC */ - if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && - !F_ISSET(b, MDBX_UTTERLY_NOSYNC)) - r = (r - MDBX_UTTERLY_NOSYNC) | MDBX_SAFE_NOSYNC; - - /* convert MDBX_DEPRECATED_MAPASYNC to MDBX_SAFE_NOSYNC */ - if ((r & (MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC)) == - (MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC) && - !F_ISSET(r, MDBX_UTTERLY_NOSYNC)) - r = (r - MDBX_DEPRECATED_MAPASYNC) | MDBX_SAFE_NOSYNC; - - /* force MDBX_NOMETASYNC if NOSYNC enabled */ - if (r & (MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC)) - r |= MDBX_NOMETASYNC; - - assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && - !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && - !F_ISSET(b, MDBX_UTTERLY_NOSYNC))); - return r; -} - -__cold static int __must_check_result override_meta(MDBX_env *env, - size_t target, - txnid_t txnid, - const MDBX_meta *shape) { - int rc = alloc_page_buf(env); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - MDBX_page *const page = env->me_pbuf; - meta_model(env, page, target); - MDBX_meta *const model = page_meta(page); - meta_set_txnid(env, model, txnid); - if (txnid) - eASSERT(env, check_meta_coherency(env, model, true)); - if (shape) { - if (txnid && unlikely(!check_meta_coherency(env, shape, false))) { - ERROR("bailout overriding meta-%zu since model failed " - "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, - target, "pre", constmeta_txnid(shape)); - return MDBX_PROBLEM; - } - if (mdbx_static.flags & MDBX_DBG_DONT_UPGRADE) - memcpy(&model->mm_magic_and_version, &shape->mm_magic_and_version, - sizeof(model->mm_magic_and_version)); - model->mm_extra_flags = shape->mm_extra_flags; - model->mm_validator_id = shape->mm_validator_id; - model->mm_extra_pagehdr = shape->mm_extra_pagehdr; - memcpy(&model->mm_geo, &shape->mm_geo, sizeof(model->mm_geo)); - memcpy(&model->mm_dbs, &shape->mm_dbs, sizeof(model->mm_dbs)); - memcpy(&model->mm_canary, &shape->mm_canary, sizeof(model->mm_canary)); - memcpy(&model->mm_pages_retired, &shape->mm_pages_retired, - sizeof(model->mm_pages_retired)); - if (txnid) { - if ((!model->mm_dbs[FREE_DBI].md_mod_txnid && - model->mm_dbs[FREE_DBI].md_root != P_INVALID) || - (!model->mm_dbs[MAIN_DBI].md_mod_txnid && - model->mm_dbs[MAIN_DBI].md_root != P_INVALID)) - memcpy(&model->mm_magic_and_version, &shape->mm_magic_and_version, - sizeof(model->mm_magic_and_version)); - if (unlikely(!check_meta_coherency(env, model, false))) { - ERROR("bailout overriding meta-%zu since model failed " - "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, - target, "post", txnid); - return MDBX_PROBLEM; - } - } - } - unaligned_poke_u64(4, model->mm_sign, meta_sign(model)); - rc = validate_meta(env, model, page, (pgno_t)target, nullptr); - if (unlikely(MDBX_IS_ERROR(rc))) - return MDBX_PROBLEM; - - if (shape && memcmp(model, shape, sizeof(MDBX_meta)) == 0) { - NOTICE("skip overriding meta-%zu since no changes " - "for txnid #%" PRIaTXN, - target, txnid); - return MDBX_SUCCESS; - } - - if (env->me_flags & MDBX_WRITEMAP) { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync(&env->me_dxb_mmap, 0, - pgno_align2os_bytes(env, model->mm_geo.next), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - /* override_meta() called only while current process have exclusive - * lock of a DB file. So meta-page could be updated directly without - * clearing consistency flag by mdbx_meta_update_begin() */ - memcpy(pgno2page(env, target), page, env->me_psize); - osal_flush_incoherent_cpu_writeback(); -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.msync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, target + 1), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - } else { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.wops.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_pwrite(env->me_fd4meta, page, env->me_psize, - pgno2bytes(env, target)); - if (rc == MDBX_SUCCESS && env->me_fd4meta == env->me_lazy_fd) { -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.fsync.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - } - osal_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS), - env->me_os_psize); - } - eASSERT(env, (!env->me_txn && !env->me_txn0) || - (env->me_stuck_meta == (int)target && - (env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == - MDBX_EXCLUSIVE)); - return rc; -} - -__cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { - if (unlikely(target >= NUM_METAS)) - return MDBX_EINVAL; - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely((env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != - MDBX_EXCLUSIVE)) - return MDBX_EPERM; - - const MDBX_meta *const target_meta = METAPAGE(env, target); - txnid_t new_txnid = constmeta_txnid(target_meta); - if (new_txnid < MIN_TXNID) - new_txnid = MIN_TXNID; - for (unsigned n = 0; n < NUM_METAS; ++n) { - if (n == target) - continue; - MDBX_page *const page = pgno2page(env, n); - MDBX_meta meta = *page_meta(page); - if (validate_meta(env, &meta, page, n, nullptr) != MDBX_SUCCESS) { - int err = override_meta(env, n, 0, nullptr); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } else { - txnid_t txnid = constmeta_txnid(&meta); - if (new_txnid <= txnid) - new_txnid = safe64_txnid_next(txnid); - } - } - - if (unlikely(new_txnid > MAX_TXNID)) { - ERROR("txnid overflow, raise %d", MDBX_TXN_FULL); - return MDBX_TXN_FULL; - } - return override_meta(env, target, new_txnid, target_meta); -} - -__cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, - unsigned target_meta, bool writeable) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *pathnameW = nullptr; - int rc = osal_mb2w(pathname, &pathnameW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_open_for_recoveryW(env, pathnameW, target_meta, writeable); - osal_free(pathnameW); - } - return rc; -} - -__cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, - unsigned target_meta, bool writeable) { -#endif /* Windows */ - - if (unlikely(target_meta >= NUM_METAS)) - return MDBX_EINVAL; - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - if (unlikely(env->me_map)) - return MDBX_EPERM; - - env->me_stuck_meta = (int8_t)target_meta; - return -#if defined(_WIN32) || defined(_WIN64) - mdbx_env_openW -#else - mdbx_env_open -#endif /* Windows */ - (env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY, - 0); -} - -__cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { - int err = osal_fileexists(lck_pathname); - if (unlikely(err != MDBX_RESULT_FALSE)) { - if (err == MDBX_RESULT_TRUE) - err = MDBX_DUPLICATED_CLK; - ERROR("Alternative/Duplicate LCK-file '%" MDBX_PRIsPATH "' error %d", - lck_pathname, err); - } - return err; -} - -__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, - const mdbx_mode_t mode) { - memset(&env->me_pathname, 0, sizeof(env->me_pathname)); - if (unlikely(!pathname || !*pathname)) - return MDBX_EINVAL; - - int rc; -#if defined(_WIN32) || defined(_WIN64) - const DWORD dwAttrib = GetFileAttributesW(pathname); - if (dwAttrib == INVALID_FILE_ATTRIBUTES) { - rc = GetLastError(); - if (rc != MDBX_ENOFILE) - return rc; - if (mode == 0 || (env->me_flags & MDBX_RDONLY) != 0) - /* can't open existing */ - return rc; - - /* auto-create directory if requested */ - if ((env->me_flags & MDBX_NOSUBDIR) == 0 && - !CreateDirectoryW(pathname, nullptr)) { - rc = GetLastError(); - if (rc != ERROR_ALREADY_EXISTS) - return rc; - } - } else { - /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ - env->me_flags |= MDBX_NOSUBDIR; - if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) - env->me_flags -= MDBX_NOSUBDIR; - } -#else - struct stat st; - if (stat(pathname, &st) != 0) { - rc = errno; - if (rc != MDBX_ENOFILE) - return rc; - if (mode == 0 || (env->me_flags & MDBX_RDONLY) != 0) - /* can't open non-existing */ - return rc /* MDBX_ENOFILE */; - - /* auto-create directory if requested */ - const mdbx_mode_t dir_mode = - (/* inherit read/write permissions for group and others */ mode & - (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | - /* always add read/write/search for owner */ S_IRWXU | - ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | - ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); - if ((env->me_flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { - rc = errno; - if (rc != EEXIST) - return rc; - } - } else { - /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ - env->me_flags |= MDBX_NOSUBDIR; - if (S_ISDIR(st.st_mode)) - env->me_flags -= MDBX_NOSUBDIR; - } -#endif - - static const pathchar_t dxb_name[] = MDBX_DATANAME; - static const pathchar_t lck_name[] = MDBX_LOCKNAME; - static const pathchar_t lock_suffix[] = MDBX_LOCK_SUFFIX; - -#if defined(_WIN32) || defined(_WIN64) - assert(dxb_name[0] == '\\' && lck_name[0] == '\\'); - const size_t pathname_len = wcslen(pathname); -#else - assert(dxb_name[0] == '/' && lck_name[0] == '/'); - const size_t pathname_len = strlen(pathname); -#endif - assert(!osal_isdirsep(lock_suffix[0])); - size_t base_len = pathname_len; - static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1; - if (env->me_flags & MDBX_NOSUBDIR) { - if (base_len > dxb_name_len && - osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, - dxb_name_len)) { - env->me_flags -= MDBX_NOSUBDIR; - base_len -= dxb_name_len; - } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && - osal_isdirsep(lck_name[0]) && - osal_pathequal(pathname + base_len - dxb_name_len + 1, - dxb_name + 1, dxb_name_len - 1)) { - env->me_flags -= MDBX_NOSUBDIR; - base_len -= dxb_name_len - 1; - } - } - - const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t); - const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name); - const size_t enough4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) - ? suflen_with_NOSUBDIR - : suflen_without_NOSUBDIR; - const size_t bytes_needed = - sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; - env->me_pathname.buffer = osal_malloc(bytes_needed); - if (!env->me_pathname.buffer) - return MDBX_ENOMEM; - - env->me_pathname.specified = env->me_pathname.buffer; - env->me_pathname.dxb = env->me_pathname.specified + pathname_len + 1; - env->me_pathname.lck = env->me_pathname.dxb + base_len + dxb_name_len + 1; - rc = MDBX_SUCCESS; - pathchar_t *const buf = env->me_pathname.buffer; - if (base_len) { - memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len); - if (env->me_flags & MDBX_NOSUBDIR) { - const pathchar_t *const lck_ext = - osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); - if (lck_ext) { - pathchar_t *pathname_ext = osal_fileext(buf, pathname_len); - memcpy(pathname_ext ? pathname_ext : buf + pathname_len, lck_ext, - sizeof(pathchar_t) * (ARRAY_END(lck_name) - lck_ext)); - rc = check_alternative_lck_absent(buf); - } - } else { - memcpy(buf + base_len, dxb_name, sizeof(dxb_name)); - memcpy(buf + base_len + dxb_name_len, lock_suffix, sizeof(lock_suffix)); - rc = check_alternative_lck_absent(buf); - } - - memcpy(env->me_pathname.dxb, pathname, sizeof(pathchar_t) * (base_len + 1)); - memcpy(env->me_pathname.lck, pathname, sizeof(pathchar_t) * base_len); - if (env->me_flags & MDBX_NOSUBDIR) { - memcpy(env->me_pathname.lck + base_len, lock_suffix, sizeof(lock_suffix)); - } else { - memcpy(env->me_pathname.dxb + base_len, dxb_name, sizeof(dxb_name)); - memcpy(env->me_pathname.lck + base_len, lck_name, sizeof(lck_name)); - } - } else { - assert(!(env->me_flags & MDBX_NOSUBDIR)); - memcpy(buf, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); - memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix)); - rc = check_alternative_lck_absent(buf); - - memcpy(env->me_pathname.dxb, dxb_name + 1, - sizeof(dxb_name) - sizeof(pathchar_t)); - memcpy(env->me_pathname.lck, lck_name + 1, - sizeof(lck_name) - sizeof(pathchar_t)); - } - - memcpy(env->me_pathname.specified, pathname, - sizeof(pathchar_t) * (pathname_len + 1)); - return rc; -} - -__cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *pathnameW = nullptr; - int rc = osal_mb2w(pathname, &pathnameW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_deleteW(pathnameW, mode); - osal_free(pathnameW); - } - return rc; -} - -__cold int mdbx_env_deleteW(const wchar_t *pathname, - MDBX_env_delete_mode_t mode) { -#endif /* Windows */ - - switch (mode) { - default: - return MDBX_EINVAL; - case MDBX_ENV_JUST_DELETE: - case MDBX_ENV_ENSURE_UNUSED: - case MDBX_ENV_WAIT_FOR_UNUSED: - break; - } - -#ifdef __e2k__ /* https://bugs.mcst.ru/bugzilla/show_bug.cgi?id=6011 */ - MDBX_env *const dummy_env = alloca(sizeof(MDBX_env)); -#else - MDBX_env dummy_env_silo, *const dummy_env = &dummy_env_silo; -#endif - memset(dummy_env, 0, sizeof(*dummy_env)); - dummy_env->me_flags = - (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; - dummy_env->me_os_psize = (unsigned)osal_syspagesize(); - dummy_env->me_psize = (unsigned)mdbx_default_pagesize(); - - STATIC_ASSERT(sizeof(dummy_env->me_flags) == sizeof(MDBX_env_flags_t)); - int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0); - if (likely(err == MDBX_SUCCESS)) { - mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, - dxb_handle = INVALID_HANDLE_VALUE; - if (mode > MDBX_ENV_JUST_DELETE) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, - dummy_env->me_pathname.dxb, &dxb_handle, 0); - err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; - if (err == MDBX_SUCCESS) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, - dummy_env->me_pathname.lck, &clk_handle, 0); - err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; - } - if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE) - err = osal_lockfile(clk_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED); - if (err == MDBX_SUCCESS && dxb_handle != INVALID_HANDLE_VALUE) - err = osal_lockfile(dxb_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED); - } - - if (err == MDBX_SUCCESS) { - err = osal_removefile(dummy_env->me_pathname.dxb); - if (err == MDBX_SUCCESS) - rc = MDBX_SUCCESS; - else if (err == MDBX_ENOFILE) - err = MDBX_SUCCESS; - } - - if (err == MDBX_SUCCESS) { - err = osal_removefile(dummy_env->me_pathname.lck); - if (err == MDBX_SUCCESS) - rc = MDBX_SUCCESS; - else if (err == MDBX_ENOFILE) - err = MDBX_SUCCESS; - } - - if (err == MDBX_SUCCESS && !(dummy_env->me_flags & MDBX_NOSUBDIR) && - (/* pathname != "." */ pathname[0] != '.' || pathname[1] != 0) && - (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || - pathname[2] != 0)) { - err = osal_removedirectory(pathname); - if (err == MDBX_SUCCESS) - rc = MDBX_SUCCESS; - else if (err == MDBX_ENOFILE) - err = MDBX_SUCCESS; - } - - if (dxb_handle != INVALID_HANDLE_VALUE) - osal_closefile(dxb_handle); - if (clk_handle != INVALID_HANDLE_VALUE) - osal_closefile(clk_handle); - } else if (err == MDBX_ENOFILE) - err = MDBX_SUCCESS; - - osal_free(dummy_env->me_pathname.buffer); - return (err == MDBX_SUCCESS) ? rc : err; -} - -__cold static int env_open(MDBX_env *env, mdbx_mode_t mode) { - /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: - * - * 0) Если размер страниц БД меньше системной страницы ОЗУ, то ядру ОС - * придется чаще обновлять страницы в unified page cache. - * - * Однако, O_DSYNC не предполагает отключение unified page cache, - * поэтому подобные затруднения будем считать проблемой ОС и/или - * ожидаемым пенальти из-за использования мелких страниц БД. - * - * 1) В режиме MDBX_SYNC_DURABLE - O_DSYNC для записи как данных, - * так и мета-страниц. Однако, на Linux отказ от O_DSYNC с последующим - * fdatasync() может быть выгоднее при использовании HDD, так как - * позволяет io-scheduler переупорядочить запись с учетом актуального - * расположения файла БД на носителе. - * - * 2) В режиме MDBX_NOMETASYNC - O_DSYNC можно использовать для данных, - * но в этом может не быть смысла, так как fdatasync() всё равно - * требуется для гарантии фиксации мета после предыдущей транзакции. - * - * В итоге на нормальных системах (не Windows) есть два варианта: - * - при возможности O_DIRECT и/или io_ring для данных, скорее всего, - * есть смысл вызвать fdatasync() перед записью данных, а затем - * использовать O_DSYNC; - * - не использовать O_DSYNC и вызывать fdatasync() после записи данных. - * - * На Windows же следует минимизировать использование FlushFileBuffers() - * из-за проблем с производительностью. Поэтому на Windows в режиме - * MDBX_NOMETASYNC: - * - мета обновляется через дескриптор без FILE_FLAG_WRITE_THROUGH; - * - перед началом записи данных вызывается FlushFileBuffers(), если - * mti_meta_sync_txnid не совпадает с последней записанной мета; - * - данные записываются через дескриптор с FILE_FLAG_WRITE_THROUGH. - * - * 3) В режиме MDBX_SAFE_NOSYNC - O_DSYNC нет смысла использовать, пока не - * будет реализована возможность полностью асинхронной "догоняющей" - * записи в выделенном процессе-сервере с io-ring очередями внутри. - * - * ----- - * - * Использование O_DIRECT или FILE_FLAG_NO_BUFFERING: - * - * Назначение этих флагов в отключении файлового дескриптора от - * unified page cache, т.е. от отображенных в память данных в случае - * libmdbx. - * - * Поэтому, использование direct i/o в libmdbx без MDBX_WRITEMAP лишено - * смысла и контр-продуктивно, ибо так мы провоцируем ядро ОС на - * не-когерентность отображения в память с содержимым файла на носителе, - * либо требуем дополнительных проверок и действий направленных на - * фактическое отключение O_DIRECT для отображенных в память данных. - * - * В режиме MDBX_WRITEMAP когерентность отображенных данных обеспечивается - * физически. Поэтому использование direct i/o может иметь смысл, если у - * ядра ОС есть какие-то проблемы с msync(), в том числе с - * производительностью: - * - использование io_ring или gather-write может быть дешевле, чем - * просмотр PTE ядром и запись измененных/грязных; - * - но проблема в том, что записываемые из user mode страницы либо не - * будут помечены чистыми (и соответственно будут записаны ядром - * еще раз), либо ядру необходимо искать и чистить PTE при получении - * запроса на запись. - * - * Поэтому O_DIRECT или FILE_FLAG_NO_BUFFERING используется: - * - только в режиме MDBX_SYNC_DURABLE с MDBX_WRITEMAP; - * - когда me_psize >= me_os_psize; - * - опция сборки MDBX_AVOID_MSYNC != 0, которая по-умолчанию включена - * только на Windows (см ниже). - * - * ----- - * - * Использование FILE_FLAG_OVERLAPPED на Windows: - * - * У Windows очень плохо с I/O (за исключением прямых постраничных - * scatter/gather, которые работают в обход проблемного unified page - * cache и поэтому почти бесполезны в libmdbx). - * - * При этом всё еще хуже при использовании FlushFileBuffers(), что также - * требуется после FlushViewOfFile() в режиме MDBX_WRITEMAP. Поэтому - * на Windows вместо FlushViewOfFile() и FlushFileBuffers() следует - * использовать запись через дескриптор с FILE_FLAG_WRITE_THROUGH. - * - * В свою очередь, запись с FILE_FLAG_WRITE_THROUGH дешевле/быстрее - * при использовании FILE_FLAG_OVERLAPPED. В результате, на Windows - * в durable-режимах запись данных всегда в overlapped-режиме, - * при этом для записи мета требуется отдельный не-overlapped дескриптор. - */ - - env->me_pid = osal_getpid(); - int rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ - : MDBX_OPEN_DXB_LAZY, - env, env->me_pathname.dxb, &env->me_lazy_fd, mode); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - -#if MDBX_LOCKING == MDBX_LOCKING_SYSV - env->me_sysv_ipc.key = ftok(env->me_pathname.dxb, 42); - if (unlikely(env->me_sysv_ipc.key == -1)) - return errno; -#endif /* MDBX_LOCKING */ - - /* Set the position in files outside of the data to avoid corruption - * due to erroneous use of file descriptors in the application code. */ - const uint64_t safe_parking_lot_offset = UINT64_C(0x7fffFFFF80000000); - osal_fseek(env->me_lazy_fd, safe_parking_lot_offset); - - env->me_fd4meta = env->me_lazy_fd; -#if defined(_WIN32) || defined(_WIN64) - eASSERT(env, env->me_overlapped_fd == 0); - bool ior_direct = false; - if (!(env->me_flags & - (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) { - if (MDBX_AVOID_MSYNC && (env->me_flags & MDBX_WRITEMAP)) { - /* Запрошен режим MDBX_SYNC_DURABLE | MDBX_WRITEMAP при активной опции - * MDBX_AVOID_MSYNC. - * - * 1) В этой комбинации наиболее выгодно использовать WriteFileGather(), - * но для этого необходимо открыть файл с флагом FILE_FLAG_NO_BUFFERING и - * после обеспечивать выравнивание адресов и размера данных на границу - * системной страницы, что в свою очередь возможно если размер страницы БД - * не меньше размера системной страницы ОЗУ. Поэтому для открытия файла в - * нужном режиме требуется знать размер страницы БД. - * - * 2) Кроме этого, в Windows запись в заблокированный регион файла - * возможно только через тот-же дескриптор. Поэтому изначальный захват - * блокировок посредством osal_lck_seize(), захват/освобождение блокировок - * во время пишущих транзакций и запись данных должны выполнятся через - * один дескриптор. - * - * Таким образом, требуется прочитать волатильный заголовок БД, чтобы - * узнать размер страницы, чтобы открыть дескриптор файла в режиме нужном - * для записи данных, чтобы использовать именно этот дескриптор для - * изначального захвата блокировок. */ - MDBX_meta header; - uint64_t dxb_filesize; - int err = read_header(env, &header, MDBX_SUCCESS, true); - if ((err == MDBX_SUCCESS && header.mm_psize >= env->me_os_psize) || - (err == MDBX_ENODATA && mode && env->me_psize >= env->me_os_psize && - osal_filesize(env->me_lazy_fd, &dxb_filesize) == MDBX_SUCCESS && - dxb_filesize == 0)) - /* Может быть коллизия, если два процесса пытаются одновременно создать - * БД с разным размером страницы, который у одного меньше системной - * страницы, а у другого НЕ меньше. Эта допустимая, но очень странная - * ситуация. Поэтому считаем её ошибочной и не пытаемся разрешить. */ - ior_direct = true; - } - - rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT - : MDBX_OPEN_DXB_OVERLAPPED, - env, env->me_pathname.dxb, &env->me_overlapped_fd, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - env->me_data_lock_event = CreateEventW(nullptr, true, false, nullptr); - if (unlikely(!env->me_data_lock_event)) - return (int)GetLastError(); - osal_fseek(env->me_overlapped_fd, safe_parking_lot_offset); - } -#else - if (mode == 0) { - /* pickup mode for lck-file */ - struct stat st; - if (unlikely(fstat(env->me_lazy_fd, &st))) - return errno; - mode = st.st_mode; - } - mode = (/* inherit read permissions for group and others */ mode & - (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | - /* always add read/write for owner */ S_IRUSR | S_IWUSR | - ((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) | - ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); -#endif /* !Windows */ - const int lck_rc = setup_lck(env, mode); - if (unlikely(MDBX_IS_ERROR(lck_rc))) - return lck_rc; - if (env->me_lfd != INVALID_HANDLE_VALUE) - osal_fseek(env->me_lfd, safe_parking_lot_offset); - - eASSERT(env, env->me_dsync_fd == INVALID_HANDLE_VALUE); - if (!(env->me_flags & - (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC -#if defined(_WIN32) || defined(_WIN64) - | MDBX_EXCLUSIVE -#endif /* !Windows */ - ))) { - rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->me_pathname.dxb, - &env->me_dsync_fd, 0); - if (unlikely(MDBX_IS_ERROR(rc))) - return rc; - if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { - if ((env->me_flags & MDBX_NOMETASYNC) == 0) - env->me_fd4meta = env->me_dsync_fd; - osal_fseek(env->me_dsync_fd, safe_parking_lot_offset); - } - } - - const MDBX_env_flags_t lazy_flags = - MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_NOMETASYNC; - const MDBX_env_flags_t mode_flags = lazy_flags | MDBX_LIFORECLAIM | - MDBX_NORDAHEAD | MDBX_RDONLY | - MDBX_WRITEMAP; - - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (lck && lck_rc != MDBX_RESULT_TRUE && (env->me_flags & MDBX_RDONLY) == 0) { - MDBX_env_flags_t snap_flags; - while ((snap_flags = atomic_load32(&lck->mti_envmode, mo_AcquireRelease)) == - MDBX_RDONLY) { - if (atomic_cas32(&lck->mti_envmode, MDBX_RDONLY, - (snap_flags = (env->me_flags & mode_flags)))) { - /* The case: - * - let's assume that for some reason the DB file is smaller - * than it should be according to the geometry, - * but not smaller than the last page used; - * - the first process that opens the database (lck_rc == RESULT_TRUE) - * does this in readonly mode and therefore cannot bring - * the file size back to normal; - * - some next process (lck_rc != RESULT_TRUE) opens the DB in - * read-write mode and now is here. - * - * FIXME: Should we re-check and set the size of DB-file right here? */ - break; - } - atomic_yield(); - } - - if (env->me_flags & MDBX_ACCEDE) { - /* Pickup current mode-flags (MDBX_LIFORECLAIM, MDBX_NORDAHEAD, etc). */ - const MDBX_env_flags_t diff = - (snap_flags ^ env->me_flags) & - ((snap_flags & lazy_flags) ? mode_flags - : mode_flags & ~MDBX_WRITEMAP); - env->me_flags ^= diff; - NOTICE("accede mode-flags: 0x%X, 0x%X -> 0x%X", diff, - env->me_flags ^ diff, env->me_flags); - } - - /* Ранее упущенный не очевидный момент: При работе БД в режимах - * не-синхронной/отложенной фиксации на диске, все процессы-писатели должны - * иметь одинаковый режим MDBX_WRITEMAP. - * - * В противном случае, сброс на диск следует выполнять дважды: сначала - * msync(), затем fdatasync(). При этом msync() не обязан отрабатывать - * в процессах без MDBX_WRITEMAP, так как файл в память отображен только - * для чтения. Поэтому, в общем случае, различия по MDBX_WRITEMAP не - * позволяют выполнить фиксацию данных на диск, после их изменения в другом - * процессе. - * - * В режиме MDBX_UTTERLY_NOSYNC позволять совместную работу с MDBX_WRITEMAP - * также не следует, поскольку никакой процесс (в том числе последний) не - * может гарантированно сбросить данные на диск, а следовательно не должен - * помечать какую-либо транзакцию как steady. - * - * В результате, требуется либо запретить совместную работу процессам с - * разным MDBX_WRITEMAP в режиме отложенной записи, либо отслеживать такое - * смешивание и блокировать steady-пометки - что контрпродуктивно. */ - const MDBX_env_flags_t rigorous_flags = - (snap_flags & lazy_flags) - ? MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_WRITEMAP - : MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC; - const MDBX_env_flags_t rigorous_diff = - (snap_flags ^ env->me_flags) & rigorous_flags; - if (rigorous_diff) { - ERROR("current mode/flags 0x%X incompatible with requested 0x%X, " - "rigorous diff 0x%X", - env->me_flags, snap_flags, rigorous_diff); - return MDBX_INCOMPATIBLE; - } - } - - mincore_clean_cache(env); - const int dxb_rc = setup_dxb(env, lck_rc, mode); - if (MDBX_IS_ERROR(dxb_rc)) - return dxb_rc; - - rc = osal_check_fs_incore(env->me_lazy_fd); - env->me_incore = false; - if (rc == MDBX_RESULT_TRUE) { - env->me_incore = true; - NOTICE("%s", "in-core database"); - rc = MDBX_SUCCESS; - } else if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("check_fs_incore(), err %d", rc); - return rc; - } - - if (unlikely(/* recovery mode */ env->me_stuck_meta >= 0) && - (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || - (env->me_flags & MDBX_EXCLUSIVE) == 0)) { - ERROR("%s", "recovery requires exclusive mode"); - return MDBX_BUSY; - } - - DEBUG("opened dbenv %p", (void *)env); - env->me_flags |= MDBX_ENV_ACTIVE; - if (!lck || lck_rc == MDBX_RESULT_TRUE) { - env->me_lck->mti_envmode.weak = env->me_flags & mode_flags; - env->me_lck->mti_meta_sync_txnid.weak = - (uint32_t)recent_committed_txnid(env); - env->me_lck->mti_reader_check_timestamp.weak = osal_monotime(); - } - if (lck) { - if (lck_rc == MDBX_RESULT_TRUE) { - rc = osal_lck_downgrade(env); - DEBUG("lck-downgrade-%s: rc %i", - (env->me_flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc); - if (rc != MDBX_SUCCESS) - return rc; - } else { - rc = cleanup_dead_readers(env, false, NULL); - if (MDBX_IS_ERROR(rc)) - return rc; - } - } - - rc = (env->me_flags & MDBX_RDONLY) - ? MDBX_SUCCESS - : osal_ioring_create(&env->me_ioring -#if defined(_WIN32) || defined(_WIN64) - , - ior_direct, env->me_overlapped_fd -#endif /* Windows */ - ); - return rc; -} - -__cold int mdbx_env_open(MDBX_env *env, const char *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *pathnameW = nullptr; - int rc = osal_mb2w(pathname, &pathnameW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_openW(env, pathnameW, flags, mode); - osal_free(pathnameW); - if (rc == MDBX_SUCCESS) - /* force to make cache of the multi-byte pathname representation */ - mdbx_env_get_path(env, &pathname); - } - return rc; -} - -__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { -#endif /* Windows */ - - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(flags & ~ENV_USABLE_FLAGS)) - return MDBX_EINVAL; - - if (unlikely(env->me_lazy_fd != INVALID_HANDLE_VALUE || - (env->me_flags & MDBX_ENV_ACTIVE) != 0 || env->me_map)) - return MDBX_EPERM; - - /* Pickup previously mdbx_env_set_flags(), - * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ - const uint32_t saved_me_flags = env->me_flags; - flags = merge_sync_flags(flags | MDBX_DEPRECATED_COALESCE, env->me_flags); - - if (flags & MDBX_RDONLY) { - /* Silently ignore irrelevant flags when we're only getting read access */ - flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | - MDBX_NOMETASYNC | MDBX_DEPRECATED_COALESCE | MDBX_LIFORECLAIM | - MDBX_NOMEMINIT | MDBX_ACCEDE); - mode = 0; - } else { -#if MDBX_MMAP_INCOHERENT_FILE_WRITE - /* Temporary `workaround` for OpenBSD kernel's flaw. - * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ - if ((flags & MDBX_WRITEMAP) == 0) { - if (flags & MDBX_ACCEDE) - flags |= MDBX_WRITEMAP; - else { - debug_log(MDBX_LOG_ERROR, __func__, __LINE__, - "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " - "of an internal flaw(s) in a file/buffer/page cache.\n"); - return 42 /* ENOPROTOOPT */; - } - } -#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ - } - - env->me_flags = (flags & ~MDBX_FATAL_ERROR); - rc = env_handle_pathname(env, pathname, mode); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); - env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); - env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); - if (unlikely(!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs))) { - rc = MDBX_ENOMEM; - goto bailout; - } - - if ((flags & MDBX_RDONLY) == 0) { - MDBX_txn *txn = nullptr; - const intptr_t bitmap_bytes = -#if MDBX_ENABLE_DBI_SPARSE - ceil_powerof2(env->me_maxdbs, - CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / - CHAR_BIT; -#else - 0; -#endif /* MDBX_ENABLE_DBI_SPARSE */ - const size_t base = sizeof(MDBX_txn) + sizeof(MDBX_cursor); - const size_t size = - base + bitmap_bytes + - env->me_maxdbs * - (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + - sizeof(txn->mt_dbi_seqs[0]) + sizeof(txn->mt_dbi_state[0])); - rc = alloc_page_buf(env); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - memset(env->me_pbuf, -1, env->me_psize * (size_t)2); - memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, env->me_psize); - txn = osal_calloc(1, size); - if (unlikely(!txn)) { - rc = MDBX_ENOMEM; - goto bailout; - } - txn->mt_dbs = ptr_disp(txn, base); - txn->mt_cursors = - ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); - txn->mt_dbi_seqs = - ptr_disp(txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); - txn->mt_dbi_state = - ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); -#if MDBX_ENABLE_DBI_SPARSE - txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - txn->mt_env = env; - txn->mt_flags = MDBX_TXN_FINISHED; - env->me_txn0 = txn; - txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); - txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) { - rc = MDBX_ENOMEM; - goto bailout; - } - adjust_defaults(env); - } - - rc = env_open(env, mode); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - -#if MDBX_DEBUG - const meta_troika_t troika = meta_tap(env); - const meta_ptr_t head = meta_recent(env, &troika); - const MDBX_db *db = &head.ptr_c->mm_dbs[MAIN_DBI]; - - DEBUG("opened database version %u, pagesize %u", - (uint8_t)unaligned_peek_u64(4, head.ptr_c->mm_magic_and_version), - env->me_psize); - DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, - data_page(head.ptr_c)->mp_pgno, head.txnid); - DEBUG("depth: %u", db->md_depth); - DEBUG("entries: %" PRIu64, db->md_entries); - DEBUG("branch pages: %" PRIaPGNO, db->md_branch_pages); - DEBUG("leaf pages: %" PRIaPGNO, db->md_leaf_pages); - DEBUG("large/overflow pages: %" PRIaPGNO, db->md_overflow_pages); - DEBUG("root: %" PRIaPGNO, db->md_root); - DEBUG("schema_altered: %" PRIaTXN, db->md_mod_txnid); -#endif /* MDBX_DEBUG */ - - if (likely(rc == MDBX_SUCCESS)) { -#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - txn_valgrind(env, nullptr); -#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ - } else { - bailout: - if (likely(env_close(env, false) == MDBX_SUCCESS)) { - env->me_flags = saved_me_flags; - } else { - rc = MDBX_PANIC; - env->me_flags = saved_me_flags | MDBX_FATAL_ERROR; - } - } - return rc; -} - -/* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ -__cold static int env_close(MDBX_env *env, bool resurrect_after_fork) { - const unsigned flags = env->me_flags; - env->me_flags &= ~ENV_INTERNAL_FLAGS; - if (flags & MDBX_ENV_TXKEY) { - thread_key_delete(env->me_txkey); - env->me_txkey = 0; - } - - if (env->me_lck) - munlock_all(env); - - rthc_lock(); - int rc = rthc_remove(env); - rthc_unlock(); - -#if MDBX_ENABLE_DBI_LOCKFREE - for (struct mdbx_defer_free_item *next, *ptr = env->me_defer_free; ptr; - ptr = next) { - next = ptr->next; - osal_free(ptr); - } - env->me_defer_free = nullptr; -#endif /* MDBX_ENABLE_DBI_LOCKFREE */ - - if (!(env->me_flags & MDBX_RDONLY)) - osal_ioring_destroy(&env->me_ioring); - - env->me_lck = nullptr; - if (env->me_lck_mmap.lck) - osal_munmap(&env->me_lck_mmap); - - if (env->me_map) { - osal_munmap(&env->me_dxb_mmap); -#ifdef ENABLE_MEMCHECK - VALGRIND_DISCARD(env->me_valgrind_handle); - env->me_valgrind_handle = -1; -#endif /* ENABLE_MEMCHECK */ - } - -#if defined(_WIN32) || defined(_WIN64) - eASSERT(env, !env->me_overlapped_fd || - env->me_overlapped_fd == INVALID_HANDLE_VALUE); - if (env->me_data_lock_event != INVALID_HANDLE_VALUE) { - CloseHandle(env->me_data_lock_event); - env->me_data_lock_event = INVALID_HANDLE_VALUE; - } - eASSERT(env, !resurrect_after_fork); - if (env->me_pathname_char) { - osal_free(env->me_pathname_char); - env->me_pathname_char = nullptr; - } -#endif /* Windows */ - - if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { - (void)osal_closefile(env->me_dsync_fd); - env->me_dsync_fd = INVALID_HANDLE_VALUE; - } - - if (env->me_lazy_fd != INVALID_HANDLE_VALUE) { - (void)osal_closefile(env->me_lazy_fd); - env->me_lazy_fd = INVALID_HANDLE_VALUE; - } - - if (env->me_lfd != INVALID_HANDLE_VALUE) { - (void)osal_closefile(env->me_lfd); - env->me_lfd = INVALID_HANDLE_VALUE; - } - - if (!resurrect_after_fork) { - if (env->me_dbxs) { - for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) - if (env->me_dbxs[i].md_name.iov_len) - osal_free(env->me_dbxs[i].md_name.iov_base); - osal_free(env->me_dbxs); - env->me_numdbs = CORE_DBS; - env->me_dbxs = nullptr; - } - if (env->me_pbuf) { - osal_memalign_free(env->me_pbuf); - env->me_pbuf = nullptr; - } - if (env->me_dbi_seqs) { - osal_free(env->me_dbi_seqs); - env->me_dbi_seqs = nullptr; - } - if (env->me_db_flags) { - osal_free(env->me_db_flags); - env->me_db_flags = nullptr; - } - if (env->me_pathname.buffer) { - osal_free(env->me_pathname.buffer); - env->me_pathname.buffer = nullptr; - } - if (env->me_txn0) { - dpl_free(env->me_txn0); - txl_free(env->me_txn0->tw.lifo_reclaimed); - pnl_free(env->me_txn0->tw.retired_pages); - pnl_free(env->me_txn0->tw.spilled.list); - pnl_free(env->me_txn0->tw.relist); - osal_free(env->me_txn0); - env->me_txn0 = nullptr; - } - } - env->me_stuck_meta = -1; - return rc; -} - -#if !(defined(_WIN32) || defined(_WIN64)) -__cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { - if (unlikely(!env)) - return MDBX_EINVAL; - - if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) - return MDBX_EBADSIGN; - - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) - return MDBX_PANIC; - - if (unlikely((env->me_flags & MDBX_ENV_ACTIVE) == 0)) - return MDBX_SUCCESS; - - const uint32_t new_pid = osal_getpid(); - if (unlikely(env->me_pid == new_pid)) - return MDBX_SUCCESS; - - if (!atomic_cas32(&env->me_signature, MDBX_ME_SIGNATURE, ~MDBX_ME_SIGNATURE)) - return MDBX_EBADSIGN; - - if (env->me_txn) - txn_abort(env->me_txn0); - env->me_live_reader = 0; - int rc = env_close(env, true); - env->me_signature.weak = MDBX_ME_SIGNATURE; - if (likely(rc == MDBX_SUCCESS)) { - rc = (env->me_flags & MDBX_EXCLUSIVE) ? MDBX_BUSY : env_open(env, 0); - if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { - rc = MDBX_PANIC; - env->me_flags |= MDBX_FATAL_ERROR; - } - } - return rc; -} -#endif /* Windows */ - -__cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { - MDBX_page *dp; - int rc = MDBX_SUCCESS; - - if (unlikely(!env)) - return MDBX_EINVAL; - - if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) - return MDBX_EBADSIGN; - -#if MDBX_ENV_CHECKPID || !(defined(_WIN32) || defined(_WIN64)) - /* Check the PID even if MDBX_ENV_CHECKPID=0 on non-Windows - * platforms (i.e. where fork() is available). - * This is required to legitimize a call after fork() - * from a child process, that should be allowed to free resources. */ - if (unlikely(env->me_pid != osal_getpid())) - env->me_flags |= MDBX_FATAL_ERROR; -#endif /* MDBX_ENV_CHECKPID */ - - if (env->me_map && (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0 && - env->me_txn0) { - if (env->me_txn0->mt_owner && env->me_txn0->mt_owner != osal_thread_self()) - return MDBX_BUSY; - } else - dont_sync = true; - - if (!atomic_cas32(&env->me_signature, MDBX_ME_SIGNATURE, 0)) - return MDBX_EBADSIGN; - - if (!dont_sync) { -#if defined(_WIN32) || defined(_WIN64) - /* On windows, without blocking is impossible to determine whether another - * process is running a writing transaction or not. - * Because in the "owner died" condition kernel don't release - * file lock immediately. */ - rc = env_sync(env, true, false); - rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; -#else - struct stat st; - if (unlikely(fstat(env->me_lazy_fd, &st))) - rc = errno; - else if (st.st_nlink > 0 /* don't sync deleted files */) { - rc = env_sync(env, true, true); - rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY || - rc == EWOULDBLOCK || rc == MDBX_RESULT_TRUE) - ? MDBX_SUCCESS - : rc; - } -#endif /* Windows */ - } - - if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) - osal_txn_unlock(env); - - eASSERT(env, env->me_signature.weak == 0); - rc = env_close(env, false) ? MDBX_PANIC : rc; - ENSURE(env, osal_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS); -#if defined(_WIN32) || defined(_WIN64) - /* me_remap_guard don't have destructor (Slim Reader/Writer Lock) */ - DeleteCriticalSection(&env->me_windowsbug_lock); -#else - ENSURE(env, osal_fastmutex_destroy(&env->me_remap_guard) == MDBX_SUCCESS); -#endif /* Windows */ - -#if MDBX_LOCKING > MDBX_LOCKING_SYSV - MDBX_lockinfo *const stub = lckless_stub(env); - /* может вернуть ошибку в дочернем процессе после fork() */ - osal_ipclock_destroy(&stub->mti_wlock); -#endif /* MDBX_LOCKING */ - - while ((dp = env->me_dp_reserve) != NULL) { - MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->me_psize); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(dp), sizeof(MDBX_page *)); - env->me_dp_reserve = mp_next(dp); - void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); - osal_free(ptr); - } - VALGRIND_DESTROY_MEMPOOL(env); - osal_free(env); - - return rc; -} - -/* Search for key within a page, using binary search. - * Returns the smallest entry larger or equal to the key. - * Updates the cursor index with the index of the found entry. - * If no entry larger or equal to the key is found, returns NULL. */ -__hot static struct node_result node_search(MDBX_cursor *mc, - const MDBX_val *key) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - const intptr_t nkeys = page_numkeys(mp); - DKBUF_DEBUG; - - DEBUG("searching %zu keys in %s %spage %" PRIaPGNO, nkeys, - IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", - mp->mp_pgno); - - struct node_result ret; - ret.exact = false; - STATIC_ASSERT(P_BRANCH == 1); - intptr_t low = mp->mp_flags & P_BRANCH; - intptr_t high = nkeys - 1; - if (unlikely(high < low)) { - mc->mc_ki[mc->mc_top] = 0; - ret.node = NULL; - return ret; - } - - intptr_t i; - MDBX_cmp_func *cmp = mc->mc_dbx->md_cmp; - MDBX_val nodekey; - if (unlikely(IS_LEAF2(mp))) { - cASSERT(mc, mp->mp_leaf2_ksize == mc->mc_db->md_xsize); - nodekey.iov_len = mp->mp_leaf2_ksize; - do { - i = (low + high) >> 1; - nodekey.iov_base = page_leaf2key(mp, i, nodekey.iov_len); - cASSERT(mc, ptr_disp(mp, mc->mc_txn->mt_env->me_psize) >= - ptr_disp(nodekey.iov_base, nodekey.iov_len)); - int cr = cmp(key, &nodekey); - DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); - if (cr > 0) - /* Found entry is less than the key. */ - /* Skip to get the smallest entry larger than key. */ - low = ++i; - else if (cr < 0) - high = i - 1; - else { - ret.exact = true; - break; - } - } while (likely(low <= high)); - - /* store the key index */ - mc->mc_ki[mc->mc_top] = (indx_t)i; - ret.node = (i < nkeys) - ? /* fake for LEAF2 */ (MDBX_node *)(intptr_t)-1 - : /* There is no entry larger or equal to the key. */ NULL; - return ret; - } - - if (IS_BRANCH(mp) && cmp == cmp_int_align2) - /* Branch pages have no data, so if using integer keys, - * alignment is guaranteed. Use faster cmp_int_align4(). */ - cmp = cmp_int_align4; - - MDBX_node *node; - do { - i = (low + high) >> 1; - node = page_node(mp, i); - nodekey.iov_len = node_ks(node); - nodekey.iov_base = node_key(node); - cASSERT(mc, ptr_disp(mp, mc->mc_txn->mt_env->me_psize) >= - ptr_disp(nodekey.iov_base, nodekey.iov_len)); - int cr = cmp(key, &nodekey); - if (IS_LEAF(mp)) - DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); - else - DEBUG("found branch index %zu [%s -> %" PRIaPGNO "], rc = %i", i, - DKEY_DEBUG(&nodekey), node_pgno(node), cr); - if (cr > 0) - /* Found entry is less than the key. */ - /* Skip to get the smallest entry larger than key. */ - low = ++i; - else if (cr < 0) - high = i - 1; - else { - ret.exact = true; - break; - } - } while (likely(low <= high)); - - /* store the key index */ - mc->mc_ki[mc->mc_top] = (indx_t)i; - ret.node = (i < nkeys) - ? page_node(mp, i) - : /* There is no entry larger or equal to the key. */ NULL; - return ret; -} - -/* Pop a page off the top of the cursor's stack. */ -static __inline void cursor_pop(MDBX_cursor *mc) { - if (likely(mc->mc_snum)) { - DEBUG("popped page %" PRIaPGNO " off db %d cursor %p", - mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *)mc); - if (likely(--mc->mc_snum)) { - mc->mc_top--; - } else { - mc->mc_flags &= ~C_INITIALIZED; - } - } -} - -/* Push a page onto the top of the cursor's stack. - * Set MDBX_TXN_ERROR on failure. */ -static __inline int cursor_push(MDBX_cursor *mc, MDBX_page *mp) { - DEBUG("pushing page %" PRIaPGNO " on db %d cursor %p", mp->mp_pgno, DDBI(mc), - (void *)mc); - - if (unlikely(mc->mc_snum >= CURSOR_STACK)) { - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return MDBX_CURSOR_FULL; - } - - mc->mc_top = mc->mc_snum++; - mc->mc_pg[mc->mc_top] = mp; - mc->mc_ki[mc->mc_top] = 0; - return MDBX_SUCCESS; -} - -__hot static __always_inline int page_get_checker_lite(const uint16_t ILL, - const MDBX_page *page, - MDBX_txn *const txn, - const txnid_t front) { - if (unlikely(page->mp_flags & ILL)) { - if (ILL == P_ILL_BITS || (page->mp_flags & P_ILL_BITS)) - return bad_page(page, "invalid page's flags (%u)\n", page->mp_flags); - else if (ILL & P_OVERFLOW) { - assert((ILL & (P_BRANCH | P_LEAF | P_LEAF2)) == 0); - assert(page->mp_flags & (P_BRANCH | P_LEAF | P_LEAF2)); - return bad_page(page, "unexpected %s instead of %s (%u)\n", - "large/overflow", "branch/leaf/leaf2", page->mp_flags); - } else if (ILL & (P_BRANCH | P_LEAF | P_LEAF2)) { - assert((ILL & P_BRANCH) && (ILL & P_LEAF) && (ILL & P_LEAF2)); - assert(page->mp_flags & (P_BRANCH | P_LEAF | P_LEAF2)); - return bad_page(page, "unexpected %s instead of %s (%u)\n", - "branch/leaf/leaf2", "large/overflow", page->mp_flags); - } else { - assert(false); - } - } - - if (unlikely(page->mp_txnid > front) && - unlikely(page->mp_txnid > txn->mt_front || front < txn->mt_txnid)) - return bad_page( - page, - "invalid page' txnid (%" PRIaTXN ") for %s' txnid (%" PRIaTXN ")\n", - page->mp_txnid, - (front == txn->mt_front && front != txn->mt_txnid) ? "front-txn" - : "parent-page", - front); - - if (((ILL & P_OVERFLOW) || !IS_OVERFLOW(page)) && - (ILL & (P_BRANCH | P_LEAF | P_LEAF2)) == 0) { - /* Контроль четности page->mp_upper тут либо приводит к ложным ошибкам, - * либо слишком дорог по количеству операций. Заковырка в том, что mp_upper - * может быть нечетным на LEAF2-страницах, при нечетном количестве элементов - * нечетной длины. Поэтому четность page->mp_upper здесь не проверяется, но - * соответствующие полные проверки есть в page_check(). */ - if (unlikely(page->mp_upper < page->mp_lower || (page->mp_lower & 1) || - PAGEHDRSZ + page->mp_upper > txn->mt_env->me_psize)) - return bad_page(page, - "invalid page' lower(%u)/upper(%u) with limit %zu\n", - page->mp_lower, page->mp_upper, page_space(txn->mt_env)); - - } else if ((ILL & P_OVERFLOW) == 0) { - const pgno_t npages = page->mp_pages; - if (unlikely(npages < 1) || unlikely(npages >= MAX_PAGENO / 2)) - return bad_page(page, "invalid n-pages (%u) for large-page\n", npages); - if (unlikely(page->mp_pgno + npages > txn->mt_next_pgno)) - return bad_page( - page, - "end of large-page beyond (%u) allocated space (%u next-pgno)\n", - page->mp_pgno + npages, txn->mt_next_pgno); - } else { - assert(false); - } - return MDBX_SUCCESS; -} - -__cold static __noinline pgr_t -page_get_checker_full(const uint16_t ILL, MDBX_page *page, - const MDBX_cursor *const mc, const txnid_t front) { - pgr_t r = {page, page_get_checker_lite(ILL, page, mc->mc_txn, front)}; - if (likely(r.err == MDBX_SUCCESS)) - r.err = page_check(mc, page); - if (unlikely(r.err != MDBX_SUCCESS)) - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return r; -} - -__hot static __always_inline pgr_t page_get_inline(const uint16_t ILL, - const MDBX_cursor *const mc, - const pgno_t pgno, - const txnid_t front) { - MDBX_txn *const txn = mc->mc_txn; - tASSERT(txn, front <= txn->mt_front); - - pgr_t r; - if (unlikely(pgno >= txn->mt_next_pgno)) { - ERROR("page #%" PRIaPGNO " beyond next-pgno", pgno); - r.page = nullptr; - r.err = MDBX_PAGE_NOTFOUND; - bailout: - txn->mt_flags |= MDBX_TXN_ERROR; - return r; - } - - eASSERT(txn->mt_env, - ((txn->mt_flags ^ txn->mt_env->me_flags) & MDBX_WRITEMAP) == 0); - r.page = pgno2page(txn->mt_env, pgno); - if ((txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0) { - const MDBX_txn *spiller = txn; - do { - /* Spilled pages were dirtied in this txn and flushed - * because the dirty list got full. Bring this page - * back in from the map (but don't unspill it here, - * leave that unless page_touch happens again). */ - if (unlikely(spiller->mt_flags & MDBX_TXN_SPILLS) && - search_spilled(spiller, pgno)) - break; - - const size_t i = dpl_search(spiller, pgno); - tASSERT(txn, (intptr_t)i > 0); - if (spiller->tw.dirtylist->items[i].pgno == pgno) { - r.page = spiller->tw.dirtylist->items[i].ptr; - break; - } - - spiller = spiller->mt_parent; - } while (spiller); - } - - if (unlikely(r.page->mp_pgno != pgno)) { - r.err = bad_page( - r.page, "pgno mismatch (%" PRIaPGNO ") != expected (%" PRIaPGNO ")\n", - r.page->mp_pgno, pgno); - goto bailout; - } - - if (unlikely(mc->mc_checking & CC_PAGECHECK)) - return page_get_checker_full(ILL, r.page, mc, front); - -#if MDBX_DISABLE_VALIDATION - r.err = MDBX_SUCCESS; -#else - r.err = page_get_checker_lite(ILL, r.page, txn, front); - if (unlikely(r.err != MDBX_SUCCESS)) - goto bailout; -#endif /* MDBX_DISABLE_VALIDATION */ - return r; -} - -/* Finish mdbx_page_search() / mdbx_page_search_lowest(). - * The cursor is at the root page, set up the rest of it. */ -__hot __noinline static int page_search_root(MDBX_cursor *mc, - const MDBX_val *key, int flags) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - int rc; - DKBUF_DEBUG; - - while (IS_BRANCH(mp)) { - MDBX_node *node; - intptr_t i; - - DEBUG("branch page %" PRIaPGNO " has %zu keys", mp->mp_pgno, - page_numkeys(mp)); - /* Don't assert on branch pages in the GC. We can get here - * while in the process of rebalancing a GC branch page; we must - * let that proceed. ITS#8336 */ - cASSERT(mc, !mc->mc_dbi || page_numkeys(mp) > 1); - DEBUG("found index 0 to page %" PRIaPGNO, node_pgno(page_node(mp, 0))); - - if (flags & (MDBX_PS_FIRST | MDBX_PS_LAST)) { - i = 0; - if (flags & MDBX_PS_LAST) { - i = page_numkeys(mp) - 1; - /* if already init'd, see if we're already in right place */ - if (mc->mc_flags & C_INITIALIZED) { - if (mc->mc_ki[mc->mc_top] == i) { - mc->mc_top = mc->mc_snum++; - mp = mc->mc_pg[mc->mc_top]; - goto ready; - } - } - } - } else { - const struct node_result nsr = node_search(mc, key); - if (likely(nsr.node)) - i = mc->mc_ki[mc->mc_top] + (intptr_t)nsr.exact - 1; - else - i = page_numkeys(mp) - 1; - DEBUG("following index %zu for key [%s]", i, DKEY_DEBUG(key)); - } - - cASSERT(mc, i >= 0 && i < (int)page_numkeys(mp)); - node = page_node(mp, i); - - rc = page_get(mc, node_pgno(node), &mp, mp->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - mc->mc_ki[mc->mc_top] = (indx_t)i; - if (unlikely(rc = cursor_push(mc, mp))) - return rc; - - ready: - if (flags & MDBX_PS_MODIFY) { - rc = page_touch(mc); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - mp = mc->mc_pg[mc->mc_top]; - } - } - - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - - DEBUG("found leaf page %" PRIaPGNO " for key [%s]", mp->mp_pgno, - DKEY_DEBUG(key)); - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; - - return MDBX_SUCCESS; -} - -static int setup_sdb(MDBX_dbx *const dbx, const MDBX_db *const db, - const unsigned pagesize) { - if (unlikely(!db_check_flags(db->md_flags))) { - ERROR("incompatible or invalid db.md_flags (%u) ", db->md_flags); - return MDBX_INCOMPATIBLE; - } - if (unlikely(!dbx->md_cmp)) { - dbx->md_cmp = get_default_keycmp(db->md_flags); - dbx->md_dcmp = get_default_datacmp(db->md_flags); - } - - dbx->md_klen_min = keysize_min(db->md_flags); - dbx->md_klen_max = keysize_max(pagesize, db->md_flags); - assert(dbx->md_klen_max != (unsigned)-1); - - dbx->md_vlen_min = valsize_min(db->md_flags); - dbx->md_vlen_max = valsize_max(pagesize, db->md_flags); - assert(dbx->md_vlen_max != (size_t)-1); - - if ((db->md_flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->md_xsize) { - if (!MDBX_DISABLE_VALIDATION && unlikely(db->md_xsize < dbx->md_vlen_min || - db->md_xsize > dbx->md_vlen_max)) { - ERROR("db.md_xsize (%u) <> min/max value-length (%zu/%zu)", db->md_xsize, - dbx->md_vlen_min, dbx->md_vlen_max); - return MDBX_CORRUPTED; - } - dbx->md_vlen_min = dbx->md_vlen_max = db->md_xsize; - } - return MDBX_SUCCESS; -} - -static int fetch_sdb(MDBX_txn *txn, size_t dbi) { - MDBX_cursor_couple couple; - int rc = cursor_init(&couple.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - MDBX_dbx *const dbx = &txn->mt_env->me_dbxs[dbi]; - rc = page_search(&couple.outer, &dbx->md_name, 0); - if (unlikely(rc != MDBX_SUCCESS)) { - bailout: - NOTICE("dbi %zu refs to inaccessible subDB `%*s` for txn %" PRIaTXN - " (err %d)", - dbi, (int)dbx->md_name.iov_len, (const char *)dbx->md_name.iov_base, - txn->mt_txnid, rc); - return (rc == MDBX_NOTFOUND) ? MDBX_BAD_DBI : rc; - } - - MDBX_val data; - struct node_result nsr = node_search(&couple.outer, &dbx->md_name); - if (unlikely(!nsr.exact)) { - rc = MDBX_NOTFOUND; - goto bailout; - } - if (unlikely((node_flags(nsr.node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) { - NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", - dbi, (int)dbx->md_name.iov_len, (const char *)dbx->md_name.iov_base, - txn->mt_txnid, "wrong flags"); - return MDBX_INCOMPATIBLE; /* not a named DB */ - } - - rc = node_read(&couple.outer, nsr.node, &data, - couple.outer.mc_pg[couple.outer.mc_top]); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(data.iov_len != sizeof(MDBX_db))) { - NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", - dbi, (int)dbx->md_name.iov_len, (const char *)dbx->md_name.iov_base, - txn->mt_txnid, "wrong rec-size"); - return MDBX_INCOMPATIBLE; /* not a named DB */ - } - - uint16_t md_flags = UNALIGNED_PEEK_16(data.iov_base, MDBX_db, md_flags); - /* The txn may not know this DBI, or another process may - * have dropped and recreated the DB with other flags. */ - MDBX_db *const db = &txn->mt_dbs[dbi]; - if (unlikely((db->md_flags & DB_PERSISTENT_FLAGS) != md_flags)) { - NOTICE("dbi %zu refs to the re-created subDB `%*s` for txn %" PRIaTXN - " with different flags (present 0x%X != wanna 0x%X)", - dbi, (int)dbx->md_name.iov_len, (const char *)dbx->md_name.iov_base, - txn->mt_txnid, db->md_flags & DB_PERSISTENT_FLAGS, md_flags); - return MDBX_INCOMPATIBLE; - } - - memcpy(db, data.iov_base, sizeof(MDBX_db)); -#if !MDBX_DISABLE_VALIDATION - const txnid_t pp_txnid = couple.outer.mc_pg[couple.outer.mc_top]->mp_txnid; - tASSERT(txn, txn->mt_front >= pp_txnid); - if (unlikely(db->md_mod_txnid > pp_txnid)) { - ERROR("db.md_mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", - db->md_mod_txnid, pp_txnid); - return MDBX_CORRUPTED; - } -#endif /* !MDBX_DISABLE_VALIDATION */ - rc = setup_sdb(dbx, db, txn->mt_env->me_psize); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - txn->mt_dbi_state[dbi] &= ~DBI_STALE; - return MDBX_SUCCESS; -} - -/* Search for the lowest key under the current branch page. - * This just bypasses a numkeys check in the current page - * before calling mdbx_page_search_root(), because the callers - * are all in situations where the current page is known to - * be underfilled. */ -__hot static int page_search_lowest(MDBX_cursor *mc) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - cASSERT(mc, IS_BRANCH(mp)); - MDBX_node *node = page_node(mp, 0); - - int rc = page_get(mc, node_pgno(node), &mp, mp->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - mc->mc_ki[mc->mc_top] = 0; - if (unlikely(rc = cursor_push(mc, mp))) - return rc; - return page_search_root(mc, NULL, MDBX_PS_FIRST); -} - -/* Search for the page a given key should be in. - * Push it and its parent pages on the cursor stack. - * - * [in,out] mc the cursor for this operation. - * [in] key the key to search for, or NULL for first/last page. - * [in] flags If MDBX_PS_MODIFY is set, visited pages in the DB - * are touched (updated with new page numbers). - * If MDBX_PS_FIRST or MDBX_PS_LAST is set, - * find first or last leaf. - * This is used by mdbx_cursor_first() and mdbx_cursor_last(). - * If MDBX_PS_ROOTONLY set, just fetch root node, no further - * lookups. - * - * Returns 0 on success, non-zero on failure. */ -__hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { - int rc; - pgno_t root; - - /* Make sure the txn is still viable, then find the root from - * the txn's db table and set it as the root of the cursor's stack. */ - if (unlikely(mc->mc_txn->mt_flags & MDBX_TXN_BLOCKED)) { - DEBUG("%s", "transaction has failed, must abort"); - return MDBX_BAD_TXN; - } - - /* Make sure we're using an up-to-date root */ - if (unlikely(*mc->mc_dbi_state & DBI_STALE)) { - rc = fetch_sdb(mc->mc_txn, mc->mc_dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - root = mc->mc_db->md_root; - - if (unlikely(root == P_INVALID)) { /* Tree is empty. */ - DEBUG("%s", "tree is empty"); - return MDBX_NOTFOUND; - } - - cASSERT(mc, root >= NUM_METAS); - if (!mc->mc_snum || !(mc->mc_flags & C_INITIALIZED) || - mc->mc_pg[0]->mp_pgno != root) { - txnid_t pp_txnid = mc->mc_db->md_mod_txnid; - pp_txnid = /* mc->mc_db->md_mod_txnid maybe zero in a legacy DB */ pp_txnid - ? pp_txnid - : mc->mc_txn->mt_txnid; - if ((mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) == 0) { - MDBX_txn *scan = mc->mc_txn; - do - if ((scan->mt_flags & MDBX_TXN_DIRTY) && - (mc->mc_dbi == MAIN_DBI || - (scan->mt_dbi_state[mc->mc_dbi] & DBI_DIRTY))) { - /* После коммита вложенных тразакций может быть mod_txnid > front */ - pp_txnid = scan->mt_front; - break; - } - while (unlikely((scan = scan->mt_parent) != nullptr)); - } - if (unlikely((rc = page_get(mc, root, &mc->mc_pg[0], pp_txnid)) != 0)) - return rc; - } - - mc->mc_snum = 1; - mc->mc_top = 0; - - DEBUG("db %d root page %" PRIaPGNO " has flags 0x%X", DDBI(mc), root, - mc->mc_pg[0]->mp_flags); - - if (flags & MDBX_PS_MODIFY) { - if (unlikely(rc = page_touch(mc))) - return rc; - } - - if (flags & MDBX_PS_ROOTONLY) - return MDBX_SUCCESS; - - return page_search_root(mc, key, flags); -} - -/* Read large/overflow node data. */ -static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node, - MDBX_val *data, const MDBX_page *mp) { - cASSERT(mc, node_flags(node) == F_BIGDATA && data->iov_len == node_ds(node)); - - pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->mp_txnid); - if (unlikely((lp.err != MDBX_SUCCESS))) { - DEBUG("read large/overflow page %" PRIaPGNO " failed", - node_largedata_pgno(node)); - return lp.err; - } - - cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW); - data->iov_base = page_data(lp.page); - if (!MDBX_DISABLE_VALIDATION) { - const MDBX_env *env = mc->mc_txn->mt_env; - const size_t dsize = data->iov_len; - const unsigned npages = number_of_ovpages(env, dsize); - if (unlikely(lp.page->mp_pages < npages)) - return bad_page(lp.page, - "too less n-pages %u for bigdata-node (%zu bytes)", - lp.page->mp_pages, dsize); - } - return MDBX_SUCCESS; -} - -/* Return the data associated with a given node. */ -static __always_inline int node_read(MDBX_cursor *mc, const MDBX_node *node, - MDBX_val *data, const MDBX_page *mp) { - data->iov_len = node_ds(node); - data->iov_base = node_data(node); - if (likely(node_flags(node) != F_BIGDATA)) - return MDBX_SUCCESS; - return node_read_bigdata(mc, node, data, mp); -} - -int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *data) { - DKBUF_DEBUG; - DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!key || !data)) - return MDBX_EINVAL; - - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - return cursor_set(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err; -} - -int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *data) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!key || !data)) - return MDBX_EINVAL; - - if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) - return MDBX_BAD_TXN; - - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - return cursor_get(&cx.outer, key, data, MDBX_SET_LOWERBOUND); -} - -int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *data, size_t *values_count) { - DKBUF_DEBUG; - DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!key || !data)) - return MDBX_EINVAL; - - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = cursor_set(&cx.outer, key, data, MDBX_SET_KEY).err; - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == MDBX_NOTFOUND && values_count) - *values_count = 0; - return rc; - } - - if (values_count) { - *values_count = 1; - if (cx.outer.mc_xcursor != NULL) { - MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], - cx.outer.mc_ki[cx.outer.mc_top]); - if (node_flags(node) & F_DUPDATA) { - // coverity[uninit_use : FALSE] - tASSERT(txn, cx.outer.mc_xcursor == &cx.inner && - (cx.inner.mx_cursor.mc_flags & C_INITIALIZED)); - // coverity[uninit_use : FALSE] - *values_count = - (sizeof(*values_count) >= sizeof(cx.inner.mx_db.md_entries) || - cx.inner.mx_db.md_entries <= PTRDIFF_MAX) - ? (size_t)cx.inner.mx_db.md_entries - : PTRDIFF_MAX; - } - } - } - return MDBX_SUCCESS; -} - -/* Find a sibling for a page. - * Replaces the page at the top of the cursor's stack with the specified - * sibling, if one exists. - * - * [in] mc The cursor for this operation. - * [in] dir SIBLING_LEFT or SIBLING_RIGHT. - * - * Returns 0 on success, non-zero on failure. */ -static int cursor_sibling(MDBX_cursor *mc, int dir) { - int rc; - MDBX_node *node; - MDBX_page *mp; - assert(dir == SIBLING_LEFT || dir == SIBLING_RIGHT); - - if (unlikely(mc->mc_snum < 2)) - return MDBX_NOTFOUND; /* root has no siblings */ - - cursor_pop(mc); - DEBUG("parent page is page %" PRIaPGNO ", index %u", - mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]); - - if ((dir == SIBLING_RIGHT) ? (mc->mc_ki[mc->mc_top] + (size_t)1 >= - page_numkeys(mc->mc_pg[mc->mc_top])) - : (mc->mc_ki[mc->mc_top] == 0)) { - DEBUG("no more keys aside, moving to next %s sibling", - dir ? "right" : "left"); - if (unlikely((rc = cursor_sibling(mc, dir)) != MDBX_SUCCESS)) { - /* undo cursor_pop before returning */ - mc->mc_top++; - mc->mc_snum++; - return rc; - } - } else { - assert((dir - 1) == -1 || (dir - 1) == 1); - mc->mc_ki[mc->mc_top] += (indx_t)(dir - 1); - DEBUG("just moving to %s index key %u", - (dir == SIBLING_RIGHT) ? "right" : "left", mc->mc_ki[mc->mc_top]); - } - cASSERT(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); - - node = page_node(mp = mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - rc = page_get(mc, node_pgno(node), &mp, mp->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) { - /* mc will be inconsistent if caller does mc_snum++ as above */ - mc->mc_flags &= ~(C_INITIALIZED | C_EOF); - return rc; - } - - rc = cursor_push(mc, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - mc->mc_ki[mc->mc_top] = - (dir == SIBLING_LEFT) ? (indx_t)page_numkeys(mp) - 1 : 0; - return MDBX_SUCCESS; -} - -/* Move the cursor to the next data item. */ -static int cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { - assert(op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP); - int rc; - - if (unlikely(mc->mc_flags & C_DEL) && op == MDBX_NEXT_DUP) - return MDBX_NOTFOUND; - - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) { - if (unlikely(mc->mc_flags & C_SUB)) - return MDBX_NOTFOUND; - return cursor_first(mc, key, data); - } - - const MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if (unlikely(mc->mc_flags & C_EOF)) { - if (mc->mc_ki[mc->mc_top] + (size_t)1 >= page_numkeys(mp)) - return MDBX_NOTFOUND; - mc->mc_flags ^= C_EOF; - } - - if (mc->mc_xcursor) { - if (op != MDBX_NEXT_NODUP) { - const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - rc = cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_NEXT); - if (likely(rc == MDBX_SUCCESS)) { - get_key_optional(node, key); - return MDBX_SUCCESS; - } - if (unlikely(rc != MDBX_NOTFOUND)) - return rc; - } - if (op != MDBX_NEXT) - return MDBX_NOTFOUND; - } - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - } - - DEBUG("cursor_next: top page is %" PRIaPGNO " in cursor %p", mp->mp_pgno, - (void *)mc); - if (mc->mc_flags & C_DEL) { - mc->mc_flags ^= C_DEL; - goto skip; - } - - intptr_t ki = mc->mc_ki[mc->mc_top]; - mc->mc_ki[mc->mc_top] = (indx_t)++ki; - const intptr_t numkeys = page_numkeys(mp); - if (unlikely(ki >= numkeys)) { - DEBUG("%s", "=====> move to next sibling page"); - mc->mc_ki[mc->mc_top] = (indx_t)(numkeys - 1); - rc = cursor_sibling(mc, SIBLING_RIGHT); - if (unlikely(rc != MDBX_SUCCESS)) { - mc->mc_flags |= C_EOF; - return rc; - } - mp = mc->mc_pg[mc->mc_top]; - DEBUG("next page is %" PRIaPGNO ", key index %u", mp->mp_pgno, - mc->mc_ki[mc->mc_top]); - } - -skip: - DEBUG("==> cursor points to page %" PRIaPGNO " with %zu keys, key index %u", - mp->mp_pgno, page_numkeys(mp), mc->mc_ki[mc->mc_top]); - - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - - if (IS_LEAF2(mp)) { - if (likely(key)) { - key->iov_len = mc->mc_db->md_xsize; - key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); - } - return MDBX_SUCCESS; - } - - const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - rc = cursor_xinit1(mc, node, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - rc = cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } else if (likely(data)) { - rc = node_read(mc, node, data, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - get_key_optional(node, key); - return MDBX_SUCCESS; -} - -/* Move the cursor to the previous data item. */ -static int cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { - assert(op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP); - int rc; - - if (unlikely(mc->mc_flags & C_DEL) && op == MDBX_PREV_DUP) - return MDBX_NOTFOUND; - - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) { - if (unlikely(mc->mc_flags & C_SUB)) - return MDBX_NOTFOUND; - rc = cursor_last(mc, key, data); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - mc->mc_ki[mc->mc_top]++; - } - - const MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if (mc->mc_xcursor) { - if (op != MDBX_PREV_NODUP) { - if (likely(mc->mc_ki[mc->mc_top] < page_numkeys(mp))) { - const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - rc = cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_PREV); - if (likely(rc == MDBX_SUCCESS)) { - get_key_optional(node, key); - mc->mc_flags &= ~C_EOF; - return MDBX_SUCCESS; - } - if (unlikely(rc != MDBX_NOTFOUND)) - return rc; - } - } - if (op != MDBX_PREV) - return MDBX_NOTFOUND; - } - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - } - - DEBUG("cursor_prev: top page is %" PRIaPGNO " in cursor %p", mp->mp_pgno, - (void *)mc); - - mc->mc_flags &= ~(C_EOF | C_DEL); - - int ki = mc->mc_ki[mc->mc_top]; - mc->mc_ki[mc->mc_top] = (indx_t)--ki; - if (unlikely(ki < 0)) { - mc->mc_ki[mc->mc_top] = 0; - DEBUG("%s", "=====> move to prev sibling page"); - if ((rc = cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS) - return rc; - mp = mc->mc_pg[mc->mc_top]; - DEBUG("prev page is %" PRIaPGNO ", key index %u", mp->mp_pgno, - mc->mc_ki[mc->mc_top]); - } - DEBUG("==> cursor points to page %" PRIaPGNO " with %zu keys, key index %u", - mp->mp_pgno, page_numkeys(mp), mc->mc_ki[mc->mc_top]); - - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - - if (IS_LEAF2(mp)) { - if (likely(key)) { - key->iov_len = mc->mc_db->md_xsize; - key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); - } - return MDBX_SUCCESS; - } - - const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - rc = cursor_xinit1(mc, node, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - rc = cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } else if (likely(data)) { - rc = node_read(mc, node, data, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - get_key_optional(node, key); - return MDBX_SUCCESS; -} - -/* Set the cursor on a specific data item. */ -__hot static struct cursor_set_result -cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { - MDBX_page *mp; - MDBX_node *node = NULL; - DKBUF_DEBUG; - - struct cursor_set_result ret; - ret.exact = false; - if (unlikely(key->iov_len < mc->mc_dbx->md_klen_min || - key->iov_len > mc->mc_dbx->md_klen_max)) { - cASSERT(mc, !"Invalid key-size"); - ret.err = MDBX_BAD_VALSIZE; - return ret; - } - - MDBX_val aligned_key = *key; - uint64_t aligned_key_buf; - if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { - switch (aligned_key.iov_len) { - default: - cASSERT(mc, !"key-size is invalid for MDBX_INTEGERKEY"); - ret.err = MDBX_BAD_VALSIZE; - return ret; - case 4: - if (unlikely(3 & (uintptr_t)aligned_key.iov_base)) - /* copy instead of return error to avoid break compatibility */ - aligned_key.iov_base = - memcpy(&aligned_key_buf, aligned_key.iov_base, 4); - break; - case 8: - if (unlikely(7 & (uintptr_t)aligned_key.iov_base)) - /* copy instead of return error to avoid break compatibility */ - aligned_key.iov_base = - memcpy(&aligned_key_buf, aligned_key.iov_base, 8); - break; - } - } - - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - - /* See if we're already on the right page */ - if (mc->mc_flags & C_INITIALIZED) { - MDBX_val nodekey; - - cASSERT(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - mp = mc->mc_pg[mc->mc_top]; - if (unlikely(!page_numkeys(mp))) { - mc->mc_ki[mc->mc_top] = 0; - mc->mc_flags |= C_EOF; - ret.err = MDBX_NOTFOUND; - return ret; - } - if (IS_LEAF2(mp)) { - nodekey.iov_len = mc->mc_db->md_xsize; - nodekey.iov_base = page_leaf2key(mp, 0, nodekey.iov_len); - } else { - node = page_node(mp, 0); - get_key(node, &nodekey); - } - int cmp = mc->mc_dbx->md_cmp(&aligned_key, &nodekey); - if (unlikely(cmp == 0)) { - /* Probably happens rarely, but first node on the page - * was the one we wanted. */ - mc->mc_ki[mc->mc_top] = 0; - ret.exact = true; - cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - goto got_node; - } - if (cmp > 0) { - const size_t nkeys = page_numkeys(mp); - if (likely(nkeys > 1)) { - if (IS_LEAF2(mp)) { - nodekey.iov_base = page_leaf2key(mp, nkeys - 1, nodekey.iov_len); - } else { - node = page_node(mp, nkeys - 1); - get_key(node, &nodekey); - } - cmp = mc->mc_dbx->md_cmp(&aligned_key, &nodekey); - if (cmp == 0) { - /* last node was the one we wanted */ - cASSERT(mc, nkeys >= 1 && nkeys <= UINT16_MAX + 1); - mc->mc_ki[mc->mc_top] = (indx_t)(nkeys - 1); - ret.exact = true; - cASSERT(mc, - mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - goto got_node; - } - if (cmp < 0) { - /* This is definitely the right page, skip search_page */ - if (mc->mc_ki[mc->mc_top] != 0 /* уже проверяли выше */ && - mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { - if (IS_LEAF2(mp)) { - nodekey.iov_base = - page_leaf2key(mp, mc->mc_ki[mc->mc_top], nodekey.iov_len); - } else { - node = page_node(mp, mc->mc_ki[mc->mc_top]); - get_key(node, &nodekey); - } - cmp = mc->mc_dbx->md_cmp(&aligned_key, &nodekey); - if (cmp == 0) { - /* current node was the one we wanted */ - ret.exact = true; - cASSERT(mc, mc->mc_ki[mc->mc_top] < - page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - goto got_node; - } - } - mc->mc_flags &= ~C_EOF; - goto search_node; - } - } - /* If any parents have right-sibs, search. - * Otherwise, there's nothing further. */ - for (size_t i = 0; i < mc->mc_top; i++) - if (mc->mc_ki[i] < page_numkeys(mc->mc_pg[i]) - 1) - goto continue_other_pages; - - /* There are no other pages */ - cASSERT(mc, nkeys <= UINT16_MAX); - mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; - mc->mc_flags |= C_EOF; - ret.err = MDBX_NOTFOUND; - return ret; - } - continue_other_pages: - if (!mc->mc_top) { - /* There are no other pages */ - mc->mc_ki[mc->mc_top] = 0; - if (op >= MDBX_SET_RANGE) - goto got_node; - - cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - ret.err = MDBX_NOTFOUND; - return ret; - } - } else { - mc->mc_pg[0] = nullptr; - } - - ret.err = page_search(mc, &aligned_key, 0); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - - mp = mc->mc_pg[mc->mc_top]; - MDBX_ANALYSIS_ASSUME(mp != nullptr); - cASSERT(mc, IS_LEAF(mp)); - -search_node:; - struct node_result nsr = node_search(mc, &aligned_key); - node = nsr.node; - ret.exact = nsr.exact; - if (!ret.exact) { - if (op < MDBX_SET_RANGE) { - /* MDBX_SET specified and not an exact match. */ - if (unlikely(mc->mc_ki[mc->mc_top] >= - page_numkeys(mc->mc_pg[mc->mc_top]))) - mc->mc_flags |= C_EOF; - ret.err = MDBX_NOTFOUND; - return ret; - } - - if (node == NULL) { - DEBUG("%s", "===> inexact leaf not found, goto sibling"); - ret.err = cursor_sibling(mc, SIBLING_RIGHT); - if (unlikely(ret.err != MDBX_SUCCESS)) { - mc->mc_flags |= C_EOF; - return ret; /* no entries matched */ - } - mp = mc->mc_pg[mc->mc_top]; - cASSERT(mc, IS_LEAF(mp)); - if (!IS_LEAF2(mp)) - node = page_node(mp, 0); - } - } - cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - -got_node: - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; - - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - ret.err = MDBX_CORRUPTED; - return ret; - } - - if (IS_LEAF2(mp)) { - if (op >= MDBX_SET_KEY) { - key->iov_len = mc->mc_db->md_xsize; - key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); - } - ret.err = MDBX_SUCCESS; - return ret; - } - - if (node_flags(node) & F_DUPDATA) { - ret.err = cursor_xinit1(mc, node, mp); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - if (op >= MDBX_SET) { - MDBX_ANALYSIS_ASSUME(mc->mc_xcursor != nullptr); - ret.err = cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - } else { - MDBX_ANALYSIS_ASSUME(mc->mc_xcursor != nullptr); - ret = cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_SET_RANGE); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - if (op == MDBX_GET_BOTH && !ret.exact) { - ret.err = MDBX_NOTFOUND; - return ret; - } - } - } else if (likely(data)) { - if (op <= MDBX_GET_BOTH_RANGE) { - if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min || - data->iov_len > mc->mc_dbx->md_vlen_max)) { - cASSERT(mc, !"Invalid data-size"); - ret.err = MDBX_BAD_VALSIZE; - return ret; - } - MDBX_val aligned_data = *data; - uint64_t aligned_databytes; - if (mc->mc_db->md_flags & MDBX_INTEGERDUP) { - switch (aligned_data.iov_len) { - default: - cASSERT(mc, !"data-size is invalid for MDBX_INTEGERDUP"); - ret.err = MDBX_BAD_VALSIZE; - return ret; - case 4: - if (unlikely(3 & (uintptr_t)aligned_data.iov_base)) - /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = - memcpy(&aligned_databytes, aligned_data.iov_base, 4); - break; - case 8: - if (unlikely(7 & (uintptr_t)aligned_data.iov_base)) - /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = - memcpy(&aligned_databytes, aligned_data.iov_base, 8); - break; - } - } - MDBX_val actual_data; - ret.err = node_read(mc, node, &actual_data, mc->mc_pg[mc->mc_top]); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - const int cmp = mc->mc_dbx->md_dcmp(&aligned_data, &actual_data); - if (cmp) { - cASSERT(mc, - mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - if (op != MDBX_GET_BOTH_RANGE || cmp > 0) { - ret.err = MDBX_NOTFOUND; - return ret; - } - } - *data = actual_data; - } else { - ret.err = node_read(mc, node, data, mc->mc_pg[mc->mc_top]); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - } - } - - /* The key already matches in all other cases */ - if (op >= MDBX_SET_KEY) - get_key_optional(node, key); - - DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), - DVAL_DEBUG(data)); - ret.err = MDBX_SUCCESS; - return ret; -} - -/* Move the cursor to the first item in the database. */ -static int cursor_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { - int rc; - - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = page_search(mc, NULL, MDBX_PS_FIRST); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - const MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; - mc->mc_ki[mc->mc_top] = 0; - - if (IS_LEAF2(mp)) { - if (likely(key)) { - key->iov_len = mc->mc_db->md_xsize; - key->iov_base = page_leaf2key(mp, 0, key->iov_len); - } - return MDBX_SUCCESS; - } - - MDBX_node *node = page_node(mp, 0); - if (node_flags(node) & F_DUPDATA) { - rc = cursor_xinit1(mc, node, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - MDBX_ANALYSIS_ASSUME(mc->mc_xcursor != nullptr); - rc = cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (unlikely(rc)) - return rc; - } else if (likely(data)) { - rc = node_read(mc, node, data, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - get_key_optional(node, key); - return MDBX_SUCCESS; -} - -/* Move the cursor to the last item in the database. */ -static int cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { - int rc; - - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = page_search(mc, NULL, MDBX_PS_LAST); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - const MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - - mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1; - mc->mc_flags |= C_INITIALIZED | C_EOF; - - if (IS_LEAF2(mp)) { - if (likely(key)) { - key->iov_len = mc->mc_db->md_xsize; - key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); - } - return MDBX_SUCCESS; - } - - MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - rc = cursor_xinit1(mc, node, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - MDBX_ANALYSIS_ASSUME(mc->mc_xcursor != nullptr); - rc = cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (unlikely(rc)) - return rc; - } else if (likely(data)) { - rc = node_read(mc, node, data, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - get_key_optional(node, key); - return MDBX_SUCCESS; -} - -static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { - int (*mfunc)(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data); - int rc; - - switch (op) { - case MDBX_GET_CURRENT: { - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) - return MDBX_ENODATA; - const MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - const size_t nkeys = page_numkeys(mp); - if (unlikely(mc->mc_ki[mc->mc_top] >= nkeys)) { - cASSERT(mc, nkeys <= UINT16_MAX); - if (mc->mc_flags & C_EOF) - return MDBX_ENODATA; - mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; - mc->mc_flags |= C_EOF; - return MDBX_NOTFOUND; - } - cASSERT(mc, nkeys > 0); - - rc = MDBX_SUCCESS; - if (IS_LEAF2(mp)) { - key->iov_len = mc->mc_db->md_xsize; - key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); - } else { - MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - get_key_optional(node, key); - if (data) { - if (node_flags(node) & F_DUPDATA) { - if (unlikely(!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))) { - rc = cursor_xinit1(mc, node, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - rc = cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (unlikely(rc)) - return rc; - } else { - rc = cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, - MDBX_GET_CURRENT); - if (unlikely(rc)) - return rc; - } - } else { - cASSERT(mc, !mc->mc_xcursor || !(mc->mc_xcursor->mx_cursor.mc_flags & - C_INITIALIZED)); - rc = node_read(mc, node, data, mp); - if (unlikely(rc)) - return rc; - } - } - } - break; - } - case MDBX_GET_BOTH: - case MDBX_GET_BOTH_RANGE: - if (unlikely(data == NULL)) - return MDBX_EINVAL; - if (unlikely(mc->mc_xcursor == NULL)) - return MDBX_INCOMPATIBLE; - /* fall through */ - __fallthrough; - case MDBX_SET: - case MDBX_SET_KEY: - case MDBX_SET_RANGE: - if (unlikely(key == NULL)) - return MDBX_EINVAL; - rc = cursor_set(mc, key, data, op).err; - if (mc->mc_flags & C_INITIALIZED) { - cASSERT(mc, mc->mc_snum > 0 && mc->mc_top < mc->mc_snum); - cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || - (mc->mc_flags & C_EOF)); - } - break; - case MDBX_GET_MULTIPLE: - if (unlikely(!data)) - return MDBX_EINVAL; - if (unlikely((mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) - return MDBX_INCOMPATIBLE; - if ((mc->mc_flags & C_INITIALIZED) == 0) { - if (unlikely(!key)) - return MDBX_EINVAL; - rc = cursor_set(mc, key, data, MDBX_SET).err; - if (unlikely(rc != MDBX_SUCCESS)) - break; - } - rc = MDBX_SUCCESS; - if (unlikely(C_INITIALIZED != (mc->mc_xcursor->mx_cursor.mc_flags & - (C_INITIALIZED | C_EOF)))) { - rc = MDBX_NOTFOUND; - break; - } - goto fetch_multiple; - case MDBX_NEXT_MULTIPLE: - if (unlikely(!data)) - return MDBX_EINVAL; - if (unlikely(!(mc->mc_db->md_flags & MDBX_DUPFIXED))) - return MDBX_INCOMPATIBLE; - rc = cursor_next(mc, key, data, MDBX_NEXT_DUP); - if (rc == MDBX_SUCCESS) { - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - fetch_multiple:; - MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor; - data->iov_len = - page_numkeys(mx->mc_pg[mx->mc_top]) * mx->mc_db->md_xsize; - data->iov_base = page_data(mx->mc_pg[mx->mc_top]); - mx->mc_ki[mx->mc_top] = (indx_t)page_numkeys(mx->mc_pg[mx->mc_top]) - 1; - } else { - rc = MDBX_NOTFOUND; - } - } - break; - case MDBX_PREV_MULTIPLE: - if (unlikely(!data)) - return MDBX_EINVAL; - if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) - return MDBX_INCOMPATIBLE; - rc = MDBX_SUCCESS; - if ((mc->mc_flags & C_INITIALIZED) == 0) - rc = cursor_last(mc, key, data); - if (rc == MDBX_SUCCESS) { - MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor; - rc = MDBX_NOTFOUND; - if (mx->mc_flags & C_INITIALIZED) { - rc = cursor_sibling(mx, SIBLING_LEFT); - if (rc == MDBX_SUCCESS) - goto fetch_multiple; - } - } - break; - case MDBX_NEXT: - case MDBX_NEXT_DUP: - case MDBX_NEXT_NODUP: - rc = cursor_next(mc, key, data, op); - break; - case MDBX_PREV: - case MDBX_PREV_DUP: - case MDBX_PREV_NODUP: - rc = cursor_prev(mc, key, data, op); - break; - case MDBX_FIRST: - rc = cursor_first(mc, key, data); - break; - case MDBX_FIRST_DUP: - mfunc = cursor_first; - move: - if (unlikely(data == NULL || !(mc->mc_flags & C_INITIALIZED))) - return MDBX_EINVAL; - if (unlikely(mc->mc_xcursor == NULL)) - return MDBX_INCOMPATIBLE; - if (mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])) { - mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mc->mc_pg[mc->mc_top]); - mc->mc_flags |= C_EOF; - return MDBX_NOTFOUND; - } else { - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (!(node_flags(node) & F_DUPDATA)) { - get_key_optional(node, key); - rc = node_read(mc, node, data, mc->mc_pg[mc->mc_top]); - break; - } - } - if (unlikely(!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))) - return MDBX_EINVAL; - rc = mfunc(&mc->mc_xcursor->mx_cursor, data, NULL); - break; - case MDBX_LAST: - rc = cursor_last(mc, key, data); - break; - case MDBX_LAST_DUP: - mfunc = cursor_last; - goto move; - - case MDBX_SET_UPPERBOUND: /* mostly same as MDBX_SET_LOWERBOUND */ - case MDBX_SET_LOWERBOUND: { - if (unlikely(key == NULL || data == NULL)) - return MDBX_EINVAL; - MDBX_val save_data = *data; - struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); - rc = csr.err; - if (rc == MDBX_SUCCESS && csr.exact && mc->mc_xcursor) { - mc->mc_flags &= ~C_DEL; - csr.exact = false; - if (!save_data.iov_base && (mc->mc_db->md_flags & MDBX_DUPFIXED)) { - /* Avoiding search nested dupfixed hive if no data provided. - * This is changes the semantic of MDBX_SET_LOWERBOUND but avoid - * returning MDBX_BAD_VALSIZE. */ - } else if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - *data = save_data; - csr = - cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_SET_RANGE); - rc = csr.err; - if (rc == MDBX_NOTFOUND) { - cASSERT(mc, !csr.exact); - rc = cursor_next(mc, key, data, MDBX_NEXT_NODUP); - } - } else { - int cmp = mc->mc_dbx->md_dcmp(&save_data, data); - csr.exact = (cmp == 0); - if (cmp > 0) - rc = cursor_next(mc, key, data, MDBX_NEXT_NODUP); - } - } - if (rc == MDBX_SUCCESS && !csr.exact) - rc = MDBX_RESULT_TRUE; - if (unlikely(op == MDBX_SET_UPPERBOUND)) { - /* minor fixups for MDBX_SET_UPPERBOUND */ - if (rc == MDBX_RESULT_TRUE) - /* already at great-than by MDBX_SET_LOWERBOUND */ - rc = MDBX_SUCCESS; - else if (rc == MDBX_SUCCESS) - /* exactly match, going next */ - rc = cursor_next(mc, key, data, MDBX_NEXT); - } - break; - } - - /* Doubtless API to positioning of the cursor at a specified key. */ - case MDBX_TO_KEY_LESSER_THAN: - case MDBX_TO_KEY_LESSER_OR_EQUAL: - case MDBX_TO_KEY_EQUAL: - case MDBX_TO_KEY_GREATER_OR_EQUAL: - case MDBX_TO_KEY_GREATER_THAN: { - if (unlikely(key == NULL)) - return MDBX_EINVAL; - struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); - rc = csr.err; - if (csr.exact) { - cASSERT(mc, csr.err == MDBX_SUCCESS); - if (op == MDBX_TO_KEY_LESSER_THAN) - rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); - else if (op == MDBX_TO_KEY_GREATER_THAN) - rc = cursor_next(mc, key, data, MDBX_NEXT_NODUP); - } else if (op < MDBX_TO_KEY_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) - rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); - else if (op == MDBX_TO_KEY_EQUAL && rc == MDBX_SUCCESS) - rc = MDBX_NOTFOUND; - break; - } - - /* Doubtless API to positioning of the cursor at a specified key-value pair - * for multi-value hives. */ - case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: - case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: - case MDBX_TO_EXACT_KEY_VALUE_EQUAL: - case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: - case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: { - if (unlikely(key == NULL || data == NULL)) - return MDBX_EINVAL; - MDBX_val save_data = *data; - struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_KEY); - rc = csr.err; - if (rc == MDBX_SUCCESS) { - cASSERT(mc, csr.exact); - MDBX_cursor *const mx = - (mc->mc_xcursor && - (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - ? &mc->mc_xcursor->mx_cursor - : nullptr; - if (mx) { - csr = cursor_set(mx, &save_data, NULL, MDBX_SET_RANGE); - rc = csr.err; - if (csr.exact) { - cASSERT(mc, csr.err == MDBX_SUCCESS); - if (op == MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN) - rc = cursor_prev(mx, data, NULL, MDBX_PREV); - else if (op == MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN) - rc = cursor_next(mx, data, NULL, MDBX_NEXT); - } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) - rc = cursor_prev(mx, data, NULL, MDBX_PREV); - else if (op == MDBX_TO_EXACT_KEY_VALUE_EQUAL && rc == MDBX_SUCCESS) - rc = MDBX_NOTFOUND; - } else { - int cmp = mc->mc_dbx->md_dcmp(data, &save_data); - switch (op) { - default: - __unreachable(); - case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: - rc = (cmp < 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; - break; - case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: - rc = (cmp <= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; - break; - case MDBX_TO_EXACT_KEY_VALUE_EQUAL: - rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; - break; - case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: - rc = (cmp >= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; - break; - case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: - rc = (cmp > 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; - break; - } - } - } - break; - } - case MDBX_TO_PAIR_LESSER_THAN: - case MDBX_TO_PAIR_LESSER_OR_EQUAL: - case MDBX_TO_PAIR_EQUAL: - case MDBX_TO_PAIR_GREATER_OR_EQUAL: - case MDBX_TO_PAIR_GREATER_THAN: { - if (unlikely(key == NULL || data == NULL)) - return MDBX_EINVAL; - MDBX_val save_data = *data; - struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); - rc = csr.err; - if (csr.exact) { - cASSERT(mc, csr.err == MDBX_SUCCESS); - MDBX_cursor *const mx = - (mc->mc_xcursor && - (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - ? &mc->mc_xcursor->mx_cursor - : nullptr; - if (mx) { - csr = cursor_set(mx, &save_data, NULL, MDBX_SET_RANGE); - rc = csr.err; - if (csr.exact) { - cASSERT(mc, csr.err == MDBX_SUCCESS); - if (op == MDBX_TO_PAIR_LESSER_THAN) - rc = cursor_prev(mc, key, data, MDBX_PREV); - else if (op == MDBX_TO_PAIR_GREATER_THAN) - rc = cursor_next(mc, key, data, MDBX_NEXT); - } else if (op < MDBX_TO_PAIR_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) - rc = cursor_prev(mc, key, data, MDBX_PREV); - else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) - rc = MDBX_NOTFOUND; - else if (op > MDBX_TO_PAIR_EQUAL && rc == MDBX_NOTFOUND) - rc = cursor_next(mc, key, data, MDBX_NEXT); - } else { - int cmp = mc->mc_dbx->md_dcmp(data, &save_data); - switch (op) { - default: - __unreachable(); - case MDBX_TO_PAIR_LESSER_THAN: - rc = (cmp < 0) ? MDBX_SUCCESS : cursor_prev(mc, key, data, MDBX_PREV); - break; - case MDBX_TO_PAIR_LESSER_OR_EQUAL: - rc = - (cmp <= 0) ? MDBX_SUCCESS : cursor_prev(mc, key, data, MDBX_PREV); - break; - case MDBX_TO_PAIR_EQUAL: - rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; - break; - case MDBX_TO_PAIR_GREATER_OR_EQUAL: - rc = - (cmp >= 0) ? MDBX_SUCCESS : cursor_next(mc, key, data, MDBX_NEXT); - break; - case MDBX_TO_PAIR_GREATER_THAN: - rc = (cmp > 0) ? MDBX_SUCCESS : cursor_next(mc, key, data, MDBX_NEXT); - break; - } - } - } else if (op < MDBX_TO_PAIR_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) - rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); - else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) - rc = MDBX_NOTFOUND; - break; - } - default: - DEBUG("unhandled/unimplemented cursor operation %u", op); - return MDBX_EINVAL; - } - - mc->mc_flags &= ~C_DEL; - return rc; -} - -int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - return cursor_get(mc, key, data, op); -} - -int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, - void *context, MDBX_cursor_op start_op, - MDBX_cursor_op turn_op, void *arg) { - if (unlikely(!predicate)) - return MDBX_EINVAL; - - const unsigned valid_start_mask = - 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | - 1 << MDBX_LAST_DUP | 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; - if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0)) - return MDBX_EINVAL; - - const unsigned valid_turn_mask = - 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | - 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | - 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; - if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) - return MDBX_EINVAL; - - MDBX_val key = {nullptr, 0}, data = {nullptr, 0}; - int rc = mdbx_cursor_get(mc, &key, &data, start_op); - while (likely(rc == MDBX_SUCCESS)) { - rc = predicate(context, &key, &data, arg); - if (rc != MDBX_RESULT_FALSE) - return rc; - rc = cursor_get(mc, &key, &data, turn_op); - } - return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; -} - -int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, - void *context, MDBX_cursor_op from_op, MDBX_val *key, - MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { - if (unlikely(!predicate)) - return MDBX_EINVAL; - - const unsigned valid_start_mask = - 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | - 1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | - 1 << MDBX_SET_UPPERBOUND; - ; - if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && - ((1 << from_op) & valid_start_mask) == 0)) - return MDBX_EINVAL; - - const unsigned valid_turn_mask = - 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | - 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | - 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; - if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) - return MDBX_EINVAL; - - int rc = mdbx_cursor_get(mc, key, value, from_op); - if (unlikely(MDBX_IS_ERROR(rc))) - return rc; - - cASSERT(mc, key != nullptr); - MDBX_val stub; - if (!value) { - value = &stub; - rc = cursor_get(mc, key, value, MDBX_GET_CURRENT); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - for (;;) { - rc = predicate(context, key, value, arg); - if (rc != MDBX_RESULT_FALSE) - return rc; - rc = cursor_get(mc, key, value, turn_op); - if (rc != MDBX_SUCCESS) - return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; - } -} - -static int cursor_first_batch(MDBX_cursor *mc) { - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - int err = page_search(mc, NULL, MDBX_PS_FIRST); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - cASSERT(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; - mc->mc_ki[mc->mc_top] = 0; - return MDBX_SUCCESS; -} - -static int cursor_next_batch(MDBX_cursor *mc) { - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) - return cursor_first_batch(mc); - - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if (unlikely(mc->mc_flags & C_EOF)) { - if ((size_t)mc->mc_ki[mc->mc_top] + 1 >= page_numkeys(mp)) - return MDBX_NOTFOUND; - mc->mc_flags ^= C_EOF; - } - - intptr_t ki = mc->mc_ki[mc->mc_top]; - mc->mc_ki[mc->mc_top] = (indx_t)++ki; - const intptr_t numkeys = page_numkeys(mp); - if (likely(ki >= numkeys)) { - DEBUG("%s", "=====> move to next sibling page"); - mc->mc_ki[mc->mc_top] = (indx_t)(numkeys - 1); - int err = cursor_sibling(mc, SIBLING_RIGHT); - if (unlikely(err != MDBX_SUCCESS)) { - mc->mc_flags |= C_EOF; - return err; - } - mp = mc->mc_pg[mc->mc_top]; - DEBUG("next page is %" PRIaPGNO ", key index %u", mp->mp_pgno, - mc->mc_ki[mc->mc_top]); - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - } - return MDBX_SUCCESS; -} - -int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, - size_t limit, MDBX_cursor_op op) { - if (unlikely(mc == NULL || count == NULL || limit < 4)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(mc->mc_db->md_flags & MDBX_DUPSORT)) - return MDBX_INCOMPATIBLE /* must be a non-dupsort subDB */; - - switch (op) { - case MDBX_FIRST: - rc = cursor_first_batch(mc); - break; - case MDBX_NEXT: - rc = cursor_next_batch(mc); - break; - case MDBX_GET_CURRENT: - rc = likely(mc->mc_flags & C_INITIALIZED) ? MDBX_SUCCESS : MDBX_ENODATA; - break; - default: - DEBUG("unhandled/unimplemented cursor operation %u", op); - rc = MDBX_EINVAL; - break; - } - - if (unlikely(rc != MDBX_SUCCESS)) { - *count = 0; - return rc; - } - - const MDBX_page *const mp = mc->mc_pg[mc->mc_top]; - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - const size_t nkeys = page_numkeys(mp); - size_t i = mc->mc_ki[mc->mc_top], n = 0; - if (unlikely(i >= nkeys)) { - cASSERT(mc, op == MDBX_GET_CURRENT); - cASSERT(mc, mdbx_cursor_on_last(mc) == MDBX_RESULT_TRUE); - *count = 0; - if (mc->mc_flags & C_EOF) { - cASSERT(mc, mdbx_cursor_on_last(mc) == MDBX_RESULT_TRUE); - return MDBX_ENODATA; - } - if (mdbx_cursor_on_last(mc) != MDBX_RESULT_TRUE) - return MDBX_EINVAL /* again MDBX_GET_CURRENT after MDBX_GET_CURRENT */; - mc->mc_flags |= C_EOF; - return MDBX_NOTFOUND; - } - - do { - if (unlikely(n + 2 > limit)) { - rc = MDBX_RESULT_TRUE; - break; - } - const MDBX_node *leaf = page_node(mp, i); - get_key(leaf, &pairs[n]); - rc = node_read(mc, leaf, &pairs[n + 1], mp); - if (unlikely(rc != MDBX_SUCCESS)) - break; - n += 2; - } while (++i < nkeys); - - mc->mc_ki[mc->mc_top] = (indx_t)i; - *count = n; - return rc; -} - -static int touch_dbi(MDBX_cursor *mc) { - cASSERT(mc, (*mc->mc_dbi_state & DBI_DIRTY) == 0); - *mc->mc_dbi_state |= DBI_DIRTY; - mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY; - if (mc->mc_dbi >= CORE_DBS) { - /* Touch DB record of named DB */ - MDBX_cursor_couple cx; - int rc = dbi_check(mc->mc_txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - mc->mc_txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; - rc = page_search(&cx.outer, &mc->mc_dbx->md_name, MDBX_PS_MODIFY); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - return MDBX_SUCCESS; -} - -static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, - const MDBX_val *data) { - cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) == 0); - cASSERT(mc, (mc->mc_flags & C_INITIALIZED) || mc->mc_snum == 0); - cASSERT(mc, cursor_is_tracked(mc)); - - cASSERT(mc, F_ISSET(dbi_state(mc->mc_txn, FREE_DBI), DBI_LINDO | DBI_VALID)); - cASSERT(mc, F_ISSET(dbi_state(mc->mc_txn, MAIN_DBI), DBI_LINDO | DBI_VALID)); - if ((mc->mc_flags & C_SUB) == 0) { - MDBX_txn *const txn = mc->mc_txn; - txn_lru_turn(txn); - - if (unlikely((*mc->mc_dbi_state & DBI_DIRTY) == 0)) { - int err = touch_dbi(mc); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - - /* Estimate how much space this operation will take: */ - /* 1) Max b-tree height, reasonable enough with including dups' sub-tree */ - size_t need = CURSOR_STACK + 3; - /* 2) GC/FreeDB for any payload */ - if (mc->mc_dbi > FREE_DBI) { - need += txn->mt_dbs[FREE_DBI].md_depth + (size_t)3; - /* 3) Named DBs also dirty the main DB */ - if (mc->mc_dbi > MAIN_DBI) - need += txn->mt_dbs[MAIN_DBI].md_depth + (size_t)3; - } -#if xMDBX_DEBUG_SPILLING != 2 - /* production mode */ - /* 4) Double the page chain estimation - * for extensively splitting, rebalance and merging */ - need += need; - /* 5) Factor the key+data which to be put in */ - need += bytes2pgno(txn->mt_env, node_size(key, data)) + (size_t)1; -#else - /* debug mode */ - (void)key; - (void)data; - txn->mt_env->debug_dirtied_est = ++need; - txn->mt_env->debug_dirtied_act = 0; -#endif /* xMDBX_DEBUG_SPILLING == 2 */ - - int err = txn_spill(txn, mc, need); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - - int rc = MDBX_SUCCESS; - if (likely(mc->mc_snum) && - !IS_MODIFIABLE(mc->mc_txn, mc->mc_pg[mc->mc_snum - 1])) { - mc->mc_top = 0; - do { - rc = page_touch(mc); - if (unlikely(rc != MDBX_SUCCESS)) - break; - mc->mc_top += 1; - } while (mc->mc_top < mc->mc_snum); - mc->mc_top = mc->mc_snum - 1; - } - return rc; -} - -static size_t leaf2_reserve(const MDBX_env *const env, size_t host_page_room, - size_t subpage_len, size_t item_len) { - eASSERT(env, (subpage_len & 1) == 0); - eASSERT(env, - env->me_subpage_reserve_prereq > env->me_subpage_room_threshold + - env->me_subpage_reserve_limit && - env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); - size_t reserve = 0; - for (size_t n = 0; - n < 5 && reserve + item_len <= env->me_subpage_reserve_limit && - EVEN(subpage_len + item_len) <= env->me_subpage_limit && - host_page_room >= - env->me_subpage_reserve_prereq + EVEN(subpage_len + item_len); - ++n) { - subpage_len += item_len; - reserve += item_len; - } - return reserve + (subpage_len & 1); -} - -static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, - MDBX_val *data, unsigned flags) { - int err; - DKBUF_DEBUG; - MDBX_env *const env = mc->mc_txn->mt_env; - DEBUG("==> put db %d key [%s], size %" PRIuPTR ", data [%s] size %" PRIuPTR, - DDBI(mc), DKEY_DEBUG(key), key->iov_len, - DVAL_DEBUG((flags & MDBX_RESERVE) ? nullptr : data), data->iov_len); - - if ((flags & MDBX_CURRENT) != 0 && (mc->mc_flags & C_SUB) == 0) { - if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE))) - return MDBX_EINVAL; - /* Опция MDBX_CURRENT означает, что запрошено обновление текущей записи, - * на которой сейчас стоит курсор. Проверяем что переданный ключ совпадает - * со значением в текущей позиции курсора. - * Здесь проще вызвать cursor_get(), так как для обслуживания таблиц - * с MDBX_DUPSORT также требуется текущий размер данных. */ - MDBX_val current_key, current_data; - err = cursor_get(mc, ¤t_key, ¤t_data, MDBX_GET_CURRENT); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (mc->mc_dbx->md_cmp(key, ¤t_key) != 0) - return MDBX_EKEYMISMATCH; - - if (unlikely((flags & MDBX_MULTIPLE))) - goto drop_current; - - if (mc->mc_db->md_flags & MDBX_DUPSORT) { - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - cASSERT(mc, mc->mc_xcursor != NULL && - (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)); - /* Если за ключом более одного значения, либо если размер данных - * отличается, то вместо обновления требуется удаление и - * последующая вставка. */ - if (mc->mc_xcursor->mx_db.md_entries > 1 || - current_data.iov_len != data->iov_len) { - drop_current: - err = cursor_del(mc, flags & MDBX_ALLDUPS); - if (unlikely(err != MDBX_SUCCESS)) - return err; - flags -= MDBX_CURRENT; - goto skip_check_samedata; - } - } else if (unlikely(node_size(key, data) > env->me_leaf_nodemax)) { - err = cursor_del(mc, 0); - if (unlikely(err != MDBX_SUCCESS)) - return err; - flags -= MDBX_CURRENT; - goto skip_check_samedata; - } - } - if (!(flags & MDBX_RESERVE) && - unlikely(cmp_lenfast(¤t_data, data) == 0)) - return MDBX_SUCCESS /* the same data, nothing to update */; - skip_check_samedata:; - } - - int rc = MDBX_SUCCESS; - if (mc->mc_db->md_root == P_INVALID) { - /* new database, cursor has nothing to point to */ - mc->mc_snum = 0; - mc->mc_top = 0; - mc->mc_flags &= ~C_INITIALIZED; - rc = MDBX_NO_ROOT; - } else if ((flags & MDBX_CURRENT) == 0) { - bool exact = false; - MDBX_val last_key, old_data; - if ((flags & MDBX_APPEND) && mc->mc_db->md_entries > 0) { - rc = cursor_last(mc, &last_key, &old_data); - if (likely(rc == MDBX_SUCCESS)) { - const int cmp = mc->mc_dbx->md_cmp(key, &last_key); - if (likely(cmp > 0)) { - mc->mc_ki[mc->mc_top]++; /* step forward for appending */ - rc = MDBX_NOTFOUND; - } else if (unlikely(cmp != 0)) { - /* new-key < last-key */ - return MDBX_EKEYMISMATCH; - } else { - rc = MDBX_SUCCESS; - exact = true; - } - } - } else { - struct cursor_set_result csr = - /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ - cursor_set(mc, (MDBX_val *)key, &old_data, MDBX_SET); - rc = csr.err; - exact = csr.exact; - } - if (likely(rc == MDBX_SUCCESS)) { - if (exact) { - if (unlikely(flags & MDBX_NOOVERWRITE)) { - DEBUG("duplicate key [%s]", DKEY_DEBUG(key)); - *data = old_data; - return MDBX_KEYEXIST; - } - if (unlikely(mc->mc_flags & C_SUB)) { - /* nested subtree of DUPSORT-database with the same key, - * nothing to update */ - eASSERT(env, data->iov_len == 0 && - (old_data.iov_len == 0 || - /* olddata may not be updated in case LEAF2-page - of dupfixed-subDB */ - (mc->mc_db->md_flags & MDBX_DUPFIXED))); - return MDBX_SUCCESS; - } - if (unlikely(flags & MDBX_ALLDUPS) && mc->mc_xcursor && - (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - err = cursor_del(mc, MDBX_ALLDUPS); - if (unlikely(err != MDBX_SUCCESS)) - return err; - flags -= MDBX_ALLDUPS; - rc = mc->mc_snum ? MDBX_NOTFOUND : MDBX_NO_ROOT; - exact = false; - } else if (!(flags & (MDBX_RESERVE | MDBX_MULTIPLE))) { - /* checking for early exit without dirtying pages */ - if (unlikely(eq_fast(data, &old_data))) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); - if (mc->mc_xcursor) { - if (flags & MDBX_NODUPDATA) - return MDBX_KEYEXIST; - if (flags & MDBX_APPENDDUP) - return MDBX_EKEYMISMATCH; - } - /* the same data, nothing to update */ - return MDBX_SUCCESS; - } - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) != 0); - } - } - } else if (unlikely(rc != MDBX_NOTFOUND)) - return rc; - } - - mc->mc_flags &= ~C_DEL; - MDBX_val xdata, *ref_data = data; - size_t *batch_dupfixed_done = nullptr, batch_dupfixed_given = 0; - if (unlikely(flags & MDBX_MULTIPLE)) { - batch_dupfixed_given = data[1].iov_len; - batch_dupfixed_done = &data[1].iov_len; - *batch_dupfixed_done = 0; - } - - /* Cursor is positioned, check for room in the dirty list */ - err = cursor_touch(mc, key, ref_data); - if (unlikely(err)) - return err; - - if (unlikely(rc == MDBX_NO_ROOT)) { - /* new database, write a root leaf page */ - DEBUG("%s", "allocating new root leaf page"); - pgr_t npr = page_new(mc, P_LEAF); - if (unlikely(npr.err != MDBX_SUCCESS)) - return npr.err; - npr.err = cursor_push(mc, npr.page); - if (unlikely(npr.err != MDBX_SUCCESS)) - return npr.err; - mc->mc_db->md_root = npr.page->mp_pgno; - mc->mc_db->md_depth++; - if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { - assert(key->iov_len >= mc->mc_dbx->md_klen_min && - key->iov_len <= mc->mc_dbx->md_klen_max); - mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = key->iov_len; - } - if (mc->mc_db->md_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) { - assert(data->iov_len >= mc->mc_dbx->md_vlen_min && - data->iov_len <= mc->mc_dbx->md_vlen_max); - assert(mc->mc_xcursor != NULL); - mc->mc_db->md_xsize = mc->mc_xcursor->mx_db.md_xsize = - (unsigned)(mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = - mc->mc_xcursor->mx_dbx.md_klen_min = - mc->mc_xcursor->mx_dbx.md_klen_max = - data->iov_len); - if (mc->mc_flags & C_SUB) - npr.page->mp_flags |= P_LEAF2; - } - mc->mc_flags |= C_INITIALIZED; - } - - MDBX_val old_singledup, old_data; - MDBX_db nested_dupdb; - MDBX_page *sub_root = nullptr; - bool insert_key, insert_data; - uint16_t fp_flags = P_LEAF; - MDBX_page *fp = env->me_pbuf; - fp->mp_txnid = mc->mc_txn->mt_front; - insert_key = insert_data = (rc != MDBX_SUCCESS); - old_singledup.iov_base = nullptr; - if (insert_key) { - /* The key does not exist */ - DEBUG("inserting key at index %i", mc->mc_ki[mc->mc_top]); - if ((mc->mc_db->md_flags & MDBX_DUPSORT) && - node_size(key, data) > env->me_leaf_nodemax) { - /* Too big for a node, insert in sub-DB. Set up an empty - * "old sub-page" for convert_to_subtree to expand to a full page. */ - fp->mp_leaf2_ksize = - (mc->mc_db->md_flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; - fp->mp_lower = fp->mp_upper = 0; - old_data.iov_len = PAGEHDRSZ; - goto convert_to_subtree; - } - } else { - /* there's only a key anyway, so this is a no-op */ - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - size_t ksize = mc->mc_db->md_xsize; - if (unlikely(key->iov_len != ksize)) - return MDBX_BAD_VALSIZE; - void *ptr = - page_leaf2key(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize); - memcpy(ptr, key->iov_base, ksize); - fix_parent: - /* if overwriting slot 0 of leaf, need to - * update branch key if there is a parent page */ - if (mc->mc_top && !mc->mc_ki[mc->mc_top]) { - size_t dtop = 1; - mc->mc_top--; - /* slot 0 is always an empty key, find real slot */ - while (mc->mc_top && !mc->mc_ki[mc->mc_top]) { - mc->mc_top--; - dtop++; - } - err = MDBX_SUCCESS; - if (mc->mc_ki[mc->mc_top]) - err = update_key(mc, key); - cASSERT(mc, mc->mc_top + dtop < UINT16_MAX); - mc->mc_top += (uint8_t)dtop; - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - - if (AUDIT_ENABLED()) { - err = cursor_check(mc); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - return MDBX_SUCCESS; - } - - more:; - if (AUDIT_ENABLED()) { - err = cursor_check(mc); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - MDBX_node *const node = - page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - - /* Large/Overflow page overwrites need special handling */ - if (unlikely(node_flags(node) & F_BIGDATA)) { - const size_t dpages = (node_size(key, data) > env->me_leaf_nodemax) - ? number_of_ovpages(env, data->iov_len) - : 0; - - const pgno_t pgno = node_largedata_pgno(node); - pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid); - if (unlikely(lp.err != MDBX_SUCCESS)) - return lp.err; - cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW); - - /* Is the ov page from this txn (or a parent) and big enough? */ - const size_t ovpages = lp.page->mp_pages; - const size_t extra_threshold = - (mc->mc_dbi == FREE_DBI) - ? 1 - : /* LY: add configurable threshold to keep reserve space */ 0; - if (!IS_FROZEN(mc->mc_txn, lp.page) && ovpages >= dpages && - ovpages <= dpages + extra_threshold) { - /* yes, overwrite it. */ - if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) { - if (IS_SPILLED(mc->mc_txn, lp.page)) { - lp = /* TODO: avoid search and get txn & spill-index from - page_result */ - page_unspill(mc->mc_txn, lp.page); - if (unlikely(lp.err)) - return lp.err; - } else { - if (unlikely(!mc->mc_txn->mt_parent)) { - ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s " - "page %" PRIaPGNO " mod-txnid %" PRIaTXN "," - " without parent transaction, current txn %" PRIaTXN - " front %" PRIaTXN, - "overflow/large", pgno, lp.page->mp_txnid, - mc->mc_txn->mt_txnid, mc->mc_txn->mt_front); - return MDBX_PROBLEM; - } - - /* It is writable only in a parent txn */ - MDBX_page *np = page_malloc(mc->mc_txn, ovpages); - if (unlikely(!np)) - return MDBX_ENOMEM; - - memcpy(np, lp.page, PAGEHDRSZ); /* Copy header of page */ - err = page_dirty(mc->mc_txn, lp.page = np, ovpages); - if (unlikely(err != MDBX_SUCCESS)) - return err; - -#if MDBX_ENABLE_PGOP_STAT - mc->mc_txn->mt_env->me_lck->mti_pgop_stat.clone.weak += ovpages; -#endif /* MDBX_ENABLE_PGOP_STAT */ - cASSERT(mc, dirtylist_check(mc->mc_txn)); - } - } - node_set_ds(node, data->iov_len); - if (flags & MDBX_RESERVE) - data->iov_base = page_data(lp.page); - else - memcpy(page_data(lp.page), data->iov_base, data->iov_len); - - if (AUDIT_ENABLED()) { - err = cursor_check(mc); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - return MDBX_SUCCESS; - } - - if ((err = page_retire(mc, lp.page)) != MDBX_SUCCESS) - return err; - } else { - old_data.iov_len = node_ds(node); - old_data.iov_base = node_data(node); - cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= - ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize)); - - /* DB has dups? */ - if (mc->mc_db->md_flags & MDBX_DUPSORT) { - /* Prepare (sub-)page/sub-DB to accept the new item, if needed. - * fp: old sub-page or a header faking it. - * mp: new (sub-)page. - * xdata: node data with new sub-page or sub-DB. */ - size_t growth = 0; /* growth in page size.*/ - MDBX_page *mp = fp = xdata.iov_base = env->me_pbuf; - mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; - - /* Was a single item before, must convert now */ - if (!(node_flags(node) & F_DUPDATA)) { - /* does data match? */ - if (flags & MDBX_APPENDDUP) { - const int cmp = mc->mc_dbx->md_dcmp(data, &old_data); - cASSERT(mc, cmp != 0 || eq_fast(data, &old_data)); - if (unlikely(cmp <= 0)) - return MDBX_EKEYMISMATCH; - } else if (eq_fast(data, &old_data)) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); - if (flags & MDBX_NODUPDATA) - return MDBX_KEYEXIST; - /* data is match exactly byte-to-byte, nothing to update */ - rc = MDBX_SUCCESS; - if (unlikely(batch_dupfixed_done)) - goto batch_dupfixed_continue; - return rc; - } - - /* Just overwrite the current item */ - if (flags & MDBX_CURRENT) { - cASSERT(mc, node_size(key, data) <= env->me_leaf_nodemax); - goto current; - } - - /* Back up original data item */ - memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, - old_singledup.iov_len = old_data.iov_len); - - /* Make sub-page header for the dup items, with dummy body */ - fp->mp_flags = P_LEAF | P_SUBP; - fp->mp_lower = 0; - xdata.iov_len = PAGEHDRSZ + old_data.iov_len + data->iov_len; - if (mc->mc_db->md_flags & MDBX_DUPFIXED) { - fp->mp_flags |= P_LEAF2; - fp->mp_leaf2_ksize = (uint16_t)data->iov_len; - /* Будем создавать LEAF2-страницу, как минимум с двумя элементами. - * При коротких значениях и наличии свободного места можно сделать - * некоторое резервирование места, чтобы при последующих добавлениях - * не сразу расширять созданную под-страницу. - * Резервирование в целом сомнительно (см ниже), но может сработать - * в плюс (а если в минус то несущественный) при коротких ключах. */ - xdata.iov_len += leaf2_reserve( - env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, - xdata.iov_len, data->iov_len); - cASSERT(mc, (xdata.iov_len & 1) == 0); - } else { - xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + - (old_data.iov_len & 1) + (data->iov_len & 1); - } - cASSERT(mc, (xdata.iov_len & 1) == 0); - fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); - old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ - } else if (node_flags(node) & F_SUBDATA) { - /* Data is on sub-DB, just store it */ - flags |= F_DUPDATA | F_SUBDATA; - goto dupsort_put; - } else { - /* Data is on sub-page */ - fp = old_data.iov_base; - switch (flags) { - default: - growth = IS_LEAF2(fp) ? fp->mp_leaf2_ksize - : (node_size(data, nullptr) + sizeof(indx_t)); - if (page_room(fp) >= growth) { - /* На текущей под-странице есть место для добавления элемента. - * Оптимальнее продолжить использовать эту страницу, ибо - * добавление вложенного дерева увеличит WAF на одну страницу. */ - goto continue_subpage; - } - /* На текущей под-странице нет места для еще одного элемента. - * Можно либо увеличить эту под-страницу, либо вынести куст - * значений во вложенное дерево. - * - * Продолжать использовать текущую под-страницу возможно - * только пока и если размер после добавления элемента будет - * меньше me_leaf_nodemax. Соответственно, при превышении - * просто сразу переходим на вложенное дерево. */ - xdata.iov_len = old_data.iov_len + (growth += growth & 1); - if (xdata.iov_len > env->me_subpage_limit) - goto convert_to_subtree; - - /* Можно либо увеличить под-страницу, в том числе с некоторым - * запасом, либо перейти на вложенное поддерево. - * - * Резервирование места на под-странице представляется сомнительным: - * - Резервирование увеличит рыхлость страниц, в том числе - * вероятность разделения основной/гнездовой страницы; - * - Сложно предсказать полезный размер резервирования, - * особенно для не-MDBX_DUPFIXED; - * - Наличие резерва позволяет съекономить только на перемещении - * части элементов основной/гнездовой страницы при последующих - * добавлениях в нее элементов. Причем после первого изменения - * размера под-страницы, её тело будет примыкать - * к неиспользуемому месту на основной/гнездовой странице, - * поэтому последующие последовательные добавления потребуют - * только передвижения в mp_ptrs[]. - * - * Соответственно, более важным/определяющим представляется - * своевременный переход к вложеному дереву, но тут достаточно - * сложный конфликт интересов: - * - При склонности к переходу к вложенным деревьям, суммарно - * в БД будет большее кол-во более рыхлых страниц. Это увеличит - * WAF, а также RAF при последовательных чтениях большой БД. - * Однако, при коротких ключах и большом кол-ве - * дубликатов/мультизначений, плотность ключей в листовых - * страницах основного дерева будет выше. Соответственно, будет - * пропорционально меньше branch-страниц. Поэтому будет выше - * вероятность оседания/не-вымывания страниц основного дерева из - * LRU-кэша, а также попадания в write-back кэш при записи. - * - Наоботот, при склонности к использованию под-страниц, будут - * наблюдаться обратные эффекты. Плюс некоторые накладные расходы - * на лишнее копирование данных под-страниц в сценариях - * нескольких обонвлений дубликатов одного куста в одной - * транзакции. - * - * Суммарно наиболее рациональным представляется такая тактика: - * - Вводим три порога subpage_limit, subpage_room_threshold - * и subpage_reserve_prereq, которые могут быть - * заданы/скорректированы пользователем в ‰ от me_leaf_nodemax; - * - Используем под-страницу пока её размер меньше subpage_limit - * и на основной/гнездовой странице не-менее - * subpage_room_threshold свободного места; - * - Резервируем место только для 1-3 коротких dupfixed-элементов, - * расширяя размер под-страницы на размер кэш-линии ЦПУ, но - * только если на странице не менее subpage_reserve_prereq - * свободного места. - * - По-умолчанию устанавливаем: - * subpage_limit = me_leaf_nodemax (1000‰); - * subpage_room_threshold = 0; - * subpage_reserve_prereq = me_leaf_nodemax (1000‰). - */ - if (IS_LEAF2(fp)) - growth += leaf2_reserve( - env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, - xdata.iov_len, data->iov_len); - break; - - case MDBX_CURRENT | MDBX_NODUPDATA: - case MDBX_CURRENT: - continue_subpage: - fp->mp_txnid = mc->mc_txn->mt_front; - fp->mp_pgno = mp->mp_pgno; - mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; - flags |= F_DUPDATA; - goto dupsort_put; - } - xdata.iov_len = old_data.iov_len + growth; - cASSERT(mc, (xdata.iov_len & 1) == 0); - } - - fp_flags = fp->mp_flags; - if (xdata.iov_len > env->me_subpage_limit || - node_size_len(node_ks(node), xdata.iov_len) > - env->me_leaf_nodemax || - (env->me_subpage_room_threshold && - page_room(mc->mc_pg[mc->mc_top]) + - node_size_len(node_ks(node), old_data.iov_len) < - env->me_subpage_room_threshold + - node_size_len(node_ks(node), xdata.iov_len))) { - /* Too big for a sub-page, convert to sub-DB */ - convert_to_subtree: - fp_flags &= ~P_SUBP; - nested_dupdb.md_xsize = 0; - nested_dupdb.md_flags = flags_db2sub(mc->mc_db->md_flags); - if (mc->mc_db->md_flags & MDBX_DUPFIXED) { - fp_flags |= P_LEAF2; - nested_dupdb.md_xsize = fp->mp_leaf2_ksize; - } - nested_dupdb.md_depth = 1; - nested_dupdb.md_branch_pages = 0; - nested_dupdb.md_leaf_pages = 1; - nested_dupdb.md_overflow_pages = 0; - nested_dupdb.md_entries = page_numkeys(fp); - xdata.iov_len = sizeof(nested_dupdb); - xdata.iov_base = &nested_dupdb; - const pgr_t par = page_alloc(mc); - mp = par.page; - if (unlikely(par.err != MDBX_SUCCESS)) - return par.err; - mc->mc_db->md_leaf_pages += 1; - cASSERT(mc, env->me_psize > old_data.iov_len); - growth = env->me_psize - (unsigned)old_data.iov_len; - cASSERT(mc, (growth & 1) == 0); - flags |= F_DUPDATA | F_SUBDATA; - nested_dupdb.md_root = mp->mp_pgno; - nested_dupdb.md_seq = 0; - nested_dupdb.md_mod_txnid = mc->mc_txn->mt_txnid; - sub_root = mp; - } - if (mp != fp) { - mp->mp_flags = fp_flags; - mp->mp_txnid = mc->mc_txn->mt_front; - mp->mp_leaf2_ksize = fp->mp_leaf2_ksize; - mp->mp_lower = fp->mp_lower; - cASSERT(mc, fp->mp_upper + growth < UINT16_MAX); - mp->mp_upper = fp->mp_upper + (indx_t)growth; - if (unlikely(fp_flags & P_LEAF2)) { - memcpy(page_data(mp), page_data(fp), - page_numkeys(fp) * fp->mp_leaf2_ksize); - cASSERT(mc, - (((mp->mp_leaf2_ksize & page_numkeys(mp)) ^ mp->mp_upper) & - 1) == 0); - } else { - cASSERT(mc, (mp->mp_upper & 1) == 0); - memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ), - ptr_disp(fp, fp->mp_upper + PAGEHDRSZ), - old_data.iov_len - fp->mp_upper - PAGEHDRSZ); - memcpy(mp->mp_ptrs, fp->mp_ptrs, - page_numkeys(fp) * sizeof(mp->mp_ptrs[0])); - for (size_t i = 0; i < page_numkeys(fp); i++) { - cASSERT(mc, mp->mp_ptrs[i] + growth <= UINT16_MAX); - mp->mp_ptrs[i] += (indx_t)growth; - } - } - } - - if (!insert_key) - node_del(mc, 0); - ref_data = &xdata; - flags |= F_DUPDATA; - goto insert_node; - } - - /* MDBX passes F_SUBDATA in 'flags' to write a DB record */ - if (unlikely((node_flags(node) ^ flags) & F_SUBDATA)) - return MDBX_INCOMPATIBLE; - - current: - if (data->iov_len == old_data.iov_len) { - cASSERT(mc, EVEN(key->iov_len) == EVEN(node_ks(node))); - /* same size, just replace it. Note that we could - * also reuse this node if the new data is smaller, - * but instead we opt to shrink the node in that case. */ - if (flags & MDBX_RESERVE) - data->iov_base = old_data.iov_base; - else if (!(mc->mc_flags & C_SUB)) - memcpy(old_data.iov_base, data->iov_base, data->iov_len); - else { - cASSERT(mc, page_numkeys(mc->mc_pg[mc->mc_top]) == 1); - cASSERT(mc, PAGETYPE_COMPAT(mc->mc_pg[mc->mc_top]) == P_LEAF); - cASSERT(mc, node_ds(node) == 0); - cASSERT(mc, node_flags(node) == 0); - cASSERT(mc, key->iov_len < UINT16_MAX); - node_set_ks(node, key->iov_len); - memcpy(node_key(node), key->iov_base, key->iov_len); - cASSERT(mc, ptr_disp(node_key(node), node_ds(node)) < - ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize)); - goto fix_parent; - } - - if (AUDIT_ENABLED()) { - err = cursor_check(mc); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - return MDBX_SUCCESS; - } - } - node_del(mc, 0); - } - - ref_data = data; - -insert_node:; - const unsigned naf = flags & NODE_ADD_FLAGS; - size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) - ? key->iov_len - : leaf_size(env, key, ref_data); - if (page_room(mc->mc_pg[mc->mc_top]) < nsize) { - rc = page_split(mc, key, ref_data, P_INVALID, - insert_key ? naf : naf | MDBX_SPLIT_REPLACE); - if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) - rc = insert_key ? cursor_check(mc) : cursor_check_updating(mc); - } else { - /* There is room already in this leaf page. */ - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - cASSERT(mc, !(naf & (F_BIGDATA | F_SUBDATA | F_DUPDATA)) && - ref_data->iov_len == 0); - rc = node_add_leaf2(mc, mc->mc_ki[mc->mc_top], key); - } else - rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, ref_data, naf); - if (likely(rc == 0)) { - /* Adjust other cursors pointing to mp */ - const MDBX_dbi dbi = mc->mc_dbi; - const size_t top = mc->mc_top; - MDBX_page *const mp = mc->mc_pg[top]; - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; - m2 = m2->mc_next) { - MDBX_cursor *m3 = - (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[top] != mp) - continue; - if (m3->mc_ki[top] >= mc->mc_ki[top]) - m3->mc_ki[top] += insert_key; - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[top]); - } - } - } - - if (likely(rc == MDBX_SUCCESS)) { - /* Now store the actual data in the child DB. Note that we're - * storing the user data in the keys field, so there are strict - * size limits on dupdata. The actual data fields of the child - * DB are all zero size. */ - if (flags & F_DUPDATA) { - MDBX_val empty; - dupsort_put: - empty.iov_len = 0; - empty.iov_base = nullptr; - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); -#define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1 - STATIC_ASSERT( - (MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == - MDBX_NOOVERWRITE); - unsigned xflags = - MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> - SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); - if ((flags & MDBX_CURRENT) == 0) { - xflags -= MDBX_CURRENT; - err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - if (sub_root) - mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; - /* converted, write the original data first */ - if (old_singledup.iov_base) { - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &old_singledup, - &empty, xflags); - if (unlikely(rc)) - goto dupsort_error; - } - if (!(node_flags(node) & F_SUBDATA) || sub_root) { - /* Adjust other cursors pointing to mp */ - MDBX_xcursor *const mx = mc->mc_xcursor; - const size_t top = mc->mc_top; - MDBX_page *const mp = mc->mc_pg[top]; - const intptr_t nkeys = page_numkeys(mp); - - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; - m2 = m2->mc_next) { - if (m2 == mc || m2->mc_snum < mc->mc_snum) - continue; - if (!(m2->mc_flags & C_INITIALIZED)) - continue; - if (m2->mc_pg[top] == mp) { - if (m2->mc_ki[top] == mc->mc_ki[top]) { - err = cursor_xinit2(m2, mx, old_singledup.iov_base != nullptr); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } else if (!insert_key && m2->mc_ki[top] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[top]); - } - } - } - } - cASSERT(mc, mc->mc_xcursor->mx_db.md_entries < PTRDIFF_MAX); - const size_t probe = (size_t)mc->mc_xcursor->mx_db.md_entries; -#define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 - STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == - MDBX_APPEND); - xflags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &empty, - xflags); - if (flags & F_SUBDATA) { - void *db = node_data(node); - mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; - memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); - } - insert_data = (probe != (size_t)mc->mc_xcursor->mx_db.md_entries); - } - /* Increment count unless we just replaced an existing item. */ - if (insert_data) - mc->mc_db->md_entries++; - if (insert_key) { - if (unlikely(rc != MDBX_SUCCESS)) - goto dupsort_error; - /* If we succeeded and the key didn't exist before, - * make sure the cursor is marked valid. */ - mc->mc_flags |= C_INITIALIZED; - } - if (likely(rc == MDBX_SUCCESS)) { - if (unlikely(batch_dupfixed_done)) { - batch_dupfixed_continue: - /* let caller know how many succeeded, if any */ - if ((*batch_dupfixed_done += 1) < batch_dupfixed_given) { - data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); - insert_key = insert_data = false; - old_singledup.iov_base = nullptr; - goto more; - } - } - if (AUDIT_ENABLED()) - rc = cursor_check(mc); - } - return rc; - - dupsort_error: - if (unlikely(rc == MDBX_KEYEXIST)) { - /* should not happen, we deleted that item */ - ERROR("Unexpected %i error while put to nested dupsort's hive", rc); - rc = MDBX_PROBLEM; - } - } - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return rc; -} - -static __hot int cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, - MDBX_val *data, unsigned flags) { - cASSERT(mc, (mc->mc_flags & C_SUB) == 0); - uint64_t aligned_keybytes, aligned_databytes; - MDBX_val aligned_key, aligned_data; - if (unlikely(key->iov_len < mc->mc_dbx->md_klen_min || - key->iov_len > mc->mc_dbx->md_klen_max)) { - cASSERT(mc, !"Invalid key-size"); - return MDBX_BAD_VALSIZE; - } - if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min || - data->iov_len > mc->mc_dbx->md_vlen_max)) { - cASSERT(mc, !"Invalid data-size"); - return MDBX_BAD_VALSIZE; - } - - if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { - switch (key->iov_len) { - default: - cASSERT(mc, !"key-size is invalid for MDBX_INTEGERKEY"); - return MDBX_BAD_VALSIZE; - case 4: - if (unlikely(3 & (uintptr_t)key->iov_base)) { - /* copy instead of return error to avoid break compatibility */ - aligned_key.iov_base = - memcpy(&aligned_keybytes, key->iov_base, aligned_key.iov_len = 4); - key = &aligned_key; - } - break; - case 8: - if (unlikely(7 & (uintptr_t)key->iov_base)) { - /* copy instead of return error to avoid break compatibility */ - aligned_key.iov_base = - memcpy(&aligned_keybytes, key->iov_base, aligned_key.iov_len = 8); - key = &aligned_key; - } - break; - } - } - if (mc->mc_db->md_flags & MDBX_INTEGERDUP) { - switch (data->iov_len) { - default: - cASSERT(mc, !"data-size is invalid for MDBX_INTEGERKEY"); - return MDBX_BAD_VALSIZE; - case 4: - if (unlikely(3 & (uintptr_t)data->iov_base)) { - if (unlikely(flags & MDBX_MULTIPLE)) - return MDBX_BAD_VALSIZE; - /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = memcpy(&aligned_databytes, data->iov_base, - aligned_data.iov_len = 4); - data = &aligned_data; - } - break; - case 8: - if (unlikely(7 & (uintptr_t)data->iov_base)) { - if (unlikely(flags & MDBX_MULTIPLE)) - return MDBX_BAD_VALSIZE; - /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = memcpy(&aligned_databytes, data->iov_base, - aligned_data.iov_len = 8); - data = &aligned_data; - } - break; - } - } - return cursor_put_nochecklen(mc, key, data, flags); -} - -int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, - MDBX_put_flags_t flags) { - if (unlikely(mc == NULL || key == NULL || data == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(dbi_changed(mc->mc_txn, mc->mc_dbi))) - return MDBX_BAD_DBI; - - cASSERT(mc, cursor_is_tracked(mc)); - - /* Check this first so counter will always be zero on any early failures. */ - if (unlikely(flags & MDBX_MULTIPLE)) { - if (unlikely(flags & MDBX_RESERVE)) - return MDBX_EINVAL; - if (unlikely(!(mc->mc_db->md_flags & MDBX_DUPFIXED))) - return MDBX_INCOMPATIBLE; - const size_t dcount = data[1].iov_len; - if (unlikely(dcount < 2 || data->iov_len == 0)) - return MDBX_BAD_VALSIZE; - if (unlikely(mc->mc_db->md_xsize != data->iov_len) && mc->mc_db->md_xsize) - return MDBX_BAD_VALSIZE; - if (unlikely(dcount > MAX_MAPSIZE / 2 / - (BRANCH_NODE_MAX(MAX_PAGESIZE) - NODESIZE))) { - /* checking for multiplication overflow */ - if (unlikely(dcount > MAX_MAPSIZE / 2 / data->iov_len)) - return MDBX_TOO_LARGE; - } - } - - if (flags & MDBX_RESERVE) { - if (unlikely(mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | - MDBX_INTEGERDUP | MDBX_DUPFIXED))) - return MDBX_INCOMPATIBLE; - data->iov_base = nullptr; - } - - if (unlikely(mc->mc_txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS - : MDBX_BAD_TXN; - - return cursor_put_checklen(mc, key, data, flags); -} - -int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { - if (unlikely(!mc)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(dbi_changed(mc->mc_txn, mc->mc_dbi))) - return MDBX_BAD_DBI; - - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) - return MDBX_ENODATA; - - if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top]))) - return MDBX_NOTFOUND; - - return cursor_del(mc, flags); -} - -static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { - cASSERT(mc, mc->mc_flags & C_INITIALIZED); - cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top])); - - int rc = cursor_touch(mc, nullptr, nullptr); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - cASSERT(mc, IS_MODIFIABLE(mc->mc_txn, mp)); - if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->mp_pgno, mp->mp_flags); - return MDBX_CORRUPTED; - } - if (IS_LEAF2(mp)) - goto del_key; - - MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - if (flags & (MDBX_ALLDUPS | /* for compatibility */ MDBX_NODUPDATA)) { - /* will subtract the final entry later */ - mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1; - mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED; - } else { - if (!(node_flags(node) & F_SUBDATA)) - mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); - rc = cursor_del(&mc->mc_xcursor->mx_cursor, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - /* If sub-DB still has entries, we're done */ - if (mc->mc_xcursor->mx_db.md_entries) { - if (node_flags(node) & F_SUBDATA) { - /* update subDB info */ - mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; - memcpy(node_data(node), &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); - } else { - /* shrink sub-page */ - node = node_shrink(mp, mc->mc_ki[mc->mc_top], node); - mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); - /* fix other sub-DB cursors pointed at sub-pages on this page */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; - m2 = m2->mc_next) { - if (m2 == mc || m2->mc_snum < mc->mc_snum) - continue; - if (!(m2->mc_flags & C_INITIALIZED)) - continue; - if (m2->mc_pg[mc->mc_top] == mp) { - MDBX_node *inner = node; - if (m2->mc_ki[mc->mc_top] >= page_numkeys(mp)) - continue; - if (m2->mc_ki[mc->mc_top] != mc->mc_ki[mc->mc_top]) { - inner = page_node(mp, m2->mc_ki[mc->mc_top]); - if (node_flags(inner) & F_SUBDATA) - continue; - } - m2->mc_xcursor->mx_cursor.mc_pg[0] = node_data(inner); - } - } - } - mc->mc_db->md_entries--; - cASSERT(mc, mc->mc_db->md_entries > 0 && mc->mc_db->md_depth > 0 && - mc->mc_db->md_root != P_INVALID); - return rc; - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED; - } - /* otherwise fall thru and delete the sub-DB */ - } - - if (node_flags(node) & F_SUBDATA) { - /* add all the child DB's pages to the free list */ - rc = drop_tree(&mc->mc_xcursor->mx_cursor, false); - if (unlikely(rc)) - goto fail; - } - } - /* MDBX passes F_SUBDATA in 'flags' to delete a DB record */ - else if (unlikely((node_flags(node) ^ flags) & F_SUBDATA)) - return MDBX_INCOMPATIBLE; - - /* add large/overflow pages to free list */ - if (node_flags(node) & F_BIGDATA) { - pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->mp_txnid); - if (unlikely((rc = lp.err) || (rc = page_retire(mc, lp.page)))) - goto fail; - } - -del_key: - mc->mc_db->md_entries--; - const MDBX_dbi dbi = mc->mc_dbi; - indx_t ki = mc->mc_ki[mc->mc_top]; - mp = mc->mc_pg[mc->mc_top]; - cASSERT(mc, IS_LEAF(mp)); - node_del(mc, mc->mc_db->md_xsize); - - /* Adjust other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || !(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_snum < mc->mc_snum) - continue; - if (m3->mc_pg[mc->mc_top] == mp) { - if (m3->mc_ki[mc->mc_top] == ki) { - m3->mc_flags |= C_DEL; - if (mc->mc_db->md_flags & MDBX_DUPSORT) { - /* Sub-cursor referred into dataset which is gone */ - m3->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - } - continue; - } else if (m3->mc_ki[mc->mc_top] > ki) { - m3->mc_ki[mc->mc_top]--; - } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); - } - } - - rc = rebalance(mc); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - - if (unlikely(!mc->mc_snum)) { - /* DB is totally empty now, just bail out. - * Other cursors adjustments were already done - * by rebalance and aren't needed here. */ - cASSERT(mc, mc->mc_db->md_entries == 0 && mc->mc_db->md_depth == 0 && - mc->mc_db->md_root == P_INVALID); - mc->mc_flags |= C_EOF; - return MDBX_SUCCESS; - } - - ki = mc->mc_ki[mc->mc_top]; - mp = mc->mc_pg[mc->mc_top]; - cASSERT(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - size_t nkeys = page_numkeys(mp); - cASSERT(mc, (mc->mc_db->md_entries > 0 && nkeys > 0) || - ((mc->mc_flags & C_SUB) && mc->mc_db->md_entries == 0 && - nkeys == 0)); - - /* Adjust this and other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_snum < mc->mc_snum) - continue; - if (m3->mc_pg[mc->mc_top] == mp) { - /* if m3 points past last node in page, find next sibling */ - if (m3->mc_ki[mc->mc_top] >= nkeys) { - rc = cursor_sibling(m3, SIBLING_RIGHT); - if (rc == MDBX_NOTFOUND) { - m3->mc_flags |= C_EOF; - rc = MDBX_SUCCESS; - continue; - } - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - if (m3->mc_ki[mc->mc_top] >= ki || - /* moved to right sibling */ m3->mc_pg[mc->mc_top] != mp) { - if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) { - node = page_node(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); - /* If this node has dupdata, it may need to be reinited - * because its data has moved. - * If the xcursor was not inited it must be reinited. - * Else if node points to a subDB, nothing is needed. */ - if (node_flags(node) & F_DUPDATA) { - if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - if (!(node_flags(node) & F_SUBDATA)) - m3->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); - } else { - rc = cursor_xinit1(m3, node, m3->mc_pg[m3->mc_top]); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - rc = cursor_first(&m3->mc_xcursor->mx_cursor, NULL, NULL); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - } - m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; - } - m3->mc_flags |= C_DEL; - } - } - } - - cASSERT(mc, rc == MDBX_SUCCESS); - if (AUDIT_ENABLED()) - rc = cursor_check(mc); - return rc; - -fail: - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return rc; -} - -/* Allocate and initialize new pages for a database. - * Set MDBX_TXN_ERROR on failure. */ -static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) { - cASSERT(mc, (flags & P_OVERFLOW) == 0); - pgr_t ret = page_alloc(mc); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - - DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno); - ret.page->mp_flags = (uint16_t)flags; - cASSERT(mc, *mc->mc_dbi_state & DBI_DIRTY); - cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); -#if MDBX_ENABLE_PGOP_STAT - mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - - STATIC_ASSERT(P_BRANCH == 1); - const unsigned is_branch = flags & P_BRANCH; - - ret.page->mp_lower = 0; - ret.page->mp_upper = (indx_t)(mc->mc_txn->mt_env->me_psize - PAGEHDRSZ); - mc->mc_db->md_branch_pages += is_branch; - mc->mc_db->md_leaf_pages += 1 - is_branch; - if (unlikely(mc->mc_flags & C_SUB)) { - MDBX_db *outer = outer_db(mc); - outer->md_branch_pages += is_branch; - outer->md_leaf_pages += 1 - is_branch; - } - return ret; -} - -static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) { - pgr_t ret = likely(npages == 1) - ? page_alloc(mc) - : page_alloc_slowpath(mc, npages, MDBX_ALLOC_DEFAULT); - if (unlikely(ret.err != MDBX_SUCCESS)) - return ret; - - DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi, - ret.page->mp_pgno, npages); - ret.page->mp_flags = P_OVERFLOW; - cASSERT(mc, *mc->mc_dbi_state & DBI_DIRTY); - cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); -#if MDBX_ENABLE_PGOP_STAT - mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages; -#endif /* MDBX_ENABLE_PGOP_STAT */ - - mc->mc_db->md_overflow_pages += (pgno_t)npages; - ret.page->mp_pages = (pgno_t)npages; - cASSERT(mc, !(mc->mc_flags & C_SUB)); - return ret; -} - -__hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, - size_t indx, - const MDBX_val *key) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - MDBX_ANALYSIS_ASSUME(key != nullptr); - DKBUF_DEBUG; - DEBUG("add to leaf2-%spage %" PRIaPGNO " index %zi, " - " key size %" PRIuPTR " [%s]", - IS_SUBP(mp) ? "sub-" : "", mp->mp_pgno, indx, key ? key->iov_len : 0, - DKEY_DEBUG(key)); - - cASSERT(mc, key); - cASSERT(mc, PAGETYPE_COMPAT(mp) == (P_LEAF | P_LEAF2)); - const size_t ksize = mc->mc_db->md_xsize; - cASSERT(mc, ksize == key->iov_len); - const size_t nkeys = page_numkeys(mp); - cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); - - /* Just using these for counting */ - const intptr_t lower = mp->mp_lower + sizeof(indx_t); - const intptr_t upper = mp->mp_upper - (ksize - sizeof(indx_t)); - if (unlikely(lower > upper)) { - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return MDBX_PAGE_FULL; - } - mp->mp_lower = (indx_t)lower; - mp->mp_upper = (indx_t)upper; - - void *const ptr = page_leaf2key(mp, indx, ksize); - cASSERT(mc, nkeys >= indx); - const size_t diff = nkeys - indx; - if (likely(diff > 0)) - /* Move higher keys up one slot. */ - memmove(ptr_disp(ptr, ksize), ptr, diff * ksize); - /* insert new key */ - memcpy(ptr, key->iov_base, ksize); - - cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); - return MDBX_SUCCESS; -} - -static int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, - const MDBX_val *key, - pgno_t pgno) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - DKBUF_DEBUG; - DEBUG("add to branch-%spage %" PRIaPGNO " index %zi, node-pgno %" PRIaPGNO - " key size %" PRIuPTR " [%s]", - IS_SUBP(mp) ? "sub-" : "", mp->mp_pgno, indx, pgno, - key ? key->iov_len : 0, DKEY_DEBUG(key)); - - cASSERT(mc, PAGETYPE_WHOLE(mp) == P_BRANCH); - STATIC_ASSERT(NODESIZE % 2 == 0); - - /* Move higher pointers up one slot. */ - const size_t nkeys = page_numkeys(mp); - cASSERT(mc, nkeys >= indx); - for (size_t i = nkeys; i > indx; --i) - mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; - - /* Adjust free space offsets. */ - const size_t branch_bytes = branch_size(mc->mc_txn->mt_env, key); - const intptr_t lower = mp->mp_lower + sizeof(indx_t); - const intptr_t upper = mp->mp_upper - (branch_bytes - sizeof(indx_t)); - if (unlikely(lower > upper)) { - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return MDBX_PAGE_FULL; - } - mp->mp_lower = (indx_t)lower; - mp->mp_ptrs[indx] = mp->mp_upper = (indx_t)upper; - - /* Write the node data. */ - MDBX_node *node = page_node(mp, indx); - node_set_pgno(node, pgno); - node_set_flags(node, 0); - UNALIGNED_POKE_8(node, MDBX_node, mn_extra, 0); - node_set_ks(node, 0); - if (likely(key != NULL)) { - node_set_ks(node, key->iov_len); - memcpy(node_key(node), key->iov_base, key->iov_len); - } - return MDBX_SUCCESS; -} - -__hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, - const MDBX_val *key, - MDBX_val *data, - unsigned flags) { - MDBX_ANALYSIS_ASSUME(key != nullptr); - MDBX_ANALYSIS_ASSUME(data != nullptr); - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - DKBUF_DEBUG; - DEBUG("add to leaf-%spage %" PRIaPGNO " index %zi, data size %" PRIuPTR - " key size %" PRIuPTR " [%s]", - IS_SUBP(mp) ? "sub-" : "", mp->mp_pgno, indx, data ? data->iov_len : 0, - key ? key->iov_len : 0, DKEY_DEBUG(key)); - cASSERT(mc, key != NULL && data != NULL); - cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF); - MDBX_page *largepage = NULL; - - size_t node_bytes; - if (unlikely(flags & F_BIGDATA)) { - /* Data already on large/overflow page. */ - STATIC_ASSERT(sizeof(pgno_t) % 2 == 0); - node_bytes = - node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); - cASSERT(mc, page_room(mp) >= node_bytes); - } else if (unlikely(node_size(key, data) > - mc->mc_txn->mt_env->me_leaf_nodemax)) { - /* Put data on large/overflow page. */ - if (unlikely(mc->mc_db->md_flags & MDBX_DUPSORT)) { - ERROR("Unexpected target %s flags 0x%x for large data-item", "dupsort-db", - mc->mc_db->md_flags); - return MDBX_PROBLEM; - } - if (unlikely(flags & (F_DUPDATA | F_SUBDATA))) { - ERROR("Unexpected target %s flags 0x%x for large data-item", "node", - flags); - return MDBX_PROBLEM; - } - cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data)); - const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len); - const pgr_t npr = page_new_large(mc, ovpages); - if (unlikely(npr.err != MDBX_SUCCESS)) - return npr.err; - largepage = npr.page; - DEBUG("allocated %u large/overflow page(s) %" PRIaPGNO "for %" PRIuPTR - " data bytes", - largepage->mp_pages, largepage->mp_pgno, data->iov_len); - flags |= F_BIGDATA; - node_bytes = - node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); - cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data)); - } else { - cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data)); - node_bytes = node_size(key, data) + sizeof(indx_t); - cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data)); - } - - /* Move higher pointers up one slot. */ - const size_t nkeys = page_numkeys(mp); - cASSERT(mc, nkeys >= indx); - for (size_t i = nkeys; i > indx; --i) - mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; - - /* Adjust free space offsets. */ - const intptr_t lower = mp->mp_lower + sizeof(indx_t); - const intptr_t upper = mp->mp_upper - (node_bytes - sizeof(indx_t)); - if (unlikely(lower > upper)) { - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return MDBX_PAGE_FULL; - } - mp->mp_lower = (indx_t)lower; - mp->mp_ptrs[indx] = mp->mp_upper = (indx_t)upper; - - /* Write the node data. */ - MDBX_node *node = page_node(mp, indx); - node_set_ks(node, key->iov_len); - node_set_flags(node, (uint8_t)flags); - UNALIGNED_POKE_8(node, MDBX_node, mn_extra, 0); - node_set_ds(node, data->iov_len); - memcpy(node_key(node), key->iov_base, key->iov_len); - - void *nodedata = node_data(node); - if (likely(largepage == NULL)) { - if (unlikely(flags & F_BIGDATA)) { - memcpy(nodedata, data->iov_base, sizeof(pgno_t)); - return MDBX_SUCCESS; - } - } else { - poke_pgno(nodedata, largepage->mp_pgno); - nodedata = page_data(largepage); - } - if (unlikely(flags & MDBX_RESERVE)) - data->iov_base = nodedata; - else if (likely(nodedata != data->iov_base && - data->iov_len /* to avoid UBSAN traps*/ != 0)) - memcpy(nodedata, data->iov_base, data->iov_len); - return MDBX_SUCCESS; -} - -/* Delete the specified node from a page. - * [in] mc Cursor pointing to the node to delete. - * [in] ksize The size of a node. Only used if the page is - * part of a MDBX_DUPFIXED database. */ -__hot static void node_del(MDBX_cursor *mc, size_t ksize) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - const size_t hole = mc->mc_ki[mc->mc_top]; - const size_t nkeys = page_numkeys(mp); - - DEBUG("delete node %zu on %s page %" PRIaPGNO, hole, - IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno); - cASSERT(mc, hole < nkeys); - - if (IS_LEAF2(mp)) { - cASSERT(mc, ksize >= sizeof(indx_t)); - size_t diff = nkeys - 1 - hole; - void *const base = page_leaf2key(mp, hole, ksize); - if (diff) - memmove(base, ptr_disp(base, ksize), diff * ksize); - cASSERT(mc, mp->mp_lower >= sizeof(indx_t)); - mp->mp_lower -= sizeof(indx_t); - cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= ksize - sizeof(indx_t)); - mp->mp_upper += (indx_t)(ksize - sizeof(indx_t)); - cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); - return; - } - - MDBX_node *node = page_node(mp, hole); - cASSERT(mc, !IS_BRANCH(mp) || hole || node_ks(node) == 0); - size_t hole_size = NODESIZE + node_ks(node); - if (IS_LEAF(mp)) - hole_size += - (node_flags(node) & F_BIGDATA) ? sizeof(pgno_t) : node_ds(node); - hole_size = EVEN(hole_size); - - const indx_t hole_offset = mp->mp_ptrs[hole]; - size_t r, w; - for (r = w = 0; r < nkeys; r++) - if (r != hole) - mp->mp_ptrs[w++] = (mp->mp_ptrs[r] < hole_offset) - ? mp->mp_ptrs[r] + (indx_t)hole_size - : mp->mp_ptrs[r]; - - void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ); - memmove(ptr_disp(base, hole_size), base, hole_offset - mp->mp_upper); - - cASSERT(mc, mp->mp_lower >= sizeof(indx_t)); - mp->mp_lower -= sizeof(indx_t); - cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= hole_size); - mp->mp_upper += (indx_t)hole_size; - - if (AUDIT_ENABLED()) { - const uint8_t checking = mc->mc_checking; - mc->mc_checking |= CC_UPDATING; - const int page_check_err = page_check(mc, mp); - mc->mc_checking = checking; - cASSERT(mc, page_check_err == MDBX_SUCCESS); - } -} - -/* Compact the main page after deleting a node on a subpage. - * [in] mp The main page to operate on. - * [in] indx The index of the subpage on the main page. */ -static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node) { - assert(node = page_node(mp, indx)); - MDBX_page *sp = (MDBX_page *)node_data(node); - assert(IS_SUBP(sp) && page_numkeys(sp) > 0); - const size_t delta = - EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); - if (unlikely(delta) == 0) - return node; - - /* Prepare to shift upward, set len = length(subpage part to shift) */ - size_t nsize = node_ds(node) - delta, len = nsize; - assert(nsize % 1 == 0); - if (!IS_LEAF2(sp)) { - len = PAGEHDRSZ; - MDBX_page *xp = ptr_disp(sp, delta); /* destination subpage */ - for (intptr_t i = page_numkeys(sp); --i >= 0;) { - assert(sp->mp_ptrs[i] >= delta); - xp->mp_ptrs[i] = (indx_t)(sp->mp_ptrs[i] - delta); - } - } - assert(sp->mp_upper >= sp->mp_lower + delta); - sp->mp_upper -= (indx_t)delta; - sp->mp_pgno = mp->mp_pgno; - node_set_ds(node, nsize); - - /* Shift upward */ - void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ); - memmove(ptr_disp(base, delta), base, ptr_dist(sp, base) + len); - - const size_t pivot = mp->mp_ptrs[indx]; - for (intptr_t i = page_numkeys(mp); --i >= 0;) { - if (mp->mp_ptrs[i] <= pivot) { - assert((size_t)UINT16_MAX - mp->mp_ptrs[i] >= delta); - mp->mp_ptrs[i] += (indx_t)delta; - } - } - assert((size_t)UINT16_MAX - mp->mp_upper >= delta); - mp->mp_upper += (indx_t)delta; - - return ptr_disp(node, delta); -} - -/* Initial setup of a sorted-dups cursor. - * - * Sorted duplicates are implemented as a sub-database for the given key. - * The duplicate data items are actually keys of the sub-database. - * Operations on the duplicate data items are performed using a sub-cursor - * initialized when the sub-database is first accessed. This function does - * the preliminary setup of the sub-cursor, filling in the fields that - * depend only on the parent DB. - * - * [in] mc The main cursor whose sorted-dups cursor is to be initialized. */ -static int cursor_xinit0(MDBX_cursor *mc) { - MDBX_xcursor *mx = mc->mc_xcursor; - if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) { - ERROR("unexpected dupsort-page for non-dupsort db/cursor (dbi %u)", - mc->mc_dbi); - return MDBX_CORRUPTED; - } - - mx->mx_cursor.mc_xcursor = NULL; - mx->mx_cursor.mc_next = NULL; - mx->mx_cursor.mc_txn = mc->mc_txn; - mx->mx_cursor.mc_db = &mx->mx_db; - mx->mx_cursor.mc_dbx = &mx->mx_dbx; - mx->mx_cursor.mc_dbi = mc->mc_dbi; - mx->mx_cursor.mc_dbi_state = mc->mc_dbi_state; - mx->mx_cursor.mc_snum = 0; - mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB; - STATIC_ASSERT(MDBX_DUPFIXED * 2 == P_LEAF2); - cASSERT(mc, (mc->mc_checking & (P_BRANCH | P_LEAF | P_LEAF2)) == P_LEAF); - mx->mx_cursor.mc_checking = - mc->mc_checking + ((mc->mc_db->md_flags & MDBX_DUPFIXED) << 1); - mx->mx_dbx.md_name.iov_len = 0; - mx->mx_dbx.md_name.iov_base = NULL; - mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; - mx->mx_dbx.md_dcmp = NULL; - mx->mx_dbx.md_klen_min = INT_MAX; - mx->mx_dbx.md_vlen_min = mx->mx_dbx.md_klen_max = mx->mx_dbx.md_vlen_max = 0; - return MDBX_SUCCESS; -} - -/* Final setup of a sorted-dups cursor. - * Sets up the fields that depend on the data from the main cursor. - * [in] mc The main cursor whose sorted-dups cursor is to be initialized. - * [in] node The data containing the MDBX_db record for the sorted-dup database. - */ -static int cursor_xinit1(MDBX_cursor *mc, const MDBX_node *node, - const MDBX_page *mp) { - MDBX_xcursor *mx = mc->mc_xcursor; - if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) { - ERROR("unexpected dupsort-page for non-dupsort db/cursor (dbi %u)", - mc->mc_dbi); - return MDBX_CORRUPTED; - } - - const uint8_t flags = node_flags(node); - switch (flags) { - default: - ERROR("invalid node flags %u", flags); - return MDBX_CORRUPTED; - case F_DUPDATA | F_SUBDATA: - if (!MDBX_DISABLE_VALIDATION && - unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), - sizeof(MDBX_db)); - return MDBX_CORRUPTED; - } - memcpy(&mx->mx_db, node_data(node), sizeof(MDBX_db)); - const txnid_t pp_txnid = mp->mp_txnid; - if (!MDBX_DISABLE_VALIDATION && - unlikely(mx->mx_db.md_mod_txnid > pp_txnid)) { - ERROR("nested-db.md_mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", - mx->mx_db.md_mod_txnid, pp_txnid); - return MDBX_CORRUPTED; - } - mx->mx_cursor.mc_pg[0] = 0; - mx->mx_cursor.mc_snum = 0; - mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB; - break; - case F_DUPDATA: - if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) <= PAGEHDRSZ)) { - ERROR("invalid nested-page size %zu", node_ds(node)); - return MDBX_CORRUPTED; - } - MDBX_page *fp = node_data(node); - mx->mx_db.md_depth = 1; - mx->mx_db.md_branch_pages = 0; - mx->mx_db.md_leaf_pages = 1; - mx->mx_db.md_overflow_pages = 0; - mx->mx_db.md_entries = page_numkeys(fp); - mx->mx_db.md_root = fp->mp_pgno; - mx->mx_db.md_mod_txnid = mp->mp_txnid; - mx->mx_cursor.mc_snum = 1; - mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB | C_INITIALIZED; - mx->mx_cursor.mc_pg[0] = fp; - mx->mx_cursor.mc_ki[0] = 0; - mx->mx_db.md_flags = flags_db2sub(mc->mc_db->md_flags); - mx->mx_db.md_xsize = - (mc->mc_db->md_flags & MDBX_DUPFIXED) ? fp->mp_leaf2_ksize : 0; - break; - } - - if (unlikely(mx->mx_db.md_xsize != mc->mc_db->md_xsize)) { - if (!MDBX_DISABLE_VALIDATION && unlikely(mc->mc_db->md_xsize != 0)) { - ERROR("cursor mismatched nested-db md_xsize %u", mc->mc_db->md_xsize); - return MDBX_CORRUPTED; - } - if (!MDBX_DISABLE_VALIDATION && - unlikely((mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) { - ERROR("mismatched nested-db md_flags %u", mc->mc_db->md_flags); - return MDBX_CORRUPTED; - } - if (!MDBX_DISABLE_VALIDATION && - unlikely(mx->mx_db.md_xsize < mc->mc_dbx->md_vlen_min || - mx->mx_db.md_xsize > mc->mc_dbx->md_vlen_max)) { - ERROR("mismatched nested-db.md_xsize (%u) <> min/max value-length " - "(%zu/%zu)", - mx->mx_db.md_xsize, mc->mc_dbx->md_vlen_min, - mc->mc_dbx->md_vlen_max); - return MDBX_CORRUPTED; - } - mc->mc_db->md_xsize = mx->mx_db.md_xsize; - mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = mx->mx_db.md_xsize; - } - mx->mx_dbx.md_klen_min = mc->mc_dbx->md_vlen_min; - mx->mx_dbx.md_klen_max = mc->mc_dbx->md_vlen_max; - - DEBUG("Sub-db -%u root page %" PRIaPGNO, mx->mx_cursor.mc_dbi, - mx->mx_db.md_root); - return MDBX_SUCCESS; -} - -/* Fixup a sorted-dups cursor due to underlying update. - * Sets up some fields that depend on the data from the main cursor. - * Almost the same as init1, but skips initialization steps if the - * xcursor had already been used. - * [in] mc The main cursor whose sorted-dups cursor is to be fixed up. - * [in] src_mx The xcursor of an up-to-date cursor. - * [in] new_dupdata True if converting from a non-F_DUPDATA item. */ -static int cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, - bool new_dupdata) { - MDBX_xcursor *mx = mc->mc_xcursor; - if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) { - ERROR("unexpected dupsort-page for non-dupsort db/cursor (dbi %u)", - mc->mc_dbi); - return MDBX_CORRUPTED; - } - - if (new_dupdata) { - mx->mx_cursor.mc_snum = 1; - mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB | C_INITIALIZED; - mx->mx_cursor.mc_ki[0] = 0; - } - - mx->mx_dbx.md_klen_min = src_mx->mx_dbx.md_klen_min; - mx->mx_dbx.md_klen_max = src_mx->mx_dbx.md_klen_max; - mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; - mx->mx_db = src_mx->mx_db; - mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0]; - if (mx->mx_cursor.mc_flags & C_INITIALIZED) { - DEBUG("Sub-db -%u root page %" PRIaPGNO, mx->mx_cursor.mc_dbi, - mx->mx_db.md_root); - } - return MDBX_SUCCESS; -} - -static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, - const MDBX_txn *const txn, MDBX_db *const db, - MDBX_dbx *const dbx, uint8_t *const dbi_state) { - tASSERT(txn, F_ISSET(*dbi_state, DBI_VALID | DBI_LINDO)); - couple->outer.mc_signature = MDBX_MC_LIVE; - couple->outer.mc_next = NULL; - couple->outer.mc_backup = NULL; - couple->outer.mc_dbi = (MDBX_dbi)dbi; - couple->outer.mc_txn = (MDBX_txn *)txn; - couple->outer.mc_db = db; - couple->outer.mc_dbx = dbx; - couple->outer.mc_dbi_state = dbi_state; - couple->outer.mc_snum = 0; - couple->outer.mc_top = 0; - couple->outer.mc_pg[0] = 0; - couple->outer.mc_flags = 0; - STATIC_ASSERT(CC_BRANCH == P_BRANCH && CC_LEAF == P_LEAF && - CC_OVERFLOW == P_OVERFLOW && CC_LEAF2 == P_LEAF2); - couple->outer.mc_checking = - (AUDIT_ENABLED() || (txn->mt_env->me_flags & MDBX_VALIDATION)) - ? CC_PAGECHECK | CC_LEAF - : CC_LEAF; - couple->outer.mc_ki[0] = 0; - couple->outer.mc_xcursor = NULL; - - int rc = MDBX_SUCCESS; - if (unlikely(*couple->outer.mc_dbi_state & DBI_STALE)) { - rc = page_search(&couple->outer, NULL, MDBX_PS_ROOTONLY); - rc = (rc != MDBX_NOTFOUND) ? rc : MDBX_SUCCESS; - } else if (unlikely(dbx->md_klen_max == 0)) { - rc = setup_sdb(dbx, db, txn->mt_env->me_psize); - } - - if (couple->outer.mc_db->md_flags & MDBX_DUPSORT) { - couple->inner.mx_cursor.mc_signature = MDBX_MC_LIVE; - couple->outer.mc_xcursor = &couple->inner; - rc = cursor_xinit0(&couple->outer); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - couple->inner.mx_dbx.md_klen_min = couple->outer.mc_dbx->md_vlen_min; - couple->inner.mx_dbx.md_klen_max = couple->outer.mc_dbx->md_vlen_max; - } - return rc; -} - -/* Initialize a cursor for a given transaction and database. */ -static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { - STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); - int rc = dbi_check(txn, dbi); - if (likely(rc == MDBX_SUCCESS)) - rc = couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, - &txn->mt_dbs[dbi], &txn->mt_env->me_dbxs[dbi], - &txn->mt_dbi_state[dbi]); - return rc; -} - -MDBX_cursor *mdbx_cursor_create(void *context) { - MDBX_cursor_couple *couple = osal_calloc(1, sizeof(MDBX_cursor_couple)); - if (unlikely(!couple)) - return nullptr; - - couple->outer.mc_signature = MDBX_MC_READY4CLOSE; - couple->outer.mc_dbi = UINT_MAX; - couple->mc_userctx = context; - return &couple->outer; -} - -int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) { - if (unlikely(!mc)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && - mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; - - MDBX_cursor_couple *couple = container_of(mc, MDBX_cursor_couple, outer); - couple->mc_userctx = ctx; - return MDBX_SUCCESS; -} - -void *mdbx_cursor_get_userctx(const MDBX_cursor *mc) { - if (unlikely(!mc)) - return nullptr; - - if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && - mc->mc_signature != MDBX_MC_LIVE)) - return nullptr; - - MDBX_cursor_couple *couple = container_of(mc, MDBX_cursor_couple, outer); - return couple->mc_userctx; -} - -int mdbx_cursor_unbind(MDBX_cursor *mc) { - if (unlikely(!mc)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_SUCCESS - : MDBX_EBADSIGN; - - if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ - return MDBX_EINVAL; - - eASSERT(nullptr, mc->mc_txn && mc->mc_txn->mt_signature == MDBX_MT_SIGNATURE); - cASSERT(mc, mc->mc_signature == MDBX_MC_LIVE); - cASSERT(mc, !mc->mc_backup); - if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) { - ERROR("Wrong cursor's transaction %p 0x%x", - __Wpedantic_format_voidptr(mc->mc_txn), - mc->mc_txn ? mc->mc_txn->mt_signature : 0); - return MDBX_PROBLEM; - } - if (mc->mc_flags & C_UNTRACK) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - cASSERT(mc, *prev == mc); - *prev = mc->mc_next; - } - mc->mc_signature = MDBX_MC_READY4CLOSE; - mc->mc_flags = 0; - return MDBX_SUCCESS; -} - -int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { - if (unlikely(!mc)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && - mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(dbi == FREE_DBI && !(txn->mt_flags & MDBX_TXN_RDONLY))) - return MDBX_EACCESS; - - if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ { - cASSERT(mc, mc->mc_signature == MDBX_MC_LIVE); - if (unlikely(mc->mc_dbi != dbi || - /* paranoia */ mc->mc_signature != MDBX_MC_LIVE || - mc->mc_txn != txn)) - return MDBX_EINVAL; - - cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); - cASSERT(mc, mc->mc_dbx == &txn->mt_env->me_dbxs[dbi]); - cASSERT(mc, mc->mc_dbi == dbi); - cASSERT(mc, mc->mc_dbi_state == &txn->mt_dbi_state[dbi]); - return likely(mc->mc_dbi == dbi && - /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && - mc->mc_txn == txn) - ? MDBX_SUCCESS - : MDBX_EINVAL /* Disallow change DBI in nested transactions */; - } - - if (mc->mc_signature == MDBX_MC_LIVE) { - rc = mdbx_cursor_unbind(mc); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - cASSERT(mc, !(mc->mc_flags & C_UNTRACK)); - - rc = cursor_init(mc, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - mc->mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = mc; - mc->mc_flags |= C_UNTRACK; - - return MDBX_SUCCESS; -} - -int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { - if (unlikely(!ret)) - return MDBX_EINVAL; - *ret = NULL; - - MDBX_cursor *const mc = mdbx_cursor_create(nullptr); - if (unlikely(!mc)) - return MDBX_ENOMEM; - - int rc = mdbx_cursor_bind(txn, mc, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - mdbx_cursor_close(mc); - return rc; - } - - *ret = mc; - return MDBX_SUCCESS; -} - -int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { - return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; -} - -int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, - bool ignore_multival) { - const int incomparable = INT16_MAX + 1; - if (unlikely(!l)) - return r ? -incomparable * 9 : 0; - else if (unlikely(!r)) - return incomparable * 9; - - if (unlikely(l->mc_signature != MDBX_MC_LIVE)) - return (r->mc_signature == MDBX_MC_LIVE) ? -incomparable * 8 : 0; - if (unlikely(r->mc_signature != MDBX_MC_LIVE)) - return (l->mc_signature == MDBX_MC_LIVE) ? incomparable * 8 : 0; - - if (unlikely(l->mc_dbx != r->mc_dbx)) { - if (l->mc_txn->mt_env != r->mc_txn->mt_env) - return (l->mc_txn->mt_env > r->mc_txn->mt_env) ? incomparable * 7 - : -incomparable * 7; - if (l->mc_txn->mt_txnid != r->mc_txn->mt_txnid) - return (l->mc_txn->mt_txnid > r->mc_txn->mt_txnid) ? incomparable * 6 - : -incomparable * 6; - return (l->mc_dbx > r->mc_dbx) ? incomparable * 5 : -incomparable * 5; - } - assert(l->mc_dbi == r->mc_dbi); - - int diff = (l->mc_flags & C_INITIALIZED) - (l->mc_flags & C_INITIALIZED); - if (unlikely(diff)) - return (diff > 0) ? incomparable * 4 : -incomparable * 4; - if (unlikely((l->mc_flags & C_INITIALIZED) == 0)) - return 0; - - size_t detent = (l->mc_snum <= r->mc_snum) ? l->mc_snum : r->mc_snum; - for (size_t i = 0; i < detent; ++i) { - diff = l->mc_ki[i] - r->mc_ki[i]; - if (diff) - return diff; - } - if (unlikely(l->mc_snum != r->mc_snum)) - return (l->mc_snum > r->mc_snum) ? incomparable * 3 : -incomparable * 3; - - assert((l->mc_xcursor != nullptr) == (r->mc_xcursor != nullptr)); - if (unlikely((l->mc_xcursor != nullptr) != (r->mc_xcursor != nullptr))) - return l->mc_xcursor ? incomparable * 2 : -incomparable * 2; - if (ignore_multival || !l->mc_xcursor) - return 0; - -#if MDBX_DEBUG - if (l->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - const MDBX_page *mp = l->mc_pg[l->mc_top]; - const MDBX_node *node = page_node(mp, l->mc_ki[l->mc_top]); - assert(node_flags(node) & F_DUPDATA); - } - if (l->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - const MDBX_page *mp = r->mc_pg[r->mc_top]; - const MDBX_node *node = page_node(mp, r->mc_ki[r->mc_top]); - assert(node_flags(node) & F_DUPDATA); - } -#endif /* MDBX_DEBUG */ - - l = &l->mc_xcursor->mx_cursor; - r = &r->mc_xcursor->mx_cursor; - diff = (l->mc_flags & C_INITIALIZED) - (l->mc_flags & C_INITIALIZED); - if (unlikely(diff)) - return (diff > 0) ? incomparable * 2 : -incomparable * 2; - if (unlikely((l->mc_flags & C_INITIALIZED) == 0)) - return 0; - - detent = (l->mc_snum <= r->mc_snum) ? l->mc_snum : r->mc_snum; - for (size_t i = 0; i < detent; ++i) { - diff = l->mc_ki[i] - r->mc_ki[i]; - if (diff) - return diff; - } - if (unlikely(l->mc_snum != r->mc_snum)) - return (l->mc_snum > r->mc_snum) ? incomparable : -incomparable; - return 0; -} - -int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { - if (unlikely(!src)) - return MDBX_EINVAL; - if (unlikely(src->mc_signature != MDBX_MC_LIVE)) - return (src->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = mdbx_cursor_bind(src->mc_txn, dest, src->mc_dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - assert(dest->mc_db == src->mc_db); - assert(dest->mc_dbi == src->mc_dbi); - assert(dest->mc_dbx == src->mc_dbx); - assert(dest->mc_dbi_state == src->mc_dbi_state); -again: - assert(dest->mc_txn == src->mc_txn); - dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK; - dest->mc_top = src->mc_top; - dest->mc_snum = src->mc_snum; - for (size_t i = 0; i < src->mc_snum; ++i) { - dest->mc_ki[i] = src->mc_ki[i]; - dest->mc_pg[i] = src->mc_pg[i]; - } - - if (src->mc_xcursor) { - dest->mc_xcursor->mx_db = src->mc_xcursor->mx_db; - dest->mc_xcursor->mx_dbx = src->mc_xcursor->mx_dbx; - src = &src->mc_xcursor->mx_cursor; - dest = &dest->mc_xcursor->mx_cursor; - goto again; - } - - return MDBX_SUCCESS; -} - -void mdbx_cursor_close(MDBX_cursor *mc) { - if (likely(mc)) { - ENSURE(NULL, mc->mc_signature == MDBX_MC_LIVE || - mc->mc_signature == MDBX_MC_READY4CLOSE); - MDBX_txn *const txn = mc->mc_txn; - if (!mc->mc_backup) { - mc->mc_txn = NULL; - /* Unlink from txn, if tracked. */ - if (mc->mc_flags & C_UNTRACK) { - ENSURE(txn->mt_env, check_txn(txn, 0) == MDBX_SUCCESS); - MDBX_cursor **prev = &txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - tASSERT(txn, *prev == mc); - *prev = mc->mc_next; - } - mc->mc_signature = 0; - mc->mc_next = mc; - osal_free(mc); - } else { - /* Cursor closed before nested txn ends */ - tASSERT(txn, mc->mc_signature == MDBX_MC_LIVE); - ENSURE(txn->mt_env, check_txn_rw(txn, 0) == MDBX_SUCCESS); - mc->mc_signature = MDBX_MC_WAIT4EOT; - } - } -} - -int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { - int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD); - if (likely(rc == MDBX_SUCCESS)) { - TXN_FOREACH_DBI_FROM(txn, i, MAIN_DBI) { - while (txn->mt_cursors[i]) { - MDBX_cursor *mc = txn->mt_cursors[i]; - ENSURE(NULL, mc->mc_signature == MDBX_MC_LIVE && - (mc->mc_flags & C_UNTRACK) && !mc->mc_backup); - rc = likely(rc < INT_MAX) ? rc + 1 : rc; - txn->mt_cursors[i] = mc->mc_next; - if (unbind) { - mc->mc_signature = MDBX_MC_READY4CLOSE; - mc->mc_flags = 0; - } else { - mc->mc_signature = 0; - mc->mc_next = mc; - osal_free(mc); - } - } - } - } else { - eASSERT(nullptr, rc < 0); - } - return rc; -} - -MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { - if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) - return NULL; - MDBX_txn *txn = mc->mc_txn; - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) - return NULL; - if (unlikely(txn->mt_flags & MDBX_TXN_FINISHED)) - return NULL; - return txn; -} - -MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { - if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) - return UINT_MAX; - return mc->mc_dbi; -} - -/* Return the count of duplicate data items for the current key */ -int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(countp == NULL || !(mc->mc_flags & C_INITIALIZED))) - return MDBX_EINVAL; - - if (!mc->mc_snum) { - *countp = 0; - return MDBX_NOTFOUND; - } - - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if ((mc->mc_flags & C_EOF) && mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) { - *countp = 0; - return MDBX_NOTFOUND; - } - - *countp = 1; - if (mc->mc_xcursor != NULL) { - MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - cASSERT(mc, mc->mc_xcursor && - (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)); - *countp = unlikely(mc->mc_xcursor->mx_db.md_entries > PTRDIFF_MAX) - ? PTRDIFF_MAX - : (size_t)mc->mc_xcursor->mx_db.md_entries; - } - } - return MDBX_SUCCESS; -} - -/* Replace the key for a branch node with a new key. - * Set MDBX_TXN_ERROR on failure. - * [in] mc Cursor pointing to the node to operate on. - * [in] key The new key to use. - * Returns 0 on success, non-zero on failure. */ -static int update_key(MDBX_cursor *mc, const MDBX_val *key) { - MDBX_page *mp; - MDBX_node *node; - size_t len; - ptrdiff_t delta, ksize, oksize; - intptr_t ptr, i, nkeys, indx; - DKBUF_DEBUG; - - cASSERT(mc, cursor_is_tracked(mc)); - indx = mc->mc_ki[mc->mc_top]; - mp = mc->mc_pg[mc->mc_top]; - node = page_node(mp, indx); - ptr = mp->mp_ptrs[indx]; -#if MDBX_DEBUG - MDBX_val k2; - k2.iov_base = node_key(node); - k2.iov_len = node_ks(node); - DEBUG("update key %zi (offset %zu) [%s] to [%s] on page %" PRIaPGNO, indx, - ptr, DVAL_DEBUG(&k2), DKEY_DEBUG(key), mp->mp_pgno); -#endif /* MDBX_DEBUG */ - - /* Sizes must be 2-byte aligned. */ - ksize = EVEN(key->iov_len); - oksize = EVEN(node_ks(node)); - delta = ksize - oksize; - - /* Shift node contents if EVEN(key length) changed. */ - if (delta) { - if (delta > (int)page_room(mp)) { - /* not enough space left, do a delete and split */ - DEBUG("Not enough room, delta = %zd, splitting...", delta); - pgno_t pgno = node_pgno(node); - node_del(mc, 0); - int err = page_split(mc, key, NULL, pgno, MDBX_SPLIT_REPLACE); - if (err == MDBX_SUCCESS && AUDIT_ENABLED()) - err = cursor_check_updating(mc); - return err; - } - - nkeys = page_numkeys(mp); - for (i = 0; i < nkeys; i++) { - if (mp->mp_ptrs[i] <= ptr) { - cASSERT(mc, mp->mp_ptrs[i] >= delta); - mp->mp_ptrs[i] -= (indx_t)delta; - } - } - - void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ); - len = ptr - mp->mp_upper + NODESIZE; - memmove(ptr_disp(base, -delta), base, len); - cASSERT(mc, mp->mp_upper >= delta); - mp->mp_upper -= (indx_t)delta; - - node = page_node(mp, indx); - } - - /* But even if no shift was needed, update ksize */ - node_set_ks(node, key->iov_len); - - if (likely(key->iov_len /* to avoid UBSAN traps*/ != 0)) - memcpy(node_key(node), key->iov_base, key->iov_len); - return MDBX_SUCCESS; -} - -/* Move a node from csrc to cdst. */ -static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { - int rc; - DKBUF_DEBUG; - - MDBX_page *psrc = csrc->mc_pg[csrc->mc_top]; - MDBX_page *pdst = cdst->mc_pg[cdst->mc_top]; - cASSERT(csrc, PAGETYPE_WHOLE(psrc) == PAGETYPE_WHOLE(pdst)); - cASSERT(csrc, csrc->mc_dbi == cdst->mc_dbi); - cASSERT(csrc, csrc->mc_top == cdst->mc_top); - if (unlikely(PAGETYPE_WHOLE(psrc) != PAGETYPE_WHOLE(pdst))) { - bailout: - ERROR("Wrong or mismatch pages's types (src %d, dst %d) to move node", - PAGETYPE_WHOLE(psrc), PAGETYPE_WHOLE(pdst)); - csrc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - return MDBX_PROBLEM; - } - - MDBX_val key4move; - switch (PAGETYPE_WHOLE(psrc)) { - case P_BRANCH: { - const MDBX_node *srcnode = page_node(psrc, csrc->mc_ki[csrc->mc_top]); - cASSERT(csrc, node_flags(srcnode) == 0); - const pgno_t srcpg = node_pgno(srcnode); - key4move.iov_len = node_ks(srcnode); - key4move.iov_base = node_key(srcnode); - - if (csrc->mc_ki[csrc->mc_top] == 0) { - const size_t snum = csrc->mc_snum; - cASSERT(csrc, snum > 0); - /* must find the lowest key below src */ - rc = page_search_lowest(csrc); - MDBX_page *lowest_page = csrc->mc_pg[csrc->mc_top]; - if (unlikely(rc)) - return rc; - cASSERT(csrc, IS_LEAF(lowest_page)); - if (unlikely(!IS_LEAF(lowest_page))) - goto bailout; - if (IS_LEAF2(lowest_page)) { - key4move.iov_len = csrc->mc_db->md_xsize; - key4move.iov_base = page_leaf2key(lowest_page, 0, key4move.iov_len); - } else { - const MDBX_node *lowest_node = page_node(lowest_page, 0); - key4move.iov_len = node_ks(lowest_node); - key4move.iov_base = node_key(lowest_node); - } - - /* restore cursor after mdbx_page_search_lowest() */ - csrc->mc_snum = (uint8_t)snum; - csrc->mc_top = (uint8_t)snum - 1; - csrc->mc_ki[csrc->mc_top] = 0; - - /* paranoia */ - cASSERT(csrc, psrc == csrc->mc_pg[csrc->mc_top]); - cASSERT(csrc, IS_BRANCH(psrc)); - if (unlikely(!IS_BRANCH(psrc))) - goto bailout; - } - - if (cdst->mc_ki[cdst->mc_top] == 0) { - const size_t snum = cdst->mc_snum; - cASSERT(csrc, snum > 0); - MDBX_cursor mn; - cursor_copy(cdst, &mn); - /* must find the lowest key below dst */ - rc = page_search_lowest(&mn); - if (unlikely(rc)) - return rc; - MDBX_page *const lowest_page = mn.mc_pg[mn.mc_top]; - cASSERT(cdst, IS_LEAF(lowest_page)); - if (unlikely(!IS_LEAF(lowest_page))) - goto bailout; - MDBX_val key; - if (IS_LEAF2(lowest_page)) { - key.iov_len = mn.mc_db->md_xsize; - key.iov_base = page_leaf2key(lowest_page, 0, key.iov_len); - } else { - MDBX_node *lowest_node = page_node(lowest_page, 0); - key.iov_len = node_ks(lowest_node); - key.iov_base = node_key(lowest_node); - } - - /* restore cursor after mdbx_page_search_lowest() */ - mn.mc_snum = (uint8_t)snum; - mn.mc_top = (uint8_t)snum - 1; - mn.mc_ki[mn.mc_top] = 0; - - const intptr_t delta = - EVEN(key.iov_len) - EVEN(node_ks(page_node(mn.mc_pg[mn.mc_top], 0))); - const intptr_t needed = - branch_size(cdst->mc_txn->mt_env, &key4move) + delta; - const intptr_t have = page_room(pdst); - if (unlikely(needed > have)) - return MDBX_RESULT_TRUE; - - if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) - return rc; - psrc = csrc->mc_pg[csrc->mc_top]; - pdst = cdst->mc_pg[cdst->mc_top]; - - WITH_CURSOR_TRACKING(mn, rc = update_key(&mn, &key)); - if (unlikely(rc)) - return rc; - } else { - const size_t needed = branch_size(cdst->mc_txn->mt_env, &key4move); - const size_t have = page_room(pdst); - if (unlikely(needed > have)) - return MDBX_RESULT_TRUE; - - if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) - return rc; - psrc = csrc->mc_pg[csrc->mc_top]; - pdst = cdst->mc_pg[cdst->mc_top]; - } - - DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO - " to node %u on page %" PRIaPGNO, - "branch", csrc->mc_ki[csrc->mc_top], DKEY_DEBUG(&key4move), - psrc->mp_pgno, cdst->mc_ki[cdst->mc_top], pdst->mp_pgno); - /* Add the node to the destination page. */ - rc = node_add_branch(cdst, cdst->mc_ki[cdst->mc_top], &key4move, srcpg); - } break; - - case P_LEAF: { - /* Mark src and dst as dirty. */ - if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) - return rc; - psrc = csrc->mc_pg[csrc->mc_top]; - pdst = cdst->mc_pg[cdst->mc_top]; - const MDBX_node *srcnode = page_node(psrc, csrc->mc_ki[csrc->mc_top]); - MDBX_val data; - data.iov_len = node_ds(srcnode); - data.iov_base = node_data(srcnode); - key4move.iov_len = node_ks(srcnode); - key4move.iov_base = node_key(srcnode); - DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO - " to node %u on page %" PRIaPGNO, - "leaf", csrc->mc_ki[csrc->mc_top], DKEY_DEBUG(&key4move), - psrc->mp_pgno, cdst->mc_ki[cdst->mc_top], pdst->mp_pgno); - /* Add the node to the destination page. */ - rc = node_add_leaf(cdst, cdst->mc_ki[cdst->mc_top], &key4move, &data, - node_flags(srcnode)); - } break; - - case P_LEAF | P_LEAF2: { - /* Mark src and dst as dirty. */ - if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) - return rc; - psrc = csrc->mc_pg[csrc->mc_top]; - pdst = cdst->mc_pg[cdst->mc_top]; - key4move.iov_len = csrc->mc_db->md_xsize; - key4move.iov_base = - page_leaf2key(psrc, csrc->mc_ki[csrc->mc_top], key4move.iov_len); - DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO - " to node %u on page %" PRIaPGNO, - "leaf2", csrc->mc_ki[csrc->mc_top], DKEY_DEBUG(&key4move), - psrc->mp_pgno, cdst->mc_ki[cdst->mc_top], pdst->mp_pgno); - /* Add the node to the destination page. */ - rc = node_add_leaf2(cdst, cdst->mc_ki[cdst->mc_top], &key4move); - } break; - - default: - assert(false); - goto bailout; - } - - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - /* Delete the node from the source page. */ - node_del(csrc, key4move.iov_len); - - cASSERT(csrc, psrc == csrc->mc_pg[csrc->mc_top]); - cASSERT(cdst, pdst == cdst->mc_pg[cdst->mc_top]); - cASSERT(csrc, PAGETYPE_WHOLE(psrc) == PAGETYPE_WHOLE(pdst)); - - { - /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2, *m3; - const MDBX_dbi dbi = csrc->mc_dbi; - cASSERT(csrc, csrc->mc_top == cdst->mc_top); - if (fromleft) { - /* If we're adding on the left, bump others up */ - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top) - continue; - if (m3 != cdst && m3->mc_pg[csrc->mc_top] == pdst && - m3->mc_ki[csrc->mc_top] >= cdst->mc_ki[csrc->mc_top]) { - m3->mc_ki[csrc->mc_top]++; - } - if (m3 != csrc && m3->mc_pg[csrc->mc_top] == psrc && - m3->mc_ki[csrc->mc_top] == csrc->mc_ki[csrc->mc_top]) { - m3->mc_pg[csrc->mc_top] = pdst; - m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; - cASSERT(csrc, csrc->mc_top > 0); - m3->mc_ki[csrc->mc_top - 1]++; - } - if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); - } - } else { - /* Adding on the right, bump others down */ - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == csrc) - continue; - if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top) - continue; - if (m3->mc_pg[csrc->mc_top] == psrc) { - if (!m3->mc_ki[csrc->mc_top]) { - m3->mc_pg[csrc->mc_top] = pdst; - m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; - cASSERT(csrc, csrc->mc_top > 0); - m3->mc_ki[csrc->mc_top - 1]--; - } else { - m3->mc_ki[csrc->mc_top]--; - } - if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], - m3->mc_ki[csrc->mc_top]); - } - } - } - } - - /* Update the parent separators. */ - if (csrc->mc_ki[csrc->mc_top] == 0) { - cASSERT(csrc, csrc->mc_top > 0); - if (csrc->mc_ki[csrc->mc_top - 1] != 0) { - MDBX_val key; - if (IS_LEAF2(psrc)) { - key.iov_len = psrc->mp_leaf2_ksize; - key.iov_base = page_leaf2key(psrc, 0, key.iov_len); - } else { - MDBX_node *srcnode = page_node(psrc, 0); - key.iov_len = node_ks(srcnode); - key.iov_base = node_key(srcnode); - } - DEBUG("update separator for source page %" PRIaPGNO " to [%s]", - psrc->mp_pgno, DKEY_DEBUG(&key)); - MDBX_cursor mn; - cursor_copy(csrc, &mn); - cASSERT(csrc, mn.mc_snum > 0); - mn.mc_snum--; - mn.mc_top--; - /* We want rebalance to find mn when doing fixups */ - WITH_CURSOR_TRACKING(mn, rc = update_key(&mn, &key)); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - if (IS_BRANCH(psrc)) { - const MDBX_val nullkey = {0, 0}; - const indx_t ix = csrc->mc_ki[csrc->mc_top]; - csrc->mc_ki[csrc->mc_top] = 0; - rc = update_key(csrc, &nullkey); - csrc->mc_ki[csrc->mc_top] = ix; - cASSERT(csrc, rc == MDBX_SUCCESS); - } - } - - if (cdst->mc_ki[cdst->mc_top] == 0) { - cASSERT(cdst, cdst->mc_top > 0); - if (cdst->mc_ki[cdst->mc_top - 1] != 0) { - MDBX_val key; - if (IS_LEAF2(pdst)) { - key.iov_len = pdst->mp_leaf2_ksize; - key.iov_base = page_leaf2key(pdst, 0, key.iov_len); - } else { - MDBX_node *srcnode = page_node(pdst, 0); - key.iov_len = node_ks(srcnode); - key.iov_base = node_key(srcnode); - } - DEBUG("update separator for destination page %" PRIaPGNO " to [%s]", - pdst->mp_pgno, DKEY_DEBUG(&key)); - MDBX_cursor mn; - cursor_copy(cdst, &mn); - cASSERT(cdst, mn.mc_snum > 0); - mn.mc_snum--; - mn.mc_top--; - /* We want rebalance to find mn when doing fixups */ - WITH_CURSOR_TRACKING(mn, rc = update_key(&mn, &key)); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - if (IS_BRANCH(pdst)) { - const MDBX_val nullkey = {0, 0}; - const indx_t ix = cdst->mc_ki[cdst->mc_top]; - cdst->mc_ki[cdst->mc_top] = 0; - rc = update_key(cdst, &nullkey); - cdst->mc_ki[cdst->mc_top] = ix; - cASSERT(cdst, rc == MDBX_SUCCESS); - } - } - - return MDBX_SUCCESS; -} - -/* Merge one page into another. - * - * The nodes from the page pointed to by csrc will be copied to the page - * pointed to by cdst and then the csrc page will be freed. - * - * [in] csrc Cursor pointing to the source page. - * [in] cdst Cursor pointing to the destination page. - * - * Returns 0 on success, non-zero on failure. */ -static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { - MDBX_val key; - int rc; - - cASSERT(csrc, csrc != cdst); - cASSERT(csrc, cursor_is_tracked(csrc)); - cASSERT(cdst, cursor_is_tracked(cdst)); - const MDBX_page *const psrc = csrc->mc_pg[csrc->mc_top]; - MDBX_page *pdst = cdst->mc_pg[cdst->mc_top]; - DEBUG("merging page %" PRIaPGNO " into %" PRIaPGNO, psrc->mp_pgno, - pdst->mp_pgno); - - cASSERT(csrc, PAGETYPE_WHOLE(psrc) == PAGETYPE_WHOLE(pdst)); - cASSERT(csrc, csrc->mc_dbi == cdst->mc_dbi && csrc->mc_db == cdst->mc_db); - cASSERT(csrc, csrc->mc_snum > 1); /* can't merge root page */ - cASSERT(cdst, cdst->mc_snum > 1); - cASSERT(cdst, cdst->mc_snum < cdst->mc_db->md_depth || - IS_LEAF(cdst->mc_pg[cdst->mc_db->md_depth - 1])); - cASSERT(csrc, csrc->mc_snum < csrc->mc_db->md_depth || - IS_LEAF(csrc->mc_pg[csrc->mc_db->md_depth - 1])); - cASSERT(cdst, csrc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance || - page_room(pdst) >= page_used(cdst->mc_txn->mt_env, psrc)); - const int pagetype = PAGETYPE_WHOLE(psrc); - - /* Move all nodes from src to dst */ - const size_t dst_nkeys = page_numkeys(pdst); - const size_t src_nkeys = page_numkeys(psrc); - cASSERT(cdst, dst_nkeys + src_nkeys >= (IS_LEAF(psrc) ? 1u : 2u)); - if (likely(src_nkeys)) { - size_t j = dst_nkeys; - if (unlikely(pagetype & P_LEAF2)) { - /* Mark dst as dirty. */ - rc = page_touch(cdst); - cASSERT(cdst, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - key.iov_len = csrc->mc_db->md_xsize; - key.iov_base = page_data(psrc); - size_t i = 0; - do { - rc = node_add_leaf2(cdst, j++, &key); - cASSERT(cdst, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - key.iov_base = ptr_disp(key.iov_base, key.iov_len); - } while (++i != src_nkeys); - } else { - MDBX_node *srcnode = page_node(psrc, 0); - key.iov_len = node_ks(srcnode); - key.iov_base = node_key(srcnode); - if (pagetype & P_BRANCH) { - MDBX_cursor mn; - cursor_copy(csrc, &mn); - /* must find the lowest key below src */ - rc = page_search_lowest(&mn); - cASSERT(csrc, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const MDBX_page *mp = mn.mc_pg[mn.mc_top]; - if (likely(!IS_LEAF2(mp))) { - cASSERT(&mn, IS_LEAF(mp)); - const MDBX_node *lowest = page_node(mp, 0); - key.iov_len = node_ks(lowest); - key.iov_base = node_key(lowest); - } else { - cASSERT(&mn, mn.mc_top > csrc->mc_top); - key.iov_len = mp->mp_leaf2_ksize; - key.iov_base = page_leaf2key(mp, mn.mc_ki[mn.mc_top], key.iov_len); - } - cASSERT(&mn, key.iov_len >= csrc->mc_dbx->md_klen_min); - cASSERT(&mn, key.iov_len <= csrc->mc_dbx->md_klen_max); - - const size_t dst_room = page_room(pdst); - const size_t src_used = page_used(cdst->mc_txn->mt_env, psrc); - const size_t space_needed = src_used - node_ks(srcnode) + key.iov_len; - if (unlikely(space_needed > dst_room)) - return MDBX_RESULT_TRUE; - } - - /* Mark dst as dirty. */ - rc = page_touch(cdst); - cASSERT(cdst, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - size_t i = 0; - while (true) { - if (pagetype & P_LEAF) { - MDBX_val data; - data.iov_len = node_ds(srcnode); - data.iov_base = node_data(srcnode); - rc = node_add_leaf(cdst, j++, &key, &data, node_flags(srcnode)); - } else { - cASSERT(csrc, node_flags(srcnode) == 0); - rc = node_add_branch(cdst, j++, &key, node_pgno(srcnode)); - } - cASSERT(cdst, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (++i == src_nkeys) - break; - srcnode = page_node(psrc, i); - key.iov_len = node_ks(srcnode); - key.iov_base = node_key(srcnode); - } - } - - pdst = cdst->mc_pg[cdst->mc_top]; - DEBUG("dst page %" PRIaPGNO " now has %zu keys (%.1f%% filled)", - pdst->mp_pgno, page_numkeys(pdst), - page_fill(cdst->mc_txn->mt_env, pdst)); - - cASSERT(csrc, psrc == csrc->mc_pg[csrc->mc_top]); - cASSERT(cdst, pdst == cdst->mc_pg[cdst->mc_top]); - } - - /* Unlink the src page from parent and add to free list. */ - csrc->mc_top--; - node_del(csrc, 0); - if (csrc->mc_ki[csrc->mc_top] == 0) { - const MDBX_val nullkey = {0, 0}; - rc = update_key(csrc, &nullkey); - cASSERT(csrc, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) { - csrc->mc_top++; - return rc; - } - } - csrc->mc_top++; - - cASSERT(csrc, psrc == csrc->mc_pg[csrc->mc_top]); - cASSERT(cdst, pdst == cdst->mc_pg[cdst->mc_top]); - - { - /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2, *m3; - const MDBX_dbi dbi = csrc->mc_dbi; - const size_t top = csrc->mc_top; - - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == csrc || top >= m3->mc_snum) - continue; - if (m3->mc_pg[top] == psrc) { - m3->mc_pg[top] = pdst; - cASSERT(m3, dst_nkeys + m3->mc_ki[top] <= UINT16_MAX); - m3->mc_ki[top] += (indx_t)dst_nkeys; - m3->mc_ki[top - 1] = cdst->mc_ki[top - 1]; - } else if (m3->mc_pg[top - 1] == csrc->mc_pg[top - 1] && - m3->mc_ki[top - 1] > csrc->mc_ki[top - 1]) { - m3->mc_ki[top - 1]--; - } - if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) - XCURSOR_REFRESH(m3, m3->mc_pg[top], m3->mc_ki[top]); - } - } - - rc = page_retire(csrc, (MDBX_page *)psrc); - cASSERT(csrc, rc != MDBX_RESULT_TRUE); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - cASSERT(cdst, cdst->mc_db->md_entries > 0); - cASSERT(cdst, cdst->mc_snum <= cdst->mc_db->md_depth); - cASSERT(cdst, cdst->mc_top > 0); - cASSERT(cdst, cdst->mc_snum == cdst->mc_top + 1); - MDBX_page *const top_page = cdst->mc_pg[cdst->mc_top]; - const indx_t top_indx = cdst->mc_ki[cdst->mc_top]; - const unsigned save_snum = cdst->mc_snum; - const uint16_t save_depth = cdst->mc_db->md_depth; - cursor_pop(cdst); - rc = rebalance(cdst); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - cASSERT(cdst, cdst->mc_db->md_entries > 0); - cASSERT(cdst, cdst->mc_snum <= cdst->mc_db->md_depth); - cASSERT(cdst, cdst->mc_snum == cdst->mc_top + 1); - -#if MDBX_ENABLE_PGOP_STAT - cdst->mc_txn->mt_env->me_lck->mti_pgop_stat.merge.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - - if (IS_LEAF(cdst->mc_pg[cdst->mc_top])) { - /* LY: don't touch cursor if top-page is a LEAF */ - cASSERT(cdst, IS_LEAF(cdst->mc_pg[cdst->mc_top]) || - PAGETYPE_WHOLE(cdst->mc_pg[cdst->mc_top]) == pagetype); - return MDBX_SUCCESS; - } - - cASSERT(cdst, page_numkeys(top_page) == dst_nkeys + src_nkeys); - - if (unlikely(pagetype != PAGETYPE_WHOLE(top_page))) { - /* LY: LEAF-page becomes BRANCH, unable restore cursor's stack */ - goto bailout; - } - - if (top_page == cdst->mc_pg[cdst->mc_top]) { - /* LY: don't touch cursor if prev top-page already on the top */ - cASSERT(cdst, cdst->mc_ki[cdst->mc_top] == top_indx); - cASSERT(cdst, IS_LEAF(cdst->mc_pg[cdst->mc_top]) || - PAGETYPE_WHOLE(cdst->mc_pg[cdst->mc_top]) == pagetype); - return MDBX_SUCCESS; - } - - const int new_snum = save_snum - save_depth + cdst->mc_db->md_depth; - if (unlikely(new_snum < 1 || new_snum > cdst->mc_db->md_depth)) { - /* LY: out of range, unable restore cursor's stack */ - goto bailout; - } - - if (top_page == cdst->mc_pg[new_snum - 1]) { - cASSERT(cdst, cdst->mc_ki[new_snum - 1] == top_indx); - /* LY: restore cursor stack */ - cdst->mc_snum = (uint8_t)new_snum; - cdst->mc_top = (uint8_t)new_snum - 1; - cASSERT(cdst, cdst->mc_snum < cdst->mc_db->md_depth || - IS_LEAF(cdst->mc_pg[cdst->mc_db->md_depth - 1])); - cASSERT(cdst, IS_LEAF(cdst->mc_pg[cdst->mc_top]) || - PAGETYPE_WHOLE(cdst->mc_pg[cdst->mc_top]) == pagetype); - return MDBX_SUCCESS; - } - - MDBX_page *const stub_page = (MDBX_page *)(~(uintptr_t)top_page); - const indx_t stub_indx = top_indx; - if (save_depth > cdst->mc_db->md_depth && - ((cdst->mc_pg[save_snum - 1] == top_page && - cdst->mc_ki[save_snum - 1] == top_indx) || - (cdst->mc_pg[save_snum - 1] == stub_page && - cdst->mc_ki[save_snum - 1] == stub_indx))) { - /* LY: restore cursor stack */ - cdst->mc_pg[new_snum - 1] = top_page; - cdst->mc_ki[new_snum - 1] = top_indx; - cdst->mc_pg[new_snum] = (MDBX_page *)(~(uintptr_t)cdst->mc_pg[new_snum]); - cdst->mc_ki[new_snum] = ~cdst->mc_ki[new_snum]; - cdst->mc_snum = (uint8_t)new_snum; - cdst->mc_top = (uint8_t)new_snum - 1; - cASSERT(cdst, cdst->mc_snum < cdst->mc_db->md_depth || - IS_LEAF(cdst->mc_pg[cdst->mc_db->md_depth - 1])); - cASSERT(cdst, IS_LEAF(cdst->mc_pg[cdst->mc_top]) || - PAGETYPE_WHOLE(cdst->mc_pg[cdst->mc_top]) == pagetype); - return MDBX_SUCCESS; - } - -bailout: - /* LY: unable restore cursor's stack */ - cdst->mc_flags &= ~C_INITIALIZED; - return MDBX_CURSOR_FULL; -} - -static void cursor_restore(const MDBX_cursor *csrc, MDBX_cursor *cdst) { - cASSERT(cdst, cdst->mc_dbi == csrc->mc_dbi); - cASSERT(cdst, cdst->mc_txn == csrc->mc_txn); - cASSERT(cdst, cdst->mc_db == csrc->mc_db); - cASSERT(cdst, cdst->mc_dbx == csrc->mc_dbx); - cASSERT(cdst, cdst->mc_dbi_state == csrc->mc_dbi_state); - cdst->mc_snum = csrc->mc_snum; - cdst->mc_top = csrc->mc_top; - cdst->mc_flags = csrc->mc_flags; - cdst->mc_checking = csrc->mc_checking; - - for (size_t i = 0; i < csrc->mc_snum; i++) { - cdst->mc_pg[i] = csrc->mc_pg[i]; - cdst->mc_ki[i] = csrc->mc_ki[i]; - } -} - -/* Copy the contents of a cursor. - * [in] csrc The cursor to copy from. - * [out] cdst The cursor to copy to. */ -static void cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { - cASSERT(csrc, csrc->mc_txn->mt_txnid >= - csrc->mc_txn->mt_env->me_lck->mti_oldest_reader.weak); - cdst->mc_dbi = csrc->mc_dbi; - cdst->mc_next = NULL; - cdst->mc_backup = NULL; - cdst->mc_xcursor = NULL; - cdst->mc_txn = csrc->mc_txn; - cdst->mc_db = csrc->mc_db; - cdst->mc_dbx = csrc->mc_dbx; - cdst->mc_dbi_state = csrc->mc_dbi_state; - cursor_restore(csrc, cdst); -} - -/* Rebalance the tree after a delete operation. - * [in] mc Cursor pointing to the page where rebalancing should begin. - * Returns 0 on success, non-zero on failure. */ -static int rebalance(MDBX_cursor *mc) { - cASSERT(mc, cursor_is_tracked(mc)); - cASSERT(mc, mc->mc_snum > 0); - cASSERT(mc, mc->mc_snum < mc->mc_db->md_depth || - IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); - const int pagetype = PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top]); - - STATIC_ASSERT(P_BRANCH == 1); - const size_t minkeys = (pagetype & P_BRANCH) + (size_t)1; - - /* Pages emptier than this are candidates for merging. */ - size_t room_threshold = likely(mc->mc_dbi != FREE_DBI) - ? mc->mc_txn->mt_env->me_merge_threshold - : mc->mc_txn->mt_env->me_merge_threshold_gc; - - const MDBX_page *const tp = mc->mc_pg[mc->mc_top]; - const size_t numkeys = page_numkeys(tp); - const size_t room = page_room(tp); - DEBUG("rebalancing %s page %" PRIaPGNO - " (has %zu keys, full %.1f%%, used %zu, room %zu bytes )", - (pagetype & P_LEAF) ? "leaf" : "branch", tp->mp_pgno, numkeys, - page_fill(mc->mc_txn->mt_env, tp), page_used(mc->mc_txn->mt_env, tp), - room); - cASSERT(mc, IS_MODIFIABLE(mc->mc_txn, tp)); - - if (unlikely(numkeys < minkeys)) { - DEBUG("page %" PRIaPGNO " must be merged due keys < %zu threshold", - tp->mp_pgno, minkeys); - } else if (unlikely(room > room_threshold)) { - DEBUG("page %" PRIaPGNO " should be merged due room %zu > %zu threshold", - tp->mp_pgno, room, room_threshold); - } else { - DEBUG("no need to rebalance page %" PRIaPGNO ", room %zu < %zu threshold", - tp->mp_pgno, room, room_threshold); - cASSERT(mc, mc->mc_db->md_entries > 0); - return MDBX_SUCCESS; - } - - int rc; - if (mc->mc_snum < 2) { - MDBX_page *const mp = mc->mc_pg[0]; - const size_t nkeys = page_numkeys(mp); - cASSERT(mc, (mc->mc_db->md_entries == 0) == (nkeys == 0)); - if (IS_SUBP(mp)) { - DEBUG("%s", "Can't rebalance a subpage, ignoring"); - cASSERT(mc, pagetype & P_LEAF); - return MDBX_SUCCESS; - } - if (nkeys == 0) { - cASSERT(mc, IS_LEAF(mp)); - DEBUG("%s", "tree is completely empty"); - cASSERT(mc, (*mc->mc_dbi_state & DBI_DIRTY) != 0); - mc->mc_db->md_root = P_INVALID; - mc->mc_db->md_depth = 0; - cASSERT(mc, mc->mc_db->md_branch_pages == 0 && - mc->mc_db->md_overflow_pages == 0 && - mc->mc_db->md_leaf_pages == 1); - /* Adjust cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; - m2 = m2->mc_next) { - MDBX_cursor *m3 = - (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || !(m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_pg[0] == mp) { - m3->mc_snum = 0; - m3->mc_top = 0; - m3->mc_flags &= ~C_INITIALIZED; - } - } - mc->mc_snum = 0; - mc->mc_top = 0; - mc->mc_flags &= ~C_INITIALIZED; - return page_retire(mc, mp); - } - if (IS_BRANCH(mp) && nkeys == 1) { - DEBUG("%s", "collapsing root page!"); - mc->mc_db->md_root = node_pgno(page_node(mp, 0)); - rc = page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], mp->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - mc->mc_db->md_depth--; - mc->mc_ki[0] = mc->mc_ki[1]; - for (int i = 1; i < mc->mc_db->md_depth; i++) { - mc->mc_pg[i] = mc->mc_pg[i + 1]; - mc->mc_ki[i] = mc->mc_ki[i + 1]; - } - - /* Adjust other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; - m2 = m2->mc_next) { - MDBX_cursor *m3 = - (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || !(m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_pg[0] == mp) { - for (int i = 0; i < mc->mc_db->md_depth; i++) { - m3->mc_pg[i] = m3->mc_pg[i + 1]; - m3->mc_ki[i] = m3->mc_ki[i + 1]; - } - m3->mc_snum--; - m3->mc_top--; - } - } - cASSERT(mc, IS_LEAF(mc->mc_pg[mc->mc_top]) || - PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top]) == pagetype); - cASSERT(mc, mc->mc_snum < mc->mc_db->md_depth || - IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); - return page_retire(mc, mp); - } - DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", - mp->mp_pgno, mp->mp_flags); - return MDBX_SUCCESS; - } - - /* The parent (branch page) must have at least 2 pointers, - * otherwise the tree is invalid. */ - const size_t pre_top = mc->mc_top - 1; - cASSERT(mc, IS_BRANCH(mc->mc_pg[pre_top])); - cASSERT(mc, !IS_SUBP(mc->mc_pg[0])); - cASSERT(mc, page_numkeys(mc->mc_pg[pre_top]) > 1); - - /* Leaf page fill factor is below the threshold. - * Try to move keys from left or right neighbor, or - * merge with a neighbor page. */ - - /* Find neighbors. */ - MDBX_cursor mn; - cursor_copy(mc, &mn); - - MDBX_page *left = nullptr, *right = nullptr; - if (mn.mc_ki[pre_top] > 0) { - rc = page_get( - &mn, node_pgno(page_node(mn.mc_pg[pre_top], mn.mc_ki[pre_top] - 1)), - &left, mc->mc_pg[mc->mc_top]->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - cASSERT(mc, PAGETYPE_WHOLE(left) == PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top])); - } - if (mn.mc_ki[pre_top] + (size_t)1 < page_numkeys(mn.mc_pg[pre_top])) { - rc = page_get( - &mn, - node_pgno(page_node(mn.mc_pg[pre_top], mn.mc_ki[pre_top] + (size_t)1)), - &right, mc->mc_pg[mc->mc_top]->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - cASSERT(mc, PAGETYPE_WHOLE(right) == PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top])); - } - cASSERT(mc, left || right); - - const size_t ki_top = mc->mc_ki[mc->mc_top]; - const size_t ki_pre_top = mn.mc_ki[pre_top]; - const size_t nkeys = page_numkeys(mn.mc_pg[mn.mc_top]); - - const size_t left_room = left ? page_room(left) : 0; - const size_t right_room = right ? page_room(right) : 0; - const size_t left_nkeys = left ? page_numkeys(left) : 0; - const size_t right_nkeys = right ? page_numkeys(right) : 0; - bool involve = false; -retry: - cASSERT(mc, mc->mc_snum > 1); - if (left_room > room_threshold && left_room >= right_room && - (IS_MODIFIABLE(mc->mc_txn, left) || involve)) { - /* try merge with left */ - cASSERT(mc, left_nkeys >= minkeys); - mn.mc_pg[mn.mc_top] = left; - mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top - 1); - mn.mc_ki[mn.mc_top] = (indx_t)(left_nkeys - 1); - mc->mc_ki[mc->mc_top] = 0; - const size_t new_ki = ki_top + left_nkeys; - mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1; - /* We want rebalance to find mn when doing fixups */ - WITH_CURSOR_TRACKING(mn, rc = page_merge(mc, &mn)); - if (likely(rc != MDBX_RESULT_TRUE)) { - cursor_restore(&mn, mc); - mc->mc_ki[mc->mc_top] = (indx_t)new_ki; - cASSERT(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); - return rc; - } - } - if (right_room > room_threshold && - (IS_MODIFIABLE(mc->mc_txn, right) || involve)) { - /* try merge with right */ - cASSERT(mc, right_nkeys >= minkeys); - mn.mc_pg[mn.mc_top] = right; - mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top + 1); - mn.mc_ki[mn.mc_top] = 0; - mc->mc_ki[mc->mc_top] = (indx_t)nkeys; - WITH_CURSOR_TRACKING(mn, rc = page_merge(&mn, mc)); - if (likely(rc != MDBX_RESULT_TRUE)) { - mc->mc_ki[mc->mc_top] = (indx_t)ki_top; - cASSERT(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); - return rc; - } - } - - if (left_nkeys > minkeys && - (right_nkeys <= left_nkeys || right_room >= left_room) && - (IS_MODIFIABLE(mc->mc_txn, left) || involve)) { - /* try move from left */ - mn.mc_pg[mn.mc_top] = left; - mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top - 1); - mn.mc_ki[mn.mc_top] = (indx_t)(left_nkeys - 1); - mc->mc_ki[mc->mc_top] = 0; - WITH_CURSOR_TRACKING(mn, rc = node_move(&mn, mc, true)); - if (likely(rc != MDBX_RESULT_TRUE)) { - mc->mc_ki[mc->mc_top] = (indx_t)(ki_top + 1); - cASSERT(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); - return rc; - } - } - if (right_nkeys > minkeys && (IS_MODIFIABLE(mc->mc_txn, right) || involve)) { - /* try move from right */ - mn.mc_pg[mn.mc_top] = right; - mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top + 1); - mn.mc_ki[mn.mc_top] = 0; - mc->mc_ki[mc->mc_top] = (indx_t)nkeys; - WITH_CURSOR_TRACKING(mn, rc = node_move(&mn, mc, false)); - if (likely(rc != MDBX_RESULT_TRUE)) { - mc->mc_ki[mc->mc_top] = (indx_t)ki_top; - cASSERT(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); - return rc; - } - } - - if (nkeys >= minkeys) { - mc->mc_ki[mc->mc_top] = (indx_t)ki_top; - if (AUDIT_ENABLED()) - return cursor_check_updating(mc); - return MDBX_SUCCESS; - } - - if (mc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance && - likely(room_threshold > 0)) { - room_threshold = 0; - goto retry; - } - if (likely(!involve) && - (likely(mc->mc_dbi != FREE_DBI) || mc->mc_txn->tw.loose_pages || - MDBX_PNL_GETSIZE(mc->mc_txn->tw.relist) || (mc->mc_flags & C_GCU) || - (mc->mc_txn->mt_flags & MDBX_TXN_DRAINED_GC) || room_threshold)) { - involve = true; - goto retry; - } - if (likely(room_threshold > 0)) { - room_threshold = 0; - goto retry; - } - ERROR("Unable to merge/rebalance %s page %" PRIaPGNO - " (has %zu keys, full %.1f%%, used %zu, room %zu bytes )", - (pagetype & P_LEAF) ? "leaf" : "branch", tp->mp_pgno, numkeys, - page_fill(mc->mc_txn->mt_env, tp), page_used(mc->mc_txn->mt_env, tp), - room); - return MDBX_PROBLEM; -} - -__cold static int page_check(const MDBX_cursor *const mc, - const MDBX_page *const mp) { - DKBUF; - int rc = MDBX_SUCCESS; - if (unlikely(mp->mp_pgno < MIN_PAGENO || mp->mp_pgno > MAX_PAGENO)) - rc = bad_page(mp, "invalid pgno (%u)\n", mp->mp_pgno); - - MDBX_env *const env = mc->mc_txn->mt_env; - const ptrdiff_t offset = ptr_dist(mp, env->me_map); - unsigned flags_mask = P_ILL_BITS; - unsigned flags_expected = 0; - if (offset < 0 || - offset > (ptrdiff_t)(pgno2bytes(env, mc->mc_txn->mt_next_pgno) - - ((mp->mp_flags & P_SUBP) ? PAGEHDRSZ + 1 - : env->me_psize))) { - /* should be dirty page without MDBX_WRITEMAP, or a subpage of. */ - flags_mask -= P_SUBP; - if ((env->me_flags & MDBX_WRITEMAP) != 0 || - (!IS_SHADOWED(mc->mc_txn, mp) && !(mp->mp_flags & P_SUBP))) - rc = bad_page(mp, "invalid page-address %p, offset %zi\n", - __Wpedantic_format_voidptr(mp), offset); - } else if (offset & (env->me_psize - 1)) - flags_expected = P_SUBP; - - if (unlikely((mp->mp_flags & flags_mask) != flags_expected)) - rc = bad_page(mp, "unknown/extra page-flags (have 0x%x, expect 0x%x)\n", - mp->mp_flags & flags_mask, flags_expected); - - cASSERT(mc, (mc->mc_checking & CC_LEAF2) == 0 || (mc->mc_flags & C_SUB) != 0); - const uint8_t type = PAGETYPE_WHOLE(mp); - switch (type) { - default: - return bad_page(mp, "invalid type (%u)\n", type); - case P_OVERFLOW: - if (unlikely(mc->mc_flags & C_SUB)) - rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", "large", - "nested dupsort tree", mc->mc_db->md_flags); - const pgno_t npages = mp->mp_pages; - if (unlikely(npages < 1 || npages >= MAX_PAGENO / 2)) - rc = bad_page(mp, "invalid n-pages (%u) for large-page\n", npages); - if (unlikely(mp->mp_pgno + npages > mc->mc_txn->mt_next_pgno)) - rc = bad_page( - mp, "end of large-page beyond (%u) allocated space (%u next-pgno)\n", - mp->mp_pgno + npages, mc->mc_txn->mt_next_pgno); - return rc; //-------------------------- end of large/overflow page handling - case P_LEAF | P_SUBP: - if (unlikely(mc->mc_db->md_depth != 1)) - rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", - "leaf-sub", "nested dupsort db", mc->mc_db->md_flags); - /* fall through */ - __fallthrough; - case P_LEAF: - if (unlikely((mc->mc_checking & CC_LEAF2) != 0)) - rc = bad_page( - mp, "unexpected leaf-page for dupfixed subtree (db-lags 0x%x)\n", - mc->mc_db->md_flags); - break; - case P_LEAF | P_LEAF2 | P_SUBP: - if (unlikely(mc->mc_db->md_depth != 1)) - rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", - "leaf2-sub", "nested dupsort db", mc->mc_db->md_flags); - /* fall through */ - __fallthrough; - case P_LEAF | P_LEAF2: - if (unlikely((mc->mc_checking & CC_LEAF2) == 0)) - rc = bad_page( - mp, - "unexpected leaf2-page for non-dupfixed (sub)tree (db-flags 0x%x)\n", - mc->mc_db->md_flags); - break; - case P_BRANCH: - break; - } - - if (unlikely(mp->mp_upper < mp->mp_lower || (mp->mp_lower & 1) || - PAGEHDRSZ + mp->mp_upper > env->me_psize)) - rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", - mp->mp_lower, mp->mp_upper, page_space(env)); - - const char *const end_of_page = ptr_disp(mp, env->me_psize); - const size_t nkeys = page_numkeys(mp); - STATIC_ASSERT(P_BRANCH == 1); - if (unlikely(nkeys <= (uint8_t)(mp->mp_flags & P_BRANCH))) { - if ((!(mc->mc_flags & C_SUB) || mc->mc_db->md_entries) && - (!(mc->mc_checking & CC_UPDATING) || - !(IS_MODIFIABLE(mc->mc_txn, mp) || (mp->mp_flags & P_SUBP)))) - rc = - bad_page(mp, "%s-page nkeys (%zu) < %u\n", - IS_BRANCH(mp) ? "branch" : "leaf", nkeys, 1 + IS_BRANCH(mp)); - } - - const size_t ksize_max = keysize_max(env->me_psize, 0); - const size_t leaf2_ksize = mp->mp_leaf2_ksize; - if (IS_LEAF2(mp)) { - if (unlikely((mc->mc_flags & C_SUB) == 0 || - (mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) - rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", - mc->mc_db->md_flags); - else if (unlikely(leaf2_ksize != mc->mc_db->md_xsize)) - rc = bad_page(mp, "invalid leaf2_ksize %zu\n", leaf2_ksize); - else if (unlikely(((leaf2_ksize & nkeys) ^ mp->mp_upper) & 1)) - rc = bad_page( - mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", - mp->mp_upper, nkeys, leaf2_ksize); - } else { - if (unlikely((mp->mp_upper & 1) || PAGEHDRSZ + mp->mp_upper + - nkeys * sizeof(MDBX_node) + - nkeys - 1 > - env->me_psize)) - rc = - bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", - mp->mp_upper, nkeys, page_space(env)); - } - - MDBX_val here, prev = {0, 0}; - for (size_t i = 0; i < nkeys; ++i) { - if (IS_LEAF2(mp)) { - const char *const key = page_leaf2key(mp, i, leaf2_ksize); - if (unlikely(end_of_page < key + leaf2_ksize)) { - rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", - key + leaf2_ksize - end_of_page); - continue; - } - - if (unlikely(leaf2_ksize != mc->mc_dbx->md_klen_min)) { - if (unlikely(leaf2_ksize < mc->mc_dbx->md_klen_min || - leaf2_ksize > mc->mc_dbx->md_klen_max)) - rc = bad_page( - mp, "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", - leaf2_ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max); - else - mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = leaf2_ksize; - } - if ((mc->mc_checking & CC_SKIPORD) == 0) { - here.iov_base = (void *)key; - here.iov_len = leaf2_ksize; - if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, - DKEY(&prev), DVAL(&here)); - prev = here; - } - } else { - const MDBX_node *const node = page_node(mp, i); - const char *const node_end = ptr_disp(node, NODESIZE); - if (unlikely(node_end > end_of_page)) { - rc = bad_page(mp, "node[%zu] (%zu) beyond page-end\n", i, - node_end - end_of_page); - continue; - } - const size_t ksize = node_ks(node); - if (unlikely(ksize > ksize_max)) - rc = bad_page(mp, "node[%zu] too long key (%zu)\n", i, ksize); - const char *const key = node_key(node); - if (unlikely(end_of_page < key + ksize)) { - rc = bad_page(mp, "node[%zu] key (%zu) beyond page-end\n", i, - key + ksize - end_of_page); - continue; - } - if ((IS_LEAF(mp) || i > 0)) { - if (unlikely(ksize < mc->mc_dbx->md_klen_min || - ksize > mc->mc_dbx->md_klen_max)) - rc = bad_page( - mp, "node[%zu] key size (%zu) <> min/max key-length (%zu/%zu)\n", - i, ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max); - if ((mc->mc_checking & CC_SKIPORD) == 0) { - here.iov_base = (void *)key; - here.iov_len = ksize; - if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "node[%zu] key wrong order (%s >= %s)\n", i, - DKEY(&prev), DVAL(&here)); - prev = here; - } - } - if (IS_BRANCH(mp)) { - if ((mc->mc_checking & CC_UPDATING) == 0 && i == 0 && - unlikely(ksize != 0)) - rc = bad_page(mp, "branch-node[%zu] wrong 0-node key-length (%zu)\n", - i, ksize); - const pgno_t ref = node_pgno(node); - if (unlikely(ref < MIN_PAGENO) || - (unlikely(ref >= mc->mc_txn->mt_next_pgno) && - (unlikely(ref >= mc->mc_txn->mt_geo.now) || - !(mc->mc_checking & CC_RETIRING)))) - rc = bad_page(mp, "branch-node[%zu] wrong pgno (%u)\n", i, ref); - if (unlikely(node_flags(node))) - rc = bad_page(mp, "branch-node[%zu] wrong flags (%u)\n", i, - node_flags(node)); - continue; - } - - switch (node_flags(node)) { - default: - rc = - bad_page(mp, "invalid node[%zu] flags (%u)\n", i, node_flags(node)); - break; - case F_BIGDATA /* data on large-page */: - case 0 /* usual */: - case F_SUBDATA /* sub-db */: - case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - case F_DUPDATA /* short sub-page */: - break; - } - - const size_t dsize = node_ds(node); - const char *const data = node_data(node); - if (node_flags(node) & F_BIGDATA) { - if (unlikely(end_of_page < data + sizeof(pgno_t))) { - rc = bad_page( - mp, "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", - "bigdata-pgno", i, nkeys, dsize, data + dsize - end_of_page); - continue; - } - if (unlikely(dsize <= mc->mc_dbx->md_vlen_min || - dsize > mc->mc_dbx->md_vlen_max)) - rc = bad_page( - mp, - "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n", - dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max); - if (unlikely(node_size_len(node_ks(node), dsize) <= - mc->mc_txn->mt_env->me_leaf_nodemax) && - mc->mc_dbi != FREE_DBI) - poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize); - - if ((mc->mc_checking & CC_RETIRING) == 0) { - const pgr_t lp = - page_get_large(mc, node_largedata_pgno(node), mp->mp_txnid); - if (unlikely(lp.err != MDBX_SUCCESS)) - return lp.err; - cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW); - const unsigned npages = number_of_ovpages(env, dsize); - if (unlikely(lp.page->mp_pages != npages)) { - if (lp.page->mp_pages < npages) - rc = bad_page(lp.page, - "too less n-pages %u for bigdata-node (%zu bytes)", - lp.page->mp_pages, dsize); - else if (mc->mc_dbi != FREE_DBI) - poor_page(lp.page, - "extra n-pages %u for bigdata-node (%zu bytes)", - lp.page->mp_pages, dsize); - } - } - continue; - } - - if (unlikely(end_of_page < data + dsize)) { - rc = bad_page(mp, - "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", - "data", i, nkeys, dsize, data + dsize - end_of_page); - continue; - } - - switch (node_flags(node)) { - default: - /* wrong, but already handled */ - continue; - case 0 /* usual */: - if (unlikely(dsize < mc->mc_dbx->md_vlen_min || - dsize > mc->mc_dbx->md_vlen_max)) { - rc = bad_page( - mp, "node-data size (%zu) <> min/max value-length (%zu/%zu)\n", - dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max); - continue; - } - break; - case F_SUBDATA /* sub-db */: - if (unlikely(dsize != sizeof(MDBX_db))) { - rc = bad_page(mp, "invalid sub-db record size (%zu)\n", dsize); - continue; - } - break; - case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - if (unlikely(dsize != sizeof(MDBX_db))) { - rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", - dsize, sizeof(MDBX_db)); - continue; - } - break; - case F_DUPDATA /* short sub-page */: - if (unlikely(dsize <= PAGEHDRSZ)) { - rc = bad_page(mp, "invalid nested/sub-page record size (%zu)\n", - dsize); - continue; - } else { - const MDBX_page *const sp = (MDBX_page *)data; - switch (sp->mp_flags & - /* ignore legacy P_DIRTY flag */ ~P_LEGACY_DIRTY) { - case P_LEAF | P_SUBP: - case P_LEAF | P_LEAF2 | P_SUBP: - break; - default: - rc = bad_page(mp, "invalid nested/sub-page flags (0x%02x)\n", - sp->mp_flags); - continue; - } - - const char *const end_of_subpage = data + dsize; - const intptr_t nsubkeys = page_numkeys(sp); - if (unlikely(nsubkeys == 0) && !(mc->mc_checking & CC_UPDATING) && - mc->mc_db->md_entries) - rc = bad_page(mp, "no keys on a %s-page\n", - IS_LEAF2(sp) ? "leaf2-sub" : "leaf-sub"); - - MDBX_val sub_here, sub_prev = {0, 0}; - for (int j = 0; j < nsubkeys; j++) { - if (IS_LEAF2(sp)) { - /* LEAF2 pages have no mp_ptrs[] or node headers */ - const size_t sub_ksize = sp->mp_leaf2_ksize; - const char *const sub_key = page_leaf2key(sp, j, sub_ksize); - if (unlikely(end_of_subpage < sub_key + sub_ksize)) { - rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n", - sub_key + sub_ksize - end_of_subpage); - continue; - } - - if (unlikely(sub_ksize != mc->mc_dbx->md_vlen_min)) { - if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || - sub_ksize > mc->mc_dbx->md_vlen_max)) - rc = bad_page(mp, - "nested-leaf2-key size (%zu) <> min/max " - "value-length (%zu/%zu)\n", - sub_ksize, mc->mc_dbx->md_vlen_min, - mc->mc_dbx->md_vlen_max); - else - mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = sub_ksize; - } - if ((mc->mc_checking & CC_SKIPORD) == 0) { - sub_here.iov_base = (void *)sub_key; - sub_here.iov_len = sub_ksize; - if (sub_prev.iov_base && - unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= 0)) - rc = bad_page(mp, - "nested-leaf2-key #%u wrong order (%s >= %s)\n", - j, DKEY(&sub_prev), DVAL(&sub_here)); - sub_prev = sub_here; - } - } else { - const MDBX_node *const sub_node = page_node(sp, j); - const char *const sub_node_end = ptr_disp(sub_node, NODESIZE); - if (unlikely(sub_node_end > end_of_subpage)) { - rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n", - end_of_subpage - sub_node_end); - continue; - } - if (unlikely(node_flags(sub_node) != 0)) - rc = bad_page(mp, "nested-node invalid flags (%u)\n", - node_flags(sub_node)); - - const size_t sub_ksize = node_ks(sub_node); - const char *const sub_key = node_key(sub_node); - const size_t sub_dsize = node_ds(sub_node); - /* char *sub_data = node_data(sub_node); */ - - if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || - sub_ksize > mc->mc_dbx->md_vlen_max)) - rc = bad_page(mp, - "nested-node-key size (%zu) <> min/max " - "value-length (%zu/%zu)\n", - sub_ksize, mc->mc_dbx->md_vlen_min, - mc->mc_dbx->md_vlen_max); - if ((mc->mc_checking & CC_SKIPORD) == 0) { - sub_here.iov_base = (void *)sub_key; - sub_here.iov_len = sub_ksize; - if (sub_prev.iov_base && - unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= 0)) - rc = bad_page(mp, - "nested-node-key #%u wrong order (%s >= %s)\n", - j, DKEY(&sub_prev), DVAL(&sub_here)); - sub_prev = sub_here; - } - if (unlikely(sub_dsize != 0)) - rc = bad_page(mp, "nested-node non-empty data size (%zu)\n", - sub_dsize); - if (unlikely(end_of_subpage < sub_key + sub_ksize)) - rc = bad_page(mp, "nested-node-key beyond (%zu) nested-page\n", - sub_key + sub_ksize - end_of_subpage); - } - } - } - break; - } - } - } - return rc; -} - -__cold static int cursor_check(const MDBX_cursor *mc) { - if (!mc->mc_txn->tw.dirtylist) { - cASSERT(mc, - (mc->mc_txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } else { - cASSERT(mc, - (mc->mc_txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - cASSERT(mc, mc->mc_txn->tw.dirtyroom + mc->mc_txn->tw.dirtylist->length == - (mc->mc_txn->mt_parent - ? mc->mc_txn->mt_parent->tw.dirtyroom - : mc->mc_txn->mt_env->me_options.dp_limit)); - } - cASSERT(mc, mc->mc_top == mc->mc_snum - 1 || (mc->mc_checking & CC_UPDATING)); - if (unlikely(mc->mc_top != mc->mc_snum - 1) && - (mc->mc_checking & CC_UPDATING) == 0) - return MDBX_CURSOR_FULL; - cASSERT(mc, (mc->mc_checking & CC_UPDATING) - ? mc->mc_snum <= mc->mc_db->md_depth - : mc->mc_snum == mc->mc_db->md_depth); - if (unlikely((mc->mc_checking & CC_UPDATING) - ? mc->mc_snum > mc->mc_db->md_depth - : mc->mc_snum != mc->mc_db->md_depth)) - return MDBX_CURSOR_FULL; - - for (int n = 0; n < (int)mc->mc_snum; ++n) { - MDBX_page *mp = mc->mc_pg[n]; - const size_t nkeys = page_numkeys(mp); - const bool expect_branch = (n < mc->mc_db->md_depth - 1) ? true : false; - const bool expect_nested_leaf = - (n + 1 == mc->mc_db->md_depth - 1) ? true : false; - const bool branch = IS_BRANCH(mp) ? true : false; - cASSERT(mc, branch == expect_branch); - if (unlikely(branch != expect_branch)) - return MDBX_CURSOR_FULL; - if ((mc->mc_checking & CC_UPDATING) == 0) { - cASSERT(mc, nkeys > mc->mc_ki[n] || (!branch && nkeys == mc->mc_ki[n] && - (mc->mc_flags & C_EOF) != 0)); - if (unlikely(nkeys <= mc->mc_ki[n] && - !(!branch && nkeys == mc->mc_ki[n] && - (mc->mc_flags & C_EOF) != 0))) - return MDBX_CURSOR_FULL; - } else { - cASSERT(mc, nkeys + 1 >= mc->mc_ki[n]); - if (unlikely(nkeys + 1 < mc->mc_ki[n])) - return MDBX_CURSOR_FULL; - } - - int err = page_check(mc, mp); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - for (size_t i = 0; i < nkeys; ++i) { - if (branch) { - MDBX_node *node = page_node(mp, i); - cASSERT(mc, node_flags(node) == 0); - if (unlikely(node_flags(node) != 0)) - return MDBX_CURSOR_FULL; - pgno_t pgno = node_pgno(node); - MDBX_page *np; - err = page_get(mc, pgno, &np, mp->mp_txnid); - cASSERT(mc, err == MDBX_SUCCESS); - if (unlikely(err != MDBX_SUCCESS)) - return err; - const bool nested_leaf = IS_LEAF(np) ? true : false; - cASSERT(mc, nested_leaf == expect_nested_leaf); - if (unlikely(nested_leaf != expect_nested_leaf)) - return MDBX_CURSOR_FULL; - err = page_check(mc, np); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - } - } - return MDBX_SUCCESS; -} - -__cold static int cursor_check_updating(MDBX_cursor *mc) { - const uint8_t checking = mc->mc_checking; - mc->mc_checking |= CC_UPDATING; - const int rc = cursor_check(mc); - mc->mc_checking = checking; - return rc; -} - -int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - const MDBX_val *data) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!key)) - return MDBX_EINVAL; - - if (unlikely(dbi <= FREE_DBI)) - return MDBX_BAD_DBI; - - if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return (txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; - - return delete (txn, dbi, key, data, 0); -} - -static int delete(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - const MDBX_val *data, unsigned flags) { - MDBX_cursor_couple cx; - MDBX_cursor_op op; - MDBX_val rdata; - int rc; - DKBUF_DEBUG; - - DEBUG("====> delete db %u key [%s], data [%s]", dbi, DKEY_DEBUG(key), - DVAL_DEBUG(data)); - - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (data) { - op = MDBX_GET_BOTH; - rdata = *data; - data = &rdata; - } else { - op = MDBX_SET; - flags |= MDBX_ALLDUPS; - } - rc = cursor_set(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err; - if (likely(rc == MDBX_SUCCESS)) { - /* let mdbx_page_split know about this cursor if needed: - * delete will trigger a rebalance; if it needs to move - * a node from one page to another, it will have to - * update the parent's separator key(s). If the new sepkey - * is larger than the current one, the parent page may - * run out of space, triggering a split. We need this - * cursor to be consistent until the end of the rebalance. */ - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; - rc = cursor_del(&cx.outer, flags); - txn->mt_cursors[dbi] = cx.outer.mc_next; - } - return rc; -} - -/* Split a page and insert a new node. - * Set MDBX_TXN_ERROR on failure. - * [in,out] mc Cursor pointing to the page and desired insertion index. - * The cursor will be updated to point to the actual page and index where - * the node got inserted after the split. - * [in] newkey The key for the newly inserted node. - * [in] newdata The data for the newly inserted node. - * [in] newpgno The page number, if the new node is a branch node. - * [in] naf The NODE_ADD_FLAGS for the new node. - * Returns 0 on success, non-zero on failure. */ -static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, - MDBX_val *const newdata, pgno_t newpgno, - const unsigned naf) { - unsigned flags; - int rc = MDBX_SUCCESS, foliage = 0; - size_t i, ptop; - MDBX_env *const env = mc->mc_txn->mt_env; - MDBX_val rkey, xdata; - MDBX_page *tmp_ki_copy = NULL; - DKBUF; - - MDBX_page *const mp = mc->mc_pg[mc->mc_top]; - cASSERT(mc, (mp->mp_flags & P_ILL_BITS) == 0); - - const size_t newindx = mc->mc_ki[mc->mc_top]; - size_t nkeys = page_numkeys(mp); - if (AUDIT_ENABLED()) { - rc = cursor_check_updating(mc); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - STATIC_ASSERT(P_BRANCH == 1); - const size_t minkeys = (mp->mp_flags & P_BRANCH) + (size_t)1; - - DEBUG(">> splitting %s-page %" PRIaPGNO - " and adding %zu+%zu [%s] at %i, nkeys %zi", - IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, newkey->iov_len, - newdata ? newdata->iov_len : 0, DKEY_DEBUG(newkey), - mc->mc_ki[mc->mc_top], nkeys); - cASSERT(mc, nkeys + 1 >= minkeys * 2); - - /* Create a new sibling page. */ - pgr_t npr = page_new(mc, mp->mp_flags); - if (unlikely(npr.err != MDBX_SUCCESS)) - return npr.err; - MDBX_page *const sister = npr.page; - sister->mp_leaf2_ksize = mp->mp_leaf2_ksize; - DEBUG("new sibling: page %" PRIaPGNO, sister->mp_pgno); - - /* Usually when splitting the root page, the cursor - * height is 1. But when called from update_key, - * the cursor height may be greater because it walks - * up the stack while finding the branch slot to update. */ - if (mc->mc_top < 1) { - npr = page_new(mc, P_BRANCH); - rc = npr.err; - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - MDBX_page *const pp = npr.page; - /* shift current top to make room for new parent */ - cASSERT(mc, mc->mc_snum < 2 && mc->mc_db->md_depth > 0); -#if MDBX_DEBUG - memset(mc->mc_pg + 3, 0, sizeof(mc->mc_pg) - sizeof(mc->mc_pg[0]) * 3); - memset(mc->mc_ki + 3, -1, sizeof(mc->mc_ki) - sizeof(mc->mc_ki[0]) * 3); -#endif - mc->mc_pg[2] = mc->mc_pg[1]; - mc->mc_ki[2] = mc->mc_ki[1]; - mc->mc_pg[1] = mc->mc_pg[0]; - mc->mc_ki[1] = mc->mc_ki[0]; - mc->mc_pg[0] = pp; - mc->mc_ki[0] = 0; - mc->mc_db->md_root = pp->mp_pgno; - DEBUG("root split! new root = %" PRIaPGNO, pp->mp_pgno); - foliage = mc->mc_db->md_depth++; - - /* Add left (implicit) pointer. */ - rc = node_add_branch(mc, 0, NULL, mp->mp_pgno); - if (unlikely(rc != MDBX_SUCCESS)) { - /* undo the pre-push */ - mc->mc_pg[0] = mc->mc_pg[1]; - mc->mc_ki[0] = mc->mc_ki[1]; - mc->mc_db->md_root = mp->mp_pgno; - mc->mc_db->md_depth--; - goto done; - } - mc->mc_snum++; - mc->mc_top++; - ptop = 0; - if (AUDIT_ENABLED()) { - rc = cursor_check_updating(mc); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - } - } else { - ptop = mc->mc_top - 1; - DEBUG("parent branch page is %" PRIaPGNO, mc->mc_pg[ptop]->mp_pgno); - } - - MDBX_cursor mn; - cursor_copy(mc, &mn); - mn.mc_pg[mn.mc_top] = sister; - mn.mc_ki[mn.mc_top] = 0; - mn.mc_ki[ptop] = mc->mc_ki[ptop] + 1; - - size_t split_indx = - (newindx < nkeys) - ? /* split at the middle */ (nkeys + 1) >> 1 - : /* split at the end (i.e. like append-mode ) */ nkeys - minkeys + 1; - eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1); - - cASSERT(mc, !IS_BRANCH(mp) || newindx > 0); - MDBX_val sepkey = {nullptr, 0}; - /* It is reasonable and possible to split the page at the begin */ - if (unlikely(newindx < minkeys)) { - split_indx = minkeys; - if (newindx == 0 && !(naf & MDBX_SPLIT_REPLACE)) { - split_indx = 0; - /* Checking for ability of splitting by the left-side insertion - * of a pure page with the new key */ - for (i = 0; i < mc->mc_top; ++i) - if (mc->mc_ki[i]) { - get_key(page_node(mc->mc_pg[i], mc->mc_ki[i]), &sepkey); - if (mc->mc_dbx->md_cmp(newkey, &sepkey) >= 0) - split_indx = minkeys; - break; - } - if (split_indx == 0) { - /* Save the current first key which was omitted on the parent branch - * page and should be updated if the new first entry will be added */ - if (IS_LEAF2(mp)) { - sepkey.iov_len = mp->mp_leaf2_ksize; - sepkey.iov_base = page_leaf2key(mp, 0, sepkey.iov_len); - } else - get_key(page_node(mp, 0), &sepkey); - cASSERT(mc, mc->mc_dbx->md_cmp(newkey, &sepkey) < 0); - /* Avoiding rare complex cases of nested split the parent page(s) */ - if (page_room(mc->mc_pg[ptop]) < branch_size(env, &sepkey)) - split_indx = minkeys; - } - if (foliage) { - TRACE("pure-left: foliage %u, top %i, ptop %zu, split_indx %zi, " - "minkeys %zi, sepkey %s, parent-room %zu, need4split %zu", - foliage, mc->mc_top, ptop, split_indx, minkeys, - DKEY_DEBUG(&sepkey), page_room(mc->mc_pg[ptop]), - branch_size(env, &sepkey)); - TRACE("pure-left: newkey %s, newdata %s, newindx %zu", - DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); - } - } - } - - const bool pure_right = split_indx == nkeys; - const bool pure_left = split_indx == 0; - if (unlikely(pure_right)) { - /* newindx == split_indx == nkeys */ - TRACE("no-split, but add new pure page at the %s", "right/after"); - cASSERT(mc, newindx == nkeys && split_indx == nkeys && minkeys == 1); - sepkey = *newkey; - } else if (unlikely(pure_left)) { - /* newindx == split_indx == 0 */ - TRACE("pure-left: no-split, but add new pure page at the %s", - "left/before"); - cASSERT(mc, newindx == 0 && split_indx == 0 && minkeys == 1); - TRACE("pure-left: old-first-key is %s", DKEY_DEBUG(&sepkey)); - } else { - if (IS_LEAF2(sister)) { - /* Move half of the keys to the right sibling */ - const intptr_t distance = mc->mc_ki[mc->mc_top] - split_indx; - size_t ksize = mc->mc_db->md_xsize; - void *const split = page_leaf2key(mp, split_indx, ksize); - size_t rsize = (nkeys - split_indx) * ksize; - size_t lsize = (nkeys - split_indx) * sizeof(indx_t); - cASSERT(mc, mp->mp_lower >= lsize); - mp->mp_lower -= (indx_t)lsize; - cASSERT(mc, sister->mp_lower + lsize <= UINT16_MAX); - sister->mp_lower += (indx_t)lsize; - cASSERT(mc, mp->mp_upper + rsize - lsize <= UINT16_MAX); - mp->mp_upper += (indx_t)(rsize - lsize); - cASSERT(mc, sister->mp_upper >= rsize - lsize); - sister->mp_upper -= (indx_t)(rsize - lsize); - sepkey.iov_len = ksize; - sepkey.iov_base = (newindx != split_indx) ? split : newkey->iov_base; - if (distance < 0) { - cASSERT(mc, ksize >= sizeof(indx_t)); - void *const ins = page_leaf2key(mp, mc->mc_ki[mc->mc_top], ksize); - memcpy(sister->mp_ptrs, split, rsize); - sepkey.iov_base = sister->mp_ptrs; - memmove(ptr_disp(ins, ksize), ins, - (split_indx - mc->mc_ki[mc->mc_top]) * ksize); - memcpy(ins, newkey->iov_base, ksize); - cASSERT(mc, UINT16_MAX - mp->mp_lower >= (int)sizeof(indx_t)); - mp->mp_lower += sizeof(indx_t); - cASSERT(mc, mp->mp_upper >= ksize - sizeof(indx_t)); - mp->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); - cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); - } else { - memcpy(sister->mp_ptrs, split, distance * ksize); - void *const ins = page_leaf2key(sister, distance, ksize); - memcpy(ins, newkey->iov_base, ksize); - memcpy(ptr_disp(ins, ksize), ptr_disp(split, distance * ksize), - rsize - distance * ksize); - cASSERT(mc, UINT16_MAX - sister->mp_lower >= (int)sizeof(indx_t)); - sister->mp_lower += sizeof(indx_t); - cASSERT(mc, sister->mp_upper >= ksize - sizeof(indx_t)); - sister->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); - cASSERT(mc, distance <= (int)UINT16_MAX); - mc->mc_ki[mc->mc_top] = (indx_t)distance; - cASSERT(mc, - (((ksize & page_numkeys(sister)) ^ sister->mp_upper) & 1) == 0); - } - - if (AUDIT_ENABLED()) { - rc = cursor_check_updating(mc); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - rc = cursor_check_updating(&mn); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - } - } else { - /* grab a page to hold a temporary copy */ - tmp_ki_copy = page_malloc(mc->mc_txn, 1); - if (unlikely(tmp_ki_copy == NULL)) { - rc = MDBX_ENOMEM; - goto done; - } - - const size_t max_space = page_space(env); - const size_t new_size = IS_LEAF(mp) ? leaf_size(env, newkey, newdata) - : branch_size(env, newkey); - - /* prepare to insert */ - for (i = 0; i < newindx; ++i) - tmp_ki_copy->mp_ptrs[i] = mp->mp_ptrs[i]; - tmp_ki_copy->mp_ptrs[i] = (indx_t)-1; - while (++i <= nkeys) - tmp_ki_copy->mp_ptrs[i] = mp->mp_ptrs[i - 1]; - tmp_ki_copy->mp_pgno = mp->mp_pgno; - tmp_ki_copy->mp_flags = mp->mp_flags; - tmp_ki_copy->mp_txnid = INVALID_TXNID; - tmp_ki_copy->mp_lower = 0; - tmp_ki_copy->mp_upper = (indx_t)max_space; - - /* Добавляемый узел может не поместиться в страницу-половину вместе - * с количественной половиной узлов из исходной страницы. В худшем случае, - * в страницу-половину с добавляемым узлом могут попасть самые больше узлы - * из исходной страницы, а другую половину только узлы с самыми короткими - * ключами и с пустыми данными. Поэтому, чтобы найти подходящую границу - * разреза требуется итерировать узлы и считая их объем. - * - * Однако, при простом количественном делении (без учета размера ключей - * и данных) на страницах-половинах будет примерно вдвое меньше узлов. - * Поэтому добавляемый узел точно поместится, если его размер не больше - * чем место "освобождающееся" от заголовков узлов, которые переедут - * в другую страницу-половину. Кроме этого, как минимум по одному байту - * будет в каждом ключе, в худшем случае кроме одного, который может быть - * нулевого размера. */ - - if (newindx == split_indx && nkeys >= 5) { - STATIC_ASSERT(P_BRANCH == 1); - split_indx += mp->mp_flags & P_BRANCH; - } - eASSERT(env, split_indx >= minkeys && split_indx <= nkeys + 1 - minkeys); - const size_t dim_nodes = - (newindx >= split_indx) ? split_indx : nkeys - split_indx; - const size_t dim_used = (sizeof(indx_t) + NODESIZE + 1) * dim_nodes; - if (new_size >= dim_used) { - /* Search for best acceptable split point */ - i = (newindx < split_indx) ? 0 : nkeys; - intptr_t dir = (newindx < split_indx) ? 1 : -1; - size_t before = 0, after = new_size + page_used(env, mp); - size_t best_split = split_indx; - size_t best_shift = INT_MAX; - - TRACE("seek separator from %zu, step %zi, default %zu, new-idx %zu, " - "new-size %zu", - i, dir, split_indx, newindx, new_size); - do { - cASSERT(mc, i <= nkeys); - size_t size = new_size; - if (i != newindx) { - MDBX_node *node = ptr_disp(mp, tmp_ki_copy->mp_ptrs[i] + PAGEHDRSZ); - size = NODESIZE + node_ks(node) + sizeof(indx_t); - if (IS_LEAF(mp)) - size += (node_flags(node) & F_BIGDATA) ? sizeof(pgno_t) - : node_ds(node); - size = EVEN(size); - } - - before += size; - after -= size; - TRACE("step %zu, size %zu, before %zu, after %zu, max %zu", i, size, - before, after, max_space); - - if (before <= max_space && after <= max_space) { - const size_t split = i + (dir > 0); - if (split >= minkeys && split <= nkeys + 1 - minkeys) { - const size_t shift = branchless_abs(split_indx - split); - if (shift >= best_shift) - break; - best_shift = shift; - best_split = split; - if (!best_shift) - break; - } - } - i += dir; - } while (i < nkeys); - - split_indx = best_split; - TRACE("chosen %zu", split_indx); - } - eASSERT(env, split_indx >= minkeys && split_indx <= nkeys + 1 - minkeys); - - sepkey = *newkey; - if (split_indx != newindx) { - MDBX_node *node = - ptr_disp(mp, tmp_ki_copy->mp_ptrs[split_indx] + PAGEHDRSZ); - sepkey.iov_len = node_ks(node); - sepkey.iov_base = node_key(node); - } - } - } - DEBUG("separator is %zd [%s]", split_indx, DKEY_DEBUG(&sepkey)); - - bool did_split_parent = false; - /* Copy separator key to the parent. */ - if (page_room(mn.mc_pg[ptop]) < branch_size(env, &sepkey)) { - TRACE("need split parent branch-page for key %s", DKEY_DEBUG(&sepkey)); - cASSERT(mc, page_numkeys(mn.mc_pg[ptop]) > 2); - cASSERT(mc, !pure_left); - const int snum = mc->mc_snum; - const int depth = mc->mc_db->md_depth; - mn.mc_snum--; - mn.mc_top--; - did_split_parent = true; - /* We want other splits to find mn when doing fixups */ - WITH_CURSOR_TRACKING( - mn, rc = page_split(&mn, &sepkey, NULL, sister->mp_pgno, 0)); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - cASSERT(mc, (int)mc->mc_snum - snum == mc->mc_db->md_depth - depth); - if (AUDIT_ENABLED()) { - rc = cursor_check_updating(mc); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - } - - /* root split? */ - ptop += mc->mc_snum - (size_t)snum; - - /* Right page might now have changed parent. - * Check if left page also changed parent. */ - if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && - mc->mc_ki[ptop] >= page_numkeys(mc->mc_pg[ptop])) { - for (i = 0; i < ptop; i++) { - mc->mc_pg[i] = mn.mc_pg[i]; - mc->mc_ki[i] = mn.mc_ki[i]; - } - mc->mc_pg[ptop] = mn.mc_pg[ptop]; - if (mn.mc_ki[ptop]) { - mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; - } else { - /* find right page's left sibling */ - mc->mc_ki[ptop] = mn.mc_ki[ptop]; - rc = cursor_sibling(mc, SIBLING_LEFT); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == MDBX_NOTFOUND) /* improper mdbx_cursor_sibling() result */ { - ERROR("unexpected %i error going left sibling", rc); - rc = MDBX_PROBLEM; - } - goto done; - } - } - } - } else if (unlikely(pure_left)) { - MDBX_page *ptop_page = mc->mc_pg[ptop]; - TRACE("pure-left: adding to parent page %u node[%u] left-leaf page #%u key " - "%s", - ptop_page->mp_pgno, mc->mc_ki[ptop], sister->mp_pgno, - DKEY(mc->mc_ki[ptop] ? newkey : NULL)); - assert(mc->mc_top == ptop + 1); - mc->mc_top = (uint8_t)ptop; - rc = node_add_branch(mc, mc->mc_ki[ptop], mc->mc_ki[ptop] ? newkey : NULL, - sister->mp_pgno); - cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1] && - ptop == mc->mc_top); - - if (likely(rc == MDBX_SUCCESS) && mc->mc_ki[ptop] == 0) { - MDBX_node *node = page_node(mc->mc_pg[ptop], 1); - TRACE("pure-left: update prev-first key on parent to %s", DKEY(&sepkey)); - cASSERT(mc, node_ks(node) == 0 && node_pgno(node) == mp->mp_pgno); - cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 0); - mc->mc_ki[ptop] = 1; - rc = update_key(mc, &sepkey); - cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 1); - cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1]); - mc->mc_ki[ptop] = 0; - } else { - TRACE("pure-left: no-need-update prev-first key on parent %s", - DKEY(&sepkey)); - } - - mc->mc_top++; - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - - MDBX_node *node = page_node(mc->mc_pg[ptop], mc->mc_ki[ptop] + (size_t)1); - cASSERT(mc, node_pgno(node) == mp->mp_pgno && mc->mc_pg[ptop] == ptop_page); - } else { - mn.mc_top--; - TRACE("add-to-parent the right-entry[%u] for new sibling-page", - mn.mc_ki[ptop]); - rc = node_add_branch(&mn, mn.mc_ki[ptop], &sepkey, sister->mp_pgno); - mn.mc_top++; - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - } - - if (unlikely(pure_left | pure_right)) { - mc->mc_pg[mc->mc_top] = sister; - mc->mc_ki[mc->mc_top] = 0; - switch (PAGETYPE_WHOLE(sister)) { - case P_LEAF: { - cASSERT(mc, newpgno == 0 || newpgno == P_INVALID); - rc = node_add_leaf(mc, 0, newkey, newdata, naf); - } break; - case P_LEAF | P_LEAF2: { - cASSERT(mc, (naf & (F_BIGDATA | F_SUBDATA | F_DUPDATA)) == 0); - cASSERT(mc, newpgno == 0 || newpgno == P_INVALID); - rc = node_add_leaf2(mc, 0, newkey); - } break; - default: - rc = bad_page(sister, "wrong page-type %u\n", PAGETYPE_WHOLE(sister)); - } - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - - if (pure_right) { - for (i = 0; i < mc->mc_top; i++) - mc->mc_ki[i] = mn.mc_ki[i]; - } else if (mc->mc_ki[mc->mc_top - 1] == 0) { - for (i = 2; i <= mc->mc_top; ++i) - if (mc->mc_ki[mc->mc_top - i]) { - get_key( - page_node(mc->mc_pg[mc->mc_top - i], mc->mc_ki[mc->mc_top - i]), - &sepkey); - if (mc->mc_dbx->md_cmp(newkey, &sepkey) < 0) { - mc->mc_top -= (uint8_t)i; - DEBUG("pure-left: update new-first on parent [%i] page %u key %s", - mc->mc_ki[mc->mc_top], mc->mc_pg[mc->mc_top]->mp_pgno, - DKEY(newkey)); - rc = update_key(mc, newkey); - mc->mc_top += (uint8_t)i; - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - } - break; - } - } - } else if (tmp_ki_copy) { /* !IS_LEAF2(mp) */ - /* Move nodes */ - mc->mc_pg[mc->mc_top] = sister; - i = split_indx; - size_t n = 0; - do { - TRACE("i %zu, nkeys %zu => n %zu, rp #%u", i, nkeys, n, sister->mp_pgno); - pgno_t pgno = 0; - MDBX_val *rdata = NULL; - if (i == newindx) { - rkey = *newkey; - if (IS_LEAF(mp)) - rdata = newdata; - else - pgno = newpgno; - flags = naf; - /* Update index for the new key. */ - mc->mc_ki[mc->mc_top] = (indx_t)n; - } else { - MDBX_node *node = ptr_disp(mp, tmp_ki_copy->mp_ptrs[i] + PAGEHDRSZ); - rkey.iov_base = node_key(node); - rkey.iov_len = node_ks(node); - if (IS_LEAF(mp)) { - xdata.iov_base = node_data(node); - xdata.iov_len = node_ds(node); - rdata = &xdata; - } else - pgno = node_pgno(node); - flags = node_flags(node); - } - - switch (PAGETYPE_WHOLE(sister)) { - case P_BRANCH: { - cASSERT(mc, 0 == (uint16_t)flags); - /* First branch index doesn't need key data. */ - rc = node_add_branch(mc, n, n ? &rkey : NULL, pgno); - } break; - case P_LEAF: { - cASSERT(mc, pgno == 0); - cASSERT(mc, rdata != NULL); - rc = node_add_leaf(mc, n, &rkey, rdata, flags); - } break; - /* case P_LEAF | P_LEAF2: { - cASSERT(mc, (nflags & (F_BIGDATA | F_SUBDATA | F_DUPDATA)) == 0); - cASSERT(mc, gno == 0); - rc = mdbx_node_add_leaf2(mc, n, &rkey); - } break; */ - default: - rc = bad_page(sister, "wrong page-type %u\n", PAGETYPE_WHOLE(sister)); - } - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - - ++n; - if (++i > nkeys) { - i = 0; - n = 0; - mc->mc_pg[mc->mc_top] = tmp_ki_copy; - TRACE("switch to mp #%u", tmp_ki_copy->mp_pgno); - } - } while (i != split_indx); - - TRACE("i %zu, nkeys %zu, n %zu, pgno #%u", i, nkeys, n, - mc->mc_pg[mc->mc_top]->mp_pgno); - - nkeys = page_numkeys(tmp_ki_copy); - for (i = 0; i < nkeys; i++) - mp->mp_ptrs[i] = tmp_ki_copy->mp_ptrs[i]; - mp->mp_lower = tmp_ki_copy->mp_lower; - mp->mp_upper = tmp_ki_copy->mp_upper; - memcpy(page_node(mp, nkeys - 1), page_node(tmp_ki_copy, nkeys - 1), - env->me_psize - tmp_ki_copy->mp_upper - PAGEHDRSZ); - - /* reset back to original page */ - if (newindx < split_indx) { - mc->mc_pg[mc->mc_top] = mp; - } else { - mc->mc_pg[mc->mc_top] = sister; - mc->mc_ki[ptop]++; - /* Make sure mc_ki is still valid. */ - if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && - mc->mc_ki[ptop] >= page_numkeys(mc->mc_pg[ptop])) { - for (i = 0; i <= ptop; i++) { - mc->mc_pg[i] = mn.mc_pg[i]; - mc->mc_ki[i] = mn.mc_ki[i]; - } - } - } - } else if (newindx >= split_indx) { - mc->mc_pg[mc->mc_top] = sister; - mc->mc_ki[ptop]++; - /* Make sure mc_ki is still valid. */ - if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && - mc->mc_ki[ptop] >= page_numkeys(mc->mc_pg[ptop])) { - for (i = 0; i <= ptop; i++) { - mc->mc_pg[i] = mn.mc_pg[i]; - mc->mc_ki[i] = mn.mc_ki[i]; - } - } - } - - /* Adjust other cursors pointing to mp and/or to parent page */ - nkeys = page_numkeys(mp); - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; - m2 = m2->mc_next) { - MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc) - continue; - if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) - continue; - if (foliage) { - /* sub cursors may be on different DB */ - if (m3->mc_pg[0] != mp) - continue; - /* root split */ - for (int k = foliage; k >= 0; k--) { - m3->mc_ki[k + 1] = m3->mc_ki[k]; - m3->mc_pg[k + 1] = m3->mc_pg[k]; - } - m3->mc_ki[0] = m3->mc_ki[0] >= nkeys + pure_left; - m3->mc_pg[0] = mc->mc_pg[0]; - m3->mc_snum++; - m3->mc_top++; - } - - if (m3->mc_top >= mc->mc_top && m3->mc_pg[mc->mc_top] == mp && !pure_left) { - if (m3->mc_ki[mc->mc_top] >= newindx && !(naf & MDBX_SPLIT_REPLACE)) - m3->mc_ki[mc->mc_top]++; - if (m3->mc_ki[mc->mc_top] >= nkeys) { - m3->mc_pg[mc->mc_top] = sister; - cASSERT(mc, m3->mc_ki[mc->mc_top] >= nkeys); - m3->mc_ki[mc->mc_top] -= (indx_t)nkeys; - for (i = 0; i < mc->mc_top; i++) { - m3->mc_ki[i] = mn.mc_ki[i]; - m3->mc_pg[i] = mn.mc_pg[i]; - } - } - } else if (!did_split_parent && m3->mc_top >= ptop && - m3->mc_pg[ptop] == mc->mc_pg[ptop] && - m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { - m3->mc_ki[ptop]++; /* also for the `pure-left` case */ - } - if (XCURSOR_INITED(m3) && IS_LEAF(mp)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); - } - TRACE("mp #%u left: %zd, sister #%u left: %zd", mp->mp_pgno, page_room(mp), - sister->mp_pgno, page_room(sister)); - -done: - if (tmp_ki_copy) - dpage_free(env, tmp_ki_copy, 1); - - if (unlikely(rc != MDBX_SUCCESS)) - mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; - else { - if (AUDIT_ENABLED()) - rc = cursor_check_updating(mc); - if (unlikely(naf & MDBX_RESERVE)) { - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (!(node_flags(node) & F_BIGDATA)) - newdata->iov_base = node_data(node); - } -#if MDBX_ENABLE_PGOP_STAT - env->me_lck->mti_pgop_stat.split.weak += 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - } - - DEBUG("<< mp #%u, rc %d", mp->mp_pgno, rc); - return rc; -} - -int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, - MDBX_put_flags_t flags) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!key || !data)) - return MDBX_EINVAL; - - if (unlikely(dbi <= FREE_DBI)) - return MDBX_BAD_DBI; - - if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | - MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | - MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE))) - return MDBX_EINVAL; - - if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return (txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; - - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; - - /* LY: support for update (explicit overwrite) */ - if (flags & MDBX_CURRENT) { - rc = cursor_set(&cx.outer, (MDBX_val *)key, NULL, MDBX_SET).err; - if (likely(rc == MDBX_SUCCESS) && - (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) && - (flags & MDBX_ALLDUPS) == 0) { - /* LY: allows update (explicit overwrite) only for unique keys */ - MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], - cx.outer.mc_ki[cx.outer.mc_top]); - if (node_flags(node) & F_DUPDATA) { - tASSERT(txn, XCURSOR_INITED(&cx.outer) && - cx.outer.mc_xcursor->mx_db.md_entries > 1); - rc = MDBX_EMULTIVAL; - if ((flags & MDBX_NOOVERWRITE) == 0) { - flags -= MDBX_CURRENT; - rc = cursor_del(&cx.outer, MDBX_ALLDUPS); - } - } - } - } - - if (likely(rc == MDBX_SUCCESS)) - rc = cursor_put_checklen(&cx.outer, key, data, flags); - txn->mt_cursors[dbi] = cx.outer.mc_next; - - return rc; -} - -/**** COPYING *****************************************************************/ - -/* State needed for a double-buffering compacting copy. */ -typedef struct mdbx_compacting_ctx { - MDBX_env *mc_env; - MDBX_txn *mc_txn; - osal_condpair_t mc_condpair; - uint8_t *mc_wbuf[2]; - size_t mc_wlen[2]; - mdbx_filehandle_t mc_fd; - /* Error code. Never cleared if set. Both threads can set nonzero - * to fail the copy. Not mutex-protected, MDBX expects atomic int. */ - volatile int mc_error; - pgno_t mc_next_pgno; - volatile unsigned mc_head; - volatile unsigned mc_tail; -} mdbx_compacting_ctx; - -/* Dedicated writer thread for compacting copy. */ -__cold static THREAD_RESULT THREAD_CALL compacting_write_thread(void *arg) { - mdbx_compacting_ctx *const ctx = arg; - -#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64)) - sigset_t sigset; - sigemptyset(&sigset); - sigaddset(&sigset, SIGPIPE); - ctx->mc_error = pthread_sigmask(SIG_BLOCK, &sigset, NULL); -#endif /* EPIPE */ - - osal_condpair_lock(&ctx->mc_condpair); - while (!ctx->mc_error) { - while (ctx->mc_tail == ctx->mc_head && !ctx->mc_error) { - int err = osal_condpair_wait(&ctx->mc_condpair, true); - if (err != MDBX_SUCCESS) { - ctx->mc_error = err; - goto bailout; - } - } - const unsigned toggle = ctx->mc_tail & 1; - size_t wsize = ctx->mc_wlen[toggle]; - if (wsize == 0) { - ctx->mc_tail += 1; - break /* EOF */; - } - ctx->mc_wlen[toggle] = 0; - uint8_t *ptr = ctx->mc_wbuf[toggle]; - if (!ctx->mc_error) { - int err = osal_write(ctx->mc_fd, ptr, wsize); - if (err != MDBX_SUCCESS) { -#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64)) - if (err == EPIPE) { - /* Collect the pending SIGPIPE, - * otherwise at least OS X gives it to the process on thread-exit. */ - int unused; - sigwait(&sigset, &unused); - } -#endif /* EPIPE */ - ctx->mc_error = err; - goto bailout; - } - } - ctx->mc_tail += 1; - osal_condpair_signal(&ctx->mc_condpair, false); - } -bailout: - osal_condpair_unlock(&ctx->mc_condpair); - return (THREAD_RESULT)0; -} - -/* Give buffer and/or MDBX_EOF to writer thread, await unused buffer. */ -__cold static int compacting_toggle_write_buffers(mdbx_compacting_ctx *ctx) { - osal_condpair_lock(&ctx->mc_condpair); - eASSERT(ctx->mc_env, ctx->mc_head - ctx->mc_tail < 2 || ctx->mc_error); - ctx->mc_head += 1; - osal_condpair_signal(&ctx->mc_condpair, true); - while (!ctx->mc_error && - ctx->mc_head - ctx->mc_tail == 2 /* both buffers in use */) { - int err = osal_condpair_wait(&ctx->mc_condpair, false); - if (err != MDBX_SUCCESS) - ctx->mc_error = err; - } - osal_condpair_unlock(&ctx->mc_condpair); - return ctx->mc_error; -} - -__cold static int compacting_walk_sdb(mdbx_compacting_ctx *ctx, MDBX_db *sdb); - -static int compacting_put_bytes(mdbx_compacting_ctx *ctx, const void *src, - size_t bytes, pgno_t pgno, pgno_t npages) { - assert(pgno == 0 || bytes > PAGEHDRSZ); - while (bytes > 0) { - const size_t side = ctx->mc_head & 1; - const size_t left = MDBX_ENVCOPY_WRITEBUF - ctx->mc_wlen[side]; - if (left < (pgno ? PAGEHDRSZ : 1)) { - int err = compacting_toggle_write_buffers(ctx); - if (unlikely(err != MDBX_SUCCESS)) - return err; - continue; - } - const size_t chunk = (bytes < left) ? bytes : left; - void *const dst = ctx->mc_wbuf[side] + ctx->mc_wlen[side]; - if (src) { - memcpy(dst, src, chunk); - if (pgno) { - assert(chunk > PAGEHDRSZ); - MDBX_page *mp = dst; - mp->mp_pgno = pgno; - if (mp->mp_txnid == 0) - mp->mp_txnid = ctx->mc_txn->mt_txnid; - if (mp->mp_flags == P_OVERFLOW) { - assert(bytes <= pgno2bytes(ctx->mc_env, npages)); - mp->mp_pages = npages; - } - pgno = 0; - } - src = ptr_disp(src, chunk); - } else - memset(dst, 0, chunk); - bytes -= chunk; - ctx->mc_wlen[side] += chunk; - } - return MDBX_SUCCESS; -} - -static int compacting_put_page(mdbx_compacting_ctx *ctx, const MDBX_page *mp, - const size_t head_bytes, const size_t tail_bytes, - const pgno_t npages) { - if (tail_bytes) { - assert(head_bytes + tail_bytes <= ctx->mc_env->me_psize); - assert(npages == 1 && - (PAGETYPE_WHOLE(mp) == P_BRANCH || PAGETYPE_WHOLE(mp) == P_LEAF)); - } else { - assert(head_bytes <= pgno2bytes(ctx->mc_env, npages)); - assert((npages == 1 && PAGETYPE_WHOLE(mp) == (P_LEAF | P_LEAF2)) || - PAGETYPE_WHOLE(mp) == P_OVERFLOW); - } - - const pgno_t pgno = ctx->mc_next_pgno; - ctx->mc_next_pgno += npages; - int err = compacting_put_bytes(ctx, mp, head_bytes, pgno, npages); - if (unlikely(err != MDBX_SUCCESS)) - return err; - err = compacting_put_bytes( - ctx, nullptr, pgno2bytes(ctx->mc_env, npages) - (head_bytes + tail_bytes), - 0, 0); - if (unlikely(err != MDBX_SUCCESS)) - return err; - return compacting_put_bytes( - ctx, ptr_disp(mp, ctx->mc_env->me_psize - tail_bytes), tail_bytes, 0, 0); -} - -__cold static int compacting_walk_tree(mdbx_compacting_ctx *ctx, - MDBX_cursor *mc, pgno_t *root, - txnid_t parent_txnid) { - mc->mc_snum = 1; - int rc = page_get(mc, *root, &mc->mc_pg[0], parent_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = page_search_root(mc, nullptr, MDBX_PS_FIRST); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - /* Make cursor pages writable */ - void *const buf = osal_malloc(pgno2bytes(ctx->mc_env, mc->mc_snum)); - if (buf == NULL) - return MDBX_ENOMEM; - - void *ptr = buf; - for (size_t i = 0; i < mc->mc_top; i++) { - page_copy(ptr, mc->mc_pg[i], ctx->mc_env->me_psize); - mc->mc_pg[i] = ptr; - ptr = ptr_disp(ptr, ctx->mc_env->me_psize); - } - /* This is writable space for a leaf page. Usually not needed. */ - MDBX_page *const leaf = ptr; - - while (mc->mc_snum > 0) { - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - size_t n = page_numkeys(mp); - - if (IS_LEAF(mp)) { - if (!(mc->mc_flags & - C_SUB) /* may have nested F_SUBDATA or F_BIGDATA nodes */) { - for (size_t i = 0; i < n; i++) { - MDBX_node *node = page_node(mp, i); - if (node_flags(node) == F_BIGDATA) { - /* Need writable leaf */ - if (mp != leaf) { - mc->mc_pg[mc->mc_top] = leaf; - page_copy(leaf, mp, ctx->mc_env->me_psize); - mp = leaf; - node = page_node(mp, i); - } - - const pgr_t lp = - page_get_large(mc, node_largedata_pgno(node), mp->mp_txnid); - if (unlikely((rc = lp.err) != MDBX_SUCCESS)) - goto done; - const size_t datasize = node_ds(node); - const pgno_t npages = number_of_ovpages(ctx->mc_env, datasize); - poke_pgno(node_data(node), ctx->mc_next_pgno); - rc = compacting_put_page(ctx, lp.page, PAGEHDRSZ + datasize, 0, - npages); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - } else if (node_flags(node) & F_SUBDATA) { - if (!MDBX_DISABLE_VALIDATION && - unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", - (unsigned)node_ds(node)); - rc = MDBX_CORRUPTED; - goto done; - } - - /* Need writable leaf */ - if (mp != leaf) { - mc->mc_pg[mc->mc_top] = leaf; - page_copy(leaf, mp, ctx->mc_env->me_psize); - mp = leaf; - node = page_node(mp, i); - } - - MDBX_db *nested = nullptr; - if (node_flags(node) & F_DUPDATA) { - rc = cursor_xinit1(mc, node, mp); - if (likely(rc == MDBX_SUCCESS)) { - nested = &mc->mc_xcursor->mx_db; - rc = compacting_walk_tree(ctx, &mc->mc_xcursor->mx_cursor, - &nested->md_root, mp->mp_txnid); - } - } else { - cASSERT(mc, (mc->mc_flags & C_SUB) == 0 && mc->mc_xcursor == 0); - MDBX_cursor_couple *couple = - container_of(mc, MDBX_cursor_couple, outer); - cASSERT(mc, - couple->inner.mx_cursor.mc_signature == ~MDBX_MC_LIVE && - !couple->inner.mx_cursor.mc_flags && - !couple->inner.mx_cursor.mc_db && - !couple->inner.mx_cursor.mc_dbx); - nested = &couple->inner.mx_db; - memcpy(nested, node_data(node), sizeof(MDBX_db)); - rc = compacting_walk_sdb(ctx, nested); - } - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - memcpy(node_data(node), nested, sizeof(MDBX_db)); - } - } - } - } else { - mc->mc_ki[mc->mc_top]++; - if (mc->mc_ki[mc->mc_top] < n) { - while (1) { - const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - rc = page_get(mc, node_pgno(node), &mp, mp->mp_txnid); - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - mc->mc_top++; - mc->mc_snum++; - mc->mc_ki[mc->mc_top] = 0; - if (!IS_BRANCH(mp)) { - mc->mc_pg[mc->mc_top] = mp; - break; - } - /* Whenever we advance to a sibling branch page, - * we must proceed all the way down to its first leaf. */ - page_copy(mc->mc_pg[mc->mc_top], mp, ctx->mc_env->me_psize); - } - continue; - } - } - - const pgno_t pgno = ctx->mc_next_pgno; - if (likely(!IS_LEAF2(mp))) { - rc = compacting_put_page( - ctx, mp, PAGEHDRSZ + mp->mp_lower, - ctx->mc_env->me_psize - (PAGEHDRSZ + mp->mp_upper), 1); - } else { - rc = compacting_put_page( - ctx, mp, PAGEHDRSZ + page_numkeys(mp) * mp->mp_leaf2_ksize, 0, 1); - } - if (unlikely(rc != MDBX_SUCCESS)) - goto done; - - if (mc->mc_top) { - /* Update parent if there is one */ - node_set_pgno( - page_node(mc->mc_pg[mc->mc_top - 1], mc->mc_ki[mc->mc_top - 1]), - pgno); - cursor_pop(mc); - } else { - /* Otherwise we're done */ - *root = pgno; - break; - } - } -done: - osal_free(buf); - return rc; -} - -__cold static int compacting_walk_sdb(mdbx_compacting_ctx *ctx, MDBX_db *sdb) { - if (unlikely(sdb->md_root == P_INVALID)) - return MDBX_SUCCESS; /* empty db */ - - MDBX_cursor_couple couple; - memset(&couple, 0, sizeof(couple)); - couple.inner.mx_cursor.mc_signature = ~MDBX_MC_LIVE; - MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbi_state = DBI_LINDO | DBI_VALID; - int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbi_state); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - couple.outer.mc_checking |= CC_SKIPORD | CC_PAGECHECK; - couple.inner.mx_cursor.mc_checking |= CC_SKIPORD | CC_PAGECHECK; - if (!sdb->md_mod_txnid) - sdb->md_mod_txnid = ctx->mc_txn->mt_txnid; - return compacting_walk_tree(ctx, &couple.outer, &sdb->md_root, - sdb->md_mod_txnid); -} - -__cold static void compacting_fixup_meta(MDBX_env *env, MDBX_meta *meta) { - eASSERT(env, meta->mm_dbs[FREE_DBI].md_mod_txnid || - meta->mm_dbs[FREE_DBI].md_root == P_INVALID); - eASSERT(env, meta->mm_dbs[MAIN_DBI].md_mod_txnid || - meta->mm_dbs[MAIN_DBI].md_root == P_INVALID); - - /* Calculate filesize taking in account shrink/growing thresholds */ - if (meta->mm_geo.next != meta->mm_geo.now) { - meta->mm_geo.now = meta->mm_geo.next; - const size_t aligner = pv2pages( - meta->mm_geo.grow_pv ? meta->mm_geo.grow_pv : meta->mm_geo.shrink_pv); - if (aligner) { - const pgno_t aligned = pgno_align2os_pgno( - env, meta->mm_geo.next + aligner - meta->mm_geo.next % aligner); - meta->mm_geo.now = aligned; - } - } - - if (meta->mm_geo.now < meta->mm_geo.lower) - meta->mm_geo.now = meta->mm_geo.lower; - if (meta->mm_geo.now > meta->mm_geo.upper) - meta->mm_geo.now = meta->mm_geo.upper; - - /* Update signature */ - assert(meta->mm_geo.now >= meta->mm_geo.next); - unaligned_poke_u64(4, meta->mm_sign, meta_sign(meta)); -} - -/* Make resizable */ -__cold static void meta_make_sizeable(MDBX_meta *meta) { - meta->mm_geo.lower = MIN_PAGENO; - if (meta->mm_geo.grow_pv == 0) { - const pgno_t step = 1 + (meta->mm_geo.upper - meta->mm_geo.lower) / 42; - meta->mm_geo.grow_pv = pages2pv(step); - } - if (meta->mm_geo.shrink_pv == 0) { - const pgno_t step = pv2pages(meta->mm_geo.grow_pv) << 1; - meta->mm_geo.shrink_pv = pages2pv(step); - } -} - -/* Copy environment with compaction. */ -__cold static int env_compact(MDBX_env *env, MDBX_txn *read_txn, - mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe, - const MDBX_copy_flags_t flags) { - const size_t meta_bytes = pgno2bytes(env, NUM_METAS); - uint8_t *const data_buffer = - buffer + ceil_powerof2(meta_bytes, env->me_os_psize); - MDBX_meta *const meta = init_metas(env, buffer); - meta_set_txnid(env, meta, read_txn->mt_txnid); - - if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) - meta_make_sizeable(meta); - - /* copy canary sequences if present */ - if (read_txn->mt_canary.v) { - meta->mm_canary = read_txn->mt_canary; - meta->mm_canary.v = constmeta_txnid(meta); - } - - if (read_txn->mt_dbs[MAIN_DBI].md_root == P_INVALID) { - /* When the DB is empty, handle it specially to - * fix any breakage like page leaks from ITS#8174. */ - meta->mm_dbs[MAIN_DBI].md_flags = read_txn->mt_dbs[MAIN_DBI].md_flags; - compacting_fixup_meta(env, meta); - if (dest_is_pipe) { - int rc = osal_write(fd, buffer, meta_bytes); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - } else { - /* Count free pages + GC pages. */ - MDBX_cursor_couple couple; - int rc = cursor_init(&couple.outer, read_txn, FREE_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - pgno_t gc = read_txn->mt_dbs[FREE_DBI].md_branch_pages + - read_txn->mt_dbs[FREE_DBI].md_leaf_pages + - read_txn->mt_dbs[FREE_DBI].md_overflow_pages; - MDBX_val key, data; - while ((rc = cursor_get(&couple.outer, &key, &data, MDBX_NEXT)) == - MDBX_SUCCESS) { - const MDBX_PNL pnl = data.iov_base; - if (unlikely(data.iov_len % sizeof(pgno_t) || - data.iov_len < MDBX_PNL_SIZEOF(pnl))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-record length", data.iov_len); - return MDBX_CORRUPTED; - } - if (unlikely(!pnl_check(pnl, read_txn->mt_next_pgno))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-record content"); - return MDBX_CORRUPTED; - } - gc += MDBX_PNL_GETSIZE(pnl); - } - if (unlikely(rc != MDBX_NOTFOUND)) - return rc; - - /* Substract GC-pages from mt_next_pgno to find the new mt_next_pgno. */ - meta->mm_geo.next = read_txn->mt_next_pgno - gc; - /* Set with current main DB */ - meta->mm_dbs[MAIN_DBI] = read_txn->mt_dbs[MAIN_DBI]; - - mdbx_compacting_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); - rc = osal_condpair_init(&ctx.mc_condpair); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - memset(data_buffer, 0, 2 * (size_t)MDBX_ENVCOPY_WRITEBUF); - ctx.mc_wbuf[0] = data_buffer; - ctx.mc_wbuf[1] = data_buffer + (size_t)MDBX_ENVCOPY_WRITEBUF; - ctx.mc_next_pgno = NUM_METAS; - ctx.mc_env = env; - ctx.mc_fd = fd; - ctx.mc_txn = read_txn; - - osal_thread_t thread; - int thread_err = osal_thread_create(&thread, compacting_write_thread, &ctx); - if (likely(thread_err == MDBX_SUCCESS)) { - if (dest_is_pipe) { - if (!meta->mm_dbs[MAIN_DBI].md_mod_txnid) - meta->mm_dbs[MAIN_DBI].md_mod_txnid = read_txn->mt_txnid; - compacting_fixup_meta(env, meta); - rc = osal_write(fd, buffer, meta_bytes); - } - if (likely(rc == MDBX_SUCCESS)) - rc = compacting_walk_sdb(&ctx, &meta->mm_dbs[MAIN_DBI]); - if (ctx.mc_wlen[ctx.mc_head & 1]) - /* toggle to flush non-empty buffers */ - compacting_toggle_write_buffers(&ctx); - - if (likely(rc == MDBX_SUCCESS) && - unlikely(meta->mm_geo.next != ctx.mc_next_pgno)) { - if (ctx.mc_next_pgno > meta->mm_geo.next) { - ERROR("the source DB %s: post-compactification used pages %" PRIaPGNO - " %c expected %" PRIaPGNO, - "has double-used pages or other corruption", ctx.mc_next_pgno, - '>', meta->mm_geo.next); - rc = MDBX_CORRUPTED; /* corrupted DB */ - } - if (ctx.mc_next_pgno < meta->mm_geo.next) { - WARNING( - "the source DB %s: post-compactification used pages %" PRIaPGNO - " %c expected %" PRIaPGNO, - "has page leak(s)", ctx.mc_next_pgno, '<', meta->mm_geo.next); - if (dest_is_pipe) - /* the root within already written meta-pages is wrong */ - rc = MDBX_CORRUPTED; - } - /* fixup meta */ - meta->mm_geo.next = ctx.mc_next_pgno; - } - - /* toggle with empty buffers to exit thread's loop */ - eASSERT(env, (ctx.mc_wlen[ctx.mc_head & 1]) == 0); - compacting_toggle_write_buffers(&ctx); - thread_err = osal_thread_join(thread); - eASSERT(env, (ctx.mc_tail == ctx.mc_head && - ctx.mc_wlen[ctx.mc_head & 1] == 0) || - ctx.mc_error); - osal_condpair_destroy(&ctx.mc_condpair); - } - if (unlikely(thread_err != MDBX_SUCCESS)) - return thread_err; - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - if (unlikely(ctx.mc_error != MDBX_SUCCESS)) - return ctx.mc_error; - if (!dest_is_pipe) - compacting_fixup_meta(env, meta); - } - - /* Extend file if required */ - if (meta->mm_geo.now != meta->mm_geo.next) { - const size_t whole_size = pgno2bytes(env, meta->mm_geo.now); - if (!dest_is_pipe) - return osal_ftruncate(fd, whole_size); - - const size_t used_size = pgno2bytes(env, meta->mm_geo.next); - memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); - for (size_t offset = used_size; offset < whole_size;) { - const size_t chunk = ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) - ? (size_t)MDBX_ENVCOPY_WRITEBUF - : whole_size - offset; - int rc = osal_write(fd, data_buffer, chunk); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - offset += chunk; - } - } - return MDBX_SUCCESS; -} - -/* Copy environment as-is. */ -__cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, - mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe, - const MDBX_copy_flags_t flags) { - /* We must start the actual read txn after blocking writers */ - int rc = txn_end(read_txn, TXN_END_RESET_TMP); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - /* Temporarily block writers until we snapshot the meta pages */ - rc = osal_txn_lock(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = txn_renew(read_txn, MDBX_TXN_RDONLY); - if (unlikely(rc != MDBX_SUCCESS)) { - osal_txn_unlock(env); - return rc; - } - - jitter4testing(false); - const size_t meta_bytes = pgno2bytes(env, NUM_METAS); - const meta_troika_t troika = meta_tap(env); - /* Make a snapshot of meta-pages, - * but writing ones after the data was flushed */ - memcpy(buffer, env->me_map, meta_bytes); - MDBX_meta *const headcopy = /* LY: get pointer to the snapshot copy */ - ptr_disp(buffer, ptr_dist(meta_recent(env, &troika).ptr_c, env->me_map)); - osal_txn_unlock(env); - - if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) - meta_make_sizeable(headcopy); - /* Update signature to steady */ - unaligned_poke_u64(4, headcopy->mm_sign, meta_sign(headcopy)); - - /* Copy the data */ - const size_t whole_size = pgno_align2os_bytes(env, read_txn->mt_end_pgno); - const size_t used_size = pgno2bytes(env, read_txn->mt_next_pgno); - jitter4testing(false); - - if (dest_is_pipe) - rc = osal_write(fd, buffer, meta_bytes); - - uint8_t *const data_buffer = - buffer + ceil_powerof2(meta_bytes, env->me_os_psize); -#if MDBX_USE_COPYFILERANGE - static bool copyfilerange_unavailable; - bool not_the_same_filesystem = false; - struct statfs statfs_info; - if (fstatfs(fd, &statfs_info) || - statfs_info.f_type == /* ECRYPTFS_SUPER_MAGIC */ 0xf15f) - /* avoid use copyfilerange_unavailable() to ecryptfs due bugs */ - not_the_same_filesystem = true; -#endif /* MDBX_USE_COPYFILERANGE */ - for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) { -#if MDBX_USE_SENDFILE - static bool sendfile_unavailable; - if (dest_is_pipe && likely(!sendfile_unavailable)) { - off_t in_offset = offset; - const ssize_t written = - sendfile(fd, env->me_lazy_fd, &in_offset, used_size - offset); - if (likely(written > 0)) { - offset = in_offset; - continue; - } - rc = MDBX_ENODATA; - if (written == 0 || ignore_enosys(rc = errno) != MDBX_RESULT_TRUE) - break; - sendfile_unavailable = true; - } -#endif /* MDBX_USE_SENDFILE */ - -#if MDBX_USE_COPYFILERANGE - if (!dest_is_pipe && !not_the_same_filesystem && - likely(!copyfilerange_unavailable)) { - off_t in_offset = offset, out_offset = offset; - ssize_t bytes_copied = copy_file_range( - env->me_lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0); - if (likely(bytes_copied > 0)) { - offset = in_offset; - continue; - } - rc = MDBX_ENODATA; - if (bytes_copied == 0) - break; - rc = errno; - if (rc == EXDEV || rc == /* workaround for ecryptfs bug(s), - maybe useful for others FS */ - EINVAL) - not_the_same_filesystem = true; - else if (ignore_enosys(rc) == MDBX_RESULT_TRUE) - copyfilerange_unavailable = true; - else - break; - } -#endif /* MDBX_USE_COPYFILERANGE */ - - /* fallback to portable */ - const size_t chunk = ((size_t)MDBX_ENVCOPY_WRITEBUF < used_size - offset) - ? (size_t)MDBX_ENVCOPY_WRITEBUF - : used_size - offset; - /* copy to avoid EFAULT in case swapped-out */ - memcpy(data_buffer, ptr_disp(env->me_map, offset), chunk); - rc = osal_write(fd, data_buffer, chunk); - offset += chunk; - } - - /* Extend file if required */ - if (likely(rc == MDBX_SUCCESS) && whole_size != used_size) { - if (!dest_is_pipe) - rc = osal_ftruncate(fd, whole_size); - else { - memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); - for (size_t offset = used_size; - rc == MDBX_SUCCESS && offset < whole_size;) { - const size_t chunk = - ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) - ? (size_t)MDBX_ENVCOPY_WRITEBUF - : whole_size - offset; - rc = osal_write(fd, data_buffer, chunk); - offset += chunk; - } - } - } - - return rc; -} - -__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const int dest_is_pipe = osal_is_pipe(fd); - if (MDBX_IS_ERROR(dest_is_pipe)) - return dest_is_pipe; - - if (!dest_is_pipe) { - rc = osal_fseek(fd, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - const size_t buffer_size = - pgno_align2os_bytes(env, NUM_METAS) + - ceil_powerof2(((flags & MDBX_CP_COMPACT) - ? 2 * (size_t)MDBX_ENVCOPY_WRITEBUF - : (size_t)MDBX_ENVCOPY_WRITEBUF), - env->me_os_psize); - - uint8_t *buffer = NULL; - rc = osal_memalign_alloc(env->me_os_psize, buffer_size, (void **)&buffer); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - MDBX_txn *read_txn = NULL; - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. */ - rc = mdbx_txn_begin(env, NULL, MDBX_TXN_RDONLY, &read_txn); - if (unlikely(rc != MDBX_SUCCESS)) { - osal_memalign_free(buffer); - return rc; - } - - if (!dest_is_pipe) { - /* Firstly write a stub to meta-pages. - * Now we sure to incomplete copy will not be used. */ - memset(buffer, -1, pgno2bytes(env, NUM_METAS)); - rc = osal_write(fd, buffer, pgno2bytes(env, NUM_METAS)); - } - - if (likely(rc == MDBX_SUCCESS)) { - memset(buffer, 0, pgno2bytes(env, NUM_METAS)); - rc = ((flags & MDBX_CP_COMPACT) ? env_compact : env_copy_asis)( - env, read_txn, fd, buffer, dest_is_pipe, flags); - } - mdbx_txn_abort(read_txn); - - if (!dest_is_pipe) { - if (likely(rc == MDBX_SUCCESS)) - rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); - - /* Write actual meta */ - if (likely(rc == MDBX_SUCCESS)) - rc = osal_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); - - if (likely(rc == MDBX_SUCCESS)) - rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); - } - - osal_memalign_free(buffer); - return rc; -} - -__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, - MDBX_copy_flags_t flags) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *dest_pathW = nullptr; - int rc = osal_mb2w(dest_path, &dest_pathW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_copyW(env, dest_pathW, flags); - osal_free(dest_pathW); - } - return rc; -} - -__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, - MDBX_copy_flags_t flags) { -#endif /* Windows */ - - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!dest_path)) - return MDBX_EINVAL; - - /* The destination path must exist, but the destination file must not. - * We don't want the OS to cache the writes, since the source data is - * already in the OS cache. */ - mdbx_filehandle_t newfd; - rc = osal_openfile(MDBX_OPEN_COPY, env, dest_path, &newfd, -#if defined(_WIN32) || defined(_WIN64) - (mdbx_mode_t)-1 -#else - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP -#endif - ); - -#if defined(_WIN32) || defined(_WIN64) - /* no locking required since the file opened with ShareMode == 0 */ -#else - if (rc == MDBX_SUCCESS) { - MDBX_STRUCT_FLOCK lock_op; - memset(&lock_op, 0, sizeof(lock_op)); - lock_op.l_type = F_WRLCK; - lock_op.l_whence = SEEK_SET; - lock_op.l_start = 0; - lock_op.l_len = OFF_T_MAX; - if (MDBX_FCNTL(newfd, MDBX_F_SETLK, &lock_op) -#if (defined(__linux__) || defined(__gnu_linux__)) && defined(LOCK_EX) && \ - (!defined(__ANDROID_API__) || __ANDROID_API__ >= 24) - || flock(newfd, LOCK_EX | LOCK_NB) -#endif /* Linux */ - ) - rc = errno; - } -#endif /* Windows / POSIX */ - - if (rc == MDBX_SUCCESS) - rc = mdbx_env_copy2fd(env, newfd, flags); - - if (newfd != INVALID_HANDLE_VALUE) { - int err = osal_closefile(newfd); - if (rc == MDBX_SUCCESS && err != rc) - rc = err; - if (rc != MDBX_SUCCESS) - (void)osal_removefile(dest_path); - } - - return rc; -} - -/******************************************************************************/ - -__cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, - bool onoff) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(flags & - ((env->me_flags & MDBX_ENV_ACTIVE) ? ~ENV_CHANGEABLE_FLAGS - : ~ENV_USABLE_FLAGS))) - return MDBX_EPERM; - - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - - const bool lock_needed = - (env->me_flags & MDBX_ENV_ACTIVE) && !env_txn0_owned(env); - bool should_unlock = false; - if (lock_needed) { - rc = osal_txn_lock(env, false); - if (unlikely(rc)) - return rc; - should_unlock = true; - } - - if (onoff) - env->me_flags = merge_sync_flags(env->me_flags, flags); - else - env->me_flags &= ~flags; - - if (should_unlock) - osal_txn_unlock(env); - return MDBX_SUCCESS; -} - -__cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!arg)) - return MDBX_EINVAL; - - *arg = env->me_flags & ENV_USABLE_FLAGS; - return MDBX_SUCCESS; -} - -__cold int mdbx_env_set_userctx(MDBX_env *env, void *ctx) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - env->me_userctx = ctx; - return MDBX_SUCCESS; -} - -__cold void *mdbx_env_get_userctx(const MDBX_env *env) { - return env ? env->me_userctx : NULL; -} - -__cold int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - -#if MDBX_DEBUG - env->me_assert_func = func; - return MDBX_SUCCESS; -#else - (void)func; - return MDBX_ENOSYS; -#endif -} - -#if defined(_WIN32) || defined(_WIN64) -__cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!arg)) - return MDBX_EINVAL; - - *arg = env->me_pathname.specified; - return MDBX_SUCCESS; -} -#endif /* Windows */ - -__cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!arg)) - return MDBX_EINVAL; - -#if defined(_WIN32) || defined(_WIN64) - if (!env->me_pathname_char) { - *arg = nullptr; - DWORD flags = /* WC_ERR_INVALID_CHARS */ 0x80; - size_t mb_len = - WideCharToMultiByte(CP_THREAD_ACP, flags, env->me_pathname.specified, - -1, nullptr, 0, nullptr, nullptr); - rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); - if (rc == ERROR_INVALID_FLAGS) { - mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, - env->me_pathname.specified, -1, nullptr, 0, - nullptr, nullptr); - rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); - } - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - char *const mb_pathname = osal_malloc(mb_len); - if (!mb_pathname) - return MDBX_ENOMEM; - if (mb_len != (size_t)WideCharToMultiByte( - CP_THREAD_ACP, flags, env->me_pathname.specified, -1, - mb_pathname, (int)mb_len, nullptr, nullptr)) { - rc = (int)GetLastError(); - osal_free(mb_pathname); - return rc; - } - if (env->me_pathname_char || - InterlockedCompareExchangePointer( - (PVOID volatile *)&env->me_pathname_char, mb_pathname, nullptr)) - osal_free(mb_pathname); - } - *arg = env->me_pathname_char; -#else - *arg = env->me_pathname.specified; -#endif /* Windows */ - return MDBX_SUCCESS; -} - -__cold int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!arg)) - return MDBX_EINVAL; - - *arg = env->me_lazy_fd; - return MDBX_SUCCESS; -} - -static void stat_get(const MDBX_db *db, MDBX_stat *st, size_t bytes) { - st->ms_depth = db->md_depth; - st->ms_branch_pages = db->md_branch_pages; - st->ms_leaf_pages = db->md_leaf_pages; - st->ms_overflow_pages = db->md_overflow_pages; - st->ms_entries = db->md_entries; - if (likely(bytes >= - offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) - st->ms_mod_txnid = db->md_mod_txnid; -} - -static void stat_add(const MDBX_db *db, MDBX_stat *const st, - const size_t bytes) { - st->ms_depth += db->md_depth; - st->ms_branch_pages += db->md_branch_pages; - st->ms_leaf_pages += db->md_leaf_pages; - st->ms_overflow_pages += db->md_overflow_pages; - st->ms_entries += db->md_entries; - if (likely(bytes >= - offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) - st->ms_mod_txnid = (st->ms_mod_txnid > db->md_mod_txnid) ? st->ms_mod_txnid - : db->md_mod_txnid; -} - -__cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { - int err = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - MDBX_cursor_couple cx; - err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - const MDBX_env *const env = txn->mt_env; - st->ms_psize = env->me_psize; - TXN_FOREACH_DBI_FROM( - txn, dbi, - /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { - if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) - stat_add(txn->mt_dbs + dbi, st, bytes); - } - - if (!(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT) && - txn->mt_dbs[MAIN_DBI].md_entries /* TODO: use `md_subs` field */) { - - /* scan and account not opened named subDBs */ - err = page_search(&cx.outer, NULL, MDBX_PS_FIRST); - while (err == MDBX_SUCCESS) { - const MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; - for (size_t i = 0; i < page_numkeys(mp); i++) { - const MDBX_node *node = page_node(mp, i); - if (node_flags(node) != F_SUBDATA) - continue; - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid subDb node size", node_ds(node)); - return MDBX_CORRUPTED; - } - - /* skip opened and already accounted */ - const MDBX_val name = {node_key(node), node_ks(node)}; - TXN_FOREACH_DBI_USER(txn, dbi) { - if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && - env->me_dbxs[MAIN_DBI].md_cmp(&name, - &env->me_dbxs[dbi].md_name) == 0) { - node = NULL; - break; - } - } - - if (node) { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); - stat_add(&db, st, bytes); - } - } - err = cursor_sibling(&cx.outer, SIBLING_RIGHT); - } - if (unlikely(err != MDBX_NOTFOUND)) - return err; - } - - return MDBX_SUCCESS; -} - -__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_stat *dest, size_t bytes) { - if (unlikely(!dest)) - return MDBX_EINVAL; - const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); - if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) - return MDBX_EINVAL; - - if (likely(txn)) { - if (env && unlikely(txn->mt_env != env)) - return MDBX_EINVAL; - return stat_acc(txn, dest, bytes); - } - - int err = check_env(env, true); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - if (env->me_txn && env_txn0_owned(env)) - /* inside write-txn */ - return stat_acc(env->me_txn, dest, bytes); - - MDBX_txn *tmp_txn; - err = mdbx_txn_begin((MDBX_env *)env, NULL, MDBX_TXN_RDONLY, &tmp_txn); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - const int rc = stat_acc(tmp_txn, dest, bytes); - err = mdbx_txn_abort(tmp_txn); - if (unlikely(err != MDBX_SUCCESS)) - return err; - return rc; -} - -__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, - uint32_t *mask) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!mask)) - return MDBX_EINVAL; - - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - if ((cx.outer.mc_db->md_flags & MDBX_DUPSORT) == 0) - return MDBX_RESULT_TRUE; - - MDBX_val key, data; - rc = cursor_first(&cx.outer, &key, &data); - *mask = 0; - while (rc == MDBX_SUCCESS) { - const MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], - cx.outer.mc_ki[cx.outer.mc_top]); - const MDBX_db *db = node_data(node); - const unsigned flags = node_flags(node); - switch (flags) { - case F_BIGDATA: - case 0: - /* single-value entry, deep = 0 */ - *mask |= 1 << 0; - break; - case F_DUPDATA: - /* single sub-page, deep = 1 */ - *mask |= 1 << 1; - break; - case F_DUPDATA | F_SUBDATA: - /* sub-tree */ - *mask |= 1 << UNALIGNED_PEEK_16(db, MDBX_db, md_depth); - break; - default: - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid node-size", flags); - return MDBX_CORRUPTED; - } - rc = cursor_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); - } - - return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; -} - -__cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *out, const size_t bytes, - meta_troika_t *const troika) { - const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); - const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) - return MDBX_PANIC; - - /* is the environment open? - * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */ - if (unlikely(!env->me_map)) { - /* environment not yet opened */ -#if 1 - /* default behavior: returns the available info but zeroed the rest */ - memset(out, 0, bytes); - out->mi_geo.lower = env->me_dbgeo.lower; - out->mi_geo.upper = env->me_dbgeo.upper; - out->mi_geo.shrink = env->me_dbgeo.shrink; - out->mi_geo.grow = env->me_dbgeo.grow; - out->mi_geo.current = env->me_dbgeo.now; - out->mi_maxreaders = env->me_maxreaders; - out->mi_dxb_pagesize = env->me_psize; - out->mi_sys_pagesize = env->me_os_psize; - if (likely(bytes > size_before_bootid)) { - out->mi_bootid.current.x = bootid.x; - out->mi_bootid.current.y = bootid.y; - } - return MDBX_SUCCESS; -#else - /* some users may prefer this behavior: return appropriate error */ - return MDBX_EPERM; -#endif - } - - *troika = (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) ? txn->tw.troika - : meta_tap(env); - const meta_ptr_t head = meta_recent(env, troika); - const MDBX_meta *const meta0 = METAPAGE(env, 0); - const MDBX_meta *const meta1 = METAPAGE(env, 1); - const MDBX_meta *const meta2 = METAPAGE(env, 2); - out->mi_recent_txnid = head.txnid; - out->mi_meta_txnid[0] = troika->txnid[0]; - out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->mm_sign); - out->mi_meta_txnid[1] = troika->txnid[1]; - out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->mm_sign); - out->mi_meta_txnid[2] = troika->txnid[2]; - out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->mm_sign); - if (likely(bytes > size_before_bootid)) { - memcpy(&out->mi_bootid.meta[0], &meta0->mm_bootid, 16); - memcpy(&out->mi_bootid.meta[1], &meta1->mm_bootid, 16); - memcpy(&out->mi_bootid.meta[2], &meta2->mm_bootid, 16); - } - - const volatile MDBX_meta *txn_meta = head.ptr_v; - out->mi_last_pgno = txn_meta->mm_geo.next - 1; - out->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); - if (txn) { - out->mi_last_pgno = txn->mt_next_pgno - 1; - out->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); - - const txnid_t wanna_meta_txnid = (txn->mt_flags & MDBX_TXN_RDONLY) - ? txn->mt_txnid - : txn->mt_txnid - xMDBX_TXNID_STEP; - txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; - txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; - txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; - } - out->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); - out->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); - out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); - out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); - out->mi_mapsize = env->me_dxb_mmap.limit; - - const MDBX_lockinfo *const lck = env->me_lck; - out->mi_maxreaders = env->me_maxreaders; - out->mi_numreaders = env->me_lck_mmap.lck - ? atomic_load32(&lck->mti_numreaders, mo_Relaxed) - : INT32_MAX; - out->mi_dxb_pagesize = env->me_psize; - out->mi_sys_pagesize = env->me_os_psize; - - if (likely(bytes > size_before_bootid)) { - const uint64_t unsynced_pages = - atomic_load64(&lck->mti_unsynced_pages, mo_Relaxed) + - ((uint32_t)out->mi_recent_txnid != - atomic_load32(&lck->mti_meta_sync_txnid, mo_Relaxed)); - out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); - const uint64_t monotime_now = osal_monotime(); - uint64_t ts = atomic_load64(&lck->mti_eoos_timestamp, mo_Relaxed); - out->mi_since_sync_seconds16dot16 = - ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; - ts = atomic_load64(&lck->mti_reader_check_timestamp, mo_Relaxed); - out->mi_since_reader_check_seconds16dot16 = - ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; - out->mi_autosync_threshold = pgno2bytes( - env, atomic_load32(&lck->mti_autosync_threshold, mo_Relaxed)); - out->mi_autosync_period_seconds16dot16 = - osal_monotime_to_16dot16_noUnderflow( - atomic_load64(&lck->mti_autosync_period, mo_Relaxed)); - out->mi_bootid.current.x = bootid.x; - out->mi_bootid.current.y = bootid.y; - out->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; - } - - if (likely(bytes > size_before_pgop_stat)) { -#if MDBX_ENABLE_PGOP_STAT - out->mi_pgop_stat.newly = - atomic_load64(&lck->mti_pgop_stat.newly, mo_Relaxed); - out->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); - out->mi_pgop_stat.clone = - atomic_load64(&lck->mti_pgop_stat.clone, mo_Relaxed); - out->mi_pgop_stat.split = - atomic_load64(&lck->mti_pgop_stat.split, mo_Relaxed); - out->mi_pgop_stat.merge = - atomic_load64(&lck->mti_pgop_stat.merge, mo_Relaxed); - out->mi_pgop_stat.spill = - atomic_load64(&lck->mti_pgop_stat.spill, mo_Relaxed); - out->mi_pgop_stat.unspill = - atomic_load64(&lck->mti_pgop_stat.unspill, mo_Relaxed); - out->mi_pgop_stat.wops = - atomic_load64(&lck->mti_pgop_stat.wops, mo_Relaxed); - out->mi_pgop_stat.prefault = - atomic_load64(&lck->mti_pgop_stat.prefault, mo_Relaxed); - out->mi_pgop_stat.mincore = - atomic_load64(&lck->mti_pgop_stat.mincore, mo_Relaxed); - out->mi_pgop_stat.msync = - atomic_load64(&lck->mti_pgop_stat.msync, mo_Relaxed); - out->mi_pgop_stat.fsync = - atomic_load64(&lck->mti_pgop_stat.fsync, mo_Relaxed); -#else - memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat)); -#endif /* MDBX_ENABLE_PGOP_STAT*/ - } - - txnid_t overall_latter_reader_txnid = out->mi_recent_txnid; - txnid_t self_latter_reader_txnid = overall_latter_reader_txnid; - if (env->me_lck_mmap.lck) { - for (size_t i = 0; i < out->mi_numreaders; ++i) { - const uint32_t pid = - atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); - if (pid) { - const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (overall_latter_reader_txnid > txnid) - overall_latter_reader_txnid = txnid; - if (pid == env->me_pid && self_latter_reader_txnid > txnid) - self_latter_reader_txnid = txnid; - } - } - } - out->mi_self_latter_reader_txnid = self_latter_reader_txnid; - out->mi_latter_reader_txnid = overall_latter_reader_txnid; - - osal_compiler_barrier(); - return MDBX_SUCCESS; -} - -__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, - size_t bytes, meta_troika_t *troika) { - MDBX_envinfo snap; - int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - eASSERT(env, sizeof(snap) >= bytes); - while (1) { - rc = env_info_snap(env, txn, out, bytes, troika); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; - snap.mi_since_reader_check_seconds16dot16 = - out->mi_since_reader_check_seconds16dot16; - if (likely(memcmp(&snap, out, bytes) == 0)) - return MDBX_SUCCESS; - memcpy(&snap, out, bytes); - } -} - -__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, - size_t bytes) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *pathnameW = nullptr; - int rc = osal_mb2w(pathname, &pathnameW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes); - osal_free(pathnameW); - } - return rc; -} - -__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, - size_t bytes) { -#endif /* Windows */ - if (unlikely(!out)) - return MDBX_EINVAL; - - const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); - const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); - if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat) - return MDBX_EINVAL; - - memset(out, 0, bytes); - if (likely(bytes > size_before_bootid)) { - out->mi_bootid.current.x = bootid.x; - out->mi_bootid.current.y = bootid.y; - } - - MDBX_env env; - memset(&env, 0, sizeof(env)); - env.me_pid = osal_getpid(); - const size_t os_psize = osal_syspagesize(); - if (unlikely(!is_powerof2(os_psize) || os_psize < MIN_PAGESIZE)) { - ERROR("unsuitable system pagesize %" PRIuPTR, os_psize); - return MDBX_INCOMPATIBLE; - } - out->mi_sys_pagesize = env.me_os_psize = (unsigned)os_psize; - env.me_flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION; - env.me_stuck_meta = -1; - env.me_lfd = INVALID_HANDLE_VALUE; - env.me_lazy_fd = INVALID_HANDLE_VALUE; - env.me_dsync_fd = INVALID_HANDLE_VALUE; - env.me_fd4meta = INVALID_HANDLE_VALUE; -#if defined(_WIN32) || defined(_WIN64) - env.me_data_lock_event = INVALID_HANDLE_VALUE; - env.me_overlapped_fd = INVALID_HANDLE_VALUE; -#endif /* Windows */ - - int rc = env_handle_pathname(&env, pathname, 0); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.me_pathname.dxb, - &env.me_lazy_fd, 0); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - MDBX_meta header; - rc = read_header(&env, &header, 0, 0); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - setup_pagesize(&env, header.mm_psize); - out->mi_dxb_pagesize = env.me_psize; - out->mi_geo.lower = pgno2bytes(&env, header.mm_geo.lower); - out->mi_geo.upper = pgno2bytes(&env, header.mm_geo.upper); - out->mi_geo.shrink = pgno2bytes(&env, pv2pages(header.mm_geo.shrink_pv)); - out->mi_geo.grow = pgno2bytes(&env, pv2pages(header.mm_geo.grow_pv)); - out->mi_geo.current = pgno2bytes(&env, header.mm_geo.now); - out->mi_last_pgno = header.mm_geo.next - 1; - - const unsigned n = 0; - out->mi_recent_txnid = constmeta_txnid(&header); - out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.mm_sign); - if (likely(bytes > size_before_bootid)) - memcpy(&out->mi_bootid.meta[n], &header.mm_bootid, 16); - -bailout: - env_close(&env, false); - return rc; -} - -__cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *arg, size_t bytes) { - if (unlikely((env == NULL && txn == NULL) || arg == NULL)) - return MDBX_EINVAL; - - const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); - const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); - if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat) - return MDBX_EINVAL; - - if (txn) { - int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - if (env) { - int err = check_env(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (txn && unlikely(txn->mt_env != env)) - return MDBX_EINVAL; - } else { - env = txn->mt_env; - } - - meta_troika_t troika; - return env_info(env, txn, arg, bytes, &troika); -} - -static __inline MDBX_cmp_func *get_default_keycmp(MDBX_db_flags_t flags) { - return (flags & MDBX_REVERSEKEY) ? cmp_reverse - : (flags & MDBX_INTEGERKEY) ? cmp_int_align2 - : cmp_lexical; -} - -static __inline MDBX_cmp_func *get_default_datacmp(MDBX_db_flags_t flags) { - return !(flags & MDBX_DUPSORT) - ? cmp_lenfast - : ((flags & MDBX_INTEGERDUP) - ? cmp_int_unaligned - : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical)); -} - -static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - const MDBX_env *const env = txn->mt_env; - eASSERT(env, dbi < txn->mt_numdbs && dbi < env->me_numdbs); - eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); - eASSERT(env, env->me_db_flags[dbi] != DB_POISON); - if ((env->me_db_flags[dbi] & DB_VALID) == 0) { - eASSERT(env, !env->me_dbxs[dbi].md_cmp && !env->me_dbxs[dbi].md_dcmp && - !env->me_dbxs[dbi].md_name.iov_len && - !env->me_dbxs[dbi].md_name.iov_base && - !env->me_dbxs[dbi].md_klen_max && - !env->me_dbxs[dbi].md_klen_min && - !env->me_dbxs[dbi].md_vlen_max && - !env->me_dbxs[dbi].md_vlen_min); - } else { - eASSERT(env, !(txn->mt_dbi_state[dbi] & DBI_VALID) || - (txn->mt_dbs[dbi].md_flags | DB_VALID) == - env->me_db_flags[dbi]); - eASSERT(env, env->me_dbxs[dbi].md_name.iov_base || dbi < CORE_DBS); - } - - /* Если dbi уже использовался, то корректными считаем четыре варианта: - * 1) user_flags равны MDBX_DB_ACCEDE - * = предполагаем что пользователь открывает существующую subDb, - * при этом код проверки не позволит установить другие компараторы. - * 2) user_flags нулевые, а оба компаратора пустые/нулевые или равны текущим - * = предполагаем что пользователь открывает существующую subDb - * старым способом с нулевыми с флагами по-умолчанию. - * 3) user_flags совпадают, а компараторы не заданы или те же - * = предполагаем что пользователь открывает subDb указывая все параметры; - * 4) user_flags отличаются, но subDb пустая и задан флаг MDBX_CREATE - * = предполагаем что пользователь пересоздает subDb; - */ - if ((user_flags & ~MDBX_CREATE) != - (unsigned)(env->me_db_flags[dbi] & DB_PERSISTENT_FLAGS)) { - /* flags are differs, check other conditions */ - if ((!user_flags && (!keycmp || keycmp == env->me_dbxs[dbi].md_cmp) && - (!datacmp || datacmp == env->me_dbxs[dbi].md_dcmp)) || - user_flags == MDBX_DB_ACCEDE) { - user_flags = env->me_db_flags[dbi] & DB_PERSISTENT_FLAGS; - } else if ((user_flags & MDBX_CREATE) == 0) - return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; - else { - eASSERT(env, env->me_db_flags[dbi] & DB_VALID); - if (txn->mt_dbi_state[dbi] & DBI_STALE) { - int err = fetch_sdb(txn, dbi); - if (unlikely(err == MDBX_SUCCESS)) - return err; - } - eASSERT(env, - (txn->mt_dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == - (DBI_LINDO | DBI_VALID)); - if (unlikely(txn->mt_dbs[dbi].md_leaf_pages)) - return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; - - /* Пересоздаём subDB если там пусто */ - if (unlikely(txn->mt_cursors[dbi])) - return MDBX_DANGLING_DBI; - env->me_db_flags[dbi] = DB_POISON; - atomic_store32(&env->me_dbi_seqs[dbi], dbi_seq_next(env, MAIN_DBI), - mo_AcquireRelease); - - const uint32_t seq = dbi_seq_next(env, dbi); - const uint16_t db_flags = user_flags & DB_PERSISTENT_FLAGS; - eASSERT(env, txn->mt_dbs[dbi].md_depth == 0 && - txn->mt_dbs[dbi].md_entries == 0 && - txn->mt_dbs[dbi].md_root == P_INVALID); - env->me_dbxs[dbi].md_cmp = - keycmp ? keycmp : get_default_keycmp(user_flags); - env->me_dbxs[dbi].md_dcmp = - datacmp ? datacmp : get_default_datacmp(user_flags); - txn->mt_dbs[dbi].md_flags = db_flags; - txn->mt_dbs[dbi].md_xsize = 0; - if (unlikely(setup_sdb(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], - env->me_psize))) { - txn->mt_dbi_state[dbi] = DBI_LINDO; - txn->mt_flags |= MDBX_TXN_ERROR; - return MDBX_PROBLEM; - } - - env->me_db_flags[dbi] = db_flags | DB_VALID; - atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); - txn->mt_dbi_seqs[dbi] = seq; - txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_CREAT | DBI_DIRTY; - txn->mt_flags |= MDBX_TXN_DIRTY; - } - } - - if (!keycmp) - keycmp = (env->me_db_flags[dbi] & DB_VALID) - ? env->me_dbxs[dbi].md_cmp - : get_default_keycmp(user_flags); - if (env->me_dbxs[dbi].md_cmp != keycmp) { - if (env->me_db_flags[dbi] & DB_VALID) - return MDBX_EINVAL; - env->me_dbxs[dbi].md_cmp = keycmp; - } - - if (!datacmp) - datacmp = (env->me_db_flags[dbi] & DB_VALID) - ? env->me_dbxs[dbi].md_dcmp - : get_default_datacmp(user_flags); - if (env->me_dbxs[dbi].md_dcmp != datacmp) { - if (env->me_db_flags[dbi] & DB_VALID) - return MDBX_EINVAL; - env->me_dbxs[dbi].md_dcmp = datacmp; - } - - return MDBX_SUCCESS; -} - -static __inline size_t dbi_namelen(const MDBX_val name) { - return (name.iov_len > sizeof(struct mdbx_defer_free_item)) - ? name.iov_len - : sizeof(struct mdbx_defer_free_item); -} - -static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, - MDBX_val name) { - MDBX_env *const env = txn->mt_env; - - /* Cannot mix named table(s) with DUPSORT flags */ - tASSERT(txn, - (txn->mt_dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == - (DBI_LINDO | DBI_VALID)); - if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT)) { - if (unlikely((user_flags & MDBX_CREATE) == 0)) - return MDBX_NOTFOUND; - if (unlikely(txn->mt_dbs[MAIN_DBI].md_leaf_pages)) - /* В MainDB есть записи, либо она уже использовалась. */ - return MDBX_INCOMPATIBLE; - - /* Пересоздаём MainDB когда там пусто. */ - tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && - txn->mt_dbs[MAIN_DBI].md_entries == 0 && - txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); - if (unlikely(txn->mt_cursors[MAIN_DBI])) - return MDBX_DANGLING_DBI; - env->me_db_flags[MAIN_DBI] = DB_POISON; - atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), - mo_AcquireRelease); - - const uint32_t seq = dbi_seq_next(env, MAIN_DBI); - const uint16_t main_flags = - txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY); - env->me_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(main_flags); - env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(main_flags); - txn->mt_dbs[MAIN_DBI].md_flags = main_flags; - txn->mt_dbs[MAIN_DBI].md_xsize = 0; - if (unlikely(setup_sdb(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], - env->me_psize) != MDBX_SUCCESS)) { - txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO; - txn->mt_flags |= MDBX_TXN_ERROR; - env->me_flags |= MDBX_FATAL_ERROR; - return MDBX_FATAL_ERROR; - } - env->me_db_flags[MAIN_DBI] = main_flags | DB_VALID; - txn->mt_dbi_seqs[MAIN_DBI] = - atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); - txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; - txn->mt_flags |= MDBX_TXN_DIRTY; - } - - tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); - - /* Is the DB already open? */ - size_t slot = env->me_numdbs; - for (size_t scan = CORE_DBS; scan < env->me_numdbs; ++scan) { - if ((env->me_db_flags[scan] & DB_VALID) == 0) { - /* Remember this free slot */ - slot = (slot < scan) ? slot : scan; - continue; - } - if (!env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[scan].md_name)) { - slot = scan; - int err = dbi_check(txn, slot); - if (err == MDBX_BAD_DBI && - txn->mt_dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) { - /* хендл использовался, стал невалидным, - * но теперь явно пере-открывается в этой транзакци */ - eASSERT(env, !txn->mt_cursors[slot]); - txn->mt_dbi_state[slot] = DBI_LINDO; - err = dbi_check(txn, slot); - } - if (err == MDBX_SUCCESS) { - err = dbi_bind(txn, slot, user_flags, keycmp, datacmp); - if (likely(err == MDBX_SUCCESS)) { - goto done; - } - } - return err; - } - } - - /* Fail, if no free slot and max hit */ - if (unlikely(slot >= env->me_maxdbs)) - return MDBX_DBS_FULL; - - if (env->me_numdbs == slot) - eASSERT(env, !env->me_db_flags[slot] && - !env->me_dbxs[slot].md_name.iov_len && - !env->me_dbxs[slot].md_name.iov_base); - - env->me_db_flags[slot] = DB_POISON; - atomic_store32(&env->me_dbi_seqs[slot], dbi_seq_next(env, slot), - mo_AcquireRelease); - memset(&env->me_dbxs[slot], 0, sizeof(env->me_dbxs[slot])); - if (env->me_numdbs == slot) - env->me_numdbs = (unsigned)slot + 1; - eASSERT(env, slot < env->me_numdbs); - - int err = dbi_check(txn, slot); - eASSERT(env, err == MDBX_BAD_DBI); - if (err != MDBX_BAD_DBI) - return MDBX_PROBLEM; - - /* Find the DB info */ - MDBX_val body; - MDBX_cursor_couple cx; - int rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - rc = cursor_set(&cx.outer, &name, &body, MDBX_SET).err; - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) - return rc; - } else { - /* make sure this is actually a table */ - MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], - cx.outer.mc_ki[cx.outer.mc_top]); - if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) - return MDBX_INCOMPATIBLE; - if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid subDb node size", body.iov_len); - return MDBX_CORRUPTED; - } - memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(MDBX_db)); - } - - /* Done here so we cannot fail after creating a new DB */ - void *clone = nullptr; - if (name.iov_len) { - clone = osal_malloc(dbi_namelen(name)); - if (unlikely(!clone)) - return MDBX_ENOMEM; - name.iov_base = memcpy(clone, name.iov_base, name.iov_len); - } else - name.iov_base = ""; - - uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH; - if (unlikely(rc)) { - /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */ - tASSERT(txn, rc == MDBX_NOTFOUND); - body.iov_base = - memset(&txn->mt_dbs[slot], 0, body.iov_len = sizeof(MDBX_db)); - txn->mt_dbs[slot].md_root = P_INVALID; - txn->mt_dbs[slot].md_mod_txnid = txn->mt_txnid; - txn->mt_dbs[slot].md_flags = user_flags & DB_PERSISTENT_FLAGS; - WITH_CURSOR_TRACKING( - cx.outer, rc = cursor_put_checklen(&cx.outer, &name, &body, - F_SUBDATA | MDBX_NOOVERWRITE)); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - dbi_state |= DBI_DIRTY | DBI_CREAT; - txn->mt_flags |= MDBX_TXN_DIRTY; - tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); - } - - /* Got info, register DBI in this txn */ - const uint32_t seq = dbi_seq_next(env, slot); - eASSERT(env, - env->me_db_flags[slot] == DB_POISON && !txn->mt_cursors[slot] && - (txn->mt_dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO); - txn->mt_dbi_state[slot] = dbi_state; - memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(txn->mt_dbs[slot])); - env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags; - rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - env->me_dbxs[slot].md_name = name; - env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; - txn->mt_dbi_seqs[slot] = - atomic_store32(&env->me_dbi_seqs[slot], seq, mo_AcquireRelease); - -done: - *dbi = (MDBX_dbi)slot; - tASSERT(txn, - slot < txn->mt_numdbs && (env->me_db_flags[slot] & DB_VALID) != 0); - eASSERT(env, dbi_check(txn, slot) == MDBX_SUCCESS); - return MDBX_SUCCESS; - -bailout: - eASSERT(env, !txn->mt_cursors[slot] && !env->me_dbxs[slot].md_name.iov_len && - !env->me_dbxs[slot].md_name.iov_base); - txn->mt_dbi_state[slot] &= DBI_LINDO | DBI_OLDEN; - env->me_db_flags[slot] = 0; - osal_free(clone); - if (slot + 1 == env->me_numdbs) - txn->mt_numdbs = env->me_numdbs = (unsigned)slot; - return rc; -} - -static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, - unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, - MDBX_cmp_func *datacmp) { - if (unlikely(!dbi)) - return MDBX_EINVAL; - *dbi = 0; - if (unlikely((user_flags & ~DB_USABLE_FLAGS) != 0)) - return MDBX_EINVAL; - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if ((user_flags & MDBX_CREATE) && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) - return MDBX_EACCESS; - - switch (user_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT | - MDBX_REVERSEDUP | MDBX_ACCEDE)) { - case MDBX_ACCEDE: - if ((user_flags & MDBX_CREATE) == 0) - break; - __fallthrough /* fall through */; - default: - return MDBX_EINVAL; - - case MDBX_DUPSORT: - case MDBX_DUPSORT | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DB_DEFAULTS: - break; - } - tASSERT(txn, db_check_flags((uint16_t)user_flags)); - - /* main table? */ - if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) { - rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); - if (likely(rc == MDBX_SUCCESS)) - *dbi = MAIN_DBI; - return rc; - } - if (unlikely(name == MDBX_CHK_GC || name->iov_base == MDBX_CHK_GC)) { - rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); - if (likely(rc == MDBX_SUCCESS)) - *dbi = FREE_DBI; - return rc; - } - if (unlikely(name == MDBX_CHK_META || name->iov_base == MDBX_CHK_META)) - return MDBX_EINVAL; - if (unlikely(name->iov_len > - txn->mt_env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db))) - return MDBX_EINVAL; - -#if MDBX_ENABLE_DBI_LOCKFREE - /* Is the DB already open? */ - const MDBX_env *const env = txn->mt_env; - size_t free_slot = env->me_numdbs; - for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) { - retry: - if ((env->me_db_flags[i] & DB_VALID) == 0) { - free_slot = i; - continue; - } - - const uint32_t snap_seq = - atomic_load32(&env->me_dbi_seqs[i], mo_AcquireRelease); - const uint16_t snap_flags = env->me_db_flags[i]; - const MDBX_val snap_name = env->me_dbxs[i].md_name; - if (user_flags != MDBX_ACCEDE && - (((user_flags ^ snap_flags) & DB_PERSISTENT_FLAGS) || - (keycmp && keycmp != env->me_dbxs[i].md_cmp) || - (datacmp && datacmp != env->me_dbxs[i].md_dcmp))) - continue; - const uint32_t main_seq = - atomic_load32(&env->me_dbi_seqs[MAIN_DBI], mo_AcquireRelease); - MDBX_cmp_func *const snap_cmp = env->me_dbxs[MAIN_DBI].md_cmp; - if (unlikely(!(snap_flags & DB_VALID) || !snap_name.iov_base || - !snap_name.iov_len || !snap_cmp)) - continue; - - const bool name_match = snap_cmp(&snap_name, name) == 0; - osal_flush_incoherent_cpu_writeback(); - if (unlikely(snap_seq != - atomic_load32(&env->me_dbi_seqs[i], mo_AcquireRelease) || - main_seq != atomic_load32(&env->me_dbi_seqs[MAIN_DBI], - mo_AcquireRelease) || - snap_flags != env->me_db_flags[i] || - snap_name.iov_base != env->me_dbxs[i].md_name.iov_base || - snap_name.iov_len != env->me_dbxs[i].md_name.iov_len)) - goto retry; - if (name_match) { - rc = dbi_check(txn, i); - if (rc == MDBX_BAD_DBI && - txn->mt_dbi_state[i] == (DBI_OLDEN | DBI_LINDO)) { - /* хендл использовался, стал невалидным, - * но теперь явно пере-открывается в этой транзакци */ - eASSERT(env, !txn->mt_cursors[i]); - txn->mt_dbi_state[i] = DBI_LINDO; - rc = dbi_check(txn, i); - } - if (likely(rc == MDBX_SUCCESS)) { - rc = dbi_bind(txn, i, user_flags, keycmp, datacmp); - if (likely(rc == MDBX_SUCCESS)) - *dbi = (MDBX_dbi)i; - } - return rc; - } - } - - /* Fail, if no free slot and max hit */ - if (unlikely(free_slot >= env->me_maxdbs)) - return MDBX_DBS_FULL; -#endif /* MDBX_ENABLE_DBI_LOCKFREE */ - - rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); - ENSURE(txn->mt_env, - osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - } - return rc; -} - -static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, - MDBX_db_flags_t flags, MDBX_dbi *dbi, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - MDBX_val thunk, *name; - if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || - name_cstr == MDBX_CHK_META) - name = (void *)name_cstr; - else { - thunk.iov_len = strlen(name_cstr); - thunk.iov_base = (void *)name_cstr; - name = &thunk; - } - return dbi_open(txn, name, flags, dbi, keycmp, datacmp); -} - -int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi) { - return dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr); -} - -int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi) { - return dbi_open(txn, name, flags, dbi, nullptr, nullptr); -} - -int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi, MDBX_cmp_func *keycmp, - MDBX_cmp_func *datacmp) { - return dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp); -} - -int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, - MDBX_db_flags_t flags, MDBX_dbi *dbi, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - return dbi_open(txn, name, flags, dbi, keycmp, datacmp); -} - -__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { - MDBX_val thunk, *name; - if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || - name_cstr == MDBX_CHK_META) - name = (void *)name_cstr; - else { - thunk.iov_len = strlen(name_cstr); - thunk.iov_base = (void *)name_cstr; - name = &thunk; - } - return mdbx_dbi_rename2(txn, dbi, name); -} - -struct dbi_rename_result { - struct mdbx_defer_free_item *defer; - int err; -}; - -__cold static struct dbi_rename_result -dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { - struct dbi_rename_result pair; - pair.defer = nullptr; - pair.err = dbi_check(txn, dbi); - if (unlikely(pair.err != MDBX_SUCCESS)) - return pair; - - MDBX_env *const env = txn->mt_env; - MDBX_val old_name = env->me_dbxs[dbi].md_name; - if (env->me_dbxs[MAIN_DBI].md_cmp(&new_name, &old_name) == 0 && - MDBX_DEBUG == 0) - return pair; - - MDBX_cursor_couple cx; - pair.err = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(pair.err != MDBX_SUCCESS)) - return pair; - pair.err = cursor_set(&cx.outer, &new_name, nullptr, MDBX_SET).err; - if (unlikely(pair.err != MDBX_NOTFOUND)) { - pair.err = (pair.err == MDBX_SUCCESS) ? MDBX_KEYEXIST : pair.err; - return pair; - } - - pair.defer = osal_malloc(dbi_namelen(new_name)); - if (unlikely(!pair.defer)) { - pair.err = MDBX_ENOMEM; - return pair; - } - new_name.iov_base = memcpy(pair.defer, new_name.iov_base, new_name.iov_len); - - cx.outer.mc_next = txn->mt_cursors[MAIN_DBI]; - txn->mt_cursors[MAIN_DBI] = &cx.outer; - - MDBX_val data = {&txn->mt_dbs[dbi], sizeof(MDBX_db)}; - pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, - F_SUBDATA | MDBX_NOOVERWRITE); - if (likely(pair.err == MDBX_SUCCESS)) { - pair.err = cursor_set(&cx.outer, &old_name, nullptr, MDBX_SET).err; - if (likely(pair.err == MDBX_SUCCESS)) - pair.err = cursor_del(&cx.outer, F_SUBDATA); - if (likely(pair.err == MDBX_SUCCESS)) { - pair.defer = env->me_dbxs[dbi].md_name.iov_base; - env->me_dbxs[dbi].md_name = new_name; - } else - txn->mt_flags |= MDBX_TXN_ERROR; - } - - txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; - return pair; -} - -__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *new_name) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(new_name == MDBX_CHK_MAIN || - new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || - new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || - new_name->iov_base == MDBX_CHK_META)) - return MDBX_EINVAL; - - if (unlikely(dbi < CORE_DBS)) - return MDBX_EINVAL; - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name); - if (pair.defer) - pair.defer->next = nullptr; - env_defer_free_and_release(txn->mt_env, pair.defer); - rc = pair.err; - } - return rc; -} - -__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, - size_t bytes) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!dest)) - return MDBX_EINVAL; - - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); - if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) - return MDBX_EINVAL; - - if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) - return MDBX_BAD_TXN; - - if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { - rc = fetch_sdb((MDBX_txn *)txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - dest->ms_psize = txn->mt_env->me_psize; - stat_get(&txn->mt_dbs[dbi], dest, bytes); - return MDBX_SUCCESS; -} - -static struct mdbx_defer_free_item *dbi_close_locked(MDBX_env *env, - MDBX_dbi dbi) { - eASSERT(env, dbi >= CORE_DBS); - if (unlikely(dbi >= env->me_numdbs)) - return nullptr; - - const uint32_t seq = dbi_seq_next(env, dbi); - struct mdbx_defer_free_item *defer_item = env->me_dbxs[dbi].md_name.iov_base; - if (likely(defer_item)) { - env->me_db_flags[dbi] = 0; - env->me_dbxs[dbi].md_name.iov_len = 0; - env->me_dbxs[dbi].md_name.iov_base = nullptr; - atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); - osal_flush_incoherent_cpu_writeback(); - defer_item->next = nullptr; - - if (env->me_numdbs == dbi + 1) { - size_t i = env->me_numdbs; - do { - --i; - eASSERT(env, i >= CORE_DBS); - eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && - !env->me_dbxs[i].md_name.iov_base); - } while (i > CORE_DBS && !env->me_dbxs[i - 1].md_name.iov_base); - env->me_numdbs = (unsigned)i; - } - } - - return defer_item; -} - -int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(dbi < CORE_DBS)) - return (dbi == MAIN_DBI) ? MDBX_SUCCESS : MDBX_BAD_DBI; - - if (unlikely(dbi >= env->me_maxdbs)) - return MDBX_BAD_DBI; - - if (unlikely(dbi < CORE_DBS || dbi >= env->me_maxdbs)) - return MDBX_BAD_DBI; - - rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) - rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); - return rc; -} - -int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, - unsigned *state) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!flags || !state)) - return MDBX_EINVAL; - - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - *flags = txn->mt_dbs[dbi].md_flags & DB_PERSISTENT_FLAGS; - *state = - txn->mt_dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); - - return MDBX_SUCCESS; -} - -static int drop_tree(MDBX_cursor *mc, const bool may_have_subDBs) { - int rc = page_search(mc, NULL, MDBX_PS_FIRST); - if (likely(rc == MDBX_SUCCESS)) { - MDBX_txn *txn = mc->mc_txn; - - /* DUPSORT sub-DBs have no ovpages/DBs. Omit scanning leaves. - * This also avoids any P_LEAF2 pages, which have no nodes. - * Also if the DB doesn't have sub-DBs and has no large/overflow - * pages, omit scanning leaves. */ - if (!(may_have_subDBs | mc->mc_db->md_overflow_pages)) - cursor_pop(mc); - - rc = pnl_need(&txn->tw.retired_pages, - (size_t)mc->mc_db->md_branch_pages + - (size_t)mc->mc_db->md_leaf_pages + - (size_t)mc->mc_db->md_overflow_pages); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - MDBX_cursor mx; - cursor_copy(mc, &mx); - while (mc->mc_snum > 0) { - MDBX_page *const mp = mc->mc_pg[mc->mc_top]; - const size_t nkeys = page_numkeys(mp); - if (IS_LEAF(mp)) { - cASSERT(mc, mc->mc_snum == mc->mc_db->md_depth); - for (size_t i = 0; i < nkeys; i++) { - MDBX_node *node = page_node(mp, i); - if (node_flags(node) & F_BIGDATA) { - rc = page_retire_ex(mc, node_largedata_pgno(node), nullptr, 0); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - if (!(may_have_subDBs | mc->mc_db->md_overflow_pages)) - goto pop; - } else if (node_flags(node) & F_SUBDATA) { - if (unlikely((node_flags(node) & F_DUPDATA) == 0)) { - rc = /* disallowing implicit subDB deletion */ MDBX_INCOMPATIBLE; - goto bailout; - } - rc = cursor_xinit1(mc, node, mp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - rc = drop_tree(&mc->mc_xcursor->mx_cursor, false); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - } - } else { - cASSERT(mc, mc->mc_snum < mc->mc_db->md_depth); - mc->mc_checking |= CC_RETIRING; - const unsigned pagetype = (IS_FROZEN(txn, mp) ? P_FROZEN : 0) + - ((mc->mc_snum + 1 == mc->mc_db->md_depth) - ? (mc->mc_checking & (P_LEAF | P_LEAF2)) - : P_BRANCH); - for (size_t i = 0; i < nkeys; i++) { - MDBX_node *node = page_node(mp, i); - tASSERT(txn, (node_flags(node) & - (F_BIGDATA | F_SUBDATA | F_DUPDATA)) == 0); - const pgno_t pgno = node_pgno(node); - rc = page_retire_ex(mc, pgno, nullptr, pagetype); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - mc->mc_checking -= CC_RETIRING; - } - if (!mc->mc_top) - break; - cASSERT(mc, nkeys > 0); - mc->mc_ki[mc->mc_top] = (indx_t)nkeys; - rc = cursor_sibling(mc, SIBLING_RIGHT); - if (unlikely(rc != MDBX_SUCCESS)) { - if (unlikely(rc != MDBX_NOTFOUND)) - goto bailout; - /* no more siblings, go back to beginning - * of previous level. */ - pop: - cursor_pop(mc); - mc->mc_ki[0] = 0; - for (size_t i = 1; i < mc->mc_snum; i++) { - mc->mc_ki[i] = 0; - mc->mc_pg[i] = mx.mc_pg[i]; - } - } - } - rc = page_retire(mc, mc->mc_pg[0]); - bailout: - if (unlikely(rc != MDBX_SUCCESS)) - txn->mt_flags |= MDBX_TXN_ERROR; - } else if (rc == MDBX_NOTFOUND) { - rc = MDBX_SUCCESS; - } - mc->mc_flags &= ~C_INITIALIZED; - return rc; -} - -__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - MDBX_cursor *mc; - rc = mdbx_cursor_open(txn, dbi, &mc); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = drop_tree(mc, - dbi == MAIN_DBI || (mc->mc_db->md_flags & MDBX_DUPSORT) != 0); - /* Invalidate the dropped DB's cursors */ - for (MDBX_cursor *m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) - m2->mc_flags &= ~(C_INITIALIZED | C_EOF); - if (unlikely(rc)) - goto bailout; - - /* Can't delete the main DB */ - if (del && dbi >= CORE_DBS) { - rc = delete(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); - if (likely(rc == MDBX_SUCCESS)) { - tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); - tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); - txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; - MDBX_env *const env = txn->mt_env; - rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); - goto bailout; - } - } - txn->mt_flags |= MDBX_TXN_ERROR; - } else { - /* reset the DB record, mark it dirty */ - txn->mt_dbi_state[dbi] |= DBI_DIRTY; - txn->mt_dbs[dbi].md_depth = 0; - txn->mt_dbs[dbi].md_branch_pages = 0; - txn->mt_dbs[dbi].md_leaf_pages = 0; - txn->mt_dbs[dbi].md_overflow_pages = 0; - txn->mt_dbs[dbi].md_entries = 0; - txn->mt_dbs[dbi].md_root = P_INVALID; - txn->mt_dbs[dbi].md_seq = 0; - txn->mt_flags |= MDBX_TXN_DIRTY; - } - -bailout: - mdbx_cursor_close(mc); - return rc; -} - -__cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, - void *ctx) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!func)) - return MDBX_EINVAL; - - rc = MDBX_RESULT_TRUE; - int serial = 0; - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (likely(lck)) { - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - for (size_t i = 0; i < snap_nreaders; i++) { - const MDBX_reader *r = lck->mti_readers + i; - retry_reader:; - const uint32_t pid = atomic_load32(&r->mr_pid, mo_AcquireRelease); - if (!pid) - continue; - txnid_t txnid = safe64_read(&r->mr_txnid); - const uint64_t tid = atomic_load64(&r->mr_tid, mo_Relaxed); - const pgno_t pages_used = - atomic_load32(&r->mr_snapshot_pages_used, mo_Relaxed); - const uint64_t reader_pages_retired = - atomic_load64(&r->mr_snapshot_pages_retired, mo_Relaxed); - if (unlikely( - txnid != safe64_read(&r->mr_txnid) || - pid != atomic_load32(&r->mr_pid, mo_AcquireRelease) || - tid != atomic_load64(&r->mr_tid, mo_Relaxed) || - pages_used != - atomic_load32(&r->mr_snapshot_pages_used, mo_Relaxed) || - reader_pages_retired != - atomic_load64(&r->mr_snapshot_pages_retired, mo_Relaxed))) - goto retry_reader; - - eASSERT(env, txnid > 0); - if (txnid >= SAFE64_INVALID_THRESHOLD) - txnid = 0; - - size_t bytes_used = 0; - size_t bytes_retained = 0; - uint64_t lag = 0; - if (txnid) { - meta_troika_t troika = meta_tap(env); - retry_header:; - const meta_ptr_t head = meta_recent(env, &troika); - const uint64_t head_pages_retired = - unaligned_peek_u64_volatile(4, head.ptr_v->mm_pages_retired); - if (unlikely(meta_should_retry(env, &troika) || - head_pages_retired != - unaligned_peek_u64_volatile( - 4, head.ptr_v->mm_pages_retired))) - goto retry_header; - - lag = (head.txnid - txnid) / xMDBX_TXNID_STEP; - bytes_used = pgno2bytes(env, pages_used); - bytes_retained = (head_pages_retired > reader_pages_retired) - ? pgno2bytes(env, (pgno_t)(head_pages_retired - - reader_pages_retired)) - : 0; - } - rc = func(ctx, ++serial, (unsigned)i, pid, (mdbx_tid_t)((intptr_t)tid), - txnid, lag, bytes_used, bytes_retained); - if (unlikely(rc != MDBX_SUCCESS)) - break; - } - } - - return rc; -} - -/* Insert pid into list if not already present. - * return -1 if already present. */ -__cold static bool pid_insert(uint32_t *ids, uint32_t pid) { - /* binary search of pid in list */ - size_t base = 0; - size_t cursor = 1; - int val = 0; - size_t n = ids[0]; - - while (n > 0) { - size_t pivot = n >> 1; - cursor = base + pivot + 1; - val = pid - ids[cursor]; - - if (val < 0) { - n = pivot; - } else if (val > 0) { - base = cursor; - n -= pivot + 1; - } else { - /* found, so it's a duplicate */ - return false; - } - } - - if (val > 0) - ++cursor; - - ids[0]++; - for (n = ids[0]; n > cursor; n--) - ids[n] = ids[n - 1]; - ids[n] = pid; - return true; -} - -__cold int mdbx_reader_check(MDBX_env *env, int *dead) { - if (dead) - *dead = 0; - return cleanup_dead_readers(env, false, dead); -} - -/* Return: - * MDBX_RESULT_TRUE - done and mutex recovered - * MDBX_SUCCESS - done - * Otherwise errcode. */ -__cold MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, - int rdt_locked, int *dead) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - eASSERT(env, rdt_locked >= 0); - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (unlikely(lck == NULL)) { - /* exclusive mode */ - if (dead) - *dead = 0; - return MDBX_SUCCESS; - } - - const size_t snap_nreaders = - atomic_load32(&lck->mti_numreaders, mo_AcquireRelease); - uint32_t pidsbuf_onstask[142]; - uint32_t *const pids = - (snap_nreaders < ARRAY_LENGTH(pidsbuf_onstask)) - ? pidsbuf_onstask - : osal_malloc((snap_nreaders + 1) * sizeof(uint32_t)); - if (unlikely(!pids)) - return MDBX_ENOMEM; - - pids[0] = 0; - int count = 0; - for (size_t i = 0; i < snap_nreaders; i++) { - const uint32_t pid = - atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); - if (pid == 0) - continue /* skip empty */; - if (pid == env->me_pid) - continue /* skip self */; - if (!pid_insert(pids, pid)) - continue /* such pid already processed */; - - int err = osal_rpid_check(env, pid); - if (err == MDBX_RESULT_TRUE) - continue /* reader is live */; - - if (err != MDBX_SUCCESS) { - rc = err; - break /* osal_rpid_check() failed */; - } - - /* stale reader found */ - if (!rdt_locked) { - err = osal_rdt_lock(env); - if (MDBX_IS_ERROR(err)) { - rc = err; - break; - } - - rdt_locked = -1; - if (err == MDBX_RESULT_TRUE) { - /* mutex recovered, the mdbx_ipclock_failed() checked all readers */ - rc = MDBX_RESULT_TRUE; - break; - } - - /* a other process may have clean and reused slot, recheck */ - if (lck->mti_readers[i].mr_pid.weak != pid) - continue; - - err = osal_rpid_check(env, pid); - if (MDBX_IS_ERROR(err)) { - rc = err; - break; - } - - if (err != MDBX_SUCCESS) - continue /* the race with other process, slot reused */; - } - - /* clean it */ - for (size_t j = i; j < snap_nreaders; j++) { - if (lck->mti_readers[j].mr_pid.weak == pid) { - DEBUG("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN, (size_t)pid, - lck->mti_readers[j].mr_txnid.weak); - atomic_store32(&lck->mti_readers[j].mr_pid, 0, mo_Relaxed); - atomic_store32(&lck->mti_readers_refresh_flag, true, mo_AcquireRelease); - count++; - } - } - } - - if (likely(!MDBX_IS_ERROR(rc))) - atomic_store64(&lck->mti_reader_check_timestamp, osal_monotime(), - mo_Relaxed); - - if (rdt_locked < 0) - osal_rdt_unlock(env); - - if (pids != pidsbuf_onstask) - osal_free(pids); - - if (dead) - *dead = count; - return rc; -} - -__cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, - union logger_union logger, char *buffer, - size_t buffer_size) { - ENSURE(nullptr, osal_fastmutex_acquire(&debug_lock) == 0); - - const int rc = mdbx_static.flags | (mdbx_static.loglevel << 16); - if (level != MDBX_LOG_DONTCHANGE) - mdbx_static.loglevel = (uint8_t)level; - - if (flags != MDBX_DBG_DONTCHANGE) { - flags &= -#if MDBX_DEBUG - MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER | -#endif - MDBX_DBG_DUMP | MDBX_DBG_LEGACY_MULTIOPEN | MDBX_DBG_LEGACY_OVERLAP | - MDBX_DBG_DONT_UPGRADE; - mdbx_static.flags = (uint8_t)flags; - } - - assert(MDBX_LOGGER_DONTCHANGE == ((MDBX_debug_func *)(intptr_t)-1)); - if (logger.ptr != (void *)((intptr_t)-1)) { - mdbx_static.logger.ptr = logger.ptr; - mdbx_static.logger_buffer = buffer; - mdbx_static.logger_buffer_size = buffer_size; - } - - ENSURE(nullptr, osal_fastmutex_release(&debug_lock) == 0); - return rc; -} - -__cold int mdbx_setup_debug_nofmt(MDBX_log_level_t level, - MDBX_debug_flags_t flags, - MDBX_debug_func_nofmt *logger, char *buffer, - size_t buffer_size) { - union logger_union thunk; - thunk.nofmt = - (logger && buffer && buffer_size) ? logger : MDBX_LOGGER_NOFMT_DONTCHANGE; - return setup_debug(level, flags, thunk, buffer, buffer_size); -} - -__cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, - MDBX_debug_func *logger) { - union logger_union thunk; - thunk.fmt = logger; - return setup_debug(level, flags, thunk, nullptr, 0); -} - -__cold static txnid_t kick_longlived_readers(MDBX_env *env, - const txnid_t laggard) { - DEBUG("DB size maxed out by reading #%" PRIaTXN, laggard); - osal_memory_fence(mo_AcquireRelease, false); - MDBX_hsr_func *const callback = env->me_hsr_callback; - txnid_t oldest = 0; - bool notify_eof_of_loop = false; - int retry = 0; - do { - const txnid_t steady = - env->me_txn->tw.troika.txnid[env->me_txn->tw.troika.prefer_steady]; - env->me_lck->mti_readers_refresh_flag.weak = /* force refresh */ true; - oldest = find_oldest_reader(env, steady); - eASSERT(env, oldest < env->me_txn0->mt_txnid); - eASSERT(env, oldest >= laggard); - eASSERT(env, oldest >= env->me_lck->mti_oldest_reader.weak); - - MDBX_lockinfo *const lck = env->me_lck_mmap.lck; - if (oldest == steady || oldest > laggard || /* without-LCK mode */ !lck) - break; - - if (MDBX_IS_ERROR(cleanup_dead_readers(env, false, NULL))) - break; - - if (!callback) - break; - - MDBX_reader *stucked = nullptr; - uint64_t hold_retired = 0; - for (size_t i = 0; i < lck->mti_numreaders.weak; ++i) { - const uint64_t snap_retired = atomic_load64( - &lck->mti_readers[i].mr_snapshot_pages_retired, mo_Relaxed); - const txnid_t rtxn = safe64_read(&lck->mti_readers[i].mr_txnid); - if (rtxn == laggard && - atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease)) { - hold_retired = snap_retired; - stucked = &lck->mti_readers[i]; - } - } - - if (!stucked) - break; - - uint32_t pid = atomic_load32(&stucked->mr_pid, mo_AcquireRelease); - uint64_t tid = atomic_load64(&stucked->mr_tid, mo_AcquireRelease); - if (safe64_read(&stucked->mr_txnid) != laggard || !pid || - stucked->mr_snapshot_pages_retired.weak != hold_retired) - continue; - - const meta_ptr_t head = meta_recent(env, &env->me_txn->tw.troika); - const txnid_t gap = (head.txnid - laggard) / xMDBX_TXNID_STEP; - const uint64_t head_retired = - unaligned_peek_u64(4, head.ptr_c->mm_pages_retired); - const size_t space = - (head_retired > hold_retired) - ? pgno2bytes(env, (pgno_t)(head_retired - hold_retired)) - : 0; - int rc = - callback(env, env->me_txn, pid, (mdbx_tid_t)((intptr_t)tid), laggard, - (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry); - if (rc < 0) - /* hsr returned error and/or agree MDBX_MAP_FULL error */ - break; - - if (rc > 0) { - if (rc == 1) { - /* hsr reported transaction (will be) aborted asynchronous */ - safe64_reset_compare(&stucked->mr_txnid, laggard); - } else { - /* hsr reported reader process was killed and slot should be cleared */ - safe64_reset(&stucked->mr_txnid, true); - atomic_store64(&stucked->mr_tid, 0, mo_Relaxed); - atomic_store32(&stucked->mr_pid, 0, mo_AcquireRelease); - } - } else if (!notify_eof_of_loop) { -#if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.kicks += 1; -#endif /* MDBX_ENABLE_PROFGC */ - notify_eof_of_loop = true; - } - - } while (++retry < INT_MAX); - - if (notify_eof_of_loop) { - /* notify end of hsr-loop */ - const txnid_t turn = oldest - laggard; - if (turn) - NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, - laggard, oldest, turn); - callback(env, env->me_txn, 0, 0, laggard, - (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry); - } - return oldest; -} - -__cold int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - env->me_hsr_callback = hsr; - return MDBX_SUCCESS; -} - -__cold MDBX_hsr_func *mdbx_env_get_hsr(const MDBX_env *env) { - return likely(env && env->me_signature.weak == MDBX_ME_SIGNATURE) - ? env->me_hsr_callback - : NULL; -} - -#ifdef __SANITIZE_THREAD__ -/* LY: avoid tsan-trap by me_txn, mm_last_pg and mt_next_pgno */ -__attribute__((__no_sanitize_thread__, __noinline__)) -#endif -int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) -{ - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return (rc > 0) ? -rc : rc; - - MDBX_env *env = txn->mt_env; - if (unlikely((txn->mt_flags & MDBX_TXN_RDONLY) == 0)) { - if (percent) - *percent = - (int)((txn->mt_next_pgno * UINT64_C(100) + txn->mt_end_pgno / 2) / - txn->mt_end_pgno); - return 0; - } - - txnid_t lag; - meta_troika_t troika = meta_tap(env); - do { - const meta_ptr_t head = meta_recent(env, &troika); - if (percent) { - const pgno_t maxpg = head.ptr_v->mm_geo.now; - *percent = - (int)((head.ptr_v->mm_geo.next * UINT64_C(100) + maxpg / 2) / maxpg); - } - lag = (head.txnid - txn->mt_txnid) / xMDBX_TXNID_STEP; - } while (unlikely(meta_should_retry(env, &troika))); - - return (lag > INT_MAX) ? INT_MAX : (int)lag; -} - -typedef struct mdbx_walk_ctx { - void *mw_user; - MDBX_pgvisitor_func *mw_visitor; - MDBX_txn *mw_txn; - MDBX_cursor *mw_cursor; - bool mw_dont_check_keys_ordering; -} mdbx_walk_ctx_t; - -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, - int deep); - -static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { - if (mp) - switch (mp->mp_flags & ~P_SPILLED) { - case P_BRANCH: - return MDBX_page_branch; - case P_LEAF: - return MDBX_page_leaf; - case P_LEAF | P_LEAF2: - return MDBX_page_dupfixed_leaf; - case P_OVERFLOW: - return MDBX_page_large; - } - return MDBX_page_broken; -} - -/* Depth-first tree traversal. */ -__cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, - MDBX_walk_sdb_t *sdb, int deep, - txnid_t parent_txnid) { - assert(pgno != P_INVALID); - MDBX_page *mp = nullptr; - int err = page_get(ctx->mw_cursor, pgno, &mp, parent_txnid); - - MDBX_page_type_t type = walk_page_type(mp); - const size_t nentries = mp ? page_numkeys(mp) : 0; - unsigned npages = 1; - size_t pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages); - size_t header_size = - (mp && !IS_LEAF2(mp)) ? PAGEHDRSZ + mp->mp_lower : PAGEHDRSZ; - size_t payload_size = 0; - size_t unused_size = - (mp ? page_room(mp) : pagesize - header_size) - payload_size; - size_t align_bytes = 0; - - for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { - if (type == MDBX_page_dupfixed_leaf) { - /* LEAF2 pages have no mp_ptrs[] or node headers */ - payload_size += mp->mp_leaf2_ksize; - continue; - } - - const MDBX_node *node = page_node(mp, i); - header_size += NODESIZE; - const size_t node_key_size = node_ks(node); - payload_size += node_key_size; - - if (type == MDBX_page_branch) { - assert(i > 0 || node_ks(node) == 0); - align_bytes += node_key_size & 1; - continue; - } - - const size_t node_data_size = node_ds(node); - assert(type == MDBX_page_leaf); - switch (node_flags(node)) { - case 0 /* usual node */: - payload_size += node_data_size; - align_bytes += (node_key_size + node_data_size) & 1; - break; - - case F_BIGDATA /* long data on the large/overflow page */: { - const pgno_t large_pgno = node_largedata_pgno(node); - const size_t over_payload = node_data_size; - const size_t over_header = PAGEHDRSZ; - npages = 1; - - assert(err == MDBX_SUCCESS); - pgr_t lp = page_get_large(ctx->mw_cursor, large_pgno, mp->mp_txnid); - err = lp.err; - if (err == MDBX_SUCCESS) { - cASSERT(ctx->mw_cursor, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW); - npages = lp.page->mp_pages; - } - - pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages); - const size_t over_unused = pagesize - over_payload - over_header; - const int rc = ctx->mw_visitor(large_pgno, npages, ctx->mw_user, deep, - sdb, pagesize, MDBX_page_large, err, 1, - over_payload, over_header, over_unused); - if (unlikely(rc != MDBX_SUCCESS)) - return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; - payload_size += sizeof(pgno_t); - align_bytes += node_key_size & 1; - } break; - - case F_SUBDATA /* sub-db */: { - if (unlikely(node_data_size != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid subDb node size", (unsigned)node_data_size); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } - header_size += node_data_size; - align_bytes += (node_key_size + node_data_size) & 1; - } break; - - case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - if (unlikely(node_data_size != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-tree node size", (unsigned)node_data_size); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } - header_size += node_data_size; - align_bytes += (node_key_size + node_data_size) & 1; - break; - - case F_DUPDATA /* short sub-page */: { - if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-page node size", (unsigned)node_data_size); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - break; - } - - MDBX_page *sp = node_data(node); - const size_t nsubkeys = page_numkeys(sp); - size_t subheader_size = - IS_LEAF2(sp) ? PAGEHDRSZ : PAGEHDRSZ + sp->mp_lower; - size_t subunused_size = page_room(sp); - size_t subpayload_size = 0; - size_t subalign_bytes = 0; - MDBX_page_type_t subtype; - - switch (sp->mp_flags & /* ignore legacy P_DIRTY flag */ ~P_LEGACY_DIRTY) { - case P_LEAF | P_SUBP: - subtype = MDBX_subpage_leaf; - break; - case P_LEAF | P_LEAF2 | P_SUBP: - subtype = MDBX_subpage_dupfixed_leaf; - break; - default: - ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-page flags", sp->mp_flags); - assert(err == MDBX_CORRUPTED); - subtype = MDBX_subpage_broken; - err = MDBX_CORRUPTED; - } - - for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; ++j) { - if (subtype == MDBX_subpage_dupfixed_leaf) { - /* LEAF2 pages have no mp_ptrs[] or node headers */ - subpayload_size += sp->mp_leaf2_ksize; - } else { - assert(subtype == MDBX_subpage_leaf); - const MDBX_node *subnode = page_node(sp, j); - const size_t subnode_size = node_ks(subnode) + node_ds(subnode); - subheader_size += NODESIZE; - subpayload_size += subnode_size; - subalign_bytes += subnode_size & 1; - if (unlikely(node_flags(subnode) != 0)) { - ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "unexpected sub-node flags", node_flags(subnode)); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } - } - } - - const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_data_size, - subtype, err, nsubkeys, subpayload_size, - subheader_size, subunused_size + subalign_bytes); - if (unlikely(rc != MDBX_SUCCESS)) - return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; - header_size += subheader_size; - unused_size += subunused_size; - payload_size += subpayload_size; - align_bytes += subalign_bytes + (node_key_size & 1); - } break; - - default: - ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid node flags", node_flags(node)); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } - } - - const int rc = ctx->mw_visitor( - pgno, 1, ctx->mw_user, deep, sdb, ctx->mw_txn->mt_env->me_psize, type, - err, nentries, payload_size, header_size, unused_size + align_bytes); - if (unlikely(rc != MDBX_SUCCESS)) - return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; - - for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { - if (type == MDBX_page_dupfixed_leaf) - continue; - - MDBX_node *node = page_node(mp, i); - if (type == MDBX_page_branch) { - assert(err == MDBX_SUCCESS); - err = walk_tree(ctx, node_pgno(node), sdb, deep + 1, mp->mp_txnid); - if (unlikely(err != MDBX_SUCCESS)) { - if (err == MDBX_RESULT_TRUE) - break; - return err; - } - continue; - } - - assert(type == MDBX_page_leaf); - switch (node_flags(node)) { - default: - continue; - - case F_SUBDATA /* sub-db */: - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-tree node size", (unsigned)node_ds(node)); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } else { - MDBX_db aligned_db; - memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); - MDBX_walk_sdb_t sdb_info = { - {node_key(node), node_ks(node)}, nullptr, nullptr}; - sdb_info.internal = &aligned_db; - assert(err == MDBX_SUCCESS); - err = walk_sdb(ctx, &sdb_info, deep + 1); - } - break; - - case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } else if (unlikely(!ctx->mw_cursor->mc_xcursor)) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "unexpected dupsort sub-tree node for non-dupsort subDB"); - assert(err == MDBX_CORRUPTED); - err = MDBX_CORRUPTED; - } else { - MDBX_db aligned_db; - memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); - assert(ctx->mw_cursor->mc_xcursor == - &container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner); - assert(err == MDBX_SUCCESS); - err = cursor_xinit1(ctx->mw_cursor, node, mp); - if (likely(err == MDBX_SUCCESS)) { - ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor; - sdb->nested = &aligned_db; - err = walk_tree(ctx, aligned_db.md_root, sdb, deep + 1, mp->mp_txnid); - sdb->nested = nullptr; - MDBX_xcursor *inner_xcursor = - container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor); - MDBX_cursor_couple *couple = - container_of(inner_xcursor, MDBX_cursor_couple, inner); - ctx->mw_cursor = &couple->outer; - } - } - break; - } - } - - return MDBX_SUCCESS; -} - -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, - int deep) { - struct MDBX_db *const db = sdb->internal; - if (unlikely(db->md_root == P_INVALID)) - return MDBX_SUCCESS; /* empty db */ - - MDBX_cursor_couple couple; - MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbi_state = DBI_LINDO | DBI_VALID; - int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbi_state); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - couple.outer.mc_checking |= ctx->mw_dont_check_keys_ordering - ? CC_SKIPORD | CC_PAGECHECK - : CC_PAGECHECK; - couple.inner.mx_cursor.mc_checking |= ctx->mw_dont_check_keys_ordering - ? CC_SKIPORD | CC_PAGECHECK - : CC_PAGECHECK; - couple.outer.mc_next = ctx->mw_cursor; - ctx->mw_cursor = &couple.outer; - rc = walk_tree(ctx, db->md_root, sdb, deep, - db->md_mod_txnid ? db->md_mod_txnid : ctx->mw_txn->mt_txnid); - ctx->mw_cursor = couple.outer.mc_next; - return rc; -} - -__cold int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, - void *user, bool dont_check_keys_ordering) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - mdbx_walk_ctx_t ctx; - memset(&ctx, 0, sizeof(ctx)); - ctx.mw_txn = txn; - ctx.mw_user = user; - ctx.mw_visitor = visitor; - ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering; - - MDBX_walk_sdb_t sdb = {{MDBX_CHK_GC, 0}, &txn->mt_dbs[FREE_DBI], nullptr}; - rc = walk_sdb(&ctx, &sdb, 0); - if (!MDBX_IS_ERROR(rc)) { - sdb.name.iov_base = MDBX_CHK_MAIN; - sdb.internal = &txn->mt_dbs[MAIN_DBI]; - rc = walk_sdb(&ctx, &sdb, 0); - } - return rc; -} - -int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (likely(canary)) { - if (txn->mt_canary.x == canary->x && txn->mt_canary.y == canary->y && - txn->mt_canary.z == canary->z) - return MDBX_SUCCESS; - txn->mt_canary.x = canary->x; - txn->mt_canary.y = canary->y; - txn->mt_canary.z = canary->z; - } - txn->mt_canary.v = txn->mt_txnid; - txn->mt_flags |= MDBX_TXN_DIRTY; - - return MDBX_SUCCESS; -} - -int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(canary == NULL)) - return MDBX_EINVAL; - - *canary = txn->mt_canary; - return MDBX_SUCCESS; -} - -int mdbx_cursor_on_first(const MDBX_cursor *mc) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - if (!(mc->mc_flags & C_INITIALIZED)) - return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; - - for (size_t i = 0; i < mc->mc_snum; ++i) { - if (mc->mc_ki[i]) - return MDBX_RESULT_FALSE; - } - - return MDBX_RESULT_TRUE; -} - -int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - if (!(mc->mc_flags & C_INITIALIZED)) - return MDBX_RESULT_TRUE; - - if (!mc->mc_xcursor) - return MDBX_RESULT_TRUE; - - mc = &mc->mc_xcursor->mx_cursor; - for (size_t i = 0; i < mc->mc_snum; ++i) { - if (mc->mc_ki[i]) - return MDBX_RESULT_FALSE; - } - - return MDBX_RESULT_TRUE; -} - -int mdbx_cursor_on_last(const MDBX_cursor *mc) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - if (!(mc->mc_flags & C_INITIALIZED)) - return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; - - for (size_t i = 0; i < mc->mc_snum; ++i) { - size_t nkeys = page_numkeys(mc->mc_pg[i]); - if (mc->mc_ki[i] < nkeys - 1) - return MDBX_RESULT_FALSE; - } - - return MDBX_RESULT_TRUE; -} - -int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - if (!(mc->mc_flags & C_INITIALIZED)) - return MDBX_RESULT_FALSE; - - if (!mc->mc_xcursor) - return MDBX_RESULT_TRUE; - - mc = &mc->mc_xcursor->mx_cursor; - for (size_t i = 0; i < mc->mc_snum; ++i) { - size_t nkeys = page_numkeys(mc->mc_pg[i]); - if (mc->mc_ki[i] < nkeys - 1) - return MDBX_RESULT_FALSE; - } - - return MDBX_RESULT_TRUE; -} - -int mdbx_cursor_eof(const MDBX_cursor *mc) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - return ((mc->mc_flags & (C_INITIALIZED | C_EOF)) == C_INITIALIZED && - mc->mc_snum && - mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top])) - ? MDBX_RESULT_FALSE - : MDBX_RESULT_TRUE; -} - -//------------------------------------------------------------------------------ - -struct diff_result { - ptrdiff_t diff; - size_t level; - ptrdiff_t root_nkeys; -}; - -/* calculates: r = x - y */ -__hot static int cursor_diff(const MDBX_cursor *const __restrict x, - const MDBX_cursor *const __restrict y, - struct diff_result *const __restrict r) { - r->diff = 0; - r->level = 0; - r->root_nkeys = 0; - - if (unlikely(x->mc_signature != MDBX_MC_LIVE)) - return (x->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - if (unlikely(y->mc_signature != MDBX_MC_LIVE)) - return (y->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn(x->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(x->mc_txn != y->mc_txn)) - return MDBX_BAD_TXN; - - if (unlikely(y->mc_dbi != x->mc_dbi)) - return MDBX_EINVAL; - - if (unlikely(!(y->mc_flags & x->mc_flags & C_INITIALIZED))) - return MDBX_ENODATA; - - while (likely(r->level < y->mc_snum && r->level < x->mc_snum)) { - if (unlikely(y->mc_pg[r->level] != x->mc_pg[r->level])) { - ERROR("Mismatch cursors's pages at %zu level", r->level); - return MDBX_PROBLEM; - } - - intptr_t nkeys = page_numkeys(y->mc_pg[r->level]); - assert(nkeys > 0); - if (r->level == 0) - r->root_nkeys = nkeys; - - const intptr_t limit_ki = nkeys - 1; - const intptr_t x_ki = x->mc_ki[r->level]; - const intptr_t y_ki = y->mc_ki[r->level]; - r->diff = ((x_ki < limit_ki) ? x_ki : limit_ki) - - ((y_ki < limit_ki) ? y_ki : limit_ki); - if (r->diff == 0) { - r->level += 1; - continue; - } - - while (unlikely(r->diff == 1) && - likely(r->level + 1 < y->mc_snum && r->level + 1 < x->mc_snum)) { - r->level += 1; - /* DB'PAGEs: 0------------------>MAX - * - * CURSORs: y < x - * STACK[i ]: | - * STACK[+1]: ...y++N|0++x... - */ - nkeys = page_numkeys(y->mc_pg[r->level]); - r->diff = (nkeys - y->mc_ki[r->level]) + x->mc_ki[r->level]; - assert(r->diff > 0); - } - - while (unlikely(r->diff == -1) && - likely(r->level + 1 < y->mc_snum && r->level + 1 < x->mc_snum)) { - r->level += 1; - /* DB'PAGEs: 0------------------>MAX - * - * CURSORs: x < y - * STACK[i ]: | - * STACK[+1]: ...x--N|0--y... - */ - nkeys = page_numkeys(x->mc_pg[r->level]); - r->diff = -(nkeys - x->mc_ki[r->level]) - y->mc_ki[r->level]; - assert(r->diff < 0); - } - - return MDBX_SUCCESS; - } - - r->diff = CMP2INT(x->mc_flags & C_EOF, y->mc_flags & C_EOF); - return MDBX_SUCCESS; -} - -__hot static ptrdiff_t estimate(const MDBX_db *db, - struct diff_result *const __restrict dr) { - /* root: branch-page => scale = leaf-factor * branch-factor^(N-1) - * level-1: branch-page(s) => scale = leaf-factor * branch-factor^2 - * level-2: branch-page(s) => scale = leaf-factor * branch-factor - * level-N: branch-page(s) => scale = leaf-factor - * leaf-level: leaf-page(s) => scale = 1 - */ - ptrdiff_t btree_power = (ptrdiff_t)db->md_depth - 2 - (ptrdiff_t)dr->level; - if (btree_power < 0) - return dr->diff; - - ptrdiff_t estimated = - (ptrdiff_t)db->md_entries * dr->diff / (ptrdiff_t)db->md_leaf_pages; - if (btree_power == 0) - return estimated; - - if (db->md_depth < 4) { - assert(dr->level == 0 && btree_power == 1); - return (ptrdiff_t)db->md_entries * dr->diff / (ptrdiff_t)dr->root_nkeys; - } - - /* average_branchpage_fillfactor = total(branch_entries) / branch_pages - total(branch_entries) = leaf_pages + branch_pages - 1 (root page) */ - const size_t log2_fixedpoint = sizeof(size_t) - 1; - const size_t half = UINT64_C(1) << (log2_fixedpoint - 1); - const size_t factor = - ((db->md_leaf_pages + db->md_branch_pages - 1) << log2_fixedpoint) / - db->md_branch_pages; - while (1) { - switch ((size_t)btree_power) { - default: { - const size_t square = (factor * factor + half) >> log2_fixedpoint; - const size_t quad = (square * square + half) >> log2_fixedpoint; - do { - estimated = estimated * quad + half; - estimated >>= log2_fixedpoint; - btree_power -= 4; - } while (btree_power >= 4); - continue; - } - case 3: - estimated = estimated * factor + half; - estimated >>= log2_fixedpoint; - __fallthrough /* fall through */; - case 2: - estimated = estimated * factor + half; - estimated >>= log2_fixedpoint; - __fallthrough /* fall through */; - case 1: - estimated = estimated * factor + half; - estimated >>= log2_fixedpoint; - __fallthrough /* fall through */; - case 0: - if (unlikely(estimated > (ptrdiff_t)db->md_entries)) - return (ptrdiff_t)db->md_entries; - if (unlikely(estimated < -(ptrdiff_t)db->md_entries)) - return -(ptrdiff_t)db->md_entries; - return estimated; - } - } -} - -int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last, - ptrdiff_t *distance_items) { - if (unlikely(first == NULL || last == NULL || distance_items == NULL)) - return MDBX_EINVAL; - - *distance_items = 0; - struct diff_result dr; - int rc = cursor_diff(last, first, &dr); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(dr.diff == 0) && - F_ISSET(first->mc_db->md_flags & last->mc_db->md_flags, - MDBX_DUPSORT | C_INITIALIZED)) { - first = &first->mc_xcursor->mx_cursor; - last = &last->mc_xcursor->mx_cursor; - rc = cursor_diff(first, last, &dr); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - if (likely(dr.diff != 0)) - *distance_items = estimate(first->mc_db, &dr); - - return MDBX_SUCCESS; -} - -int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op move_op, ptrdiff_t *distance_items) { - if (unlikely(cursor == NULL || distance_items == NULL || - move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE)) - return MDBX_EINVAL; - - if (unlikely(cursor->mc_signature != MDBX_MC_LIVE)) - return (cursor->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn(cursor->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (!(cursor->mc_flags & C_INITIALIZED)) - return MDBX_ENODATA; - - MDBX_cursor_couple next; - cursor_copy(cursor, &next.outer); - if (cursor->mc_db->md_flags & MDBX_DUPSORT) { - next.outer.mc_xcursor = &next.inner; - rc = cursor_xinit0(&next.outer); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - MDBX_xcursor *mx = &container_of(cursor, MDBX_cursor_couple, outer)->inner; - cursor_copy(&mx->mx_cursor, &next.inner.mx_cursor); - } - - MDBX_val stub = {0, 0}; - if (data == NULL) { - const unsigned mask = - 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY; - if (unlikely(mask & (1 << move_op))) - return MDBX_EINVAL; - data = &stub; - } - - if (key == NULL) { - const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | - 1 << MDBX_SET_KEY | 1 << MDBX_SET | - 1 << MDBX_SET_RANGE; - if (unlikely(mask & (1 << move_op))) - return MDBX_EINVAL; - key = &stub; - } - - next.outer.mc_signature = MDBX_MC_LIVE; - rc = cursor_get(&next.outer, key, data, move_op); - if (unlikely(rc != MDBX_SUCCESS && - (rc != MDBX_NOTFOUND || !(next.outer.mc_flags & C_INITIALIZED)))) - return rc; - - return mdbx_estimate_distance(cursor, &next.outer, distance_items); -} - -int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *begin_key, const MDBX_val *begin_data, - const MDBX_val *end_key, const MDBX_val *end_data, - ptrdiff_t *size_items) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!size_items)) - return MDBX_EINVAL; - - if (unlikely(begin_data && (begin_key == NULL || begin_key == MDBX_EPSILON))) - return MDBX_EINVAL; - - if (unlikely(end_data && (end_key == NULL || end_key == MDBX_EPSILON))) - return MDBX_EINVAL; - - if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON)) - return MDBX_EINVAL; - - MDBX_cursor_couple begin; - /* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */ - rc = cursor_init(&begin.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(begin.outer.mc_db->md_entries == 0)) { - *size_items = 0; - return MDBX_SUCCESS; - } - - MDBX_val stub; - if (!begin_key) { - if (unlikely(!end_key)) { - /* LY: FIRST..LAST case */ - *size_items = (ptrdiff_t)begin.outer.mc_db->md_entries; - return MDBX_SUCCESS; - } - rc = cursor_first(&begin.outer, &stub, &stub); - if (unlikely(end_key == MDBX_EPSILON)) { - /* LY: FIRST..+epsilon case */ - return (rc == MDBX_SUCCESS) - ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) - : rc; - } - } else { - if (unlikely(begin_key == MDBX_EPSILON)) { - if (end_key == NULL) { - /* LY: -epsilon..LAST case */ - rc = cursor_last(&begin.outer, &stub, &stub); - return (rc == MDBX_SUCCESS) - ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) - : rc; - } - /* LY: -epsilon..value case */ - assert(end_key != MDBX_EPSILON); - begin_key = end_key; - } else if (unlikely(end_key == MDBX_EPSILON)) { - /* LY: value..+epsilon case */ - assert(begin_key != MDBX_EPSILON); - end_key = begin_key; - } - if (end_key && !begin_data && !end_data && - (begin_key == end_key || - begin.outer.mc_dbx->md_cmp(begin_key, end_key) == 0)) { - /* LY: single key case */ - rc = cursor_set(&begin.outer, (MDBX_val *)begin_key, NULL, MDBX_SET).err; - if (unlikely(rc != MDBX_SUCCESS)) { - *size_items = 0; - return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; - } - *size_items = 1; - if (begin.outer.mc_xcursor != NULL) { - MDBX_node *node = page_node(begin.outer.mc_pg[begin.outer.mc_top], - begin.outer.mc_ki[begin.outer.mc_top]); - if (node_flags(node) & F_DUPDATA) { - /* LY: return the number of duplicates for given key */ - tASSERT(txn, begin.outer.mc_xcursor == &begin.inner && - (begin.inner.mx_cursor.mc_flags & C_INITIALIZED)); - *size_items = - (sizeof(*size_items) >= sizeof(begin.inner.mx_db.md_entries) || - begin.inner.mx_db.md_entries <= PTRDIFF_MAX) - ? (size_t)begin.inner.mx_db.md_entries - : PTRDIFF_MAX; - } - } - return MDBX_SUCCESS; - } else if (begin_data) { - stub = *begin_data; - rc = cursor_set(&begin.outer, (MDBX_val *)begin_key, &stub, - MDBX_GET_BOTH_RANGE) - .err; - } else { - stub = *begin_key; - rc = cursor_set(&begin.outer, &stub, nullptr, MDBX_SET_RANGE).err; - } - } - - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND || !(begin.outer.mc_flags & C_INITIALIZED)) - return rc; - } - - MDBX_cursor_couple end; - rc = cursor_init(&end.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - if (!end_key) - rc = cursor_last(&end.outer, &stub, &stub); - else if (end_data) { - stub = *end_data; - rc = cursor_set(&end.outer, (MDBX_val *)end_key, &stub, MDBX_GET_BOTH_RANGE) - .err; - } else { - stub = *end_key; - rc = cursor_set(&end.outer, &stub, nullptr, MDBX_SET_RANGE).err; - } - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND || !(end.outer.mc_flags & C_INITIALIZED)) - return rc; - } - - rc = mdbx_estimate_distance(&begin.outer, &end.outer, size_items); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - assert(*size_items >= -(ptrdiff_t)begin.outer.mc_db->md_entries && - *size_items <= (ptrdiff_t)begin.outer.mc_db->md_entries); - -#if 0 /* LY: Was decided to returns as-is (i.e. negative) the estimation \ - * results for an inverted ranges. */ - - /* Commit 8ddfd1f34ad7cf7a3c4aa75d2e248ca7e639ed63 - Change-Id: If59eccf7311123ab6384c4b93f9b1fed5a0a10d1 */ - - if (*size_items < 0) { - /* LY: inverted range case */ - *size_items += (ptrdiff_t)begin.outer.mc_db->md_entries; - } else if (*size_items == 0 && begin_key && end_key) { - int cmp = begin.outer.mc_dbx->md_cmp(&origin_begin_key, &origin_end_key); - if (cmp == 0 && (begin.inner.mx_cursor.mc_flags & C_INITIALIZED) && - begin_data && end_data) - cmp = begin.outer.mc_dbx->md_dcmp(&origin_begin_data, &origin_end_data); - if (cmp > 0) { - /* LY: inverted range case with empty scope */ - *size_items = (ptrdiff_t)begin.outer.mc_db->md_entries; - } - } - assert(*size_items >= 0 && - *size_items <= (ptrdiff_t)begin.outer.mc_db->md_entries); -#endif - - return MDBX_SUCCESS; -} - -//------------------------------------------------------------------------------ - -/* Позволяет обновить или удалить существующую запись с получением - * в old_data предыдущего значения данных. При этом если new_data равен - * нулю, то выполняется удаление, иначе обновление/вставка. - * - * Текущее значение может находиться в уже измененной (грязной) странице. - * В этом случае страница будет перезаписана при обновлении, а само старое - * значение утрачено. Поэтому исходно в old_data должен быть передан - * дополнительный буфер для копирования старого значения. - * Если переданный буфер слишком мал, то функция вернет -1, установив - * old_data->iov_len в соответствующее значение. - * - * Для не-уникальных ключей также возможен второй сценарий использования, - * когда посредством old_data из записей с одинаковым ключом для - * удаления/обновления выбирается конкретная. Для выбора этого сценария - * во flags следует одновременно указать MDBX_CURRENT и MDBX_NOOVERWRITE. - * Именно эта комбинация выбрана, так как она лишена смысла, и этим позволяет - * идентифицировать запрос такого сценария. - * - * Функция может быть замещена соответствующими операциями с курсорами - * после двух доработок (TODO): - * - внешняя аллокация курсоров, в том числе на стеке (без malloc). - * - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE). - */ - -int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *new_data, MDBX_val *old_data, - MDBX_put_flags_t flags, MDBX_preserve_func preserver, - void *preserver_context) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!key || !old_data || old_data == new_data)) - return MDBX_EINVAL; - - if (unlikely(old_data->iov_base == NULL && old_data->iov_len)) - return MDBX_EINVAL; - - if (unlikely(new_data == NULL && - (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) - return MDBX_EINVAL; - - if (unlikely(dbi <= FREE_DBI)) - return MDBX_BAD_DBI; - - if (unlikely(flags & - ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | - MDBX_RESERVE | MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) - return MDBX_EINVAL; - - MDBX_cursor_couple cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; - - MDBX_val present_key = *key; - if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) { - /* в old_data значение для выбора конкретного дубликата */ - if (unlikely(!(txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT))) { - rc = MDBX_EINVAL; - goto bailout; - } - - /* убираем лишний бит, он был признаком запрошенного режима */ - flags -= MDBX_NOOVERWRITE; - - rc = cursor_set(&cx.outer, &present_key, old_data, MDBX_GET_BOTH).err; - if (rc != MDBX_SUCCESS) - goto bailout; - } else { - /* в old_data буфер для сохранения предыдущего значения */ - if (unlikely(new_data && old_data->iov_base == new_data->iov_base)) - return MDBX_EINVAL; - MDBX_val present_data; - rc = cursor_set(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err; - if (unlikely(rc != MDBX_SUCCESS)) { - old_data->iov_base = NULL; - old_data->iov_len = 0; - if (rc != MDBX_NOTFOUND || (flags & MDBX_CURRENT)) - goto bailout; - } else if (flags & MDBX_NOOVERWRITE) { - rc = MDBX_KEYEXIST; - *old_data = present_data; - goto bailout; - } else { - MDBX_page *page = cx.outer.mc_pg[cx.outer.mc_top]; - if (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) { - if (flags & MDBX_CURRENT) { - /* disallow update/delete for multi-values */ - MDBX_node *node = page_node(page, cx.outer.mc_ki[cx.outer.mc_top]); - if (node_flags(node) & F_DUPDATA) { - tASSERT(txn, XCURSOR_INITED(&cx.outer) && - cx.outer.mc_xcursor->mx_db.md_entries > 1); - if (cx.outer.mc_xcursor->mx_db.md_entries > 1) { - rc = MDBX_EMULTIVAL; - goto bailout; - } - } - /* В оригинальной LMDB флажок MDBX_CURRENT здесь приведет - * к замене данных без учета MDBX_DUPSORT сортировки, - * но здесь это в любом случае допустимо, так как мы - * проверили что для ключа есть только одно значение. */ - } - } - - if (IS_MODIFIABLE(txn, page)) { - if (new_data && cmp_lenfast(&present_data, new_data) == 0) { - /* если данные совпадают, то ничего делать не надо */ - *old_data = *new_data; - goto bailout; - } - rc = preserver ? preserver(preserver_context, old_data, - present_data.iov_base, present_data.iov_len) - : MDBX_SUCCESS; - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } else { - *old_data = present_data; - } - flags |= MDBX_CURRENT; - } - } - - if (likely(new_data)) - rc = cursor_put_checklen(&cx.outer, key, new_data, flags); - else - rc = cursor_del(&cx.outer, flags & MDBX_ALLDUPS); - -bailout: - txn->mt_cursors[dbi] = cx.outer.mc_next; - return rc; -} - -static int default_value_preserver(void *context, MDBX_val *target, - const void *src, size_t bytes) { - (void)context; - if (unlikely(target->iov_len < bytes)) { - target->iov_base = nullptr; - target->iov_len = bytes; - return MDBX_RESULT_TRUE; - } - memcpy(target->iov_base, src, target->iov_len = bytes); - return MDBX_SUCCESS; -} - -int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *new_data, MDBX_val *old_data, - MDBX_put_flags_t flags) { - return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, - default_value_preserver, nullptr); -} - -/* Функция сообщает находится ли указанный адрес в "грязной" странице у - * заданной пишущей транзакции. В конечном счете это позволяет избавиться от - * лишнего копирования данных из НЕ-грязных страниц. - * - * "Грязные" страницы - это те, которые уже были изменены в ходе пишущей - * транзакции. Соответственно, какие-либо дальнейшие изменения могут привести - * к перезаписи таких страниц. Поэтому все функции, выполняющие изменения, в - * качестве аргументов НЕ должны получать указатели на данные в таких - * страницах. В свою очередь "НЕ грязные" страницы перед модификацией будут - * скопированы. - * - * Другими словами, данные из "грязных" страниц должны быть либо скопированы - * перед передачей в качестве аргументов для дальнейших модификаций, либо - * отвергнуты на стадии проверки корректности аргументов. - * - * Таким образом, функция позволяет как избавится от лишнего копирования, - * так и выполнить более полную проверку аргументов. - * - * ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только - * так гарантируется что актуальный заголовок страницы будет физически - * расположен в той-же странице памяти, в том числе для многостраничных - * P_OVERFLOW страниц с длинными данными. */ -int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - const MDBX_env *env = txn->mt_env; - const ptrdiff_t offset = ptr_dist(ptr, env->me_map); - if (offset >= 0) { - const pgno_t pgno = bytes2pgno(env, offset); - if (likely(pgno < txn->mt_next_pgno)) { - const MDBX_page *page = pgno2page(env, pgno); - if (unlikely(page->mp_pgno != pgno || - (page->mp_flags & P_ILL_BITS) != 0)) { - /* The ptr pointed into middle of a large page, - * not to the beginning of a data. */ - return MDBX_EINVAL; - } - return ((txn->mt_flags & MDBX_TXN_RDONLY) || !IS_MODIFIABLE(txn, page)) - ? MDBX_RESULT_FALSE - : MDBX_RESULT_TRUE; - } - if ((size_t)offset < env->me_dxb_mmap.limit) { - /* Указатель адресует что-то в пределах mmap, но за границей - * распределенных страниц. Такое может случится если mdbx_is_dirty() - * вызывается после операции, в ходе которой грязная страница была - * возвращена в нераспределенное пространство. */ - return (txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EINVAL : MDBX_RESULT_TRUE; - } - } - - /* Страница вне используемого mmap-диапазона, т.е. либо в функцию был - * передан некорректный адрес, либо адрес в теневой странице, которая была - * выделена посредством malloc(). - * - * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная", - * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */ - return (txn->mt_flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? MDBX_EINVAL - : MDBX_RESULT_TRUE; -} - -int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, - uint64_t increment) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { - rc = fetch_sdb(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - MDBX_db *dbs = &txn->mt_dbs[dbi]; - if (likely(result)) - *result = dbs->md_seq; - - if (likely(increment > 0)) { - if (unlikely(dbi == FREE_DBI || (txn->mt_flags & MDBX_TXN_RDONLY) != 0)) - return MDBX_EACCESS; - - uint64_t new = dbs->md_seq + increment; - if (unlikely(new < increment)) - return MDBX_RESULT_TRUE; - - tASSERT(txn, new > dbs->md_seq); - dbs->md_seq = new; - txn->mt_flags |= MDBX_TXN_DIRTY; - txn->mt_dbi_state[dbi] |= DBI_DIRTY; - } - - return MDBX_SUCCESS; -} - -/*----------------------------------------------------------------------------*/ - -__cold intptr_t mdbx_limits_dbsize_min(intptr_t pagesize) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - else if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - return MIN_PAGENO * pagesize; -} - -__cold intptr_t mdbx_limits_dbsize_max(intptr_t pagesize) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - else if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); - const uint64_t limit = (1 + (uint64_t)MAX_PAGENO) * pagesize; - return (limit < MAX_MAPSIZE) ? (intptr_t)limit : (intptr_t)MAX_MAPSIZE; -} - -__cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) { - if (pagesize < 1) - pagesize = (intptr_t)mdbx_default_pagesize(); - else if (unlikely(pagesize < (intptr_t)MIN_PAGESIZE || - pagesize > (intptr_t)MAX_PAGESIZE || - !is_powerof2((size_t)pagesize))) - return -1; - - STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); - const uint64_t pgl_limit = - pagesize * (uint64_t)(MDBX_PGL_LIMIT / MDBX_GOLD_RATIO_DBL); - const uint64_t map_limit = (uint64_t)(MAX_MAPSIZE / MDBX_GOLD_RATIO_DBL); - return (pgl_limit < map_limit) ? (intptr_t)pgl_limit : (intptr_t)map_limit; -} - -/*** Key-making functions to avoid custom comparators *************************/ - -static __always_inline double key2double(const int64_t key) { - union { - uint64_t u; - double f; - } casting; - - casting.u = (key < 0) ? key + UINT64_C(0x8000000000000000) - : UINT64_C(0xffffFFFFffffFFFF) - key; - return casting.f; -} - -static __always_inline uint64_t double2key(const double *const ptr) { - STATIC_ASSERT(sizeof(double) == sizeof(int64_t)); - const int64_t i = *(const int64_t *)ptr; - const uint64_t u = (i < 0) ? UINT64_C(0xffffFFFFffffFFFF) - i - : i + UINT64_C(0x8000000000000000); - if (ASSERT_ENABLED()) { - const double f = key2double(u); - assert(memcmp(&f, ptr, 8) == 0); - } - return u; -} - -static __always_inline float key2float(const int32_t key) { - union { - uint32_t u; - float f; - } casting; - - casting.u = - (key < 0) ? key + UINT32_C(0x80000000) : UINT32_C(0xffffFFFF) - key; - return casting.f; -} - -static __always_inline uint32_t float2key(const float *const ptr) { - STATIC_ASSERT(sizeof(float) == sizeof(int32_t)); - const int32_t i = *(const int32_t *)ptr; - const uint32_t u = - (i < 0) ? UINT32_C(0xffffFFFF) - i : i + UINT32_C(0x80000000); - if (ASSERT_ENABLED()) { - const float f = key2float(u); - assert(memcmp(&f, ptr, 4) == 0); - } - return u; -} - -uint64_t mdbx_key_from_double(const double ieee754_64bit) { - return double2key(&ieee754_64bit); -} - -uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) { - return double2key(ieee754_64bit); -} - -uint32_t mdbx_key_from_float(const float ieee754_32bit) { - return float2key(&ieee754_32bit); -} - -uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) { - return float2key(ieee754_32bit); -} - -#define IEEE754_DOUBLE_MANTISSA_SIZE 52 -#define IEEE754_DOUBLE_EXPONENTA_BIAS 0x3FF -#define IEEE754_DOUBLE_EXPONENTA_MAX 0x7FF -#define IEEE754_DOUBLE_IMPLICIT_LEAD UINT64_C(0x0010000000000000) -#define IEEE754_DOUBLE_MANTISSA_MASK UINT64_C(0x000FFFFFFFFFFFFF) -#define IEEE754_DOUBLE_MANTISSA_AMAX UINT64_C(0x001FFFFFFFFFFFFF) - -static __inline int clz64(uint64_t value) { -#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_clzl) - if (sizeof(value) == sizeof(int)) - return __builtin_clz(value); - if (sizeof(value) == sizeof(long)) - return __builtin_clzl(value); -#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ - __has_builtin(__builtin_clzll) - return __builtin_clzll(value); -#endif /* have(long long) && long long == uint64_t */ -#endif /* GNU C */ - -#if defined(_MSC_VER) - unsigned long index; -#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) - _BitScanReverse64(&index, value); - return 63 - index; -#else - if (value > UINT32_MAX) { - _BitScanReverse(&index, (uint32_t)(value >> 32)); - return 31 - index; - } - _BitScanReverse(&index, (uint32_t)value); - return 63 - index; -#endif -#endif /* MSVC */ - - value |= value >> 1; - value |= value >> 2; - value |= value >> 4; - value |= value >> 8; - value |= value >> 16; - value |= value >> 32; - static const uint8_t debruijn_clz64[64] = { - 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, - 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, - 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, - 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0}; - return debruijn_clz64[value * UINT64_C(0x03F79D71B4CB0A89) >> 58]; -} - -static __inline uint64_t round_mantissa(const uint64_t u64, int shift) { - assert(shift < 0 && u64 > 0); - shift = -shift; - const unsigned half = 1 << (shift - 1); - const unsigned lsb = 1 & (unsigned)(u64 >> shift); - const unsigned tie2even = 1 ^ lsb; - return (u64 + half - tie2even) >> shift; -} - -uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) { - const uint64_t bias = UINT64_C(0x8000000000000000); - if (json_integer > 0) { - const uint64_t u64 = json_integer; - int shift = clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1); - uint64_t mantissa = u64 << shift; - if (unlikely(shift < 0)) { - mantissa = round_mantissa(u64, shift); - if (mantissa > IEEE754_DOUBLE_MANTISSA_AMAX) - mantissa = round_mantissa(u64, --shift); - } - - assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && - mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); - const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + - IEEE754_DOUBLE_MANTISSA_SIZE - shift; - assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX); - const uint64_t key = bias + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); -#if !defined(_MSC_VER) || \ - defined( \ - _DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ - symbol __except1 referenced in function __ftol3_except */ - assert(key == mdbx_key_from_double((double)json_integer)); -#endif /* Workaround for MSVC */ - return key; - } - - if (json_integer < 0) { - const uint64_t u64 = -json_integer; - int shift = clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1); - uint64_t mantissa = u64 << shift; - if (unlikely(shift < 0)) { - mantissa = round_mantissa(u64, shift); - if (mantissa > IEEE754_DOUBLE_MANTISSA_AMAX) - mantissa = round_mantissa(u64, --shift); - } - - assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && - mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); - const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + - IEEE754_DOUBLE_MANTISSA_SIZE - shift; - assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX); - const uint64_t key = bias - 1 - (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); -#if !defined(_MSC_VER) || \ - defined( \ - _DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ - symbol __except1 referenced in function __ftol3_except */ - assert(key == mdbx_key_from_double((double)json_integer)); -#endif /* Workaround for MSVC */ - return key; - } - - return bias; -} - -int64_t mdbx_jsonInteger_from_key(const MDBX_val v) { - assert(v.iov_len == 8); - const uint64_t key = unaligned_peek_u64(2, v.iov_base); - const uint64_t bias = UINT64_C(0x8000000000000000); - const uint64_t covalent = (key > bias) ? key - bias : bias - key - 1; - const int shift = IEEE754_DOUBLE_EXPONENTA_BIAS + 63 - - (IEEE754_DOUBLE_EXPONENTA_MAX & - (int)(covalent >> IEEE754_DOUBLE_MANTISSA_SIZE)); - if (unlikely(shift < 1)) - return (key < bias) ? INT64_MIN : INT64_MAX; - if (unlikely(shift > 63)) - return 0; - - const uint64_t unscaled = ((covalent & IEEE754_DOUBLE_MANTISSA_MASK) - << (63 - IEEE754_DOUBLE_MANTISSA_SIZE)) + - bias; - const int64_t absolute = unscaled >> shift; - const int64_t value = (key < bias) ? -absolute : absolute; - assert(key == mdbx_key_from_jsonInteger(value) || - (mdbx_key_from_jsonInteger(value - 1) < key && - key < mdbx_key_from_jsonInteger(value + 1))); - return value; -} - -double mdbx_double_from_key(const MDBX_val v) { - assert(v.iov_len == 8); - return key2double(unaligned_peek_u64(2, v.iov_base)); -} - -float mdbx_float_from_key(const MDBX_val v) { - assert(v.iov_len == 4); - return key2float(unaligned_peek_u32(2, v.iov_base)); -} - -int32_t mdbx_int32_from_key(const MDBX_val v) { - assert(v.iov_len == 4); - return (int32_t)(unaligned_peek_u32(2, v.iov_base) - UINT32_C(0x80000000)); -} - -int64_t mdbx_int64_from_key(const MDBX_val v) { - assert(v.iov_len == 8); - return (int64_t)(unaligned_peek_u64(2, v.iov_base) - - UINT64_C(0x8000000000000000)); -} - -__cold MDBX_cmp_func *mdbx_get_keycmp(MDBX_db_flags_t flags) { - return get_default_keycmp(flags); -} - -__cold MDBX_cmp_func *mdbx_get_datacmp(MDBX_db_flags_t flags) { - return get_default_datacmp(flags); -} - -__cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, - uint64_t value) { - int err = check_env(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - const bool lock_needed = ((env->me_flags & MDBX_ENV_ACTIVE) && env->me_txn0 && - !env_txn0_owned(env)); - bool should_unlock = false; - switch (option) { - case MDBX_opt_sync_bytes: - if (value == /* default */ UINT64_MAX) - value = MAX_WRITE; - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - if (unlikely(!(env->me_flags & MDBX_ENV_ACTIVE))) - return MDBX_EPERM; - if (unlikely(value > SIZE_MAX - 65536)) - return MDBX_EINVAL; - value = bytes2pgno(env, (size_t)value + env->me_psize - 1); - if ((uint32_t)value != atomic_load32(&env->me_lck->mti_autosync_threshold, - mo_AcquireRelease) && - atomic_store32(&env->me_lck->mti_autosync_threshold, (uint32_t)value, - mo_Relaxed) - /* Дергаем sync(force=off) только если задано новое не-нулевое значение - * и мы вне транзакции */ - && lock_needed) { - err = env_sync(env, false, false); - if (err == /* нечего сбрасывать на диск */ MDBX_RESULT_TRUE) - err = MDBX_SUCCESS; - } - break; - - case MDBX_opt_sync_period: - if (value == /* default */ UINT64_MAX) - value = 2780315 /* 42.42424 секунды */; - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - if (unlikely(!(env->me_flags & MDBX_ENV_ACTIVE))) - return MDBX_EPERM; - if (unlikely(value > UINT32_MAX)) - return MDBX_EINVAL; - value = osal_16dot16_to_monotime((uint32_t)value); - if (value != atomic_load64(&env->me_lck->mti_autosync_period, - mo_AcquireRelease) && - atomic_store64(&env->me_lck->mti_autosync_period, value, mo_Relaxed) - /* Дергаем sync(force=off) только если задано новое не-нулевое значение - * и мы вне транзакции */ - && lock_needed) { - err = env_sync(env, false, false); - if (err == /* нечего сбрасывать на диск */ MDBX_RESULT_TRUE) - err = MDBX_SUCCESS; - } - break; - - case MDBX_opt_max_db: - if (value == /* default */ UINT64_MAX) - value = 42; - if (unlikely(value > MDBX_MAX_DBI)) - return MDBX_EINVAL; - if (unlikely(env->me_map)) - return MDBX_EPERM; - env->me_maxdbs = (unsigned)value + CORE_DBS; - break; - - case MDBX_opt_max_readers: - if (value == /* default */ UINT64_MAX) - value = MDBX_READERS_LIMIT; - if (unlikely(value < 1 || value > MDBX_READERS_LIMIT)) - return MDBX_EINVAL; - if (unlikely(env->me_map)) - return MDBX_EPERM; - env->me_maxreaders = (unsigned)value; - break; - - case MDBX_opt_dp_reserve_limit: - if (value == /* default */ UINT64_MAX) - value = INT_MAX; - if (unlikely(value > INT_MAX)) - return MDBX_EINVAL; - if (env->me_options.dp_reserve_limit != (unsigned)value) { - if (lock_needed) { - err = osal_txn_lock(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - should_unlock = true; - } - env->me_options.dp_reserve_limit = (unsigned)value; - while (env->me_dp_reserve_len > env->me_options.dp_reserve_limit) { - eASSERT(env, env->me_dp_reserve != NULL); - MDBX_page *dp = env->me_dp_reserve; - MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->me_psize); - VALGRIND_MAKE_MEM_DEFINED(&mp_next(dp), sizeof(MDBX_page *)); - env->me_dp_reserve = mp_next(dp); - void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); - osal_free(ptr); - env->me_dp_reserve_len -= 1; - } - } - break; - - case MDBX_opt_rp_augment_limit: - if (value == /* default */ UINT64_MAX) { - env->me_options.flags.non_auto.rp_augment_limit = 0; - env->me_options.rp_augment_limit = default_rp_augment_limit(env); - } else if (unlikely(value > MDBX_PGL_LIMIT)) - return MDBX_EINVAL; - else { - env->me_options.flags.non_auto.rp_augment_limit = 1; - env->me_options.rp_augment_limit = (unsigned)value; - } - break; - - case MDBX_opt_gc_time_limit: - if (value == /* default */ UINT64_MAX) - value = 0; - if (unlikely(value > UINT32_MAX)) - return MDBX_EINVAL; - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - value = osal_16dot16_to_monotime((uint32_t)value); - if (value != env->me_options.gc_time_limit) { - if (env->me_txn && lock_needed) - return MDBX_EPERM; - env->me_options.gc_time_limit = value; - if (!env->me_options.flags.non_auto.rp_augment_limit) - env->me_options.rp_augment_limit = default_rp_augment_limit(env); - } - break; - - case MDBX_opt_txn_dp_limit: - case MDBX_opt_txn_dp_initial: - if (value == /* default */ UINT64_MAX) - value = MDBX_PGL_LIMIT; - if (unlikely(value > MDBX_PGL_LIMIT || value < CURSOR_STACK * 4)) - return MDBX_EINVAL; - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - if (lock_needed) { - err = osal_txn_lock(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - should_unlock = true; - } - if (env->me_txn) - err = MDBX_EPERM /* unable change during transaction */; - else { - const pgno_t value32 = (pgno_t)value; - if (option == MDBX_opt_txn_dp_initial && - env->me_options.dp_initial != value32) { - env->me_options.dp_initial = value32; - if (env->me_options.dp_limit < value32) { - env->me_options.dp_limit = value32; - env->me_options.flags.non_auto.dp_limit = 1; - } - } - if (option == MDBX_opt_txn_dp_limit && - env->me_options.dp_limit != value32) { - env->me_options.dp_limit = value32; - env->me_options.flags.non_auto.dp_limit = 1; - if (env->me_options.dp_initial > value32) - env->me_options.dp_initial = value32; - } - } - break; - - case MDBX_opt_spill_max_denominator: - if (value == /* default */ UINT64_MAX) - value = 8; - if (unlikely(value > 255)) - return MDBX_EINVAL; - env->me_options.spill_max_denominator = (uint8_t)value; - break; - case MDBX_opt_spill_min_denominator: - if (value == /* default */ UINT64_MAX) - value = 8; - if (unlikely(value > 255)) - return MDBX_EINVAL; - env->me_options.spill_min_denominator = (uint8_t)value; - break; - case MDBX_opt_spill_parent4child_denominator: - if (value == /* default */ UINT64_MAX) - value = 0; - if (unlikely(value > 255)) - return MDBX_EINVAL; - env->me_options.spill_parent4child_denominator = (uint8_t)value; - break; - - case MDBX_opt_loose_limit: - if (value == /* default */ UINT64_MAX) - value = 64; - if (unlikely(value > 255)) - return MDBX_EINVAL; - env->me_options.dp_loose_limit = (uint8_t)value; - break; - - case MDBX_opt_merge_threshold_16dot16_percent: - if (value == /* default */ UINT64_MAX) - value = 65536 / 4 /* 25% */; - if (unlikely(value < 8192 || value > 32768)) - return MDBX_EINVAL; - env->me_options.merge_threshold_16dot16_percent = (unsigned)value; - recalculate_merge_threshold(env); - break; - - case MDBX_opt_writethrough_threshold: -#if defined(_WIN32) || defined(_WIN64) - /* позволяем "установить" значение по-умолчанию и совпадающее - * с поведением соответствующим текущей установке MDBX_NOMETASYNC */ - if (value == /* default */ UINT64_MAX && - value != ((env->me_flags & MDBX_NOMETASYNC) ? 0 : UINT_MAX)) - err = MDBX_EINVAL; -#else - if (value == /* default */ UINT64_MAX) - value = MDBX_WRITETHROUGH_THRESHOLD_DEFAULT; - if (value != (unsigned)value) - err = MDBX_EINVAL; - else - env->me_options.writethrough_threshold = (unsigned)value; -#endif - break; - - case MDBX_opt_prefault_write_enable: - if (value == /* default */ UINT64_MAX) { - env->me_options.prefault_write = default_prefault_write(env); - env->me_options.flags.non_auto.prefault_write = false; - } else if (value > 1) - err = MDBX_EINVAL; - else { - env->me_options.prefault_write = value != 0; - env->me_options.flags.non_auto.prefault_write = true; - } - break; - - case MDBX_opt_prefer_waf_insteadof_balance: - if (value == /* default */ UINT64_MAX) - env->me_options.prefer_waf_insteadof_balance = - default_prefer_waf_insteadof_balance(env); - else if (value > 1) - err = MDBX_EINVAL; - else - env->me_options.prefer_waf_insteadof_balance = value != 0; - break; - - default: - return MDBX_EINVAL; - } - - if (should_unlock) - osal_txn_unlock(env); - return err; -} - -__cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, - uint64_t *pvalue) { - int err = check_env(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (unlikely(!pvalue)) - return MDBX_EINVAL; - - switch (option) { - case MDBX_opt_sync_bytes: - if (unlikely(!(env->me_flags & MDBX_ENV_ACTIVE))) - return MDBX_EPERM; - *pvalue = pgno2bytes( - env, atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed)); - break; - - case MDBX_opt_sync_period: - if (unlikely(!(env->me_flags & MDBX_ENV_ACTIVE))) - return MDBX_EPERM; - *pvalue = osal_monotime_to_16dot16( - atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed)); - break; - - case MDBX_opt_max_db: - *pvalue = env->me_maxdbs - CORE_DBS; - break; - - case MDBX_opt_max_readers: - *pvalue = env->me_maxreaders; - break; - - case MDBX_opt_dp_reserve_limit: - *pvalue = env->me_options.dp_reserve_limit; - break; - - case MDBX_opt_rp_augment_limit: - *pvalue = env->me_options.rp_augment_limit; - break; - - case MDBX_opt_gc_time_limit: - *pvalue = osal_monotime_to_16dot16(env->me_options.gc_time_limit); - break; - - case MDBX_opt_txn_dp_limit: - *pvalue = env->me_options.dp_limit; - break; - case MDBX_opt_txn_dp_initial: - *pvalue = env->me_options.dp_initial; - break; - - case MDBX_opt_spill_max_denominator: - *pvalue = env->me_options.spill_max_denominator; - break; - case MDBX_opt_spill_min_denominator: - *pvalue = env->me_options.spill_min_denominator; - break; - case MDBX_opt_spill_parent4child_denominator: - *pvalue = env->me_options.spill_parent4child_denominator; - break; - - case MDBX_opt_loose_limit: - *pvalue = env->me_options.dp_loose_limit; - break; - - case MDBX_opt_merge_threshold_16dot16_percent: - *pvalue = env->me_options.merge_threshold_16dot16_percent; - break; - - case MDBX_opt_writethrough_threshold: -#if defined(_WIN32) || defined(_WIN64) - *pvalue = (env->me_flags & MDBX_NOMETASYNC) ? 0 : INT_MAX; -#else - *pvalue = env->me_options.writethrough_threshold; -#endif - break; - - case MDBX_opt_prefault_write_enable: - *pvalue = env->me_options.prefault_write; - break; - - case MDBX_opt_prefer_waf_insteadof_balance: - *pvalue = env->me_options.prefer_waf_insteadof_balance; - break; - - default: - return MDBX_EINVAL; - } - - return MDBX_SUCCESS; -} - -static size_t estimate_rss(size_t database_bytes) { - return database_bytes + database_bytes / 64 + - (512 + MDBX_WORDBITS * 16) * MEGABYTE; -} - -__cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, - MDBX_warmup_flags_t flags, - unsigned timeout_seconds_16dot16) { - if (unlikely(env == NULL && txn == NULL)) - return MDBX_EINVAL; - if (unlikely(flags > - (MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | - MDBX_warmup_touchlimit | MDBX_warmup_release))) - return MDBX_EINVAL; - - if (txn) { - int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - if (env) { - int err = check_env(env, false); - if (unlikely(err != MDBX_SUCCESS)) - return err; - if (txn && unlikely(txn->mt_env != env)) - return MDBX_EINVAL; - } else { - env = txn->mt_env; - } - - const uint64_t timeout_monotime = - (timeout_seconds_16dot16 && (flags & MDBX_warmup_force)) - ? osal_monotime() + osal_16dot16_to_monotime(timeout_seconds_16dot16) - : 0; - - if (flags & MDBX_warmup_release) - munlock_all(env); - - pgno_t used_pgno; - if (txn) { - used_pgno = txn->mt_geo.next; - } else { - const meta_troika_t troika = meta_tap(env); - used_pgno = meta_recent(env, &troika).ptr_v->mm_geo.next; - } - const size_t used_range = pgno_align2os_bytes(env, used_pgno); - const pgno_t mlock_pgno = bytes2pgno(env, used_range); - - int rc = MDBX_SUCCESS; - if (flags & MDBX_warmup_touchlimit) { - const size_t estimated_rss = estimate_rss(used_range); -#if defined(_WIN32) || defined(_WIN64) - SIZE_T current_ws_lower, current_ws_upper; - if (GetProcessWorkingSetSize(GetCurrentProcess(), ¤t_ws_lower, - ¤t_ws_upper) && - current_ws_lower < estimated_rss) { - const SIZE_T ws_lower = estimated_rss; - const SIZE_T ws_upper = - (MDBX_WORDBITS == 32 && ws_lower > MEGABYTE * 2048) - ? ws_lower - : ws_lower + MDBX_WORDBITS * MEGABYTE * 32; - if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_lower, ws_upper)) { - rc = (int)GetLastError(); - WARNING("SetProcessWorkingSetSize(%zu, %zu) error %d", ws_lower, - ws_upper, rc); - } - } -#endif /* Windows */ -#ifdef RLIMIT_RSS - struct rlimit rss; - if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) { - rss.rlim_cur = estimated_rss; - if (rss.rlim_max < estimated_rss) - rss.rlim_max = estimated_rss; - if (setrlimit(RLIMIT_RSS, &rss)) { - rc = errno; - WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS", - (size_t)rss.rlim_cur, (size_t)rss.rlim_max, rc); - } - } -#endif /* RLIMIT_RSS */ -#ifdef RLIMIT_MEMLOCK - if (flags & MDBX_warmup_lock) { - struct rlimit memlock; - if (getrlimit(RLIMIT_MEMLOCK, &memlock) == 0 && - memlock.rlim_cur < estimated_rss) { - memlock.rlim_cur = estimated_rss; - if (memlock.rlim_max < estimated_rss) - memlock.rlim_max = estimated_rss; - if (setrlimit(RLIMIT_MEMLOCK, &memlock)) { - rc = errno; - WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_MEMLOCK", - (size_t)memlock.rlim_cur, (size_t)memlock.rlim_max, rc); - } - } - } -#endif /* RLIMIT_MEMLOCK */ - (void)estimated_rss; - } - -#if defined(MLOCK_ONFAULT) && \ - ((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 27)) || \ - (defined(__ANDROID_API__) && __ANDROID_API__ >= 30)) && \ - (defined(__linux__) || defined(__gnu_linux__)) - if ((flags & MDBX_warmup_lock) != 0 && linux_kernel_version >= 0x04040000 && - atomic_load32(&env->me_mlocked_pgno, mo_AcquireRelease) < mlock_pgno) { - if (mlock2(env->me_map, used_range, MLOCK_ONFAULT)) { - rc = errno; - WARNING("mlock2(%zu, %s) error %d", used_range, "MLOCK_ONFAULT", rc); - } else { - update_mlcnt(env, mlock_pgno, true); - rc = MDBX_SUCCESS; - } - if (rc != EINVAL) - flags -= MDBX_warmup_lock; - } -#endif /* MLOCK_ONFAULT */ - - int err = MDBX_ENOSYS; -#if MDBX_ENABLE_MADVISE - err = set_readahead(env, used_pgno, true, true); -#else -#if defined(_WIN32) || defined(_WIN64) - if (mdbx_PrefetchVirtualMemory) { - WIN32_MEMORY_RANGE_ENTRY hint; - hint.VirtualAddress = env->me_map; - hint.NumberOfBytes = used_range; - if (mdbx_PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0)) - err = MDBX_SUCCESS; - else { - err = (int)GetLastError(); - ERROR("%s(%zu) error %d", "PrefetchVirtualMemory", used_range, err); - } - } -#endif /* Windows */ - -#if defined(POSIX_MADV_WILLNEED) - err = posix_madvise(env->me_map, used_range, POSIX_MADV_WILLNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; -#elif defined(MADV_WILLNEED) - err = madvise(env->me_map, used_range, MADV_WILLNEED) ? ignore_enosys(errno) - : MDBX_SUCCESS; -#endif - -#if defined(F_RDADVISE) - if (err) { - fcntl(env->me_lazy_fd, F_RDAHEAD, true); - struct radvisory hint; - hint.ra_offset = 0; - hint.ra_count = unlikely(used_range > INT_MAX && - sizeof(used_range) > sizeof(hint.ra_count)) - ? INT_MAX - : (int)used_range; - err = fcntl(env->me_lazy_fd, F_RDADVISE, &hint) ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (err == ENOTTY) - err = MDBX_SUCCESS /* Ignore ENOTTY for DB on the ram-disk */; - } -#endif /* F_RDADVISE */ -#endif /* MDBX_ENABLE_MADVISE */ - if (err != MDBX_SUCCESS && rc == MDBX_SUCCESS) - rc = err; - - if ((flags & MDBX_warmup_force) != 0 && - (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS)) { - const volatile uint8_t *ptr = env->me_map; - size_t offset = 0, unused = 42; -#if !(defined(_WIN32) || defined(_WIN64)) - if (flags & MDBX_warmup_oomsafe) { - const int null_fd = open("/dev/null", O_WRONLY); - if (unlikely(null_fd < 0)) - rc = errno; - else { - struct iovec iov[MDBX_AUXILARY_IOV_MAX]; - for (;;) { - unsigned i; - for (i = 0; i < MDBX_AUXILARY_IOV_MAX && offset < used_range; ++i) { - iov[i].iov_base = (void *)(ptr + offset); - iov[i].iov_len = 1; - offset += env->me_os_psize; - } - if (unlikely(writev(null_fd, iov, i) < 0)) { - rc = errno; - if (rc == EFAULT) - rc = ENOMEM; - break; - } - if (offset >= used_range) { - rc = MDBX_SUCCESS; - break; - } - if (timeout_seconds_16dot16 && osal_monotime() > timeout_monotime) { - rc = MDBX_RESULT_TRUE; - break; - } - } - close(null_fd); - } - } else -#endif /* Windows */ - for (;;) { - unused += ptr[offset]; - offset += env->me_os_psize; - if (offset >= used_range) { - rc = MDBX_SUCCESS; - break; - } - if (timeout_seconds_16dot16 && osal_monotime() > timeout_monotime) { - rc = MDBX_RESULT_TRUE; - break; - } - } - (void)unused; - } - - if ((flags & MDBX_warmup_lock) != 0 && - (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS) && - atomic_load32(&env->me_mlocked_pgno, mo_AcquireRelease) < mlock_pgno) { -#if defined(_WIN32) || defined(_WIN64) - if (VirtualLock(env->me_map, used_range)) { - update_mlcnt(env, mlock_pgno, true); - rc = MDBX_SUCCESS; - } else { - rc = (int)GetLastError(); - WARNING("%s(%zu) error %d", "VirtualLock", used_range, rc); - } -#elif defined(_POSIX_MEMLOCK_RANGE) - if (mlock(env->me_map, used_range) == 0) { - update_mlcnt(env, mlock_pgno, true); - rc = MDBX_SUCCESS; - } else { - rc = errno; - WARNING("%s(%zu) error %d", "mlock", used_range, rc); - } -#else - rc = MDBX_ENOSYS; -#endif - } - - return rc; -} - -#if !defined(_WIN32) && !defined(_WIN64) -__cold static void rthc_afterfork(void) { - NOTICE("drown %d rthc entries", rthc_count); - for (size_t i = 0; i < rthc_count; ++i) { - MDBX_env *const env = rthc_table[i].env; - NOTICE("drown env %p", __Wpedantic_format_voidptr(env)); - if (env->me_lck_mmap.lck) - osal_munmap(&env->me_lck_mmap); - if (env->me_map) { - osal_munmap(&env->me_dxb_mmap); -#ifdef ENABLE_MEMCHECK - VALGRIND_DISCARD(env->me_valgrind_handle); - env->me_valgrind_handle = -1; -#endif /* ENABLE_MEMCHECK */ - } - env->me_lck = lckless_stub(env); - rthc_drown(env); - } - if (rthc_table != rthc_table_static) - osal_free(rthc_table); - rthc_count = 0; - rthc_table = rthc_table_static; - rthc_limit = RTHC_INITIAL_LIMIT; - rthc_pending.weak = 0; -} -#endif /* ! Windows */ - -__cold void global_ctor(void) { - ENSURE(nullptr, osal_fastmutex_init(&debug_lock) == 0); - osal_ctor(); - rthc_limit = RTHC_INITIAL_LIMIT; - rthc_table = rthc_table_static; -#if defined(_WIN32) || defined(_WIN64) - InitializeCriticalSection(&rthc_critical_section); -#else - ENSURE(nullptr, pthread_atfork(nullptr, nullptr, rthc_afterfork) == 0); - ENSURE(nullptr, pthread_key_create(&rthc_key, thread_dtor) == 0); - TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), - __Wpedantic_format_voidptr(&rthc_key), (unsigned)rthc_key); -#endif - /* checking time conversion, this also avoids racing on 32-bit architectures - * during storing calculated 64-bit ratio(s) into memory. */ - uint32_t proba = UINT32_MAX; - while (true) { - unsigned time_conversion_checkup = - osal_monotime_to_16dot16(osal_16dot16_to_monotime(proba)); - unsigned one_more = (proba < UINT32_MAX) ? proba + 1 : proba; - unsigned one_less = (proba > 0) ? proba - 1 : proba; - ENSURE(nullptr, time_conversion_checkup >= one_less && - time_conversion_checkup <= one_more); - if (proba == 0) - break; - proba >>= 1; - } - - bootid = osal_bootid(); - -#if MDBX_DEBUG - for (size_t i = 0; i < 2 * 2 * 2 * 3 * 3 * 3; ++i) { - const bool s0 = (i >> 0) & 1; - const bool s1 = (i >> 1) & 1; - const bool s2 = (i >> 2) & 1; - const uint8_t c01 = (i / (8 * 1)) % 3; - const uint8_t c02 = (i / (8 * 3)) % 3; - const uint8_t c12 = (i / (8 * 9)) % 3; - - const uint8_t packed = meta_cmp2pack(c01, c02, c12, s0, s1, s2); - meta_troika_t troika; - troika.fsm = (uint8_t)i; - meta_troika_unpack(&troika, packed); - - const uint8_t tail = TROIKA_TAIL(&troika); - const bool strict = TROIKA_STRICT_VALID(&troika); - const bool valid = TROIKA_VALID(&troika); - - const uint8_t recent_chk = meta_cmp2recent(c01, s0, s1) - ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) - : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); - const uint8_t prefer_steady_chk = - meta_cmp2steady(c01, s0, s1) ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) - : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); - - uint8_t tail_chk; - if (recent_chk == 0) - tail_chk = meta_cmp2steady(c12, s1, s2) ? 2 : 1; - else if (recent_chk == 1) - tail_chk = meta_cmp2steady(c02, s0, s2) ? 2 : 0; - else - tail_chk = meta_cmp2steady(c01, s0, s1) ? 1 : 0; - - const bool valid_chk = - c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; - const bool strict_chk = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && - (c12 != 1 || s1 != s2); - assert(troika.recent == recent_chk); - assert(troika.prefer_steady == prefer_steady_chk); - assert(tail == tail_chk); - assert(valid == valid_chk); - assert(strict == strict_chk); - // printf(" %d, ", packed); - assert(troika_fsm_map[troika.fsm] == packed); - } -#endif /* MDBX_DEBUG*/ - -#if 0 /* debug */ - for (size_t i = 0; i < 65536; ++i) { - size_t pages = pv2pages(i); - size_t x = pages2pv(pages); - size_t xp = pv2pages(x); - if (!(x == i || (x % 2 == 0 && x < 65536)) || pages != xp) - printf("%u => %zu => %u => %zu\n", i, pages, x, xp); - assert(pages == xp); - } - fflush(stdout); -#endif /* #if 0 */ -} - -/*------------------------------------------------------------------------------ - * Legacy API */ - -#ifndef LIBMDBX_NO_EXPORTS_LEGACY_API - -LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, - MDBX_txn_flags_t flags, MDBX_txn **ret) { - return __inline_mdbx_txn_begin(env, parent, flags, ret); -} - -LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn) { - return __inline_mdbx_txn_commit(txn); -} - -LIBMDBX_API __cold int mdbx_env_stat(const MDBX_env *env, MDBX_stat *stat, - size_t bytes) { - return __inline_mdbx_env_stat(env, stat, bytes); -} - -LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, - size_t bytes) { - return __inline_mdbx_env_info(env, info, bytes); -} - -LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, - unsigned *flags) { - return __inline_mdbx_dbi_flags(txn, dbi, flags); -} - -LIBMDBX_API __cold int mdbx_env_sync(MDBX_env *env) { - return __inline_mdbx_env_sync(env); -} - -LIBMDBX_API __cold int mdbx_env_sync_poll(MDBX_env *env) { - return __inline_mdbx_env_sync_poll(env); -} - -LIBMDBX_API __cold int mdbx_env_close(MDBX_env *env) { - return __inline_mdbx_env_close(env); -} - -LIBMDBX_API __cold int mdbx_env_set_mapsize(MDBX_env *env, size_t size) { - return __inline_mdbx_env_set_mapsize(env, size); -} - -LIBMDBX_API __cold int mdbx_env_set_maxdbs(MDBX_env *env, MDBX_dbi dbs) { - return __inline_mdbx_env_set_maxdbs(env, dbs); -} - -LIBMDBX_API __cold int mdbx_env_get_maxdbs(const MDBX_env *env, MDBX_dbi *dbs) { - return __inline_mdbx_env_get_maxdbs(env, dbs); -} - -LIBMDBX_API __cold int mdbx_env_set_maxreaders(MDBX_env *env, - unsigned readers) { - return __inline_mdbx_env_set_maxreaders(env, readers); -} - -LIBMDBX_API __cold int mdbx_env_get_maxreaders(const MDBX_env *env, - unsigned *readers) { - return __inline_mdbx_env_get_maxreaders(env, readers); -} - -LIBMDBX_API __cold int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) { - return __inline_mdbx_env_set_syncbytes(env, threshold); -} - -LIBMDBX_API __cold int mdbx_env_get_syncbytes(const MDBX_env *env, - size_t *threshold) { - return __inline_mdbx_env_get_syncbytes(env, threshold); -} - -LIBMDBX_API __cold int mdbx_env_set_syncperiod(MDBX_env *env, - unsigned seconds_16dot16) { - return __inline_mdbx_env_set_syncperiod(env, seconds_16dot16); -} - -LIBMDBX_API __cold int mdbx_env_get_syncperiod(const MDBX_env *env, - unsigned *seconds_16dot16) { - return __inline_mdbx_env_get_syncperiod(env, seconds_16dot16); -} - -LIBMDBX_API __cold MDBX_NOTHROW_CONST_FUNCTION intptr_t -mdbx_limits_pgsize_min(void) { - return __inline_mdbx_limits_pgsize_min(); -} - -LIBMDBX_API __cold MDBX_NOTHROW_CONST_FUNCTION intptr_t -mdbx_limits_pgsize_max(void) { - return __inline_mdbx_limits_pgsize_max(); -} - -LIBMDBX_API MDBX_NOTHROW_CONST_FUNCTION uint64_t -mdbx_key_from_int64(const int64_t i64) { - return __inline_mdbx_key_from_int64(i64); -} - -LIBMDBX_API MDBX_NOTHROW_CONST_FUNCTION uint32_t -mdbx_key_from_int32(const int32_t i32) { - return __inline_mdbx_key_from_int32(i32); -} - -#endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */ - -/*------------------------------------------------------------------------------ - * Locking API */ - -int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - if (unlikely(env->me_txn0->mt_owner || - (env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) - return MDBX_BUSY; - - return osal_txn_lock(env, dont_wait); -} - -int mdbx_txn_unlock(MDBX_env *env) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(env->me_flags & MDBX_RDONLY)) - return MDBX_EACCESS; - if (unlikely(env->me_txn0->mt_owner != osal_thread_self())) - return MDBX_THREAD_MISMATCH; - if (unlikely((env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) - return MDBX_BUSY; - - osal_txn_unlock(env); - return MDBX_SUCCESS; -} - -/******************************************************************************* - * Checking API */ - -typedef struct MDBX_chk_internal { - MDBX_chk_context_t *usr; - const struct MDBX_chk_callbacks *cb; - uint64_t monotime_timeout; - - size_t *problem_counter; - uint8_t flags; - bool got_break; - bool write_locked; - uint8_t scope_depth; - - MDBX_chk_subdb_t subdb_gc, subdb_main; - int16_t *pagemap; - MDBX_chk_subdb_t *last_lookup; - const void *last_nested; - MDBX_chk_scope_t scope_stack[12]; - MDBX_chk_subdb_t *subdb[MDBX_MAX_DBI + CORE_DBS]; - - MDBX_envinfo envinfo; - meta_troika_t troika; - MDBX_val v2a_buf; -} MDBX_chk_internal_t; - -__cold static int chk_check_break(MDBX_chk_scope_t *const scope) { - MDBX_chk_internal_t *const chk = scope->internal; - return (chk->got_break || (chk->cb->check_break && - (chk->got_break = chk->cb->check_break(chk->usr)))) - ? MDBX_RESULT_TRUE - : MDBX_RESULT_FALSE; -} - -__cold static void chk_line_end(MDBX_chk_line_t *line) { - if (likely(line)) { - MDBX_chk_internal_t *chk = line->ctx->internal; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - if (likely(chk->cb->print_done)) - chk->cb->print_done(line); - } -} - -__cold __must_check_result static MDBX_chk_line_t * -chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { - MDBX_chk_internal_t *const chk = scope->internal; - if (severity < MDBX_chk_warning) - mdbx_env_chk_encount_problem(chk->usr); - MDBX_chk_line_t *line = nullptr; - if (likely(chk->cb->print_begin)) { - line = chk->cb->print_begin(chk->usr, severity); - if (likely(line)) { - assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - line->ctx = chk->usr; - } - } - return line; -} - -__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { - if (likely(line)) { - MDBX_chk_internal_t *chk = line->ctx->internal; - enum MDBX_chk_severity severity = line->severity; - chk_line_end(line); - line = chk_line_begin(chk->usr->scope, severity); - } - return line; -} - -__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { - if (likely(line)) { - MDBX_chk_internal_t *chk = line->ctx->internal; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - if (likely(chk->cb->print_flush)) { - chk->cb->print_flush(line); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - line->out = line->begin; - } - } - return line; -} - -__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { - if (likely(line && need)) { - size_t have = line->end - line->out; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - if (need > have) { - line = chk_flush(line); - have = line->end - line->out; - } - return (need < have) ? need : have; - } - return 0; -} - -__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, - const char *str) { - if (likely(line && str && *str)) { - MDBX_chk_internal_t *chk = line->ctx->internal; - size_t left = strlen(str); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - if (chk->cb->print_chars) { - chk->cb->print_chars(line, str, left); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - } else - do { - size_t chunk = chk_print_wanna(line, left); - assert(chunk <= left); - if (unlikely(!chunk)) - break; - memcpy(line->out, str, chunk); - line->out += chunk; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - str += chunk; - left -= chunk; - } while (left); - line->empty = false; - } - return line; -} - -__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, - const char *fmt, va_list args) { - if (likely(line)) { - MDBX_chk_internal_t *chk = line->ctx->internal; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - if (chk->cb->print_format) { - chk->cb->print_format(line, fmt, args); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - } else { - va_list ones; - va_copy(ones, args); - const int needed = vsnprintf(nullptr, 0, fmt, ones); - va_end(ones); - if (likely(needed > 0)) { - const size_t have = chk_print_wanna(line, needed); - if (likely(have > 0)) { - int written = vsnprintf(line->out, have, fmt, args); - if (likely(written > 0)) - line->out += written; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); - } - } - } - line->empty = false; - } - return line; -} - -__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) - chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { - if (likely(line)) { - // MDBX_chk_internal_t *chk = line->ctx->internal; - va_list args; - va_start(args, fmt); - line = chk_print_va(line, fmt, args); - va_end(args); - line->empty = false; - } - return line; -} - -__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, - const char *prefix, - const uint64_t value, - const char *suffix) { - static const char sf[] = - "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ - if (likely(line)) { - MDBX_chk_internal_t *chk = line->ctx->internal; - prefix = prefix ? prefix : ""; - suffix = suffix ? suffix : ""; - if (chk->cb->print_size) - chk->cb->print_size(line, prefix, value, suffix); - else - for (unsigned i = 0;; ++i) { - const unsigned scale = 10 + i * 10; - const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); - const uint64_t integer = rounded >> scale; - const uint64_t fractional = - (rounded - (integer << scale)) * 100u >> scale; - if ((rounded >> scale) <= 1000) - return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, - value, (unsigned)integer, (unsigned)fractional, - sf[i], suffix); - } - line->empty = false; - } - return line; -} - -__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, - const char *subj) { - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); - if (line) - chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, - mdbx_strerror(err), err))); - else - debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", - subj, mdbx_strerror(err), err); - return err; -} - -__cold static void MDBX_PRINTF_ARGS(5, 6) - chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, - uint64_t entry_number, const char *caption, - const char *extra_fmt, ...) { - MDBX_chk_internal_t *const chk = scope->internal; - MDBX_chk_issue_t *issue = chk->usr->scope->issues; - while (issue) { - if (issue->caption == caption) { - issue->count += 1; - break; - } else - issue = issue->next; - } - const bool fresh = issue == nullptr; - if (fresh) { - issue = osal_malloc(sizeof(*issue)); - if (likely(issue)) { - issue->caption = caption; - issue->count = 1; - issue->next = chk->usr->scope->issues; - chk->usr->scope->issues = issue; - } else - chk_error_rc(scope, ENOMEM, "adding issue"); - } - - va_list args; - va_start(args, extra_fmt); - if (chk->cb->issue) { - mdbx_env_chk_encount_problem(chk->usr); - chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); - } else { - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); - if (entry_number != UINT64_MAX) - chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption); - else - chk_print(line, "%s: %s", object, caption); - if (extra_fmt) - chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")"); - chk_line_end(fresh ? chk_flush(line) : line); - } - va_end(args); -} - -__cold static void MDBX_PRINTF_ARGS(2, 3) - chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { - MDBX_chk_internal_t *const chk = scope->internal; - va_list args; - va_start(args, fmt); - if (likely(chk->cb->issue)) { - mdbx_env_chk_encount_problem(chk->usr); - chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); - } else - chk_line_end( - chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); - va_end(args); -} - -__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { - assert(chk->scope_depth > 0); - MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth; - MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr; - if (!outer || outer->stage != inner->stage) { - if (err == MDBX_SUCCESS && *chk->problem_counter) - err = MDBX_PROBLEM; - else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err)) - *chk->problem_counter = 1; - if (chk->problem_counter != &chk->usr->result.total_problems) { - chk->usr->result.total_problems += *chk->problem_counter; - chk->problem_counter = &chk->usr->result.total_problems; - } - if (chk->cb->stage_end) - err = chk->cb->stage_end(chk->usr, inner->stage, err); - } - if (chk->cb->scope_conclude) - err = chk->cb->scope_conclude(chk->usr, outer, inner, err); - chk->usr->scope = outer; - chk->usr->scope_nesting = chk->scope_depth -= 1; - if (outer) - outer->subtotal_issues += inner->subtotal_issues; - if (chk->cb->scope_pop) - chk->cb->scope_pop(chk->usr, outer, inner); - - while (inner->issues) { - MDBX_chk_issue_t *next = inner->issues->next; - osal_free(inner->issues); - inner->issues = next; - } - memset(inner, -1, sizeof(*inner)); - return err; -} - -__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, - int verbosity_adjustment, - enum MDBX_chk_stage stage, - const void *object, size_t *problems, - const char *fmt, va_list args) { - if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) - return MDBX_BACKLOG_DEPLETED; - - MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; - const int verbosity = - outer->verbosity + - (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); - MDBX_chk_scope_t *const inner = outer + 1; - memset(inner, 0, sizeof(*inner)); - inner->internal = outer->internal; - inner->stage = stage ? stage : (stage = outer->stage); - inner->object = object; - inner->verbosity = (verbosity < MDBX_chk_warning) - ? MDBX_chk_warning - : (enum MDBX_chk_severity)verbosity; - if (problems) - chk->problem_counter = problems; - else if (!chk->problem_counter || outer->stage != stage) - chk->problem_counter = &chk->usr->result.total_problems; - - if (chk->cb->scope_push) { - const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args); - if (unlikely(err != MDBX_SUCCESS)) - return err; - } - chk->usr->scope = inner; - chk->usr->scope_nesting = chk->scope_depth += 1; - - if (stage != outer->stage && chk->cb->stage_begin) { - int err = chk->cb->stage_begin(chk->usr, stage); - if (unlikely(err != MDBX_SUCCESS)) { - err = chk_scope_end(chk, err); - assert(err != MDBX_SUCCESS); - return err ? err : MDBX_RESULT_TRUE; - } - } - return MDBX_SUCCESS; -} - -__cold static int MDBX_PRINTF_ARGS(6, 7) - chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, - enum MDBX_chk_stage stage, const void *object, - size_t *problems, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, - problems, fmt, args); - va_end(args); - return rc; -} - -__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) { - MDBX_chk_internal_t *const chk = target->internal; - assert(target <= chk->usr->scope); - while (chk->usr->scope > target) - err = chk_scope_end(chk, err); - return err; -} - -__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { - if (inner && inner > inner->internal->scope_stack) - chk_scope_restore(inner - 1, MDBX_SUCCESS); -} - -__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) - chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, - const char *fmt, ...) { - chk_scope_restore(scope, MDBX_SUCCESS); - va_list args; - va_start(args, fmt); - int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, - scope->stage, nullptr, nullptr, fmt, args); - va_end(args); - return err ? nullptr : scope + 1; -} - -__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, - const MDBX_val *val) { - if (val == MDBX_CHK_MAIN) - return "@MAIN"; - if (val == MDBX_CHK_GC) - return "@GC"; - if (val == MDBX_CHK_META) - return "@META"; - - const unsigned char *const data = val->iov_base; - const size_t len = val->iov_len; - if (data == MDBX_CHK_MAIN) - return "@MAIN"; - if (data == MDBX_CHK_GC) - return "@GC"; - if (data == MDBX_CHK_META) - return "@META"; - - if (!len) - return ""; - if (!data) - return ""; - if (len > 65536) { - const size_t enough = 42; - if (chk->v2a_buf.iov_len < enough) { - void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough); - if (unlikely(!ptr)) - return ""; - chk->v2a_buf.iov_base = ptr; - chk->v2a_buf.iov_len = enough; - } - snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, - "", len); - return chk->v2a_buf.iov_base; - } - - bool printable = true; - bool quoting = false; - size_t xchars = 0; - for (size_t i = 0; i < len && printable; ++i) { - quoting = quoting || !(data[i] == '_' || isalnum(data[i])); - printable = - isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); - } - - size_t need = len + 1; - if (quoting || !printable) - need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; - if (need > chk->v2a_buf.iov_len) { - void *ptr = osal_realloc(chk->v2a_buf.iov_base, need); - if (unlikely(!ptr)) - return ""; - chk->v2a_buf.iov_base = ptr; - chk->v2a_buf.iov_len = need; - } - - static const char hex[] = "0123456789abcdef"; - char *w = chk->v2a_buf.iov_base; - if (!quoting) { - memcpy(w, data, len); - w += len; - } else if (printable) { - *w++ = '\''; - for (size_t i = 0; i < len; ++i) { - if (data[i] < ' ') { - assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4); - w[0] = '\\'; - w[1] = 'x'; - w[2] = hex[data[i] >> 4]; - w[3] = hex[data[i] & 15]; - w += 4; - } else if (strchr("\"'`\\", data[i])) { - assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); - w[0] = '\\'; - w[1] = data[i]; - w += 2; - } else { - assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1); - *w++ = data[i]; - } - } - *w++ = '\''; - } else { - *w++ = '\\'; - *w++ = 'x'; - for (size_t i = 0; i < len; ++i) { - assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); - w[0] = hex[data[i] >> 4]; - w[1] = hex[data[i] & 15]; - w += 2; - } - } - assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w); - *w = 0; - return chk->v2a_buf.iov_base; -} - -__cold static void chk_dispose(MDBX_chk_internal_t *chk) { - assert(chk->subdb[FREE_DBI] == &chk->subdb_gc); - assert(chk->subdb[MAIN_DBI] == &chk->subdb_main); - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { - MDBX_chk_subdb_t *const sdb = chk->subdb[i]; - if (sdb) { - chk->subdb[i] = nullptr; - if (chk->cb->subdb_dispose && sdb->cookie) { - chk->cb->subdb_dispose(chk->usr, sdb); - sdb->cookie = nullptr; - } - if (sdb != &chk->subdb_gc && sdb != &chk->subdb_main) { - osal_free(sdb); - } - } - } - osal_free(chk->v2a_buf.iov_base); - osal_free(chk->pagemap); - chk->usr->internal = nullptr; - chk->usr->scope = nullptr; - chk->pagemap = nullptr; - memset(chk, 0xDD, sizeof(*chk)); - osal_free(chk); -} - -static size_t div_8s(size_t numerator, size_t divider) { - assert(numerator <= (SIZE_MAX >> 8)); - return (numerator << 8) / divider; -} - -static size_t mul_8s(size_t quotient, size_t multiplier) { - size_t hi = multiplier * (quotient >> 8); - size_t lo = multiplier * (quotient & 255) + 128; - return hi + (lo >> 8); -} - -static void histogram_reduce(struct MDBX_chk_histogram *p) { - const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; - // ищем пару для слияния с минимальной ошибкой - size_t min_err = SIZE_MAX, min_i = last - 1; - for (size_t i = 0; i < last; ++i) { - const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, - s1 = p->ranges[i].amount; - const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, - s2 = p->ranges[i + 1].amount; - const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; - assert(s1 > 0 && b1 > 0 && b1 < e1); - assert(s2 > 0 && b2 > 0 && b2 < e2); - assert(e1 <= b2); - // за ошибку принимаем площадь изменений на гистограмме при слиянии - const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx); - const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1); - const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2); - const size_t dx = mul_8s(hx, b2 - e1); - const size_t err = d1 + d2 + dx; - if (min_err >= err) { - min_i = i; - min_err = err; - } - } - // объединяем - p->ranges[min_i].end = p->ranges[min_i + 1].end; - p->ranges[min_i].amount += p->ranges[min_i + 1].amount; - p->ranges[min_i].count += p->ranges[min_i + 1].count; - if (min_i < last) - // перемещаем хвост - memmove(p->ranges + min_i, p->ranges + min_i + 1, - (last - min_i) * sizeof(p->ranges[0])); - // обнуляем последний элемент и продолжаем - p->ranges[last].count = 0; -} - -static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { - STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2); - p->amount += n; - p->count += 1; - if (likely(n < 2)) { - p->ones += n; - p->pad += 1; - } else - for (;;) { - const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; - size_t i = 0; - while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) { - if (n < p->ranges[i].end) { - // значение попадает в существующий интервал - p->ranges[i].amount += n; - p->ranges[i].count += 1; - return; - } - ++i; - } - if (p->ranges[last].count == 0) { - // использованы еще не все слоты, добавляем интервал - assert(i < size); - if (p->ranges[i].count) { - assert(i < last); - // раздвигаем -#ifdef __COVERITY__ - if (i < last) /* avoid Coverity false-positive issue */ -#endif /* __COVERITY__ */ - memmove(p->ranges + i + 1, p->ranges + i, - (last - i) * sizeof(p->ranges[0])); - } - p->ranges[i].begin = n; - p->ranges[i].end = n + 1; - p->ranges[i].amount = n; - p->ranges[i].count = 1; - return; - } - histogram_reduce(p); - } -} - -__cold static MDBX_chk_line_t * -histogram_dist(MDBX_chk_line_t *line, - const struct MDBX_chk_histogram *histogram, const char *prefix, - const char *first, bool amount) { - line = chk_print(line, "%s:", prefix); - const char *comma = ""; - const size_t first_val = amount ? histogram->ones : histogram->pad; - if (first_val) { - chk_print(line, " %s=%" PRIuSIZE, first, first_val); - comma = ","; - } - for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n) - if (histogram->ranges[n].count) { - chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); - if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) - chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); - line = chk_print(line, "=%" PRIuSIZE, - amount ? histogram->ranges[n].amount - : histogram->ranges[n].count); - comma = ","; - } - return line; -} - -__cold static MDBX_chk_line_t * -histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, - const struct MDBX_chk_histogram *histogram, const char *prefix, - const char *first, bool amount) { - if (histogram->count) { - line = chk_print(line, "%s %" PRIuSIZE, prefix, - amount ? histogram->amount : histogram->count); - if (scope->verbosity > MDBX_chk_info) - line = chk_puts( - histogram_dist(line, histogram, " (distribution", first, amount), - ")"); - } - return line; -} - -//----------------------------------------------------------------------------- - -__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, - const MDBX_walk_sdb_t *in, - MDBX_chk_subdb_t **out) { - MDBX_chk_internal_t *const chk = scope->internal; - if (chk->last_lookup && - chk->last_lookup->name.iov_base == in->name.iov_base) { - *out = chk->last_lookup; - return MDBX_SUCCESS; - } - - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { - MDBX_chk_subdb_t *sdb = chk->subdb[i]; - if (!sdb) { - sdb = osal_calloc(1, sizeof(MDBX_chk_subdb_t)); - if (unlikely(!sdb)) { - *out = nullptr; - return chk_error_rc(scope, MDBX_ENOMEM, "alloc_subDB"); - } - chk->subdb[i] = sdb; - sdb->flags = in->internal->md_flags; - sdb->id = -1; - sdb->name = in->name; - } - if (sdb->name.iov_base == in->name.iov_base) { - if (sdb->id < 0) { - sdb->id = (int)i; - sdb->cookie = - chk->cb->subdb_filter - ? chk->cb->subdb_filter(chk->usr, &sdb->name, sdb->flags) - : (void *)(intptr_t)-1; - } - *out = (chk->last_lookup = sdb); - return MDBX_SUCCESS; - } - } - chk_scope_issue(scope, "too many subDBs > %u", - (unsigned)ARRAY_LENGTH(chk->subdb) - CORE_DBS - /* meta */ 1); - *out = nullptr; - return MDBX_PROBLEM; -} - -//------------------------------------------------------------------------------ - -__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, - const unsigned num) { - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); - MDBX_chk_internal_t *const chk = scope->internal; - if (line) { - MDBX_env *const env = chk->usr->env; - const bool have_bootid = (chk->envinfo.mi_bootid.current.x | - chk->envinfo.mi_bootid.current.y) != 0; - const bool bootid_match = - have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], - &chk->envinfo.mi_bootid.current, - sizeof(chk->envinfo.mi_bootid.current)) == 0; - - const char *status = "stay"; - if (num == chk->troika.recent) - status = "head"; - else if (num == TROIKA_TAIL(&chk->troika)) - status = "tail"; - line = chk_print(line, "meta-%u: %s, ", num, status); - - switch (chk->envinfo.mi_meta_sign[num]) { - case MDBX_DATASIGN_NONE: - line = chk_puts(line, "no-sync/legacy"); - break; - case MDBX_DATASIGN_WEAK: - line = chk_print(line, "weak-%s", - have_bootid - ? (bootid_match ? "intact (same boot-id)" : "dead") - : "unknown (no boot-id)"); - break; - default: - line = chk_puts(line, "steady"); - break; - } - const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; - line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid); - if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y) - line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", - chk->envinfo.mi_bootid.meta[num].x, - chk->envinfo.mi_bootid.meta[num].y, - bootid_match ? "live" : "not match"); - else - line = chk_puts(line, "no boot-id"); - - if (env->me_stuck_meta >= 0) { - if (num == (unsigned)env->me_stuck_meta) - line = chk_print(line, ", %s", "forced for checking"); - } else if (meta_txnid > chk->envinfo.mi_recent_txnid && - (env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == - MDBX_EXCLUSIVE) - line = chk_print(line, - ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 - " >>> %" PRIu64 ")", - meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, - chk->envinfo.mi_recent_txnid); - chk_line_end(line); - } -} - -__cold static int -chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, - const int deep, const MDBX_walk_sdb_t *sdb_info, - const size_t page_size, const MDBX_page_type_t pagetype, - const MDBX_error_t page_err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) { - MDBX_chk_scope_t *const scope = ctx; - MDBX_chk_internal_t *const chk = scope->internal; - MDBX_chk_context_t *const usr = chk->usr; - MDBX_env *const env = usr->env; - - MDBX_chk_subdb_t *sdb; - int err = chk_get_sdb(scope, sdb_info, &sdb); - if (unlikely(err)) - return err; - - if (deep > 42) { - chk_scope_issue(scope, "too deeply %u", deep); - return MDBX_CORRUPTED /* avoid infinite loop/recursion */; - } - histogram_acc(deep, &sdb->histogram.deep); - usr->result.processed_pages += npages; - const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; - - int height = deep + 1; - if (sdb->id >= CORE_DBS) - height -= usr->txn->mt_dbs[MAIN_DBI].md_depth; - const struct MDBX_db *nested = sdb_info->nested; - if (nested) { - if (sdb->flags & MDBX_DUPSORT) - height -= sdb_info->internal->md_depth; - else { - chk_object_issue(scope, "nested tree", pgno, "unexpected", - "subDb %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), - sdb->flags, deep); - nested = nullptr; - } - } else - chk->last_nested = nullptr; - - const char *pagetype_caption; - bool branch = false; - switch (pagetype) { - default: - chk_object_issue(scope, "page", pgno, "unknown page-type", - "type %u, deep %i", (unsigned)pagetype, deep); - pagetype_caption = "unknown"; - sdb->pages.other += npages; - break; - case MDBX_page_broken: - assert(page_err != MDBX_SUCCESS); - pagetype_caption = "broken"; - sdb->pages.other += npages; - break; - case MDBX_subpage_broken: - assert(page_err != MDBX_SUCCESS); - pagetype_caption = "broken-subpage"; - sdb->pages.other += npages; - break; - case MDBX_page_large: - pagetype_caption = "large"; - histogram_acc(npages, &sdb->histogram.large_pages); - if (sdb->flags & MDBX_DUPSORT) - chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, subDb %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, - deep); - break; - case MDBX_page_branch: - branch = true; - if (!nested) { - pagetype_caption = "branch"; - sdb->pages.branch += 1; - } else { - pagetype_caption = "nested-branch"; - sdb->pages.nested_branch += 1; - } - break; - case MDBX_page_dupfixed_leaf: - if (!nested) - chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, subDb %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, - deep); - /* fall through */ - __fallthrough; - case MDBX_page_leaf: - if (!nested) { - pagetype_caption = "leaf"; - sdb->pages.leaf += 1; - if (height != sdb_info->internal->md_depth) - chk_object_issue(scope, "page", pgno, "wrong tree height", - "actual %i != %i subDb %s", height, - sdb_info->internal->md_depth, - chk_v2a(chk, &sdb->name)); - } else { - pagetype_caption = - (pagetype == MDBX_page_leaf) ? "nested-leaf" : "nested-leaf-dupfixed"; - sdb->pages.nested_leaf += 1; - if (chk->last_nested != nested) { - histogram_acc(height, &sdb->histogram.nested_tree); - chk->last_nested = nested; - } - if (height != nested->md_depth) - chk_object_issue(scope, "page", pgno, "wrong nested-tree height", - "actual %i != %i dupsort-node %s", height, - nested->md_depth, chk_v2a(chk, &sdb->name)); - } - break; - case MDBX_subpage_dupfixed_leaf: - case MDBX_subpage_leaf: - pagetype_caption = (pagetype == MDBX_subpage_leaf) ? "subleaf-dupsort" - : "subleaf-dupfixed"; - sdb->pages.nested_subleaf += 1; - if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) - chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, subDb %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, - deep); - break; - } - - if (npages) { - if (sdb->cookie) { - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); - if (npages == 1) - chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); - else - chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, - npages); - chk_line_end( - chk_print(line, - " of %s: header %" PRIiPTR ", %s %" PRIiPTR - ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", - chk_v2a(chk, &sdb->name), header_bytes, - (pagetype == MDBX_page_branch) ? "keys" : "entries", - nentries, payload_bytes, unused_bytes, deep)); - } - - bool already_used = false; - for (unsigned n = 0; n < npages; ++n) { - const size_t spanpgno = pgno + n; - if (spanpgno >= usr->result.alloc_pages) { - chk_object_issue(scope, "page", spanpgno, "wrong page-no", - "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", - pagetype_caption, spanpgno, usr->result.alloc_pages, - deep); - sdb->pages.all += 1; - } else if (chk->pagemap[spanpgno]) { - const MDBX_chk_subdb_t *const rival = - chk->subdb[chk->pagemap[spanpgno] - 1]; - chk_object_issue(scope, "page", spanpgno, - (branch && rival == sdb) ? "loop" : "already used", - "%s-page: by %s, deep %i", pagetype_caption, - chk_v2a(chk, &rival->name), deep); - already_used = true; - } else { - chk->pagemap[spanpgno] = (int16_t)sdb->id + 1; - sdb->pages.all += 1; - } - } - - if (already_used) - return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ - : MDBX_SUCCESS; - } - - if (MDBX_IS_ERROR(page_err)) { - chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", - pagetype_caption); - } else { - if (unused_bytes > page_size) - chk_object_issue(scope, "page", pgno, "illegal unused-bytes", - "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, - unused_bytes, env->me_psize); - - if (header_bytes < (int)sizeof(long) || - (size_t)header_bytes >= env->me_psize - sizeof(long)) { - chk_object_issue(scope, "page", pgno, "illegal header-length", - "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, - pagetype_caption, sizeof(long), header_bytes, - env->me_psize - sizeof(long)); - } - if (nentries < 1 || (pagetype == MDBX_page_branch && nentries < 2)) { - chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty", - "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - sdb->pages.empty += 1; - } - - if (npages) { - if (page_bytes != page_size) { - chk_object_issue(scope, "page", pgno, "misused", - "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR - "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", - pagetype_caption, page_size, page_bytes, header_bytes, - payload_bytes, unused_bytes, deep); - if (page_size > page_bytes) - sdb->lost_bytes += page_size - page_bytes; - } else { - sdb->payload_bytes += payload_bytes + header_bytes; - usr->result.total_payload_bytes += payload_bytes + header_bytes; - } - } - } - return chk_check_break(scope); -} - -__cold static int chk_tree(MDBX_chk_scope_t *const scope) { - MDBX_chk_internal_t *const chk = scope->internal; - MDBX_chk_context_t *const usr = chk->usr; - MDBX_env *const env = usr->env; - MDBX_txn *const txn = usr->txn; - -#if defined(_WIN32) || defined(_WIN64) - SetLastError(ERROR_SUCCESS); -#else - errno = 0; -#endif /* Windows */ - chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap)); - if (!chk->pagemap) { - int err = osal_get_errno(); - return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc"); - } - - if (scope->verbosity > MDBX_chk_info) - chk_scope_push(scope, 0, "Walking pages..."); - /* always skip key ordering checking - * to avoid MDBX_CORRUPTED in case custom comparators were used */ - usr->result.processed_pages = NUM_METAS; - int err = mdbx_env_pgwalk(txn, chk_pgvisitor, scope, true); - if (MDBX_IS_ERROR(err) && err != MDBX_EINTR) - chk_error_rc(scope, err, "mdbx_env_pgwalk"); - - for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n) - if (!chk->pagemap[n]) - usr->result.unused_pages += 1; - - MDBX_chk_subdb_t total; - memset(&total, 0, sizeof(total)); - total.pages.all = NUM_METAS; - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { - MDBX_chk_subdb_t *const sdb = chk->subdb[i]; - total.payload_bytes += sdb->payload_bytes; - total.lost_bytes += sdb->lost_bytes; - total.pages.all += sdb->pages.all; - total.pages.empty += sdb->pages.empty; - total.pages.other += sdb->pages.other; - total.pages.branch += sdb->pages.branch; - total.pages.leaf += sdb->pages.leaf; - total.pages.nested_branch += sdb->pages.nested_branch; - total.pages.nested_leaf += sdb->pages.nested_leaf; - total.pages.nested_subleaf += sdb->pages.nested_subleaf; - } - assert(total.pages.all == usr->result.processed_pages); - - const size_t total_page_bytes = pgno2bytes(env, total.pages.all); - if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose) - chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), - "walked %zu pages, left/unused %zu" - ", %" PRIuSIZE " problem(s)", - usr->result.processed_pages, - usr->result.unused_pages, - usr->scope->subtotal_issues)); - - err = chk_scope_restore(scope, err); - if (scope->verbosity > MDBX_chk_info) { - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { - MDBX_chk_subdb_t *const sdb = chk->subdb[i]; - MDBX_chk_scope_t *inner = - chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); - if (sdb->pages.all == 0) - chk_line_end( - chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); - else { - MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); - if (line) { - line = chk_print(line, "page usage: subtotal %" PRIuSIZE, - sdb->pages.all); - const size_t branch_pages = - sdb->pages.branch + sdb->pages.nested_branch; - const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf + - sdb->pages.nested_subleaf; - if (sdb->pages.other) - line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other); - if (sdb->pages.other == 0 || - (branch_pages | leaf_pages | sdb->histogram.large_pages.count) != - 0) { - line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, - branch_pages, leaf_pages); - if (sdb->histogram.large_pages.count || - (sdb->flags & MDBX_DUPSORT) == 0) { - line = chk_print(line, ", large %" PRIuSIZE, - sdb->histogram.large_pages.count); - if (sdb->histogram.large_pages.amount | - sdb->histogram.large_pages.count) - line = histogram_print(inner, line, &sdb->histogram.large_pages, - " amount", "single", true); - } - } - line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, - "tree deep density", "1", false); - if (sdb != &chk->subdb_gc && sdb->histogram.nested_tree.count) { - line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, - sdb->histogram.nested_tree.count); - line = histogram_dist(line, &sdb->histogram.nested_tree, " density", - "1", false); - line = chk_print(chk_line_feed(line), - "nested tree(s) pages %" PRIuSIZE - ": branch %" PRIuSIZE ", leaf %" PRIuSIZE - ", subleaf %" PRIuSIZE, - sdb->pages.nested_branch + sdb->pages.nested_leaf, - sdb->pages.nested_branch, sdb->pages.nested_leaf, - sdb->pages.nested_subleaf); - } - - const size_t bytes = pgno2bytes(env, sdb->pages.all); - line = chk_print( - chk_line_feed(line), - "page filling: subtotal %" PRIuSIZE - " bytes (%.1f%%), payload %" PRIuSIZE - " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", - bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes, - sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes, - (bytes - sdb->payload_bytes) * 100.0 / bytes); - if (sdb->pages.empty) - line = chk_print(line, ", %" PRIuSIZE " empty pages", - sdb->pages.empty); - if (sdb->lost_bytes) - line = - chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes); - chk_line_end(line); - } - } - chk_scope_restore(scope, 0); - } - } - - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); - line = chk_print(line, - "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE - " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," - " average fill %.1f%%", - total_page_bytes, usr->result.total_payload_bytes, - usr->result.total_payload_bytes * 100.0 / total_page_bytes, - total_page_bytes - usr->result.total_payload_bytes, - (total_page_bytes - usr->result.total_payload_bytes) * - 100.0 / total_page_bytes, - usr->result.total_payload_bytes * 100.0 / total_page_bytes); - if (total.pages.empty) - line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); - if (total.lost_bytes) - line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes); - chk_line_end(line); - return err; -} - -typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, - MDBX_chk_subdb_t *sdb, const size_t record_number, - const MDBX_val *key, const MDBX_val *data); - -__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, - MDBX_chk_subdb_t *sdb, - const size_t record_number, const MDBX_val *key, - const MDBX_val *data) { - MDBX_chk_internal_t *const chk = scope->internal; - int err = MDBX_SUCCESS; - assert(sdb->cookie); - if (chk->cb->subdb_handle_kv) - err = chk->cb->subdb_handle_kv(chk->usr, sdb, record_number, key, data); - return err ? err : chk_check_break(scope); -} - -__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, - MDBX_chk_subdb_t *sdb, chk_kv_visitor *handler) { - MDBX_chk_internal_t *const chk = scope->internal; - MDBX_chk_context_t *const usr = chk->usr; - MDBX_env *const env = usr->env; - MDBX_txn *const txn = usr->txn; - MDBX_cursor *cursor = nullptr; - size_t record_count = 0, dups = 0, sub_databases = 0; - int err; - - if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->mt_flags) { - chk_line_end( - chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), - "abort processing %s due to a previous error", - chk_v2a(chk, &sdb->name)))); - err = MDBX_BAD_TXN; - goto bailout; - } - - if (0 > (int)dbi) { - err = dbi_open( - txn, &sdb->name, MDBX_DB_ACCEDE, &dbi, - (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, - (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); - if (unlikely(err)) { - tASSERT(txn, dbi >= txn->mt_env->me_numdbs || - (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0); - chk_error_rc(scope, err, "mdbx_dbi_open"); - goto bailout; - } - tASSERT(txn, dbi < txn->mt_env->me_numdbs && - (txn->mt_env->me_db_flags[dbi] & DB_VALID) != 0); - } - - const MDBX_db *const db = txn->mt_dbs + dbi; - if (handler) { - const char *key_mode = nullptr; - switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { - case 0: - key_mode = "usual"; - break; - case MDBX_REVERSEKEY: - key_mode = "reserve"; - break; - case MDBX_INTEGERKEY: - key_mode = "ordinal"; - break; - case MDBX_REVERSEKEY | MDBX_INTEGERKEY: - key_mode = "msgpack"; - break; - default: - key_mode = "inconsistent"; - chk_scope_issue(scope, "wrong key-mode (0x%x)", - sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); - } - - const char *value_mode = nullptr; - switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | - MDBX_INTEGERDUP)) { - case 0: - value_mode = "single"; - break; - case MDBX_DUPSORT: - value_mode = "multi"; - break; - case MDBX_DUPSORT | MDBX_REVERSEDUP: - value_mode = "multi-reverse"; - break; - case MDBX_DUPSORT | MDBX_DUPFIXED: - value_mode = "multi-samelength"; - break; - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: - value_mode = "multi-reverse-samelength"; - break; - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: - value_mode = "multi-ordinal"; - break; - case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - value_mode = "multi-msgpack"; - break; - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - value_mode = "reserved"; - break; - default: - value_mode = "inconsistent"; - chk_scope_issue(scope, "wrong value-mode (0x%x)", - sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | - MDBX_DUPFIXED | MDBX_INTEGERDUP)); - } - - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); - line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, - value_mode); - line = chk_print(line, ", flags:"); - if (!sdb->flags) - line = chk_print(line, " none"); - else { - const uint8_t f[] = {MDBX_DUPSORT, - MDBX_INTEGERKEY, - MDBX_REVERSEKEY, - MDBX_DUPFIXED, - MDBX_REVERSEDUP, - MDBX_INTEGERDUP, - 0}; - const char *const t[] = {"dupsort", "integerkey", "reversekey", - "dupfixed", "reversedup", "integerdup"}; - for (size_t i = 0; f[i]; i++) - if (sdb->flags & f[i]) - line = chk_print(line, " %s", t[i]); - } - chk_line_end(chk_print(line, " (0x%02X)", sdb->flags)); - - line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), - "entries %" PRIu64 ", sequence %" PRIu64, db->md_entries, - db->md_seq); - if (db->md_mod_txnid) - line = chk_print(line, ", last modification txn#%" PRIaTXN, - db->md_mod_txnid); - if (db->md_root != P_INVALID) - line = chk_print(line, ", root #%" PRIaPGNO, db->md_root); - chk_line_end(line); - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), - "b-tree depth %u, pages: branch %" PRIaPGNO - ", leaf %" PRIaPGNO ", large %" PRIaPGNO, - db->md_depth, db->md_branch_pages, db->md_leaf_pages, - db->md_overflow_pages)); - - if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { - const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch; - const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf; - const size_t subtotal_pages = - db->md_branch_pages + db->md_leaf_pages + db->md_overflow_pages; - if (subtotal_pages != sdb->pages.all) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", - "subtotal", subtotal_pages, sdb->pages.all); - if (db->md_branch_pages != branch_pages) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", - "branch", db->md_branch_pages, branch_pages); - if (db->md_leaf_pages != leaf_pages) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", - "all-leaf", db->md_leaf_pages, leaf_pages); - if (db->md_overflow_pages != sdb->histogram.large_pages.amount) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", - "large/overlow", db->md_overflow_pages, - sdb->histogram.large_pages.amount); - } - } - - err = mdbx_cursor_open(txn, dbi, &cursor); - if (unlikely(err)) { - chk_error_rc(scope, err, "mdbx_cursor_open"); - goto bailout; - } - if (chk->flags & MDBX_CHK_IGNORE_ORDER) { - cursor->mc_checking |= CC_SKIPORD | CC_PAGECHECK; - if (cursor->mc_xcursor) - cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD | CC_PAGECHECK; - } - - const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags); - MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; - MDBX_val key, data; - err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); - while (err == MDBX_SUCCESS) { - err = chk_check_break(scope); - if (unlikely(err)) - goto bailout; - - bool bad_key = false; - if (key.iov_len > maxkeysize) { - chk_object_issue(scope, "entry", record_count, - "key length exceeds max-key-size", - "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); - bad_key = true; - } else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && - key.iov_len != 4) { - chk_object_issue(scope, "entry", record_count, "wrong key length", - "%" PRIuPTR " != 4or8", key.iov_len); - bad_key = true; - } - - bool bad_data = false; - if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && - data.iov_len != 4) { - chk_object_issue(scope, "entry", record_count, "wrong data length", - "%" PRIuPTR " != 4or8", data.iov_len); - bad_data = true; - } - - if (prev_key.iov_base) { - if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) && - prev_data.iov_len != data.iov_len) { - chk_object_issue(scope, "entry", record_count, "different data length", - "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, - data.iov_len); - bad_data = true; - } - - if (!bad_key) { - int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); - if (cmp == 0) { - ++dups; - if ((sdb->flags & MDBX_DUPSORT) == 0) { - chk_object_issue(scope, "entry", record_count, "duplicated entries", - nullptr); - if (prev_data.iov_base && data.iov_len == prev_data.iov_len && - memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) - chk_object_issue(scope, "entry", record_count, - "complete duplicate", nullptr); - } else if (!bad_data && prev_data.iov_base) { - cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); - if (cmp == 0) - chk_object_issue(scope, "entry", record_count, - "complete duplicate", nullptr); - else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) - chk_object_issue(scope, "entry", record_count, - "wrong order of multi-values", nullptr); - } - } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) - chk_object_issue(scope, "entry", record_count, - "wrong order of entries", nullptr); - } - } - - if (!bad_key) { - if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY)) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), - "fixed key-size %" PRIuSIZE, key.iov_len)); - prev_key = key; - } - if (!bad_data) { - if (!prev_data.iov_base && - (sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), - "fixed data-size %" PRIuSIZE, data.iov_len)); - prev_data = data; - } - - record_count++; - histogram_acc(key.iov_len, &sdb->histogram.key_len); - histogram_acc(data.iov_len, &sdb->histogram.val_len); - - const MDBX_node *const node = - page_node(cursor->mc_pg[cursor->mc_top], cursor->mc_ki[cursor->mc_top]); - if (node_flags(node) == F_SUBDATA) { - if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | - MDBX_REVERSEDUP | MDBX_INTEGERDUP))) - chk_object_issue(scope, "entry", record_count, - "unexpected sub-database", "node-flags 0x%x", - node_flags(node)); - else if (data.iov_len != sizeof(MDBX_db)) - chk_object_issue(scope, "entry", record_count, - "wrong sub-database node size", - "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, - sizeof(MDBX_db)); - else if (scope->stage == MDBX_chk_traversal_maindb) - /* подсчитываем subDB при первом проходе */ - sub_databases += 1; - else { - /* обработка subDB при втором проходе */ - MDBX_db aligned_db; - memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); - MDBX_walk_sdb_t sdb_info = {key, nullptr, nullptr}; - sdb_info.internal = &aligned_db; - MDBX_chk_subdb_t *subdb; - err = chk_get_sdb(scope, &sdb_info, &subdb); - if (unlikely(err)) - goto bailout; - if (subdb->cookie) { - err = chk_scope_begin(chk, 0, MDBX_chk_traversal_subdbs, subdb, - &usr->result.problems_kv, - "Processing subDB %s...", - chk_v2a(chk, &subdb->name)); - if (likely(!err)) { - err = chk_db(usr->scope, (MDBX_dbi)-1, subdb, chk_handle_kv); - if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) - usr->result.subdb_processed += 1; - } - err = chk_scope_restore(scope, err); - if (unlikely(err)) - goto bailout; - } else - chk_line_end(chk_flush( - chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s...", chk_v2a(chk, &subdb->name)))); - } - } else if (handler) { - err = handler(scope, sdb, record_count, &key, &data); - if (unlikely(err)) - goto bailout; - } - - err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); - } - - err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") - : MDBX_SUCCESS; - if (err == MDBX_SUCCESS && record_count != db->md_entries) - chk_scope_issue(scope, - "different number of entries %" PRIuSIZE " != %" PRIu64, - record_count, db->md_entries); -bailout: - if (cursor) { - if (handler) { - if (sdb->histogram.key_len.count) { - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); - line = histogram_dist(line, &sdb->histogram.key_len, - "key length density", "0/1", false); - chk_line_feed(line); - line = histogram_dist(line, &sdb->histogram.val_len, - "value length density", "0/1", false); - chk_line_end(line); - } - if (scope->stage == MDBX_chk_traversal_maindb) - usr->result.subdb_total = sub_databases; - if (chk->cb->subdb_conclude) - err = chk->cb->subdb_conclude(usr, sdb, cursor, err); - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); - line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); - if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | - MDBX_REVERSEDUP | MDBX_INTEGERDUP))) - line = chk_print(line, " %" PRIuSIZE " dups,", dups); - if (sub_databases || dbi == MAIN_DBI) - line = chk_print(line, " %" PRIuSIZE " sub-databases,", sub_databases); - line = chk_print(line, - " %" PRIuSIZE " key's bytes," - " %" PRIuSIZE " data's bytes," - " %" PRIuSIZE " problem(s)", - sdb->histogram.key_len.amount, - sdb->histogram.val_len.amount, scope->subtotal_issues); - chk_line_end(chk_flush(line)); - } - - mdbx_cursor_close(cursor); - if (!txn->mt_cursors[dbi] && (txn->mt_dbi_state[dbi] & DBI_FRESH)) - mdbx_dbi_close(env, dbi); - } - return err; -} - -__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, - MDBX_chk_subdb_t *sdb, - const size_t record_number, const MDBX_val *key, - const MDBX_val *data) { - MDBX_chk_internal_t *const chk = scope->internal; - MDBX_chk_context_t *const usr = chk->usr; - assert(sdb == &chk->subdb_gc); - (void)sdb; - const char *bad = ""; - pgno_t *iptr = data->iov_base; - - if (key->iov_len != sizeof(txnid_t)) - chk_object_issue(scope, "entry", record_number, "wrong txn-id size", - "key-size %" PRIuSIZE, key->iov_len); - else { - txnid_t txnid; - memcpy(&txnid, key->iov_base, sizeof(txnid)); - if (txnid < 1 || txnid > usr->txn->mt_txnid) - chk_object_issue(scope, "entry", record_number, "wrong txn-id", - "%" PRIaTXN, txnid); - else { - if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) - chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, - data->iov_len); - size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number > MDBX_PGL_LIMIT) - chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, - number); - else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { - chk_object_issue(scope, "entry", txnid, "trimmed idl", - "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", - (number + 1) * sizeof(pgno_t), data->iov_len); - number = data->iov_len / sizeof(pgno_t) - 1; - } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= - /* LY: allow gap up to one page. it is ok - * and better than shink-and-retry inside update_gc() */ - usr->env->me_psize) - chk_object_issue(scope, "entry", txnid, "extra idl space", - "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", - (number + 1) * sizeof(pgno_t), data->iov_len); - - usr->result.gc_pages += number; - if (chk->envinfo.mi_latter_reader_txnid > txnid) - usr->result.reclaimable_pages += number; - - size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->mt_next_pgno; - size_t span = 1; - for (size_t i = 0; i < number; ++i) { - const size_t pgno = iptr[i]; - if (pgno < NUM_METAS) - chk_object_issue(scope, "entry", txnid, "wrong idl entry", - "pgno %" PRIuSIZE " < meta-pages %u", pgno, - NUM_METAS); - else if (pgno >= usr->result.backed_pages) - chk_object_issue(scope, "entry", txnid, "wrong idl entry", - "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, - usr->result.backed_pages); - else if (pgno >= usr->result.alloc_pages) - chk_object_issue(scope, "entry", txnid, "wrong idl entry", - "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, - usr->result.alloc_pages - 1); - else { - if (MDBX_PNL_DISORDERED(prev, pgno)) { - bad = " [bad sequence]"; - chk_object_issue( - scope, "entry", txnid, "bad sequence", - "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, - (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, - pgno); - } - if (chk->pagemap) { - const intptr_t id = chk->pagemap[pgno]; - if (id == 0) - chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; - else if (id > 0) { - assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->subdb)); - chk_object_issue(scope, "page", pgno, "already used", "by %s", - chk_v2a(chk, &chk->subdb[id - 1]->name)); - } else - chk_object_issue(scope, "page", pgno, "already listed in GC", - nullptr); - } - } - prev = pgno; - while (i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span))) - ++span; - } - if (sdb->cookie) { - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), - "transaction %" PRIaTXN ", %" PRIuSIZE - " pages, maxspan %" PRIuSIZE "%s", - txnid, number, span, bad)); - for (size_t i = 0; i < number; i += span) { - const size_t pgno = iptr[i]; - for (span = 1; - i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span)); - ++span) - ; - histogram_acc(span, &sdb->histogram.nested_tree); - MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); - if (line) { - if (span > 1) - line = - chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); - else - line = chk_print(line, "%9" PRIuSIZE, pgno); - chk_line_end(line); - int err = chk_check_break(scope); - if (err) - return err; - } - } - } - } - } - return chk_check_break(scope); -} - -__cold static int env_chk(MDBX_chk_scope_t *const scope) { - MDBX_chk_internal_t *const chk = scope->internal; - MDBX_chk_context_t *const usr = chk->usr; - MDBX_env *const env = usr->env; - MDBX_txn *const txn = usr->txn; - int err = - env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); - if (unlikely(err)) - return chk_error_rc(scope, err, "env_info"); - - MDBX_chk_line_t *line = - chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); - if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) - line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, - chk->envinfo.mi_bootid.current.x, - chk->envinfo.mi_bootid.current.y); - else - line = chk_puts(line, "unavailable"); - chk_line_end(line); - - err = osal_filesize(env->me_lazy_fd, &env->me_dxb_mmap.filesize); - if (unlikely(err)) - return chk_error_rc(scope, err, "osal_filesize"); - - //-------------------------------------------------------------------------- - - err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, - &usr->result.problems_meta, "Peek the meta-pages..."); - if (likely(!err)) { - MDBX_chk_scope_t *const inner = usr->scope; - const uint64_t dxbfile_pages = - env->me_dxb_mmap.filesize >> env->me_psize2log; - usr->result.alloc_pages = txn->mt_next_pgno; - usr->result.backed_pages = bytes2pgno(env, env->me_dxb_mmap.current); - if (unlikely(usr->result.backed_pages > dxbfile_pages)) - chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, - usr->result.backed_pages, dxbfile_pages); - if (unlikely(dxbfile_pages < NUM_METAS)) - chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, - NUM_METAS); - if (unlikely(usr->result.backed_pages < NUM_METAS)) - chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, - NUM_METAS); - if (unlikely(usr->result.backed_pages < NUM_METAS)) { - chk_scope_issue(inner, "backed-pages %zu < num-metas %u", - usr->result.backed_pages, NUM_METAS); - return MDBX_CORRUPTED; - } - if (unlikely(dxbfile_pages < NUM_METAS)) { - chk_scope_issue(inner, "backed-pages %zu < num-metas %u", - usr->result.backed_pages, NUM_METAS); - return MDBX_CORRUPTED; - } - if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { - chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", - usr->result.backed_pages, (size_t)MAX_PAGENO + 1); - usr->result.backed_pages = MAX_PAGENO + 1; - } - - if ((env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (unlikely(usr->result.backed_pages > dxbfile_pages)) { - chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, - usr->result.backed_pages, dxbfile_pages); - usr->result.backed_pages = (size_t)dxbfile_pages; - } - if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { - chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", - usr->result.alloc_pages, usr->result.backed_pages); - usr->result.alloc_pages = usr->result.backed_pages; - } - } else { - /* DB may be shrunk by writer down to the allocated (but unused) pages. */ - if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { - chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", - usr->result.alloc_pages, usr->result.backed_pages); - usr->result.alloc_pages = usr->result.backed_pages; - } - if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { - chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, - usr->result.alloc_pages, dxbfile_pages); - usr->result.alloc_pages = (size_t)dxbfile_pages; - } - if (unlikely(usr->result.backed_pages > dxbfile_pages)) - usr->result.backed_pages = (size_t)dxbfile_pages; - } - - line = chk_line_feed(chk_print( - chk_line_begin(inner, MDBX_chk_info), - "pagesize %u (%u system), max keysize %u..%u" - ", max readers %u", - env->me_psize, env->me_os_psize, - mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), - mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->me_maxreaders)); - line = chk_line_feed( - chk_print_size(line, "mapsize ", env->me_dxb_mmap.current, nullptr)); - if (txn->mt_geo.lower == txn->mt_geo.upper) - line = chk_print_size( - line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); - else { - line = chk_print_size( - line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); - line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); - line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); - - line = chk_line_feed( - chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); - line = chk_print_size( - line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); - } - tASSERT(txn, txn->mt_geo.now == chk->envinfo.mi_geo.current / - chk->envinfo.mi_dxb_pagesize); - chk_line_end(chk_print(line, ", %u pages", txn->mt_geo.now)); -#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG - if (txn->mt_geo.shrink_pv && txn->mt_geo.now != txn->mt_geo.upper && - scope->verbosity >= MDBX_chk_verbose) { - line = chk_line_begin(inner, MDBX_chk_notice); - chk_line_feed(chk_print( - line, " > WARNING: Due Windows system limitations a file couldn't")); - chk_line_feed(chk_print( - line, " > be truncated while the database is opened. So, the size")); - chk_line_feed(chk_print( - line, " > database file of may by large than the database itself,")); - chk_line_end(chk_print( - line, " > until it will be closed or reopened in read-write mode.")); - } -#endif /* Windows || Debug */ - chk_verbose_meta(inner, 0); - chk_verbose_meta(inner, 1); - chk_verbose_meta(inner, 2); - - if (env->me_stuck_meta >= 0) { - chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing), - "skip checking meta-pages since the %u" - " is selected for verification", - env->me_stuck_meta)); - line = chk_line_feed( - chk_print(chk_line_begin(inner, MDBX_chk_resolution), - "transactions: recent %" PRIu64 ", " - "selected for verification %" PRIu64 ", lag %" PRIi64, - chk->envinfo.mi_recent_txnid, - chk->envinfo.mi_meta_txnid[env->me_stuck_meta], - chk->envinfo.mi_recent_txnid - - chk->envinfo.mi_meta_txnid[env->me_stuck_meta])); - chk_line_end(line); - } else { - chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), - "performs check for meta-pages clashes")); - const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); - if (meta_clash_mask & 1) - chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); - if (meta_clash_mask & 2) - chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2); - if (meta_clash_mask & 4) - chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); - - const unsigned prefer_steady_metanum = chk->troika.prefer_steady; - const uint64_t prefer_steady_txnid = - chk->troika.txnid[prefer_steady_metanum]; - const unsigned recent_metanum = chk->troika.recent; - const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; - if (env->me_flags & MDBX_EXCLUSIVE) { - chk_line_end( - chk_puts(chk_line_begin(inner, MDBX_chk_verbose), - "performs full check recent-txn-id with meta-pages")); - eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid); - if (prefer_steady_txnid != recent_txnid) { - if ((chk->flags & MDBX_CHK_READWRITE) != 0 && - (env->me_flags & MDBX_RDONLY) == 0 && - recent_txnid > prefer_steady_txnid && - (chk->envinfo.mi_bootid.current.x | - chk->envinfo.mi_bootid.current.y) != 0 && - chk->envinfo.mi_bootid.current.x == - chk->envinfo.mi_bootid.meta[recent_metanum].x && - chk->envinfo.mi_bootid.current.y == - chk->envinfo.mi_bootid.meta[recent_metanum].y) { - chk_line_end( - chk_print(chk_line_begin(inner, MDBX_chk_verbose), - "recent meta-%u is weak, but boot-id match current" - " (will synced upon successful check)", - recent_metanum)); - } else - chk_scope_issue( - inner, - "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")", - prefer_steady_metanum, prefer_steady_txnid, recent_txnid); - } - } else if (chk->write_locked) { - chk_line_end( - chk_puts(chk_line_begin(inner, MDBX_chk_verbose), - "performs lite check recent-txn-id with meta-pages (not a " - "monopolistic mode)")); - if (recent_txnid != chk->envinfo.mi_recent_txnid) { - chk_scope_issue(inner, - "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")", - recent_metanum, recent_txnid, - chk->envinfo.mi_recent_txnid); - } - } else { - chk_line_end(chk_puts( - chk_line_begin(inner, MDBX_chk_verbose), - "skip check recent-txn-id with meta-pages (monopolistic or " - "read-write mode only)")); - } - - chk_line_end(chk_print( - chk_line_begin(inner, MDBX_chk_resolution), - "transactions: recent %" PRIu64 ", latter reader %" PRIu64 - ", lag %" PRIi64, - chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, - chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); - } - } - err = chk_scope_restore(scope, err); - - //-------------------------------------------------------------------------- - - if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skipping %s traversal...", "b-tree")); - else { - err = chk_scope_begin( - chk, -1, MDBX_chk_traversal_tree, nullptr, &usr->result.tree_problems, - "Traversal %s by txn#%" PRIaTXN "...", "b-tree", txn->mt_txnid); - if (likely(!err)) - err = chk_tree(usr->scope); - if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) - usr->result.gc_tree_problems = usr->result.tree_problems; - if (usr->result.tree_problems && usr->result.kv_tree_problems == 0) - usr->result.kv_tree_problems = usr->result.tree_problems; - chk_scope_restore(scope, err); - } - - if (usr->result.gc_tree_problems > 0) - chk_line_end(chk_print( - chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", - chk_v2a(chk, MDBX_CHK_GC), "b-tree", - usr->result.problems_gc = usr->result.gc_tree_problems)); - else { - err = chk_scope_begin(chk, -1, MDBX_chk_traversal_freedb, &chk->subdb_gc, - &usr->result.problems_gc, - "Traversal %s by txn#%" PRIaTXN "...", "GC/freeDB", - txn->mt_txnid); - if (likely(!err)) - err = chk_db(usr->scope, FREE_DBI, &chk->subdb_gc, chk_handle_gc); - line = chk_line_begin(scope, MDBX_chk_info); - if (line) { - histogram_print(scope, line, &chk->subdb_gc.histogram.nested_tree, - "span(s)", "single", false); - chk_line_end(line); - } - if (usr->result.problems_gc == 0 && - (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { - const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; - if (usr->result.processed_pages != used_pages) - chk_scope_issue(usr->scope, - "used pages mismatch (%" PRIuSIZE - "(walked) != %" PRIuSIZE "(allocated - GC))", - usr->result.processed_pages, used_pages); - if (usr->result.unused_pages != usr->result.gc_pages) - chk_scope_issue(usr->scope, - "GC pages mismatch (%" PRIuSIZE - "(expected) != %" PRIuSIZE "(GC))", - usr->result.unused_pages, usr->result.gc_pages); - } - } - chk_scope_restore(scope, err); - - //-------------------------------------------------------------------------- - - err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, - "Page allocation:"); - const double percent_boundary_reciprocal = 100.0 / txn->mt_geo.upper; - const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; - const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; - const size_t available2boundary = txn->mt_geo.upper - - usr->result.alloc_pages + - usr->result.reclaimable_pages; - const size_t available2backed = usr->result.backed_pages - - usr->result.alloc_pages + - usr->result.reclaimable_pages; - const size_t remained2boundary = txn->mt_geo.upper - usr->result.alloc_pages; - const size_t remained2backed = - usr->result.backed_pages - usr->result.alloc_pages; - - const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) - ? usr->result.alloc_pages - usr->result.gc_pages - : usr->result.processed_pages; - - line = chk_line_begin(usr->scope, MDBX_chk_info); - line = chk_print(line, - "backed by file: %" PRIuSIZE " pages (%.1f%%)" - ", %" PRIuSIZE " left to boundary (%.1f%%)", - usr->result.backed_pages, - usr->result.backed_pages * percent_boundary_reciprocal, - txn->mt_geo.upper - usr->result.backed_pages, - (txn->mt_geo.upper - usr->result.backed_pages) * - percent_boundary_reciprocal); - line = chk_line_feed(line); - - line = chk_print( - line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", - "used", used, used * percent_backed_reciprocal, - used * percent_boundary_reciprocal); - line = chk_line_feed(line); - - line = chk_print( - line, - "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE - " to boundary (%.1f%% of boundary)", - "remained", remained2backed, remained2backed * percent_backed_reciprocal, - remained2boundary, remained2boundary * percent_boundary_reciprocal); - line = chk_line_feed(line); - - line = chk_print( - line, - "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" - ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", - usr->result.reclaimable_pages, - usr->result.reclaimable_pages * percent_backed_reciprocal, - usr->result.reclaimable_pages * percent_boundary_reciprocal, - usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, - usr->result.gc_pages * percent_boundary_reciprocal); - line = chk_line_feed(line); - - line = chk_print( - line, - "detained by reader(s): %" PRIuSIZE - " (%.1f%% of backed, %.1f%% of boundary)" - ", %u reader(s), lag %" PRIi64, - detained, detained * percent_backed_reciprocal, - detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, - chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); - line = chk_line_feed(line); - - line = chk_print( - line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", - "allocated", usr->result.alloc_pages, - usr->result.alloc_pages * percent_backed_reciprocal, - usr->result.alloc_pages * percent_boundary_reciprocal); - line = chk_line_feed(line); - - line = chk_print(line, - "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE - " to boundary (%.1f%% of boundary)", - "available", available2backed, - available2backed * percent_backed_reciprocal, - available2boundary, - available2boundary * percent_boundary_reciprocal); - chk_line_end(line); - - line = chk_line_begin(usr->scope, MDBX_chk_resolution); - line = chk_print(line, "%s %" PRIaPGNO " pages", - (txn->mt_geo.upper == txn->mt_geo.now) ? "total" : "upto", - txn->mt_geo.upper); - line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", - usr->result.backed_pages, - usr->result.backed_pages * percent_boundary_reciprocal); - line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", - usr->result.alloc_pages, - usr->result.alloc_pages * percent_boundary_reciprocal); - line = - chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, - available2boundary * percent_boundary_reciprocal); - chk_line_end(line); - chk_scope_restore(scope, err); - - //-------------------------------------------------------------------------- - - if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skipping %s traversal...", "key-value")); - else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) - chk_line_end(chk_print( - chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", - chk_v2a(chk, MDBX_CHK_MAIN), "key-value", - usr->result.problems_kv = usr->result.kv_tree_problems)); - else { - err = - chk_scope_begin(chk, 0, MDBX_chk_traversal_maindb, &chk->subdb_main, - &usr->result.problems_kv, "Processing %s...", "MainDB"); - if (likely(!err)) - err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, chk_handle_kv); - chk_scope_restore(scope, err); - - if (usr->result.problems_kv && usr->result.subdb_total) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s", "sub-database(s)")); - else if (usr->result.problems_kv == 0 && usr->result.subdb_total == 0) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", - "sub-database(s)")); - else if (usr->result.problems_kv == 0 && usr->result.subdb_total) { - err = chk_scope_begin(chk, 1, MDBX_chk_traversal_subdbs, nullptr, - &usr->result.problems_kv, - "Traversal %s by txn#%" PRIaTXN "...", - "sub-database(s)", txn->mt_txnid); - if (!err) - err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, nullptr); - if (usr->scope->subtotal_issues) - chk_line_end( - chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), - "processed %" PRIuSIZE " of %" PRIuSIZE " subDb(s)" - ", %" PRIuSIZE " problems(s)", - usr->result.subdb_processed, usr->result.subdb_total, - usr->scope->subtotal_issues)); - } - chk_scope_restore(scope, err); - } - - return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, - nullptr, nullptr)); -} - -__cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) { - if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && - ctx->internal->problem_counter && ctx->scope)) { - *ctx->internal->problem_counter += 1; - ctx->scope->subtotal_issues += 1; - return MDBX_SUCCESS; - } - return MDBX_EINVAL; -} - -__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, - MDBX_chk_context_t *ctx, - const enum MDBX_chk_flags_t flags, - enum MDBX_chk_severity verbosity, - unsigned timeout_seconds_16dot16) { - int err, rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - if (unlikely(!cb || !ctx || ctx->internal)) - return MDBX_EINVAL; - - MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); - if (unlikely(!chk)) - return MDBX_ENOMEM; - - chk->cb = cb; - chk->usr = ctx; - chk->usr->internal = chk; - chk->usr->env = env; - chk->flags = flags; - - chk->subdb_gc.id = -1; - chk->subdb_gc.name.iov_base = MDBX_CHK_GC; - chk->subdb[FREE_DBI] = &chk->subdb_gc; - - chk->subdb_main.id = -1; - chk->subdb_main.name.iov_base = MDBX_CHK_MAIN; - chk->subdb[MAIN_DBI] = &chk->subdb_main; - - chk->monotime_timeout = - timeout_seconds_16dot16 - ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() - : 0; - chk->usr->scope_nesting = 0; - chk->usr->result.subdbs = (const void *)&chk->subdb; - - MDBX_chk_scope_t *const top = chk->scope_stack; - top->verbosity = verbosity; - top->internal = chk; - - // init - rc = chk_scope_end( - chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); - - // lock - if (likely(!rc)) - rc = chk_scope_begin( - chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", - (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); - if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && - (flags & MDBX_CHK_READWRITE)) { - rc = mdbx_txn_lock(env, false); - if (unlikely(rc)) - chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); - else - chk->write_locked = true; - } - if (likely(!rc)) { - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn); - if (unlikely(rc)) - chk_error_rc(ctx->scope, rc, "mdbx_txn_begin"); - } - chk_scope_end(chk, rc); - - // doit - if (likely(!rc)) { - chk->subdb_gc.flags = ctx->txn->mt_dbs[FREE_DBI].md_flags; - chk->subdb_main.flags = ctx->txn->mt_dbs[MAIN_DBI].md_flags; - rc = env_chk(top); - } - - // unlock - if (ctx->txn || chk->write_locked) { - chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr); - if (ctx->txn) { - err = mdbx_txn_abort(ctx->txn); - if (err && !rc) - rc = err; - ctx->txn = nullptr; - } - if (chk->write_locked) - mdbx_txn_unlock(env); - rc = chk_scope_end(chk, rc); - } - - // finalize - err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); - rc = chk_scope_end(chk, err ? err : rc); - chk_dispose(chk); - return rc; -} - -/******************************************************************************/ -/* *INDENT-OFF* */ -/* clang-format off */ - -__dll_export -#ifdef __attribute_used__ - __attribute_used__ -#elif defined(__GNUC__) || __has_attribute(__used__) - __attribute__((__used__)) -#endif -#ifdef __attribute_externally_visible__ - __attribute_externally_visible__ -#elif (defined(__GNUC__) && !defined(__clang__)) || \ - __has_attribute(__externally_visible__) - __attribute__((__externally_visible__)) -#endif - const struct MDBX_build_info mdbx_build = { -#ifdef MDBX_BUILD_TIMESTAMP - MDBX_BUILD_TIMESTAMP -#else - "\"" __DATE__ " " __TIME__ "\"" -#endif /* MDBX_BUILD_TIMESTAMP */ - - , -#ifdef MDBX_BUILD_TARGET - MDBX_BUILD_TARGET -#else - #if defined(__ANDROID_API__) - "Android" MDBX_STRINGIFY(__ANDROID_API__) - #elif defined(__linux__) || defined(__gnu_linux__) - "Linux" - #elif defined(EMSCRIPTEN) || defined(__EMSCRIPTEN__) - "webassembly" - #elif defined(__CYGWIN__) - "CYGWIN" - #elif defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) \ - || defined(__WINDOWS__) - "Windows" - #elif defined(__APPLE__) - #if (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) \ - || (defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR) - "iOS" - #else - "MacOS" - #endif - #elif defined(__FreeBSD__) - "FreeBSD" - #elif defined(__DragonFly__) - "DragonFlyBSD" - #elif defined(__NetBSD__) - "NetBSD" - #elif defined(__OpenBSD__) - "OpenBSD" - #elif defined(__bsdi__) - "UnixBSDI" - #elif defined(__MACH__) - "MACH" - #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) - "HPUX" - #elif defined(_AIX) - "AIX" - #elif defined(__sun) && defined(__SVR4) - "Solaris" - #elif defined(__BSD__) || defined(BSD) - "UnixBSD" - #elif defined(__unix__) || defined(UNIX) || defined(__unix) \ - || defined(__UNIX) || defined(__UNIX__) - "UNIX" - #elif defined(_POSIX_VERSION) - "POSIX" MDBX_STRINGIFY(_POSIX_VERSION) - #else - "UnknownOS" - #endif /* Target OS */ - - "-" - - #if defined(__amd64__) - "AMD64" - #elif defined(__ia32__) - "IA32" - #elif defined(__e2k__) || defined(__elbrus__) - "Elbrus" - #elif defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) - "Alpha" - #elif defined(__aarch64__) || defined(_M_ARM64) - "ARM64" - #elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) \ - || defined(__TARGET_ARCH_THUMB) || defined(_ARM) || defined(_M_ARM) \ - || defined(_M_ARMT) || defined(__arm) - "ARM" - #elif defined(__mips64) || defined(__mips64__) || (defined(__mips) && (__mips >= 64)) - "MIPS64" - #elif defined(__mips__) || defined(__mips) || defined(_R4000) || defined(__MIPS__) - "MIPS" - #elif defined(__hppa64__) || defined(__HPPA64__) || defined(__hppa64) - "PARISC64" - #elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) - "PARISC" - #elif defined(__ia64__) || defined(__ia64) || defined(_IA64) \ - || defined(__IA64__) || defined(_M_IA64) || defined(__itanium__) - "Itanium" - #elif defined(__powerpc64__) || defined(__ppc64__) || defined(__ppc64) \ - || defined(__powerpc64) || defined(_ARCH_PPC64) - "PowerPC64" - #elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc) \ - || defined(__ppc) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__POWERPC__) - "PowerPC" - #elif defined(__sparc64__) || defined(__sparc64) - "SPARC64" - #elif defined(__sparc__) || defined(__sparc) - "SPARC" - #elif defined(__s390__) || defined(__s390) || defined(__zarch__) || defined(__zarch) - "S390" - #else - "UnknownARCH" - #endif -#endif /* MDBX_BUILD_TARGET */ - -#ifdef MDBX_BUILD_TYPE -# if defined(_MSC_VER) -# pragma message("Configuration-depended MDBX_BUILD_TYPE: " MDBX_BUILD_TYPE) -# endif - "-" MDBX_BUILD_TYPE -#endif /* MDBX_BUILD_TYPE */ - , - "MDBX_DEBUG=" MDBX_STRINGIFY(MDBX_DEBUG) -#ifdef ENABLE_GPROF - " ENABLE_GPROF" -#endif /* ENABLE_GPROF */ - " MDBX_WORDBITS=" MDBX_STRINGIFY(MDBX_WORDBITS) - " BYTE_ORDER=" -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - "LITTLE_ENDIAN" -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - "BIG_ENDIAN" -#else - #error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - " MDBX_ENABLE_BIGFOOT=" MDBX_STRINGIFY(MDBX_ENABLE_BIGFOOT) - " MDBX_ENV_CHECKPID=" MDBX_ENV_CHECKPID_CONFIG - " MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG - " MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG - " MDBX_64BIT_CAS=" MDBX_64BIT_CAS_CONFIG - " MDBX_TRUST_RTC=" MDBX_TRUST_RTC_CONFIG - " MDBX_AVOID_MSYNC=" MDBX_STRINGIFY(MDBX_AVOID_MSYNC) - " MDBX_ENABLE_REFUND=" MDBX_STRINGIFY(MDBX_ENABLE_REFUND) - " MDBX_ENABLE_MADVISE=" MDBX_STRINGIFY(MDBX_ENABLE_MADVISE) - " MDBX_ENABLE_MINCORE=" MDBX_STRINGIFY(MDBX_ENABLE_MINCORE) - " MDBX_ENABLE_PGOP_STAT=" MDBX_STRINGIFY(MDBX_ENABLE_PGOP_STAT) - " MDBX_ENABLE_PROFGC=" MDBX_STRINGIFY(MDBX_ENABLE_PROFGC) -#if MDBX_DISABLE_VALIDATION - " MDBX_DISABLE_VALIDATION=YES" -#endif /* MDBX_DISABLE_VALIDATION */ -#ifdef __SANITIZE_ADDRESS__ - " SANITIZE_ADDRESS=YES" -#endif /* __SANITIZE_ADDRESS__ */ -#ifdef ENABLE_MEMCHECK - " ENABLE_MEMCHECK=YES" -#endif /* ENABLE_MEMCHECK */ -#if MDBX_FORCE_ASSERTIONS - " MDBX_FORCE_ASSERTIONS=YES" -#endif /* MDBX_FORCE_ASSERTIONS */ -#ifdef _GNU_SOURCE - " _GNU_SOURCE=YES" -#else - " _GNU_SOURCE=NO" -#endif /* _GNU_SOURCE */ -#ifdef __APPLE__ - " MDBX_OSX_SPEED_INSTEADOF_DURABILITY=" MDBX_STRINGIFY(MDBX_OSX_SPEED_INSTEADOF_DURABILITY) -#endif /* MacOS */ -#if defined(_WIN32) || defined(_WIN64) - " MDBX_WITHOUT_MSVC_CRT=" MDBX_STRINGIFY(MDBX_WITHOUT_MSVC_CRT) - " MDBX_BUILD_SHARED_LIBRARY=" MDBX_STRINGIFY(MDBX_BUILD_SHARED_LIBRARY) -#if !MDBX_BUILD_SHARED_LIBRARY - " MDBX_MANUAL_MODULE_HANDLER=" MDBX_STRINGIFY(MDBX_MANUAL_MODULE_HANDLER) -#endif - " WINVER=" MDBX_STRINGIFY(WINVER) -#else /* Windows */ - " MDBX_LOCKING=" MDBX_LOCKING_CONFIG - " MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG -#endif /* !Windows */ - " MDBX_CACHELINE_SIZE=" MDBX_STRINGIFY(MDBX_CACHELINE_SIZE) - " MDBX_CPU_WRITEBACK_INCOHERENT=" MDBX_STRINGIFY(MDBX_CPU_WRITEBACK_INCOHERENT) - " MDBX_MMAP_INCOHERENT_CPU_CACHE=" MDBX_STRINGIFY(MDBX_MMAP_INCOHERENT_CPU_CACHE) - " MDBX_MMAP_INCOHERENT_FILE_WRITE=" MDBX_STRINGIFY(MDBX_MMAP_INCOHERENT_FILE_WRITE) - " MDBX_UNALIGNED_OK=" MDBX_STRINGIFY(MDBX_UNALIGNED_OK) - " MDBX_PNL_ASCENDING=" MDBX_STRINGIFY(MDBX_PNL_ASCENDING) - , -#ifdef MDBX_BUILD_COMPILER - MDBX_BUILD_COMPILER -#else - #ifdef __INTEL_COMPILER - "Intel C/C++ " MDBX_STRINGIFY(__INTEL_COMPILER) - #elif defined(__apple_build_version__) - "Apple clang " MDBX_STRINGIFY(__apple_build_version__) - #elif defined(__ibmxl__) - "IBM clang C " MDBX_STRINGIFY(__ibmxl_version__) "." MDBX_STRINGIFY(__ibmxl_release__) - "." MDBX_STRINGIFY(__ibmxl_modification__) "." MDBX_STRINGIFY(__ibmxl_ptf_fix_level__) - #elif defined(__clang__) - "clang " MDBX_STRINGIFY(__clang_version__) - #elif defined(__MINGW64__) - "MINGW-64 " MDBX_STRINGIFY(__MINGW64_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW64_MINOR_VERSION) - #elif defined(__MINGW32__) - "MINGW-32 " MDBX_STRINGIFY(__MINGW32_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW32_MINOR_VERSION) - #elif defined(__MINGW__) - "MINGW " MDBX_STRINGIFY(__MINGW_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW_MINOR_VERSION) - #elif defined(__IBMC__) - "IBM C " MDBX_STRINGIFY(__IBMC__) - #elif defined(__GNUC__) - "GNU C/C++ " - #ifdef __VERSION__ - __VERSION__ - #else - MDBX_STRINGIFY(__GNUC__) "." MDBX_STRINGIFY(__GNUC_MINOR__) "." MDBX_STRINGIFY(__GNUC_PATCHLEVEL__) - #endif - #elif defined(_MSC_VER) - "MSVC " MDBX_STRINGIFY(_MSC_FULL_VER) "-" MDBX_STRINGIFY(_MSC_BUILD) - #else - "Unknown compiler" - #endif -#endif /* MDBX_BUILD_COMPILER */ - , -#ifdef MDBX_BUILD_FLAGS_CONFIG - MDBX_BUILD_FLAGS_CONFIG -#endif /* MDBX_BUILD_FLAGS_CONFIG */ -#ifdef MDBX_BUILD_FLAGS - MDBX_BUILD_FLAGS -#endif /* MDBX_BUILD_FLAGS */ -#if !(defined(MDBX_BUILD_FLAGS_CONFIG) || defined(MDBX_BUILD_FLAGS)) - "undefined (please use correct build script)" -#ifdef _MSC_VER -#pragma message("warning: Build flags undefined. Please use correct build script") -#else -#warning "Build flags undefined. Please use correct build script" -#endif // _MSC_VER -#endif -}; - -#ifdef __SANITIZE_ADDRESS__ -#if !defined(_MSC_VER) || __has_attribute(weak) -LIBMDBX_API __attribute__((__weak__)) -#endif -const char *__asan_default_options(void) { - return "symbolize=1:allow_addr2line=1:" -#if MDBX_DEBUG - "debug=1:" - "verbosity=2:" -#endif /* MDBX_DEBUG */ - "log_threads=1:" - "report_globals=1:" - "replace_str=1:replace_intrin=1:" - "malloc_context_size=9:" -#if !defined(__APPLE__) - "detect_leaks=1:" -#endif - "check_printf=1:" - "detect_deadlocks=1:" -#ifndef LTO_ENABLED - "check_initialization_order=1:" -#endif - "detect_stack_use_after_return=1:" - "intercept_tls_get_addr=1:" - "decorate_proc_maps=1:" - "abort_on_error=1"; -} -#endif /* __SANITIZE_ADDRESS__ */ - -/* *INDENT-ON* */ -/* clang-format on */ diff --git a/src/cursor.c b/src/cursor.c new file mode 100644 index 00000000..524ac1b5 --- /dev/null +++ b/src/cursor.c @@ -0,0 +1,2451 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold int cursor_check(const MDBX_cursor *mc) { + if (!mc->txn->tw.dirtylist) { + cASSERT(mc, (mc->txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } else { + cASSERT(mc, (mc->txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + cASSERT(mc, mc->txn->tw.dirtyroom + mc->txn->tw.dirtylist->length == + (mc->txn->parent ? mc->txn->parent->tw.dirtyroom + : mc->txn->env->options.dp_limit)); + } + + cASSERT(mc, (mc->checking & z_updating) ? mc->top + 1 <= mc->tree->height + : mc->top + 1 == mc->tree->height); + if (unlikely((mc->checking & z_updating) ? mc->top + 1 > mc->tree->height + : mc->top + 1 != mc->tree->height)) + return MDBX_CURSOR_FULL; + + if (is_pointed(mc) && (mc->checking & z_updating) == 0) { + const page_t *mp = mc->pg[mc->top]; + const size_t nkeys = page_numkeys(mp); + if (!is_hollow(mc)) { + cASSERT(mc, mc->ki[mc->top] < nkeys); + if (mc->ki[mc->top] >= nkeys) + return MDBX_CURSOR_FULL; + } + if (inner_pointed(mc)) { + cASSERT(mc, is_filled(mc)); + if (!is_filled(mc)) + return MDBX_CURSOR_FULL; + } + } + + for (intptr_t n = 0; n <= mc->top; ++n) { + page_t *mp = mc->pg[n]; + const size_t nkeys = page_numkeys(mp); + const bool expect_branch = (n < mc->tree->height - 1) ? true : false; + const bool expect_nested_leaf = + (n + 1 == mc->tree->height - 1) ? true : false; + const bool branch = is_branch(mp) ? true : false; + cASSERT(mc, branch == expect_branch); + if (unlikely(branch != expect_branch)) + return MDBX_CURSOR_FULL; + if ((mc->checking & z_updating) == 0) { + cASSERT(mc, nkeys > mc->ki[n] || (!branch && nkeys == mc->ki[n] && + (mc->flags & z_hollow) != 0)); + if (unlikely(nkeys <= mc->ki[n] && !(!branch && nkeys == mc->ki[n] && + (mc->flags & z_hollow) != 0))) + return MDBX_CURSOR_FULL; + } else { + cASSERT(mc, nkeys + 1 >= mc->ki[n]); + if (unlikely(nkeys + 1 < mc->ki[n])) + return MDBX_CURSOR_FULL; + } + + int err = page_check(mc, mp); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + for (size_t i = 0; i < nkeys; ++i) { + if (branch) { + node_t *node = page_node(mp, i); + cASSERT(mc, node_flags(node) == 0); + if (unlikely(node_flags(node) != 0)) + return MDBX_CURSOR_FULL; + pgno_t pgno = node_pgno(node); + page_t *np; + err = page_get(mc, pgno, &np, mp->txnid); + cASSERT(mc, err == MDBX_SUCCESS); + if (unlikely(err != MDBX_SUCCESS)) + return err; + const bool nested_leaf = is_leaf(np) ? true : false; + cASSERT(mc, nested_leaf == expect_nested_leaf); + if (unlikely(nested_leaf != expect_nested_leaf)) + return MDBX_CURSOR_FULL; + err = page_check(mc, np); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + } + } + return MDBX_SUCCESS; +} + +__cold int cursor_check_updating(MDBX_cursor *mc) { + const uint8_t checking = mc->checking; + mc->checking |= z_updating; + const int rc = cursor_check(mc); + mc->checking = checking; + return rc; +} + +bool cursor_is_tracked(const MDBX_cursor *mc) { + for (MDBX_cursor *scan = mc->txn->cursors[cursor_dbi(mc)]; scan; + scan = scan->next) + if (mc == ((mc->flags & z_inner) ? &scan->subcur->cursor : scan)) + return true; + return false; +} + +/*----------------------------------------------------------------------------*/ + +static int touch_dbi(MDBX_cursor *mc) { + cASSERT(mc, (mc->flags & z_inner) == 0); + cASSERT(mc, (*cursor_dbi_state(mc) & DBI_DIRTY) == 0); + *cursor_dbi_state(mc) |= DBI_DIRTY; + mc->txn->flags |= MDBX_TXN_DIRTY; + + if (!cursor_is_core(mc)) { + /* Touch DB record of named DB */ + cursor_couple_t cx; + int rc = dbi_check(mc->txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = cursor_init(&cx.outer, mc->txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + mc->txn->dbi_state[MAIN_DBI] |= DBI_DIRTY; + rc = tree_search(&cx.outer, &container_of(mc->clc, kvx_t, clc)->name, + Z_MODIFY); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + return MDBX_SUCCESS; +} + +__hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, + const MDBX_val *data) { + cASSERT(mc, (mc->txn->flags & MDBX_TXN_RDONLY) == 0); + cASSERT(mc, is_pointed(mc) || mc->tree->height == 0); + cASSERT(mc, cursor_is_tracked(mc)); + + cASSERT(mc, F_ISSET(dbi_state(mc->txn, FREE_DBI), DBI_LINDO | DBI_VALID)); + cASSERT(mc, F_ISSET(dbi_state(mc->txn, MAIN_DBI), DBI_LINDO | DBI_VALID)); + if ((mc->flags & z_inner) == 0) { + MDBX_txn *const txn = mc->txn; + dpl_lru_turn(txn); + + if (unlikely((*cursor_dbi_state(mc) & DBI_DIRTY) == 0)) { + int err = touch_dbi(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + + /* Estimate how much space this operation will take: */ + /* 1) Max b-tree height, reasonable enough with including dups' sub-tree */ + size_t need = CURSOR_STACK_SIZE + 3; + /* 2) GC/FreeDB for any payload */ + if (!cursor_is_gc(mc)) { + need += txn->dbs[FREE_DBI].height + (size_t)3; + /* 3) Named DBs also dirty the main DB */ + if (cursor_is_main(mc)) + need += txn->dbs[MAIN_DBI].height + (size_t)3; + } +#if xMDBX_DEBUG_SPILLING != 2 + /* production mode */ + /* 4) Double the page chain estimation + * for extensively splitting, rebalance and merging */ + need += need; + /* 5) Factor the key+data which to be put in */ + need += bytes2pgno(txn->env, node_size(key, data)) + (size_t)1; +#else + /* debug mode */ + (void)key; + (void)data; + txn->env->debug_dirtied_est = ++need; + txn->env->debug_dirtied_act = 0; +#endif /* xMDBX_DEBUG_SPILLING == 2 */ + + int err = txn_spill(txn, mc, need); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + + if (likely(mc->top >= 0) && !is_modifable(mc->txn, mc->pg[mc->top])) { + const int8_t top = mc->top; + mc->top = 0; + do { + int err = page_touch(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + mc->top += 1; + } while (mc->top <= top); + mc->top = top; + } + return MDBX_SUCCESS; +} + +/*----------------------------------------------------------------------------*/ + +int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, + const size_t dbi) { + + tASSERT(nested_txn, dbi > FREE_DBI && dbi < nested_txn->n_dbi); + const size_t size = parent_cursor->subcur + ? sizeof(MDBX_cursor) + sizeof(subcur_t) + : sizeof(MDBX_cursor); + for (MDBX_cursor *bk; parent_cursor; parent_cursor = bk->next) { + cASSERT(parent_cursor, parent_cursor != parent_cursor->next); + bk = parent_cursor; + if (parent_cursor->signature != cur_signature_live) + continue; + bk = osal_malloc(size); + if (unlikely(!bk)) + return MDBX_ENOMEM; +#if MDBX_DEBUG + memset(bk, 0xCD, size); + VALGRIND_MAKE_MEM_UNDEFINED(bk, size); +#endif /* MDBX_DEBUG */ + *bk = *parent_cursor; + parent_cursor->backup = bk; + /* Kill pointers into src to reduce abuse: The + * user may not use mc until dst ends. But we need a valid + * txn pointer here for cursor fixups to keep working. */ + parent_cursor->txn = nested_txn; + parent_cursor->tree = &nested_txn->dbs[dbi]; + parent_cursor->dbi_state = &nested_txn->dbi_state[dbi]; + subcur_t *mx = parent_cursor->subcur; + if (mx != nullptr) { + *(subcur_t *)(bk + 1) = *mx; + mx->cursor.txn = nested_txn; + mx->cursor.dbi_state = parent_cursor->dbi_state; + } + parent_cursor->next = nested_txn->cursors[dbi]; + nested_txn->cursors[dbi] = parent_cursor; + } + return MDBX_SUCCESS; +} + +void cursor_eot(MDBX_cursor *mc, const bool merge) { + const unsigned stage = mc->signature; + MDBX_cursor *const bk = mc->backup; + ENSURE(mc->txn->env, stage == cur_signature_live || + (stage == cur_signature_wait4eot && bk)); + if (bk) { + subcur_t *mx = mc->subcur; + cASSERT(mc, mc->txn->parent != nullptr); + /* Zap: Using uninitialized memory '*mc->backup'. */ + MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001); + ENSURE(mc->txn->env, bk->signature == cur_signature_live); + cASSERT(mc, mx == bk->subcur); + if (merge) { + /* Update pointers to parent txn */ + mc->next = bk->next; + mc->backup = bk->backup; + mc->txn = bk->txn; + mc->tree = bk->tree; + mc->dbi_state = bk->dbi_state; + if (mx) { + mx->cursor.txn = mc->txn; + mx->cursor.dbi_state = mc->dbi_state; + } + } else { + /* Restore from backup, i.e. rollback/abort nested txn */ + *mc = *bk; + if (mx) + *mx = *(subcur_t *)(bk + 1); + } + if (stage == cur_signature_wait4eot /* Cursor was closed by user */) + mc->signature = stage /* Promote closed state to parent txn */; + bk->signature = 0; + osal_free(bk); + } else { + ENSURE(mc->txn->env, stage == cur_signature_live); + mc->signature = cur_signature_ready4dispose /* Cursor may be reused */; + mc->next = mc; + } +} + +/*----------------------------------------------------------------------------*/ + +static __always_inline int couple_init(cursor_couple_t *couple, + const MDBX_txn *const txn, + tree_t *const tree, kvx_t *const kvx, + uint8_t *const dbi_state) { + + VALGRIND_MAKE_MEM_UNDEFINED(couple, sizeof(cursor_couple_t)); + tASSERT(txn, F_ISSET(*dbi_state, DBI_VALID | DBI_LINDO)); + + couple->outer.signature = cur_signature_live; + couple->outer.next = &couple->outer; + couple->outer.backup = nullptr; + couple->outer.txn = (MDBX_txn *)txn; + couple->outer.tree = tree; + couple->outer.clc = &kvx->clc; + couple->outer.dbi_state = dbi_state; + couple->outer.top_and_flags = z_fresh_mark; + STATIC_ASSERT((int)z_branch == P_BRANCH && (int)z_leaf == P_LEAF && + (int)z_largepage == P_LARGE && (int)z_dupfix == P_DUPFIX); + couple->outer.checking = + (AUDIT_ENABLED() || (txn->env->flags & MDBX_VALIDATION)) + ? z_pagecheck | z_leaf + : z_leaf; + couple->outer.subcur = nullptr; + + if (tree->flags & MDBX_DUPSORT) { + couple->inner.cursor.signature = cur_signature_live; + subcur_t *const mx = couple->outer.subcur = &couple->inner; + mx->cursor.subcur = nullptr; + mx->cursor.next = &mx->cursor; + mx->cursor.txn = (MDBX_txn *)txn; + mx->cursor.tree = &mx->nested_tree; + mx->cursor.clc = ptr_disp(couple->outer.clc, sizeof(clc_t)); + tASSERT(txn, &mx->cursor.clc->k == &kvx->clc.v); + mx->cursor.dbi_state = dbi_state; + mx->cursor.top_and_flags = z_fresh_mark | z_inner; + STATIC_ASSERT(MDBX_DUPFIXED * 2 == P_DUPFIX); + mx->cursor.checking = + couple->outer.checking + ((tree->flags & MDBX_DUPFIXED) << 1); + } + + if (unlikely(*dbi_state & DBI_STALE)) + return sdb_fetch(couple->outer.txn, cursor_dbi(&couple->outer)); + + if (unlikely(kvx->clc.k.lmax == 0)) + return sdb_setup(txn->env, kvx, tree); + + return MDBX_SUCCESS; +} + +__cold int cursor_init4walk(cursor_couple_t *couple, const MDBX_txn *const txn, + tree_t *const tree, kvx_t *const kvx) { + return couple_init(couple, txn, tree, kvx, txn->dbi_state); +} + +int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { + STATIC_ASSERT(offsetof(cursor_couple_t, outer) == 0); + int rc = dbi_check(txn, dbi); + if (likely(rc == MDBX_SUCCESS)) + rc = couple_init(container_of(mc, cursor_couple_t, outer), txn, + &txn->dbs[dbi], &txn->env->kvs[dbi], &txn->dbi_state[dbi]); + return rc; +} + +__cold static int unexpected_dupsort(MDBX_cursor *mc) { + ERROR("unexpected dupsort-page/node for non-dupsort db/cursor (dbi %zu)", + cursor_dbi(mc)); + mc->txn->flags |= MDBX_TXN_ERROR; + be_poor(mc); + return MDBX_CORRUPTED; +} + +int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, + const page_t *mp) { + cASSERT(mc, is_pointed(mc)); + subcur_t *mx = mc->subcur; + if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) + return unexpected_dupsort(mc); + + const uint8_t flags = node_flags(node); + switch (flags) { + default: + ERROR("invalid node flags %u", flags); + goto bailout; + case N_DUPDATA | N_SUBDATA: + if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), + sizeof(tree_t)); + goto bailout; + } + memcpy(&mx->nested_tree, node_data(node), sizeof(tree_t)); + const txnid_t pp_txnid = mp->txnid; + if (!MDBX_DISABLE_VALIDATION && + unlikely(mx->nested_tree.mod_txnid > pp_txnid)) { + ERROR("nested-db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", + mx->nested_tree.mod_txnid, pp_txnid); + goto bailout; + } + mx->cursor.top_and_flags = z_fresh_mark | z_inner; + break; + case N_DUPDATA: + if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) <= PAGEHDRSZ)) { + ERROR("invalid nested-page size %zu", node_ds(node)); + goto bailout; + } + page_t *sp = node_data(node); + mx->nested_tree.height = 1; + mx->nested_tree.branch_pages = 0; + mx->nested_tree.leaf_pages = 1; + mx->nested_tree.large_pages = 0; + mx->nested_tree.items = page_numkeys(sp); + mx->nested_tree.root = 0; + mx->nested_tree.mod_txnid = mp->txnid; + mx->cursor.top_and_flags = z_inner; + mx->cursor.pg[0] = sp; + mx->cursor.ki[0] = 0; + mx->nested_tree.flags = flags_db2sub(mc->tree->flags); + mx->nested_tree.dupfix_size = + (mc->tree->flags & MDBX_DUPFIXED) ? sp->dupfix_ksize : 0; + break; + } + + if (unlikely(mx->nested_tree.dupfix_size != mc->tree->dupfix_size)) { + if (!MDBX_DISABLE_VALIDATION && unlikely(mc->tree->dupfix_size != 0)) { + ERROR("cursor mismatched nested-db dupfix_size %u", + mc->tree->dupfix_size); + goto bailout; + } + if (!MDBX_DISABLE_VALIDATION && + unlikely((mc->tree->flags & MDBX_DUPFIXED) == 0)) { + ERROR("mismatched nested-db flags %u", mc->tree->flags); + goto bailout; + } + if (!MDBX_DISABLE_VALIDATION && + unlikely(mx->nested_tree.dupfix_size < mc->clc->v.lmin || + mx->nested_tree.dupfix_size > mc->clc->v.lmax)) { + ERROR("mismatched nested-db.dupfix_size (%u) <> min/max value-length " + "(%zu/%zu)", + mx->nested_tree.dupfix_size, mc->clc->v.lmin, mc->clc->v.lmax); + goto bailout; + } + mc->tree->dupfix_size = mx->nested_tree.dupfix_size; + mc->clc->v.lmin = mc->clc->v.lmax = mx->nested_tree.dupfix_size; + } + + DEBUG("Sub-db dbi -%zu root page %" PRIaPGNO, cursor_dbi(&mx->cursor), + mx->nested_tree.root); + return MDBX_SUCCESS; + +bailout: + mx->cursor.top_and_flags = z_poor_mark | z_inner; + return MDBX_CORRUPTED; +} + +/*----------------------------------------------------------------------------*/ + +MDBX_cursor *cursor_cpstk(const MDBX_cursor *csrc, MDBX_cursor *cdst) { + cASSERT(cdst, cdst->txn == csrc->txn); + cASSERT(cdst, cdst->tree == csrc->tree); + cASSERT(cdst, cdst->clc == csrc->clc); + cASSERT(cdst, cdst->dbi_state == csrc->dbi_state); + cdst->top_and_flags = csrc->top_and_flags; + + for (intptr_t i = 0; i <= csrc->top; i++) { + cdst->pg[i] = csrc->pg[i]; + cdst->ki[i] = csrc->ki[i]; + } + return cdst; +} + +static __always_inline int sibling(MDBX_cursor *mc, bool right) { + if (mc->top < 1) { + /* root has no siblings */ + return MDBX_NOTFOUND; + } + + cursor_pop(mc); + DEBUG("parent page is page %" PRIaPGNO ", index %u", mc->pg[mc->top]->pgno, + mc->ki[mc->top]); + + int err; + if (right ? (mc->ki[mc->top] + (size_t)1 >= page_numkeys(mc->pg[mc->top])) + : (mc->ki[mc->top] == 0)) { + DEBUG("no more keys aside, moving to next %s sibling", + right ? "right" : "left"); + err = right ? cursor_sibling_right(mc) : cursor_sibling_left(mc); + if (err != MDBX_SUCCESS) { + if (likely(err == MDBX_NOTFOUND)) + /* undo cursor_pop before returning */ + mc->top += 1; + return err; + } + } else { + mc->ki[mc->top] += right ? 1 : -1; + DEBUG("just moving to %s index key %u", right ? "right" : "left", + mc->ki[mc->top]); + } + cASSERT(mc, is_branch(mc->pg[mc->top])); + + page_t *mp = mc->pg[mc->top]; + const node_t *node = page_node(mp, mc->ki[mc->top]); + err = page_get(mc, node_pgno(node), &mp, mp->txnid); + if (likely(err == MDBX_SUCCESS)) { + err = cursor_push(mc, mp, right ? 0 : (indx_t)page_numkeys(mp) - 1); + if (likely(err == MDBX_SUCCESS)) + return err; + } + + be_poor(mc); + return err; +} + +__hot int cursor_sibling_left(MDBX_cursor *mc) { + int err = sibling(mc, false); + if (likely(err != MDBX_NOTFOUND)) + return err; + + cASSERT(mc, mc->top >= 0); + size_t nkeys = page_numkeys(mc->pg[mc->top]); + cASSERT(mc, nkeys > 0); + mc->ki[mc->top] = 0; + return MDBX_NOTFOUND; +} + +__hot int cursor_sibling_right(MDBX_cursor *mc) { + int err = sibling(mc, true); + if (likely(err != MDBX_NOTFOUND)) + return err; + + cASSERT(mc, mc->top >= 0); + size_t nkeys = page_numkeys(mc->pg[mc->top]); + cASSERT(mc, nkeys > 0); + mc->ki[mc->top] = (indx_t)nkeys - 1; + mc->flags = z_eof_soft | z_eof_hard | (mc->flags & z_clear_mask); + inner_gone(mc); + return MDBX_NOTFOUND; +} + +/*----------------------------------------------------------------------------*/ + +/* Функция-шаблон: Приземляет курсор на данные в текущей позиции. + * В том числе, загружает данные во вложенный курсор при его наличии. */ +static __always_inline int cursor_bring(const bool inner, const bool tend2first, + MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data, bool eof) { + if (inner) { + cASSERT(mc, !data && !mc->subcur && (mc->flags & z_inner) != 0); + } else { + cASSERT(mc, (mc->flags & z_inner) == 0); + } + + const page_t *mp = mc->pg[mc->top]; + if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", + mp->pgno, mp->flags); + return MDBX_CORRUPTED; + } + + const size_t nkeys = page_numkeys(mp); + cASSERT(mc, nkeys > 0); + const size_t ki = mc->ki[mc->top]; + cASSERT(mc, nkeys > ki); + cASSERT(mc, !eof || ki == nkeys - 1); + + if (inner && is_dupfix_leaf(mp)) { + be_filled(mc); + if (eof) + mc->flags |= z_eof_soft; + if (likely(key)) + *key = page_dupfix_key(mp, ki, mc->tree->dupfix_size); + return MDBX_SUCCESS; + } + + const node_t *__restrict node = page_node(mp, ki); + if (!inner && (node_flags(node) & N_DUPDATA)) { + int err = cursor_dupsort_setup(mc, node, mp); + if (unlikely(err != MDBX_SUCCESS)) + return err; + MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr); + if (node_flags(node) & N_SUBDATA) { + err = tend2first ? inner_first(&mc->subcur->cursor, data) + : inner_last(&mc->subcur->cursor, data); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } else { + if (!tend2first) { + mc->subcur->cursor.ki[0] = (indx_t)mc->subcur->nested_tree.items - 1; + mc->subcur->cursor.flags |= z_eof_soft; + } + if (data) { + const page_t *inner_mp = mc->subcur->cursor.pg[0]; + cASSERT(mc, is_subpage(inner_mp) && is_leaf(inner_mp)); + const size_t inner_ki = mc->subcur->cursor.ki[0]; + if (is_dupfix_leaf(inner_mp)) + *data = page_dupfix_key(inner_mp, inner_ki, mc->tree->dupfix_size); + else + *data = get_key(page_node(inner_mp, inner_ki)); + } + } + be_filled(mc); + } else { + if (!inner) + inner_gone(mc); + if (data) { + int err = node_read(mc, node, data, mp); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + be_filled(mc); + if (eof) + mc->flags |= z_eof_soft; + } + + get_key_optional(node, key); + return MDBX_SUCCESS; +} + +/* Функция-шаблон: Устанавливает курсор в начало или конец. */ +static __always_inline int cursor_brim(const bool inner, const bool tend2first, + MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data) { + if (mc->top != 0) { + int err = tree_search(mc, nullptr, tend2first ? Z_FIRST : Z_LAST); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + const size_t nkeys = page_numkeys(mc->pg[mc->top]); + cASSERT(mc, nkeys > 0); + mc->ki[mc->top] = tend2first ? 0 : nkeys - 1; + return cursor_bring(inner, tend2first, mc, key, data, !tend2first); +} + +__hot int inner_first(MDBX_cursor *mc, MDBX_val *data) { + return cursor_brim(true, true, mc, data, nullptr); +} + +__hot int inner_last(MDBX_cursor *mc, MDBX_val *data) { + return cursor_brim(true, false, mc, data, nullptr); +} + +__hot int outer_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { + return cursor_brim(false, true, mc, key, data); +} + +__hot int outer_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { + return cursor_brim(false, false, mc, key, data); +} + +/*----------------------------------------------------------------------------*/ + +/* Функция-шаблон: Передвигает курсор на одну позицию. + * При необходимости управляет вложенным курсором. */ +static __always_inline int cursor_step(const bool inner, const bool forward, + MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data, + MDBX_cursor_op op) { + if (forward) { + if (inner) + cASSERT(mc, op == MDBX_NEXT); + else + cASSERT(mc, + op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP); + } else { + if (inner) + cASSERT(mc, op == MDBX_PREV); + else + cASSERT(mc, + op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP); + } + if (inner) { + cASSERT(mc, !data && !mc->subcur && (mc->flags & z_inner) != 0); + } else { + cASSERT(mc, (mc->flags & z_inner) == 0); + } + + if (unlikely(is_poor(mc))) { + int state = mc->flags; + if (state & z_fresh) { + if (forward) + return inner ? inner_first(mc, key) : outer_first(mc, key, data); + else + return inner ? inner_last(mc, key) : outer_last(mc, key, data); + } + mc->flags = inner ? z_inner | z_poor_mark : z_poor_mark; + return (state & z_after_delete) ? MDBX_NOTFOUND : MDBX_ENODATA; + } + + const page_t *mp = mc->pg[mc->top]; + const intptr_t nkeys = page_numkeys(mp); + cASSERT(mc, nkeys > 0); + + intptr_t ki = mc->ki[mc->top]; + const uint8_t state = + mc->flags & (z_after_delete | z_hollow | z_eof_hard | z_eof_soft); + if (likely(state == 0)) { + cASSERT(mc, ki < nkeys); + if (!inner && op != (forward ? MDBX_NEXT_NODUP : MDBX_PREV_NODUP)) { + int err = MDBX_NOTFOUND; + if (inner_pointed(mc)) { + err = forward ? inner_next(&mc->subcur->cursor, data) + : inner_prev(&mc->subcur->cursor, data); + if (likely(err == MDBX_SUCCESS)) { + get_key_optional(page_node(mp, ki), key); + return MDBX_SUCCESS; + } + if (unlikely(err != MDBX_NOTFOUND && err != MDBX_ENODATA)) { + cASSERT(mc, !inner_pointed(mc)); + return err; + } + cASSERT(mc, !forward || (mc->subcur->cursor.flags & z_eof_soft)); + } + if (op == (forward ? MDBX_NEXT_DUP : MDBX_PREV_DUP)) + return err; + } + if (!inner) + inner_gone(mc); + } else { + if (mc->flags & z_hollow) { + cASSERT(mc, !inner_pointed(mc)); + return MDBX_ENODATA; + } + + if (!inner && op == (forward ? MDBX_NEXT_DUP : MDBX_PREV_DUP)) + return MDBX_NOTFOUND; + + if (forward) { + if (state & z_after_delete) { + if (ki < nkeys) + goto bring; + } else { + cASSERT(mc, state & (z_eof_soft | z_eof_hard)); + return MDBX_NOTFOUND; + } + } else if (state & z_eof_hard) { + mc->ki[mc->top] = (indx_t)nkeys - 1; + goto bring; + } + } + + DEBUG("turn-%s: top page was %" PRIaPGNO " in cursor %p, ki %zi of %zi", + forward ? "next" : "prev", mp->pgno, __Wpedantic_format_voidptr(mc), ki, + nkeys); + if (forward) { + if (likely(++ki < nkeys)) + mc->ki[mc->top] = (indx_t)ki; + else { + DEBUG("%s", "=====> move to next sibling page"); + int err = cursor_sibling_right(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + mp = mc->pg[mc->top]; + DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, + mc->ki[mc->top]); + } + } else { + if (likely(--ki >= 0)) + mc->ki[mc->top] = (indx_t)ki; + else { + DEBUG("%s", "=====> move to prev sibling page"); + int err = cursor_sibling_left(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + mp = mc->pg[mc->top]; + DEBUG("prev page is %" PRIaPGNO ", key index %u", mp->pgno, + mc->ki[mc->top]); + } + } + DEBUG("==> cursor points to page %" PRIaPGNO " with %zu keys, key index %u", + mp->pgno, page_numkeys(mp), mc->ki[mc->top]); + +bring: + return cursor_bring(inner, forward, mc, key, data, false); +} + +__hot int inner_next(MDBX_cursor *mc, MDBX_val *data) { + return cursor_step(true, true, mc, data, nullptr, MDBX_NEXT); +} + +__hot int inner_prev(MDBX_cursor *mc, MDBX_val *data) { + return cursor_step(true, false, mc, data, nullptr, MDBX_PREV); +} + +__hot int outer_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, + MDBX_cursor_op op) { + return cursor_step(false, true, mc, key, data, op); +} + +__hot int outer_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, + MDBX_cursor_op op) { + return cursor_step(false, false, mc, key, data, op); +} + +/*----------------------------------------------------------------------------*/ + +__hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, + unsigned flags) { + int err; + DKBUF_DEBUG; + MDBX_env *const env = mc->txn->env; + DEBUG("==> put db %d key [%s], size %" PRIuPTR ", data [%s] size %" PRIuPTR, + cursor_dbi_dbg(mc), DKEY_DEBUG(key), key->iov_len, + DVAL_DEBUG((flags & MDBX_RESERVE) ? nullptr : data), data->iov_len); + + if ((flags & MDBX_CURRENT) != 0 && (mc->flags & z_inner) == 0) { + if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE))) + return MDBX_EINVAL; + /* Запрошено обновление текущей записи, на которой сейчас стоит курсор. + * Проверяем что переданный ключ совпадает со значением в текущей позиции + * курсора. Здесь проще вызвать cursor_ops(), так как для обслуживания + * таблиц с MDBX_DUPSORT также требуется текущий размер данных. */ + MDBX_val current_key, current_data; + err = cursor_ops(mc, ¤t_key, ¤t_data, MDBX_GET_CURRENT); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (mc->clc->k.cmp(key, ¤t_key) != 0) + return MDBX_EKEYMISMATCH; + + if (unlikely((flags & MDBX_MULTIPLE))) + goto drop_current; + + if (mc->subcur) { + node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); + if (node_flags(node) & N_DUPDATA) { + cASSERT(mc, inner_pointed(mc)); + /* Если за ключом более одного значения, либо если размер данных + * отличается, то вместо обновления требуется удаление и + * последующая вставка. */ + if (mc->subcur->nested_tree.items > 1 || + current_data.iov_len != data->iov_len) { + drop_current: + err = cursor_del(mc, flags & MDBX_ALLDUPS); + if (unlikely(err != MDBX_SUCCESS)) + return err; + flags -= MDBX_CURRENT; + goto skip_check_samedata; + } + } else if (unlikely(node_size(key, data) > env->leaf_nodemax)) { + /* Уже есть пара key-value хранящаяся в обычном узле. Новые данные + * слишком большие для размещения в обычном узле вместе с ключом, но + * могут быть размещены в вложенном дереве. Удаляем узел со старыми + * данными, чтобы при помещении новых создать вложенное дерево. */ + err = cursor_del(mc, 0); + if (unlikely(err != MDBX_SUCCESS)) + return err; + flags -= MDBX_CURRENT; + goto skip_check_samedata; + } + } + if (!(flags & MDBX_RESERVE) && + unlikely(cmp_lenfast(¤t_data, data) == 0)) + return MDBX_SUCCESS /* the same data, nothing to update */; + skip_check_samedata:; + } + + int rc = MDBX_SUCCESS; + if (mc->tree->height == 0) { + /* new database, cursor has nothing to point to */ + cASSERT(mc, is_poor(mc)); + rc = MDBX_NO_ROOT; + } else if ((flags & MDBX_CURRENT) == 0) { + bool exact = false; + MDBX_val last_key, old_data; + if ((flags & MDBX_APPEND) && mc->tree->items > 0) { + old_data.iov_base = nullptr; + old_data.iov_len = 0; + rc = (mc->flags & z_inner) ? inner_last(mc, &last_key) + : outer_last(mc, &last_key, &old_data); + if (likely(rc == MDBX_SUCCESS)) { + const int cmp = mc->clc->k.cmp(key, &last_key); + if (likely(cmp > 0)) { + mc->ki[mc->top]++; /* step forward for appending */ + rc = MDBX_NOTFOUND; + } else if (unlikely(cmp != 0)) { + /* new-key < last-key */ + return MDBX_EKEYMISMATCH; + } else { + rc = MDBX_SUCCESS; + exact = true; + } + } + } else { + csr_t csr = + /* olddata may not be updated in case DUPFIX-page of dupfix-subDB */ + cursor_seek(mc, (MDBX_val *)key, &old_data, MDBX_SET); + rc = csr.err; + exact = csr.exact; + } + if (likely(rc == MDBX_SUCCESS)) { + if (exact) { + if (unlikely(flags & MDBX_NOOVERWRITE)) { + DEBUG("duplicate key [%s]", DKEY_DEBUG(key)); + *data = old_data; + return MDBX_KEYEXIST; + } + if (unlikely(mc->flags & z_inner)) { + /* nested subtree of DUPSORT-database with the same key, + * nothing to update */ + eASSERT(env, + data->iov_len == 0 && (old_data.iov_len == 0 || + /* olddata may not be updated in case + DUPFIX-page of dupfix-subDB */ + (mc->tree->flags & MDBX_DUPFIXED))); + return MDBX_SUCCESS; + } + if (unlikely(flags & MDBX_ALLDUPS) && inner_pointed(mc)) { + err = cursor_del(mc, MDBX_ALLDUPS); + if (unlikely(err != MDBX_SUCCESS)) + return err; + flags -= MDBX_ALLDUPS; + cASSERT(mc, mc->top + 1 == mc->tree->height); + rc = (mc->top >= 0) ? MDBX_NOTFOUND : MDBX_NO_ROOT; + exact = false; + } else if (!(flags & (MDBX_RESERVE | MDBX_MULTIPLE))) { + /* checking for early exit without dirtying pages */ + if (unlikely(eq_fast(data, &old_data))) { + cASSERT(mc, mc->clc->v.cmp(data, &old_data) == 0); + if (mc->subcur) { + if (flags & MDBX_NODUPDATA) + return MDBX_KEYEXIST; + if (flags & MDBX_APPENDDUP) + return MDBX_EKEYMISMATCH; + } + /* the same data, nothing to update */ + return MDBX_SUCCESS; + } + cASSERT(mc, mc->clc->v.cmp(data, &old_data) != 0); + } + } + } else if (unlikely(rc != MDBX_NOTFOUND)) + return rc; + } + + mc->flags &= ~z_after_delete; + MDBX_val xdata, *ref_data = data; + size_t *batch_dupfix_done = nullptr, batch_dupfix_given = 0; + if (unlikely(flags & MDBX_MULTIPLE)) { + batch_dupfix_given = data[1].iov_len; + batch_dupfix_done = &data[1].iov_len; + *batch_dupfix_done = 0; + } + + /* Cursor is positioned, check for room in the dirty list */ + err = cursor_touch(mc, key, ref_data); + if (unlikely(err)) + return err; + + if (unlikely(rc == MDBX_NO_ROOT)) { + /* new database, write a root leaf page */ + DEBUG("%s", "allocating new root leaf page"); + pgr_t npr = page_new(mc, P_LEAF); + if (unlikely(npr.err != MDBX_SUCCESS)) + return npr.err; + npr.err = cursor_push(mc, npr.page, 0); + if (unlikely(npr.err != MDBX_SUCCESS)) + return npr.err; + mc->tree->root = npr.page->pgno; + mc->tree->height++; + if (mc->tree->flags & MDBX_INTEGERKEY) { + assert(key->iov_len >= mc->clc->k.lmin && + key->iov_len <= mc->clc->k.lmax); + mc->clc->k.lmin = mc->clc->k.lmax = key->iov_len; + } + if (mc->tree->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) { + assert(data->iov_len >= mc->clc->v.lmin && + data->iov_len <= mc->clc->v.lmax); + assert(mc->subcur != nullptr); + mc->tree->dupfix_size = /* mc->subcur->nested_tree.dupfix_size = */ + (unsigned)(mc->clc->v.lmin = mc->clc->v.lmax = data->iov_len); + cASSERT(mc, mc->clc->v.lmin == mc->subcur->cursor.clc->k.lmin); + cASSERT(mc, mc->clc->v.lmax == mc->subcur->cursor.clc->k.lmax); + if (mc->flags & z_inner) + npr.page->flags |= P_DUPFIX; + } + } + + MDBX_val old_singledup, old_data; + tree_t nested_dupdb; + page_t *sub_root = nullptr; + bool insert_key, insert_data; + uint16_t fp_flags = P_LEAF; + page_t *fp = env->page_auxbuf; + fp->txnid = mc->txn->front_txnid; + insert_key = insert_data = (rc != MDBX_SUCCESS); + old_singledup.iov_base = nullptr; + old_singledup.iov_len = 0; + if (insert_key) { + /* The key does not exist */ + DEBUG("inserting key at index %i", mc->ki[mc->top]); + if ((mc->tree->flags & MDBX_DUPSORT) && + node_size(key, data) > env->leaf_nodemax) { + /* Too big for a node, insert in sub-DB. Set up an empty + * "old sub-page" for convert_to_subtree to expand to a full page. */ + fp->dupfix_ksize = + (mc->tree->flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; + fp->lower = fp->upper = 0; + old_data.iov_len = PAGEHDRSZ; + goto convert_to_subtree; + } + } else { + /* there's only a key anyway, so this is a no-op */ + if (is_dupfix_leaf(mc->pg[mc->top])) { + size_t ksize = mc->tree->dupfix_size; + if (unlikely(key->iov_len != ksize)) + return MDBX_BAD_VALSIZE; + void *ptr = page_dupfix_ptr(mc->pg[mc->top], mc->ki[mc->top], ksize); + memcpy(ptr, key->iov_base, ksize); + fix_parent: + /* if overwriting slot 0 of leaf, need to + * update branch key if there is a parent page */ + if (mc->top && !mc->ki[mc->top]) { + size_t dtop = 1; + mc->top--; + /* slot 0 is always an empty key, find real slot */ + while (mc->top && !mc->ki[mc->top]) { + mc->top--; + dtop++; + } + err = MDBX_SUCCESS; + if (mc->ki[mc->top]) + err = tree_propagate_key(mc, key); + cASSERT(mc, mc->top + dtop < UINT16_MAX); + mc->top += (uint8_t)dtop; + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + + if (AUDIT_ENABLED()) { + err = cursor_check(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + return MDBX_SUCCESS; + } + + more: + if (AUDIT_ENABLED()) { + err = cursor_check(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + node_t *const node = page_node(mc->pg[mc->top], mc->ki[mc->top]); + + /* Large/Overflow page overwrites need special handling */ + if (unlikely(node_flags(node) & N_BIGDATA)) { + const size_t dpages = (node_size(key, data) > env->leaf_nodemax) + ? largechunk_npages(env, data->iov_len) + : 0; + + const pgno_t pgno = node_largedata_pgno(node); + pgr_t lp = page_get_large(mc, pgno, mc->pg[mc->top]->txnid); + if (unlikely(lp.err != MDBX_SUCCESS)) + return lp.err; + cASSERT(mc, page_type(lp.page) == P_LARGE); + + /* Is the ov page from this txn (or a parent) and big enough? */ + const size_t ovpages = lp.page->pages; + const size_t extra_threshold = + (mc->tree == &mc->txn->dbs[FREE_DBI]) + ? 1 + : /* LY: add configurable threshold to keep reserve space */ 0; + if (!is_frozen(mc->txn, lp.page) && ovpages >= dpages && + ovpages <= dpages + extra_threshold) { + /* yes, overwrite it. */ + if (!is_modifable(mc->txn, lp.page)) { + if (is_spilled(mc->txn, lp.page)) { + lp = /* TODO: avoid search and get txn & spill-index from + page_result */ + page_unspill(mc->txn, lp.page); + if (unlikely(lp.err)) + return lp.err; + } else { + if (unlikely(!mc->txn->parent)) { + ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s " + "page %" PRIaPGNO " mod-txnid %" PRIaTXN "," + " without parent transaction, current txn %" PRIaTXN + " front %" PRIaTXN, + "large/overflow", pgno, lp.page->txnid, mc->txn->txnid, + mc->txn->front_txnid); + return MDBX_PROBLEM; + } + + /* It is writable only in a parent txn */ + page_t *np = page_shadow_alloc(mc->txn, ovpages); + if (unlikely(!np)) + return MDBX_ENOMEM; + + memcpy(np, lp.page, PAGEHDRSZ); /* Copy header of page */ + err = page_dirty(mc->txn, lp.page = np, ovpages); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_ENABLE_PGOP_STAT + mc->txn->env->lck->pgops.clone.weak += ovpages; +#endif /* MDBX_ENABLE_PGOP_STAT */ + cASSERT(mc, dpl_check(mc->txn)); + } + } + node_set_ds(node, data->iov_len); + if (flags & MDBX_RESERVE) + data->iov_base = page_data(lp.page); + else + memcpy(page_data(lp.page), data->iov_base, data->iov_len); + + if (AUDIT_ENABLED()) { + err = cursor_check(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + return MDBX_SUCCESS; + } + + if ((err = page_retire(mc, lp.page)) != MDBX_SUCCESS) + return err; + } else { + old_data.iov_len = node_ds(node); + old_data.iov_base = node_data(node); + cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= + ptr_disp(mc->pg[mc->top], env->ps)); + + /* DB has dups? */ + if (mc->tree->flags & MDBX_DUPSORT) { + /* Prepare (sub-)page/sub-DB to accept the new item, if needed. + * fp: old sub-page or a header faking it. + * mp: new (sub-)page. + * xdata: node data with new sub-page or sub-DB. */ + size_t growth = 0; /* growth in page size.*/ + page_t *mp = fp = xdata.iov_base = env->page_auxbuf; + mp->pgno = mc->pg[mc->top]->pgno; + + /* Was a single item before, must convert now */ + if (!(node_flags(node) & N_DUPDATA)) { + /* does data match? */ + if (flags & MDBX_APPENDDUP) { + const int cmp = mc->clc->v.cmp(data, &old_data); + cASSERT(mc, cmp != 0 || eq_fast(data, &old_data)); + if (unlikely(cmp <= 0)) + return MDBX_EKEYMISMATCH; + } else if (eq_fast(data, &old_data)) { + cASSERT(mc, mc->clc->v.cmp(data, &old_data) == 0); + if (flags & MDBX_NODUPDATA) + return MDBX_KEYEXIST; + /* data is match exactly byte-to-byte, nothing to update */ + rc = MDBX_SUCCESS; + if (unlikely(batch_dupfix_done)) + goto batch_dupfix_continue; + return rc; + } + + /* Just overwrite the current item */ + if (flags & MDBX_CURRENT) { + cASSERT(mc, node_size(key, data) <= env->leaf_nodemax); + goto current; + } + + /* Back up original data item */ + memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, + old_singledup.iov_len = old_data.iov_len); + + /* Make sub-page header for the dup items, with dummy body */ + fp->flags = P_LEAF | P_SUBP; + fp->lower = 0; + xdata.iov_len = PAGEHDRSZ + old_data.iov_len + data->iov_len; + if (mc->tree->flags & MDBX_DUPFIXED) { + fp->flags |= P_DUPFIX; + fp->dupfix_ksize = (uint16_t)data->iov_len; + /* Будем создавать DUPFIX-страницу, как минимум с двумя элементами. + * При коротких значениях и наличии свободного места можно сделать + * некоторое резервирование места, чтобы при последующих добавлениях + * не сразу расширять созданную под-страницу. + * Резервирование в целом сомнительно (см ниже), но может сработать + * в плюс (а если в минус то несущественный) при коротких ключах. */ + xdata.iov_len += page_subleaf2_reserve( + env, page_room(mc->pg[mc->top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + cASSERT(mc, (xdata.iov_len & 1) == 0); + } else { + xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + + (old_data.iov_len & 1) + (data->iov_len & 1); + } + cASSERT(mc, (xdata.iov_len & 1) == 0); + fp->upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); + old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ + } else if (node_flags(node) & N_SUBDATA) { + /* Data is on sub-DB, just store it */ + flags |= N_DUPDATA | N_SUBDATA; + goto dupsort_put; + } else { + /* Data is on sub-page */ + fp = old_data.iov_base; + switch (flags) { + default: + growth = is_dupfix_leaf(fp) + ? fp->dupfix_ksize + : (node_size(data, nullptr) + sizeof(indx_t)); + if (page_room(fp) >= growth) { + /* На текущей под-странице есть место для добавления элемента. + * Оптимальнее продолжить использовать эту страницу, ибо + * добавление вложенного дерева увеличит WAF на одну страницу. */ + goto continue_subpage; + } + /* На текущей под-странице нет места для еще одного элемента. + * Можно либо увеличить эту под-страницу, либо вынести куст + * значений во вложенное дерево. + * + * Продолжать использовать текущую под-страницу возможно + * только пока и если размер после добавления элемента будет + * меньше leaf_nodemax. Соответственно, при превышении + * просто сразу переходим на вложенное дерево. */ + xdata.iov_len = old_data.iov_len + (growth += growth & 1); + if (xdata.iov_len > env->subpage_limit) + goto convert_to_subtree; + + /* Можно либо увеличить под-страницу, в том числе с некоторым + * запасом, либо перейти на вложенное поддерево. + * + * Резервирование места на под-странице представляется сомнительным: + * - Резервирование увеличит рыхлость страниц, в том числе + * вероятность разделения основной/гнездовой страницы; + * - Сложно предсказать полезный размер резервирования, + * особенно для не-MDBX_DUPFIXED; + * - Наличие резерва позволяет съекономить только на перемещении + * части элементов основной/гнездовой страницы при последующих + * добавлениях в нее элементов. Причем после первого изменения + * размера под-страницы, её тело будет примыкать + * к неиспользуемому месту на основной/гнездовой странице, + * поэтому последующие последовательные добавления потребуют + * только передвижения в entries[]. + * + * Соответственно, более важным/определяющим представляется + * своевременный переход к вложеному дереву, но тут достаточно + * сложный конфликт интересов: + * - При склонности к переходу к вложенным деревьям, суммарно + * в БД будет большее кол-во более рыхлых страниц. Это увеличит + * WAF, а также RAF при последовательных чтениях большой БД. + * Однако, при коротких ключах и большом кол-ве + * дубликатов/мультизначений, плотность ключей в листовых + * страницах основного дерева будет выше. Соответственно, будет + * пропорционально меньше branch-страниц. Поэтому будет выше + * вероятность оседания/не-вымывания страниц основного дерева из + * LRU-кэша, а также попадания в write-back кэш при записи. + * - Наоботот, при склонности к использованию под-страниц, будут + * наблюдаться обратные эффекты. Плюс некоторые накладные расходы + * на лишнее копирование данных под-страниц в сценариях + * нескольких обонвлений дубликатов одного куста в одной + * транзакции. + * + * Суммарно наиболее рациональным представляется такая тактика: + * - Вводим три порога subpage_limit, subpage_room_threshold + * и subpage_reserve_prereq, которые могут быть + * заданы/скорректированы пользователем в ‰ от leaf_nodemax; + * - Используем под-страницу пока её размер меньше subpage_limit + * и на основной/гнездовой странице не-менее + * subpage_room_threshold свободного места; + * - Резервируем место только для 1-3 коротких dupfix-элементов, + * расширяя размер под-страницы на размер кэш-линии ЦПУ, но + * только если на странице не менее subpage_reserve_prereq + * свободного места. + * - По-умолчанию устанавливаем: + * subpage_limit = leaf_nodemax (1000‰); + * subpage_room_threshold = 0; + * subpage_reserve_prereq = leaf_nodemax (1000‰). + */ + if (is_dupfix_leaf(fp)) + growth += page_subleaf2_reserve( + env, page_room(mc->pg[mc->top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + else { + /* TODO: Если добавить возможность для пользователя задавать + * min/max размеров ключей/данных, то здесь разумно реализовать + * тактику резервирования подобную dupfixed. */ + } + break; + + case MDBX_CURRENT | MDBX_NODUPDATA: + case MDBX_CURRENT: + continue_subpage: + fp->txnid = mc->txn->front_txnid; + fp->pgno = mp->pgno; + mc->subcur->cursor.pg[0] = fp; + flags |= N_DUPDATA; + goto dupsort_put; + } + xdata.iov_len = old_data.iov_len + growth; + cASSERT(mc, (xdata.iov_len & 1) == 0); + } + + fp_flags = fp->flags; + if (xdata.iov_len > env->subpage_limit || + node_size_len(node_ks(node), xdata.iov_len) > env->leaf_nodemax || + (env->subpage_room_threshold && + page_room(mc->pg[mc->top]) + + node_size_len(node_ks(node), old_data.iov_len) < + env->subpage_room_threshold + + node_size_len(node_ks(node), xdata.iov_len))) { + /* Too big for a sub-page, convert to sub-DB */ + convert_to_subtree: + fp_flags &= ~P_SUBP; + nested_dupdb.dupfix_size = 0; + nested_dupdb.flags = flags_db2sub(mc->tree->flags); + if (mc->tree->flags & MDBX_DUPFIXED) { + fp_flags |= P_DUPFIX; + nested_dupdb.dupfix_size = fp->dupfix_ksize; + } + nested_dupdb.height = 1; + nested_dupdb.branch_pages = 0; + nested_dupdb.leaf_pages = 1; + nested_dupdb.large_pages = 0; + nested_dupdb.items = page_numkeys(fp); + xdata.iov_len = sizeof(nested_dupdb); + xdata.iov_base = &nested_dupdb; + const pgr_t par = gc_alloc_single(mc); + mp = par.page; + if (unlikely(par.err != MDBX_SUCCESS)) + return par.err; + mc->tree->leaf_pages += 1; + cASSERT(mc, env->ps > old_data.iov_len); + growth = env->ps - (unsigned)old_data.iov_len; + cASSERT(mc, (growth & 1) == 0); + flags |= N_DUPDATA | N_SUBDATA; + nested_dupdb.root = mp->pgno; + nested_dupdb.sequence = 0; + nested_dupdb.mod_txnid = mc->txn->txnid; + sub_root = mp; + } + if (mp != fp) { + mp->flags = fp_flags; + mp->txnid = mc->txn->front_txnid; + mp->dupfix_ksize = fp->dupfix_ksize; + mp->lower = fp->lower; + cASSERT(mc, fp->upper + growth < UINT16_MAX); + mp->upper = fp->upper + (indx_t)growth; + if (unlikely(fp_flags & P_DUPFIX)) { + memcpy(page_data(mp), page_data(fp), + page_numkeys(fp) * fp->dupfix_ksize); + cASSERT(mc, (((mp->dupfix_ksize & page_numkeys(mp)) ^ mp->upper) & + 1) == 0); + } else { + cASSERT(mc, (mp->upper & 1) == 0); + memcpy(ptr_disp(mp, mp->upper + PAGEHDRSZ), + ptr_disp(fp, fp->upper + PAGEHDRSZ), + old_data.iov_len - fp->upper - PAGEHDRSZ); + memcpy(mp->entries, fp->entries, + page_numkeys(fp) * sizeof(mp->entries[0])); + for (size_t i = 0; i < page_numkeys(fp); i++) { + cASSERT(mc, mp->entries[i] + growth <= UINT16_MAX); + mp->entries[i] += (indx_t)growth; + } + } + } + + if (!insert_key) + node_del(mc, 0); + ref_data = &xdata; + flags |= N_DUPDATA; + goto insert_node; + } + + /* MDBX passes N_SUBDATA in 'flags' to write a DB record */ + if (unlikely((node_flags(node) ^ flags) & N_SUBDATA)) + return MDBX_INCOMPATIBLE; + + current: + if (data->iov_len == old_data.iov_len) { + cASSERT(mc, EVEN_CEIL(key->iov_len) == EVEN_CEIL(node_ks(node))); + /* same size, just replace it. Note that we could + * also reuse this node if the new data is smaller, + * but instead we opt to shrink the node in that case. */ + if (flags & MDBX_RESERVE) + data->iov_base = old_data.iov_base; + else if (!(mc->flags & z_inner)) + memcpy(old_data.iov_base, data->iov_base, data->iov_len); + else { + cASSERT(mc, page_numkeys(mc->pg[mc->top]) == 1); + cASSERT(mc, page_type_compat(mc->pg[mc->top]) == P_LEAF); + cASSERT(mc, node_ds(node) == 0); + cASSERT(mc, node_flags(node) == 0); + cASSERT(mc, key->iov_len < UINT16_MAX); + node_set_ks(node, key->iov_len); + memcpy(node_key(node), key->iov_base, key->iov_len); + cASSERT(mc, ptr_disp(node_key(node), node_ds(node)) < + ptr_disp(mc->pg[mc->top], env->ps)); + goto fix_parent; + } + + if (AUDIT_ENABLED()) { + err = cursor_check(mc); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + return MDBX_SUCCESS; + } + } + node_del(mc, 0); + } + + ref_data = data; + +insert_node:; + const unsigned naf = flags & NODE_ADD_FLAGS; + size_t nsize = is_dupfix_leaf(mc->pg[mc->top]) + ? key->iov_len + : leaf_size(env, key, ref_data); + if (page_room(mc->pg[mc->top]) < nsize) { + rc = page_split(mc, key, ref_data, P_INVALID, + insert_key ? naf : naf | MDBX_SPLIT_REPLACE); + if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) + rc = insert_key ? cursor_check(mc) : cursor_check_updating(mc); + } else { + /* There is room already in this leaf page. */ + if (is_dupfix_leaf(mc->pg[mc->top])) { + cASSERT(mc, !(naf & (N_BIGDATA | N_SUBDATA | N_DUPDATA)) && + ref_data->iov_len == 0); + rc = node_add_dupfix(mc, mc->ki[mc->top], key); + } else + rc = node_add_leaf(mc, mc->ki[mc->top], key, ref_data, naf); + if (likely(rc == 0)) { + /* Adjust other cursors pointing to mp */ + page_t *const mp = mc->pg[mc->top]; + const size_t dbi = cursor_dbi(mc); + for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) { + MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_related(mc, m3) || m3->pg[mc->top] != mp) + continue; + if (m3->ki[mc->top] >= mc->ki[mc->top]) + m3->ki[mc->top] += insert_key; + if (inner_pointed(m3)) + cursor_inner_refresh(m3, mp, m3->ki[mc->top]); + } + } + } + + if (likely(rc == MDBX_SUCCESS)) { + /* Now store the actual data in the child DB. Note that we're + * storing the user data in the keys field, so there are strict + * size limits on dupdata. The actual data fields of the child + * DB are all zero size. */ + if (flags & N_DUPDATA) { + MDBX_val empty; + dupsort_put: + empty.iov_len = 0; + empty.iov_base = nullptr; + node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); +#define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1 + STATIC_ASSERT( + (MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == + MDBX_NOOVERWRITE); + unsigned inner_flags = + MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> + SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); + if ((flags & MDBX_CURRENT) == 0) { + inner_flags -= MDBX_CURRENT; + rc = cursor_dupsort_setup(mc, node, mc->pg[mc->top]); + if (unlikely(rc != MDBX_SUCCESS)) + goto dupsort_error; + } + subcur_t *const mx = mc->subcur; + if (sub_root) { + cASSERT(mc, mx->nested_tree.height == 1 && + mx->nested_tree.root == sub_root->pgno); + mx->cursor.flags = z_inner; + mx->cursor.top = 0; + mx->cursor.pg[0] = sub_root; + mx->cursor.ki[0] = 0; + } + if (old_singledup.iov_base) { + /* converted, write the original data first */ + if (is_dupfix_leaf(mx->cursor.pg[0])) + rc = node_add_dupfix(&mx->cursor, 0, &old_singledup); + else + rc = node_add_leaf(&mx->cursor, 0, &old_singledup, &empty, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto dupsort_error; + mx->cursor.tree->items = 1; + } + if (!(node_flags(node) & N_SUBDATA) || sub_root) { + page_t *const mp = mc->pg[mc->top]; + const intptr_t nkeys = page_numkeys(mp); + const size_t dbi = cursor_dbi(mc); + + for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) { + if (!is_related(mc, m2) || m2->pg[mc->top] != mp) + continue; + if (/* пропускаем незаполненные курсоры, иначе получится что у такого + курсора будет инициализирован вложенный, + что антилогично и бесполезно. */ + is_filled(m2) && m2->ki[mc->top] == mc->ki[mc->top]) { + cASSERT(m2, m2->subcur->cursor.clc == mx->cursor.clc); + m2->subcur->nested_tree = mx->nested_tree; + m2->subcur->cursor.pg[0] = mx->cursor.pg[0]; + if (old_singledup.iov_base) { + m2->subcur->cursor.top_and_flags = z_inner; + m2->subcur->cursor.ki[0] = 0; + } + DEBUG("Sub-dbi -%zu root page %" PRIaPGNO, + cursor_dbi(&m2->subcur->cursor), + m2->subcur->nested_tree.root); + } else if (!insert_key && m2->ki[mc->top] < nkeys) + cursor_inner_refresh(m2, mp, m2->ki[mc->top]); + } + } + cASSERT(mc, mc->subcur->nested_tree.items < PTRDIFF_MAX); + const size_t probe = (size_t)mc->subcur->nested_tree.items; +#define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 + STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == + MDBX_APPEND); + inner_flags |= + (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; + rc = cursor_put(&mc->subcur->cursor, data, &empty, inner_flags); + if (flags & N_SUBDATA) { + void *db = node_data(node); + mc->subcur->nested_tree.mod_txnid = mc->txn->txnid; + memcpy(db, &mc->subcur->nested_tree, sizeof(tree_t)); + } + insert_data = (probe != (size_t)mc->subcur->nested_tree.items); + } + /* Increment count unless we just replaced an existing item. */ + if (insert_data) + mc->tree->items++; + if (insert_key) { + if (unlikely(rc != MDBX_SUCCESS)) + goto dupsort_error; + /* If we succeeded and the key didn't exist before, + * make sure the cursor is marked valid. */ + be_filled(mc); + } + if (likely(rc == MDBX_SUCCESS)) { + cASSERT(mc, is_filled(mc)); + if (unlikely(batch_dupfix_done)) { + batch_dupfix_continue: + /* let caller know how many succeeded, if any */ + if ((*batch_dupfix_done += 1) < batch_dupfix_given) { + data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); + insert_key = insert_data = false; + old_singledup.iov_base = nullptr; + goto more; + } + } + if (AUDIT_ENABLED()) + rc = cursor_check(mc); + } + return rc; + + dupsort_error: + if (unlikely(rc == MDBX_KEYEXIST)) { + /* should not happen, we deleted that item */ + ERROR("Unexpected %i error while put to nested dupsort's hive", rc); + rc = MDBX_PROBLEM; + } + } + mc->txn->flags |= MDBX_TXN_ERROR; + return rc; +} + +__hot int cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, + MDBX_val *data, unsigned flags) { + cASSERT(mc, (mc->flags & z_inner) == 0); + if (unlikely(key->iov_len > mc->clc->k.lmax || + key->iov_len < mc->clc->k.lmin)) { + cASSERT(mc, !"Invalid key-size"); + return MDBX_BAD_VALSIZE; + } + if (unlikely(data->iov_len > mc->clc->v.lmax || + data->iov_len < mc->clc->v.lmin)) { + cASSERT(mc, !"Invalid data-size"); + return MDBX_BAD_VALSIZE; + } + + uint64_t aligned_keybytes, aligned_databytes; + MDBX_val aligned_key, aligned_data; + if (mc->tree->flags & MDBX_INTEGERKEY) { + if (key->iov_len == 8) { + if (unlikely(7 & (uintptr_t)key->iov_base)) { + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = bcopy_8(&aligned_keybytes, key->iov_base); + aligned_key.iov_len = key->iov_len; + key = &aligned_key; + } + } else if (key->iov_len == 4) { + if (unlikely(3 & (uintptr_t)key->iov_base)) { + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = bcopy_4(&aligned_keybytes, key->iov_base); + aligned_key.iov_len = key->iov_len; + key = &aligned_key; + } + } else { + cASSERT(mc, !"key-size is invalid for MDBX_INTEGERKEY"); + return MDBX_BAD_VALSIZE; + } + } + if (mc->tree->flags & MDBX_INTEGERDUP) { + if (data->iov_len == 8) { + if (unlikely(7 & (uintptr_t)data->iov_base)) { + if (unlikely(flags & MDBX_MULTIPLE)) + return MDBX_BAD_VALSIZE; + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = bcopy_8(&aligned_databytes, data->iov_base); + aligned_data.iov_len = data->iov_len; + data = &aligned_data; + } + } else if (data->iov_len == 4) { + if (unlikely(3 & (uintptr_t)data->iov_base)) { + if (unlikely(flags & MDBX_MULTIPLE)) + return MDBX_BAD_VALSIZE; + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = bcopy_4(&aligned_databytes, data->iov_base); + aligned_data.iov_len = data->iov_len; + data = &aligned_data; + } + } else { + cASSERT(mc, !"data-size is invalid for MDBX_INTEGERKEY"); + return MDBX_BAD_VALSIZE; + } + } + return cursor_put(mc, key, data, flags); +} + +__hot int cursor_del(MDBX_cursor *mc, unsigned flags) { + if (unlikely(!is_filled(mc))) + return MDBX_ENODATA; + + int rc = cursor_touch(mc, nullptr, nullptr); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + page_t *mp = mc->pg[mc->top]; + cASSERT(mc, is_modifable(mc->txn, mp)); + if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", + mp->pgno, mp->flags); + return MDBX_CORRUPTED; + } + if (is_dupfix_leaf(mp)) + goto del_key; + + node_t *node = page_node(mp, mc->ki[mc->top]); + if (node_flags(node) & N_DUPDATA) { + if (flags & (MDBX_ALLDUPS | /* for compatibility */ MDBX_NODUPDATA)) { + /* will subtract the final entry later */ + mc->tree->items -= mc->subcur->nested_tree.items - 1; + } else { + if (!(node_flags(node) & N_SUBDATA)) { + page_t *sp = node_data(node); + cASSERT(mc, is_subpage(sp)); + sp->txnid = mp->txnid; + mc->subcur->cursor.pg[0] = sp; + } + rc = cursor_del(&mc->subcur->cursor, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + /* If sub-DB still has entries, we're done */ + if (mc->subcur->nested_tree.items) { + if (node_flags(node) & N_SUBDATA) { + /* update subDB info */ + mc->subcur->nested_tree.mod_txnid = mc->txn->txnid; + memcpy(node_data(node), &mc->subcur->nested_tree, sizeof(tree_t)); + } else { + /* shrink sub-page */ + node = node_shrink(mp, mc->ki[mc->top], node); + mc->subcur->cursor.pg[0] = node_data(node); + /* fix other sub-DB cursors pointed at sub-pages on this page */ + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; + m2 = m2->next) { + if (!is_related(mc, m2) || m2->pg[mc->top] != mp) + continue; + const node_t *inner = node; + if (unlikely(m2->ki[mc->top] >= page_numkeys(mp))) { + m2->flags = z_poor_mark; + m2->subcur->nested_tree.root = 0; + m2->subcur->cursor.top_and_flags = z_inner | z_poor_mark; + continue; + } + if (m2->ki[mc->top] != mc->ki[mc->top]) { + inner = page_node(mp, m2->ki[mc->top]); + if (node_flags(inner) & N_SUBDATA) + continue; + } + m2->subcur->cursor.pg[0] = node_data(inner); + } + } + mc->tree->items -= 1; + cASSERT(mc, mc->tree->items > 0 && mc->tree->height > 0 && + mc->tree->root != P_INVALID); + return rc; + } + /* otherwise fall thru and delete the sub-DB */ + } + + if ((node_flags(node) & N_SUBDATA) && mc->subcur->cursor.tree->height) { + /* add all the child DB's pages to the free list */ + rc = tree_drop(&mc->subcur->cursor, false); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + inner_gone(mc); + } else { + cASSERT(mc, !inner_pointed(mc)); + /* MDBX passes N_SUBDATA in 'flags' to delete a DB record */ + if (unlikely((node_flags(node) ^ flags) & N_SUBDATA)) + return MDBX_INCOMPATIBLE; + } + + /* add large/overflow pages to free list */ + if (node_flags(node) & N_BIGDATA) { + pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); + if (unlikely((rc = lp.err) || (rc = page_retire(mc, lp.page)))) + goto fail; + } + +del_key: + mc->tree->items -= 1; + const MDBX_dbi dbi = cursor_dbi(mc); + indx_t ki = mc->ki[mc->top]; + mp = mc->pg[mc->top]; + cASSERT(mc, is_leaf(mp)); + node_del(mc, mc->tree->dupfix_size); + + /* Adjust other cursors pointing to mp */ + for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) { + MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_related(mc, m3) || m3->pg[mc->top] != mp) + continue; + if (m3->ki[mc->top] == ki) { + m3->flags |= z_after_delete; + inner_gone(m3); + } else { + m3->ki[mc->top] -= m3->ki[mc->top] > ki; + if (inner_pointed(m3)) + cursor_inner_refresh(m3, m3->pg[mc->top], m3->ki[mc->top]); + } + } + + rc = tree_rebalance(mc); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + + mc->flags |= z_after_delete; + inner_gone(mc); + if (unlikely(mc->top < 0)) { + /* DB is totally empty now, just bail out. + * Other cursors adjustments were already done + * by rebalance and aren't needed here. */ + cASSERT(mc, mc->tree->items == 0 && + (mc->tree->root == P_INVALID || + (is_inner(mc) && !mc->tree->root)) && + mc->flags < 0); + return MDBX_SUCCESS; + } + + ki = mc->ki[mc->top]; + mp = mc->pg[mc->top]; + cASSERT(mc, is_leaf(mc->pg[mc->top])); + size_t nkeys = page_numkeys(mp); + cASSERT(mc, + (mc->tree->items > 0 && nkeys > 0) || + ((mc->flags & z_inner) && mc->tree->items == 0 && nkeys == 0)); + + /* Adjust this and other cursors pointing to mp */ + const intptr_t top = /* может быть сброшен в -1 */ mc->top; + for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) { + MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (top > m3->top || m3->pg[top] != mp) + continue; + /* if m3 points past last node in page, find next sibling */ + if (m3->ki[top] >= nkeys) { + rc = cursor_sibling_right(m3); + if (rc == MDBX_NOTFOUND) { + rc = MDBX_SUCCESS; + continue; + } + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + if (/* пропускаем незаполненные курсоры, иначе получится что у такого + курсора будет инициализирован вложенный, + что антилогично и бесполезно. */ + is_filled(m3) && m3->subcur && + (m3->ki[top] >= ki || + /* уже переместились вправо */ m3->pg[top] != mp)) { + node = page_node(m3->pg[m3->top], m3->ki[m3->top]); + /* Если это dupsort-узел, то должен быть валидный вложенный курсор. */ + if (node_flags(node) & N_DUPDATA) { + /* Тут три варианта событий: + * 1) Вложенный курсор уже инициализирован, у узла есть флаг N_SUBDATA, + * соответственно дубликаты вынесены в отдельное дерево с корнем + * в отдельной странице = ничего корректировать не требуется. + * 2) Вложенный курсор уже инициализирован, у узла нет флага N_SUBDATA, + * соответственно дубликаты размещены на вложенной sub-странице. + * 3) Курсор стоял на удалённом элементе, который имел одно значение, + * а после удаления переместился на следующий элемент с дубликатами. + * В этом случае вложенный курсор не инициализирован и тепеь его + * нужно установить на первый дубликат. */ + if (is_pointed(&m3->subcur->cursor)) { + if ((node_flags(node) & N_SUBDATA) == 0) { + cASSERT(m3, m3->subcur->cursor.top == 0 && + m3->subcur->nested_tree.height == 1); + m3->subcur->cursor.pg[0] = node_data(node); + } + } else { + rc = cursor_dupsort_setup(m3, node, m3->pg[m3->top]); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + if (node_flags(node) & N_SUBDATA) { + rc = inner_first(&m3->subcur->cursor, nullptr); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + } + } else + inner_gone(m3); + } + } + + cASSERT(mc, rc == MDBX_SUCCESS); + if (AUDIT_ENABLED()) + rc = cursor_check(mc); + return rc; + +fail: + mc->txn->flags |= MDBX_TXN_ERROR; + return rc; +} + +/*----------------------------------------------------------------------------*/ + +__hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, + MDBX_cursor_op op) { + DKBUF_DEBUG; + + csr_t ret; + ret.exact = false; + if (unlikely(key->iov_len < mc->clc->k.lmin || + key->iov_len > mc->clc->k.lmax)) { + cASSERT(mc, !"Invalid key-size"); + ret.err = MDBX_BAD_VALSIZE; + return ret; + } + + MDBX_val aligned_key = *key; + uint64_t aligned_key_buf; + if (mc->tree->flags & MDBX_INTEGERKEY) { + if (aligned_key.iov_len == 8) { + if (unlikely(7 & (uintptr_t)aligned_key.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = bcopy_8(&aligned_key_buf, aligned_key.iov_base); + } else if (aligned_key.iov_len == 4) { + if (unlikely(3 & (uintptr_t)aligned_key.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = bcopy_4(&aligned_key_buf, aligned_key.iov_base); + } else { + cASSERT(mc, !"key-size is invalid for MDBX_INTEGERKEY"); + ret.err = MDBX_BAD_VALSIZE; + return ret; + } + } + + page_t *mp; + node_t *node = nullptr; + /* See if we're already on the right page */ + if (is_pointed(mc)) { + mp = mc->pg[mc->top]; + cASSERT(mc, is_leaf(mp)); + const size_t nkeys = page_numkeys(mp); + if (unlikely(nkeys == 0)) { + /* при создании первой листовой страницы */ + cASSERT(mc, mc->top == 0 && mc->tree->height == 1 && + mc->tree->branch_pages == 0 && + mc->tree->leaf_pages == 1 && mc->ki[0] == 0); + /* Логически верно, но нет смысла, ибо это мимолетная/временная + * ситуация до добавления элемента выше по стеку вызовов: + mc->flags |= z_eof_soft | z_hollow; */ + ret.err = MDBX_NOTFOUND; + return ret; + } + + MDBX_val nodekey; + if (is_dupfix_leaf(mp)) + nodekey = page_dupfix_key(mp, 0, mc->tree->dupfix_size); + else { + node = page_node(mp, 0); + nodekey = get_key(node); + inner_gone(mc); + } + int cmp = mc->clc->k.cmp(&aligned_key, &nodekey); + if (unlikely(cmp == 0)) { + /* Probably happens rarely, but first node on the page + * was the one we wanted. */ + mc->ki[mc->top] = 0; + ret.exact = true; + goto got_node; + } + + if (cmp > 0) { + /* Искомый ключ больше первого на этой странице, + * целевая позиция на этой странице либо правее (ближе к концу). */ + if (likely(nkeys > 1)) { + if (is_dupfix_leaf(mp)) { + nodekey.iov_base = page_dupfix_ptr(mp, nkeys - 1, nodekey.iov_len); + } else { + node = page_node(mp, nkeys - 1); + nodekey = get_key(node); + } + cmp = mc->clc->k.cmp(&aligned_key, &nodekey); + if (cmp == 0) { + /* last node was the one we wanted */ + mc->ki[mc->top] = (indx_t)(nkeys - 1); + ret.exact = true; + goto got_node; + } + if (cmp < 0) { + /* Искомый ключ между первым и последним на этой страницы, + * поэтому пропускаем поиск по дереву и продолжаем только на текущей + * странице. */ + /* Сравниваем с текущей позицией, ибо частным сценарием является такое + * совпадение, но не делаем проверку если текущая позиция является + * первой/последний и соответственно такое сравнение было выше. */ + if (mc->ki[mc->top] > 0 && mc->ki[mc->top] < nkeys - 1) { + if (is_dupfix_leaf(mp)) { + nodekey.iov_base = + page_dupfix_ptr(mp, mc->ki[mc->top], nodekey.iov_len); + } else { + node = page_node(mp, mc->ki[mc->top]); + nodekey = get_key(node); + } + cmp = mc->clc->k.cmp(&aligned_key, &nodekey); + if (cmp == 0) { + /* current node was the one we wanted */ + ret.exact = true; + goto got_node; + } + } + goto search_node; + } + } + + /* Если в стеке курсора есть страницы справа, то продолжим искать там. */ + cASSERT(mc, mc->tree->height > mc->top); + for (intptr_t i = 0; i < mc->top; i++) + if ((size_t)mc->ki[i] + 1 < page_numkeys(mc->pg[i])) + goto continue_other_pages; + + /* Ключ больше последнего. */ + mc->ki[mc->top] = (indx_t)nkeys; + if (op < MDBX_SET_RANGE) { + target_not_found: + cASSERT(mc, op == MDBX_SET || op == MDBX_SET_KEY || + op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE); + /* Операция предполагает поиск конкретного ключа, который не найден. + * Поэтому переводим курсор в неустановленное состояние, но без сброса + * top, что позволяет работать fastpath при последующем поиске по дереву + * страниц. */ + mc->flags = z_hollow | (mc->flags & z_clear_mask); + inner_gone(mc); + ret.err = MDBX_NOTFOUND; + return ret; + } + cASSERT(mc, op == MDBX_SET_RANGE); + mc->flags = z_eof_soft | z_eof_hard | (mc->flags & z_clear_mask); + ret.err = MDBX_NOTFOUND; + return ret; + } + + if (mc->top == 0) { + /* There are no other pages */ + mc->ki[mc->top] = 0; + if (op >= MDBX_SET_RANGE) + goto got_node; + else + goto target_not_found; + } + } + cASSERT(mc, !inner_pointed(mc)); + +continue_other_pages: + ret.err = tree_search(mc, &aligned_key, 0); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + + cASSERT(mc, is_pointed(mc) && !inner_pointed(mc)); + mp = mc->pg[mc->top]; + MDBX_ANALYSIS_ASSUME(mp != nullptr); + cASSERT(mc, is_leaf(mp)); + +search_node: + cASSERT(mc, is_pointed(mc) && !inner_pointed(mc)); + struct node_search_result nsr = node_search(mc, &aligned_key); + node = nsr.node; + ret.exact = nsr.exact; + if (!ret.exact) { + if (op < MDBX_SET_RANGE) + goto target_not_found; + + if (node == nullptr) { + DEBUG("%s", "===> inexact leaf not found, goto sibling"); + ret.err = cursor_sibling_right(mc); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; /* no entries matched */ + mp = mc->pg[mc->top]; + cASSERT(mc, is_leaf(mp)); + if (!is_dupfix_leaf(mp)) + node = page_node(mp, 0); + } + } + +got_node: + cASSERT(mc, is_pointed(mc) && !inner_pointed(mc)); + cASSERT(mc, mc->ki[mc->top] < page_numkeys(mc->pg[mc->top])); + if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", + mp->pgno, mp->flags); + ret.err = MDBX_CORRUPTED; + return ret; + } + + if (is_dupfix_leaf(mp)) { + if (op >= MDBX_SET_KEY) + *key = page_dupfix_key(mp, mc->ki[mc->top], mc->tree->dupfix_size); + be_filled(mc); + ret.err = MDBX_SUCCESS; + return ret; + } + + if (node_flags(node) & N_DUPDATA) { + ret.err = cursor_dupsort_setup(mc, node, mp); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + if (op >= MDBX_SET) { + MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr); + if (node_flags(node) & N_SUBDATA) { + ret.err = inner_first(&mc->subcur->cursor, data); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + } else if (data) { + const page_t *inner_mp = mc->subcur->cursor.pg[0]; + cASSERT(mc, is_subpage(inner_mp) && is_leaf(inner_mp)); + const size_t inner_ki = mc->subcur->cursor.ki[0]; + if (is_dupfix_leaf(inner_mp)) + *data = page_dupfix_key(inner_mp, inner_ki, mc->tree->dupfix_size); + else + *data = get_key(page_node(inner_mp, inner_ki)); + } + } else { + MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr); + ret = cursor_seek(&mc->subcur->cursor, data, nullptr, MDBX_SET_RANGE); + if (unlikely(ret.err != MDBX_SUCCESS)) { + if (ret.err == MDBX_NOTFOUND && op < MDBX_SET_RANGE) + goto target_not_found; + return ret; + } + if (op == MDBX_GET_BOTH && !ret.exact) + goto target_not_found; + } + } else if (likely(data)) { + if (op <= MDBX_GET_BOTH_RANGE) { + if (unlikely(data->iov_len < mc->clc->v.lmin || + data->iov_len > mc->clc->v.lmax)) { + cASSERT(mc, !"Invalid data-size"); + ret.err = MDBX_BAD_VALSIZE; + return ret; + } + MDBX_val aligned_data = *data; + uint64_t aligned_databytes; + if (mc->tree->flags & MDBX_INTEGERDUP) { + if (aligned_data.iov_len == 8) { + if (unlikely(7 & (uintptr_t)aligned_data.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = + bcopy_8(&aligned_databytes, aligned_data.iov_base); + } else if (aligned_data.iov_len == 4) { + if (unlikely(3 & (uintptr_t)aligned_data.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = + bcopy_4(&aligned_databytes, aligned_data.iov_base); + } else { + cASSERT(mc, !"data-size is invalid for MDBX_INTEGERDUP"); + ret.err = MDBX_BAD_VALSIZE; + return ret; + } + } + MDBX_val actual_data; + ret.err = node_read(mc, node, &actual_data, mc->pg[mc->top]); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + const int cmp = mc->clc->v.cmp(&aligned_data, &actual_data); + if (cmp) { + if (op != MDBX_GET_BOTH_RANGE) { + cASSERT(mc, op == MDBX_GET_BOTH); + goto target_not_found; + } + if (cmp > 0) { + ret.err = MDBX_NOTFOUND; + return ret; + } + } + *data = actual_data; + } else { + ret.err = node_read(mc, node, data, mc->pg[mc->top]); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + } + } + + /* The key already matches in all other cases */ + if (op >= MDBX_SET_KEY) + get_key_optional(node, key); + + DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), + DVAL_DEBUG(data)); + ret.err = MDBX_SUCCESS; + be_filled(mc); + return ret; +} + +__hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, + const MDBX_cursor_op op) { + if (op != MDBX_GET_CURRENT) + DEBUG(">> cursor %p(0x%x), ops %u, key %p, value %p", + __Wpedantic_format_voidptr(mc), mc->flags, op, + __Wpedantic_format_voidptr(key), __Wpedantic_format_voidptr(data)); + int rc; + + switch (op) { + case MDBX_GET_CURRENT: + cASSERT(mc, (mc->flags & z_inner) == 0); + if (unlikely(!is_filled(mc))) { + if (is_hollow(mc)) + return MDBX_ENODATA; + if (mc->ki[mc->top] >= page_numkeys(mc->pg[mc->top])) + return MDBX_NOTFOUND; + } + if (mc->flags & z_after_delete) + return outer_next(mc, key, data, MDBX_NEXT_NODUP); + else if (inner_pointed(mc) && (mc->subcur->cursor.flags & z_after_delete)) + return outer_next(mc, key, data, MDBX_NEXT_DUP); + else { + const page_t *mp = mc->pg[mc->top]; + const node_t *node = page_node(mp, mc->ki[mc->top]); + get_key_optional(node, key); + if (!data) + return MDBX_SUCCESS; + if (node_flags(node) & N_DUPDATA) { + if (!MDBX_DISABLE_VALIDATION && unlikely(!mc->subcur)) + return unexpected_dupsort(mc); + mc = &mc->subcur->cursor; + if (unlikely(!is_filled(mc))) { + if (is_hollow(mc)) + return MDBX_ENODATA; + if (mc->ki[mc->top] >= page_numkeys(mc->pg[mc->top])) + return MDBX_NOTFOUND; + } + mp = mc->pg[mc->top]; + if (is_dupfix_leaf(mp)) + *data = page_dupfix_key(mp, mc->ki[mc->top], mc->tree->dupfix_size); + else + *data = get_key(page_node(mp, mc->ki[mc->top])); + return MDBX_SUCCESS; + } else { + cASSERT(mc, !inner_pointed(mc)); + return node_read(mc, node, data, mc->pg[mc->top]); + } + } + + case MDBX_GET_BOTH: + case MDBX_GET_BOTH_RANGE: + if (unlikely(data == nullptr)) + return MDBX_EINVAL; + if (unlikely(mc->subcur == nullptr)) + return MDBX_INCOMPATIBLE; + /* fall through */ + __fallthrough; + case MDBX_SET: + case MDBX_SET_KEY: + case MDBX_SET_RANGE: + if (unlikely(key == nullptr)) + return MDBX_EINVAL; + rc = cursor_seek(mc, key, data, op).err; + if (rc == MDBX_SUCCESS) + cASSERT(mc, is_filled(mc)); + else if (rc == MDBX_NOTFOUND && mc->tree->items) { + cASSERT(mc, is_pointed(mc)); + cASSERT(mc, op == MDBX_SET_RANGE || op == MDBX_GET_BOTH_RANGE || + is_hollow(mc)); + cASSERT(mc, op == MDBX_GET_BOTH_RANGE || inner_hollow(mc)); + } else + cASSERT(mc, is_poor(mc) && !is_filled(mc)); + return rc; + + case MDBX_GET_MULTIPLE: + if (unlikely(!data)) + return MDBX_EINVAL; + if (unlikely((mc->tree->flags & MDBX_DUPFIXED) == 0)) + return MDBX_INCOMPATIBLE; + if (unlikely(!is_pointed(mc))) { + if (unlikely(!key)) + return MDBX_EINVAL; + if (unlikely((mc->flags & z_fresh) == 0)) + return MDBX_ENODATA; + rc = cursor_seek(mc, key, data, MDBX_SET).err; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + if (unlikely(is_eof(mc) || !inner_filled(mc))) + return MDBX_ENODATA; + goto fetch_multiple; + + case MDBX_NEXT_MULTIPLE: + if (unlikely(!data)) + return MDBX_EINVAL; + if (unlikely(mc->subcur == nullptr)) + return MDBX_INCOMPATIBLE; + rc = outer_next(mc, key, data, MDBX_NEXT_DUP); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + else { + fetch_multiple: + cASSERT(mc, is_filled(mc) && !inner_filled(mc)); + MDBX_cursor *mx = &mc->subcur->cursor; + data->iov_len = page_numkeys(mx->pg[mx->top]) * mx->tree->dupfix_size; + data->iov_base = page_data(mx->pg[mx->top]); + mx->ki[mx->top] = (indx_t)page_numkeys(mx->pg[mx->top]) - 1; + return MDBX_SUCCESS; + } + + case MDBX_PREV_MULTIPLE: + if (unlikely(!data)) + return MDBX_EINVAL; + if (unlikely(mc->subcur == nullptr)) + return MDBX_INCOMPATIBLE; + if (unlikely(!is_pointed(mc))) { + if (unlikely((mc->flags & z_fresh) == 0)) + return MDBX_ENODATA; + rc = outer_last(mc, key, data); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + mc->subcur->cursor.ki[mc->subcur->cursor.top] = 0; + goto fetch_multiple; + } + if (unlikely(!is_filled(mc) || !inner_filled(mc))) + return MDBX_ENODATA; + rc = cursor_sibling_left(&mc->subcur->cursor); + if (likely(rc == MDBX_SUCCESS)) + goto fetch_multiple; + return rc; + + case MDBX_NEXT_DUP: + case MDBX_NEXT: + case MDBX_NEXT_NODUP: + rc = outer_next(mc, key, data, op); + mc->flags &= ~z_eof_hard; + ((cursor_couple_t *)mc)->inner.cursor.flags &= ~z_eof_hard; + return rc; + + case MDBX_PREV_DUP: + case MDBX_PREV: + case MDBX_PREV_NODUP: + return outer_prev(mc, key, data, op); + + case MDBX_FIRST: + return outer_first(mc, key, data); + case MDBX_LAST: + return outer_last(mc, key, data); + + case MDBX_LAST_DUP: + case MDBX_FIRST_DUP: + if (unlikely(data == nullptr)) + return MDBX_EINVAL; + if (unlikely(!is_filled(mc))) + return MDBX_ENODATA; + else { + node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); + get_key_optional(node, key); + if ((node_flags(node) & N_DUPDATA) == 0) + return node_read(mc, node, data, mc->pg[mc->top]); + else if (MDBX_DISABLE_VALIDATION || likely(mc->subcur)) + return ((op == MDBX_FIRST_DUP) ? inner_first + : inner_last)(&mc->subcur->cursor, data); + else + return unexpected_dupsort(mc); + } + break; + + case MDBX_SET_UPPERBOUND: + case MDBX_SET_LOWERBOUND: + if (unlikely(key == nullptr || data == nullptr)) + return MDBX_EINVAL; + else { + MDBX_val save_data = *data; + csr_t csr = cursor_seek(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (rc == MDBX_SUCCESS && csr.exact && mc->subcur) { + csr.exact = false; + if (!save_data.iov_base) { + /* Avoiding search nested dupfix hive if no data provided. + * This is changes the semantic of MDBX_SET_LOWERBOUND but avoid + * returning MDBX_BAD_VALSIZE. */ + } else if (is_pointed(&mc->subcur->cursor)) { + *data = save_data; + csr = cursor_seek(&mc->subcur->cursor, data, nullptr, MDBX_SET_RANGE); + rc = csr.err; + if (rc == MDBX_NOTFOUND) { + cASSERT(mc, !csr.exact); + rc = outer_next(mc, key, data, MDBX_NEXT_NODUP); + } + } else { + int cmp = mc->clc->v.cmp(&save_data, data); + csr.exact = (cmp == 0); + if (cmp > 0) + rc = outer_next(mc, key, data, MDBX_NEXT_NODUP); + } + } + if (rc == MDBX_SUCCESS && !csr.exact) + rc = MDBX_RESULT_TRUE; + if (unlikely(op == MDBX_SET_UPPERBOUND)) { + /* minor fixups for MDBX_SET_UPPERBOUND */ + if (rc == MDBX_RESULT_TRUE) + /* already at great-than by MDBX_SET_LOWERBOUND */ + rc = MDBX_SUCCESS; + else if (rc == MDBX_SUCCESS) + /* exactly match, going next */ + rc = outer_next(mc, key, data, MDBX_NEXT); + } + } + return rc; + + /* Doubtless API to positioning of the cursor at a specified key. */ + case MDBX_TO_KEY_LESSER_THAN: + case MDBX_TO_KEY_LESSER_OR_EQUAL: + case MDBX_TO_KEY_EQUAL: + case MDBX_TO_KEY_GREATER_OR_EQUAL: + case MDBX_TO_KEY_GREATER_THAN: + if (unlikely(key == nullptr)) + return MDBX_EINVAL; + else { + csr_t csr = cursor_seek(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_KEY_LESSER_THAN) + rc = outer_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_KEY_GREATER_THAN) + rc = outer_next(mc, key, data, MDBX_NEXT_NODUP); + } else if (op < MDBX_TO_KEY_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = outer_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_KEY_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + } + return rc; + + /* Doubtless API to positioning of the cursor at a specified key-value pair + * for multi-value hives. */ + case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: + case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: + if (unlikely(key == nullptr || data == nullptr)) + return MDBX_EINVAL; + else { + MDBX_val save_data = *data; + csr_t csr = cursor_seek(mc, key, data, MDBX_SET_KEY); + rc = csr.err; + if (rc == MDBX_SUCCESS) { + cASSERT(mc, csr.exact); + if (inner_pointed(mc)) { + MDBX_cursor *const mx = &mc->subcur->cursor; + csr = cursor_seek(mx, &save_data, nullptr, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN) + rc = inner_prev(mx, data); + else if (op == MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN) + rc = inner_next(mx, data); + } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = inner_prev(mx, data); + else if (op == MDBX_TO_EXACT_KEY_VALUE_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + } else { + int cmp = mc->clc->v.cmp(data, &save_data); + switch (op) { + default: + __unreachable(); + case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: + rc = (cmp < 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: + rc = (cmp <= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_EQUAL: + rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: + rc = (cmp >= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: + rc = (cmp > 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + } + } + } + } + return rc; + + case MDBX_TO_PAIR_LESSER_THAN: + case MDBX_TO_PAIR_LESSER_OR_EQUAL: + case MDBX_TO_PAIR_EQUAL: + case MDBX_TO_PAIR_GREATER_OR_EQUAL: + case MDBX_TO_PAIR_GREATER_THAN: + if (unlikely(key == nullptr || data == nullptr)) + return MDBX_EINVAL; + else { + MDBX_val save_data = *data; + csr_t csr = cursor_seek(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (inner_pointed(mc)) { + MDBX_cursor *const mx = &mc->subcur->cursor; + csr = cursor_seek(mx, &save_data, nullptr, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_PAIR_LESSER_THAN) + rc = outer_prev(mc, key, data, MDBX_PREV); + else if (op == MDBX_TO_PAIR_GREATER_THAN) + rc = outer_next(mc, key, data, MDBX_NEXT); + } else if (op < MDBX_TO_PAIR_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = outer_prev(mc, key, data, MDBX_PREV); + else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + else if (op > MDBX_TO_PAIR_EQUAL && rc == MDBX_NOTFOUND) + rc = outer_next(mc, key, data, MDBX_NEXT); + } else { + int cmp = mc->clc->v.cmp(data, &save_data); + switch (op) { + default: + __unreachable(); + case MDBX_TO_PAIR_LESSER_THAN: + if (cmp >= 0) + rc = outer_prev(mc, key, data, MDBX_PREV); + break; + case MDBX_TO_PAIR_LESSER_OR_EQUAL: + if (cmp > 0) + rc = outer_prev(mc, key, data, MDBX_PREV); + break; + case MDBX_TO_PAIR_EQUAL: + rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_PAIR_GREATER_OR_EQUAL: + if (cmp < 0) + rc = outer_next(mc, key, data, MDBX_NEXT); + break; + case MDBX_TO_PAIR_GREATER_THAN: + if (cmp <= 0) + rc = outer_next(mc, key, data, MDBX_NEXT); + break; + } + } + } else if (op < MDBX_TO_PAIR_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = outer_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + } + return rc; + + default: + DEBUG("unhandled/unimplemented cursor operation %u", op); + return MDBX_EINVAL; + } +} diff --git a/src/cursor.h b/src/cursor.h new file mode 100644 index 00000000..05174726 --- /dev/null +++ b/src/cursor.h @@ -0,0 +1,398 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +/* Состояние курсора. + * + * плохой/poor: + * - неустановленный курсор с незаполненым стеком; + * - следует пропускать во всех циклах отслеживания/корректировки + * позиций курсоров; + * - допускаются только операции предполагающие установку абсолютной позиции; + * - в остальных случаях возвращается ENODATA. + * + * У таких курсоров top = -1 и flags < 0, что позволяет дешево проверять и + * пропускать такие курсоры в циклах отслеживания/корректировки по условию + * probe_cursor->top < this_cursor->top. + * + * пустой/hollow: + * - частично инициализированный курсор, но без доступной пользователю позиции, + * поэтому нельзя выполнить какую-либо операцию без абсолютного (не + * относительного) позиционирования; + * - ki[top] может быть некорректным, в том числе >= page_numkeys(pg[top]). + * + * У таких курсоров top >= 0, но flags < 0 (есть флажок z_hollow). + * + * установленный/pointed: + * - полностью инициализированный курсор с конкретной позицией с данными; + * - можно прочитать текущую строку, удалить её, либо выполнить + * относительное перемещение; + * - может иметь флажки z_after_delete, z_eof_hard и z_eof_soft; + * - наличие z_eof_soft означает что курсор перемещен за пределы данных, + * поэтому нелья прочитать текущие данные, либо удалить их. + * + * У таких курсоров top >= 0 и flags >= 0 (нет флажка z_hollow). + * + * наполненный данными/filled: + * - это установленный/pointed курсор без флагов z_eof_soft; + * - за курсором есть даные, возможны CRUD операции в текущей позиции. + * + * У таких курсоров top >= 0 и (unsigned)flags < z_eof_soft. + * + * Изменения состояния. + * + * - Сбрасывается состояние курсора посредством top_and_flags |= z_poor_mark, + * что равносильно top = -1 вместе с flags |= z_poor_mark; + * - При позиционировании курсора сначала устанавливается top, а flags + * только в самом конце при отсутстви ошибок. + * - Повторное позиционирование first/last может начинаться + * с установки/обнуления только top без сброса flags, что позволяет работать + * быстрому пути внутри tree_search_finalize(). + * + * - Заморочки с концом данных: + * - mdbx_cursor_get(NEXT) выполняет две операции (перемещение и чтение), + * поэтому перемещение на последнюю строку строку всегда успешно, + * а ошибка возвращается только при последующем next(). + * Однако, из-за этой двойственности семантика ситуации возврата ошибки + * из mdbx_cursor_get(NEXT) допускает разночтение/неопределенность, ибо + * не понятно к чему относится ошибка: + * - Если к чтению данных, то курсор перемещен и стоит после последней + * строки. Соответственно, чтение в текущей позиции запрещено, + * а при выполнении prev() курсор вернется на последнюю строку; + * - Если же ошибка относится к перемещению, то курсор не перемещен и + * остается на последней строке. Соответственно, чтение в текущей + * позиции допустимо, а при выполнении prev() курсор встанет + * на пред-последнюю строку. + * - Пикантность в том, что пользователи (так или иначе) полагаются + * на оба варианта поведения, при этом конечно ожидают что после + * ошибки MDBX_NEXT функция mdbx_cursor_eof() будет возвращать true. + * - далее добавляется схожая ситуация с MDBX_GET_RANGE, MDBX_LOWERBOUND, + * MDBX_GET_BOTH_RANGE и MDBX_UPPERBOUND. Тут при неуспехе поиска курсор + * может/должен стоять после последней строки. + * - далее добавляется MDBX_LAST. Тут курсор должен стоять на последней + * строке и допускать чтение в текузщей позиции, + * но mdbx_cursor_eof() должен возвращать true. + * + * Решение = делаем два флажка z_eof_soft и z_eof_hard: + * - Когда установлен только z_eof_soft, + * функция mdbx_cursor_eof() возвращает true, но допускается + * чтение данных в текущей позиции, а prev() передвигает курсор + * на пред-последнюю строку. + * - Когда установлен z_eof_hard, чтение данных в текущей позиции + * не допускается, и mdbx_cursor_eof() также возвращает true, + * а prev() устанавливает курсора на последюю строку. */ +enum cursor_state { + /* Это вложенный курсор для вложенного дерева/страницы и является + inner-элементом struct cursor_couple. */ + z_inner = 0x01, + + /* Происходит подготовка к обновлению GC, + поэтому можно брать страницы из GC даже для FREE_DBI. */ + z_gcu_preparation = 0x02, + + /* Курсор только-что создан, поэтому допускается авто-установка + в начало/конец, вместо возврата ошибки. */ + z_fresh = 0x04, + + /* Предыдущей операцией было удаление, поэтому курсор уже физически указывает + на следующий элемент и соответствующая операция перемещения должна + игнорироваться. */ + z_after_delete = 0x08, + + /* */ + z_disable_tree_search_fastpath = 0x10, + + /* Курсор логически в конце данных, но физически на последней строке, + * ki[top] == page_numkeys(pg[top]) - 1 и читать данные в текущей позиции. */ + z_eof_soft = 0x20, + + /* Курсор логически за концом данных, поэтому следующий переход "назад" + должен игнорироваться и/или приводить к установке на последнюю строку. + В текущем же состоянии нельзя делать CRUD операции. */ + z_eof_hard = 0x40, + + /* За курсором нет данных, логически его позиция не определена, + нельзя делать CRUD операции в текущей позиции. + Относительное перемещение запрещено. */ + z_hollow = -128 /* 0x80 */, + + /* Маски для сброса/установки состояния. */ + z_clear_mask = z_inner | z_gcu_preparation, + z_poor_mark = z_eof_hard | z_hollow | z_disable_tree_search_fastpath, + z_fresh_mark = z_poor_mark | z_fresh +}; + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_inner(const MDBX_cursor *mc) { + return (mc->flags & z_inner) != 0; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_poor(const MDBX_cursor *mc) { + const bool r = mc->top < 0; + cASSERT(mc, r == (mc->top_and_flags < 0)); + if (r && mc->subcur) + cASSERT(mc, mc->subcur->cursor.flags < 0 && mc->subcur->cursor.top < 0); + return r; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_pointed(const MDBX_cursor *mc) { + const bool r = mc->top >= 0; + cASSERT(mc, r == (mc->top_and_flags >= 0)); + if (!r && mc->subcur) + cASSERT(mc, is_poor(&mc->subcur->cursor)); + return r; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_hollow(const MDBX_cursor *mc) { + const bool r = mc->flags < 0; + if (!r) { + cASSERT(mc, mc->top >= 0); + cASSERT(mc, (mc->flags & z_eof_hard) || + mc->ki[mc->top] < page_numkeys(mc->pg[mc->top])); + } else if (mc->subcur) + cASSERT(mc, is_poor(&mc->subcur->cursor)); + return r; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_eof(const MDBX_cursor *mc) { + const bool r = z_eof_soft <= (uint8_t)mc->flags; + return r; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_filled(const MDBX_cursor *mc) { + const bool r = z_eof_hard > (uint8_t)mc->flags; + return r; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +inner_filled(const MDBX_cursor *mc) { + return mc->subcur && is_filled(&mc->subcur->cursor); +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +inner_pointed(const MDBX_cursor *mc) { + return mc->subcur && is_pointed(&mc->subcur->cursor); +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +inner_hollow(const MDBX_cursor *mc) { + return !mc->subcur || is_hollow(&mc->subcur->cursor); +} + +MDBX_MAYBE_UNUSED static inline void inner_gone(MDBX_cursor *mc) { + if (mc->subcur) { + TRACE("reset inner cursor %p", + __Wpedantic_format_voidptr(&mc->subcur->cursor)); + mc->subcur->nested_tree.root = 0; + mc->subcur->cursor.top_and_flags = z_inner | z_poor_mark; + } +} + +MDBX_MAYBE_UNUSED static inline void be_poor(MDBX_cursor *mc) { + const bool inner = is_inner(mc); + if (inner) { + mc->tree->root = 0; + mc->top_and_flags = z_inner | z_poor_mark; + } else { + mc->top_and_flags |= z_poor_mark; + inner_gone(mc); + } + cASSERT(mc, is_poor(mc) && !is_pointed(mc) && !is_filled(mc)); + cASSERT(mc, inner == is_inner(mc)); +} + +MDBX_MAYBE_UNUSED static inline void be_filled(MDBX_cursor *mc) { + cASSERT(mc, mc->top >= 0); + cASSERT(mc, mc->ki[mc->top] < page_numkeys(mc->pg[mc->top])); + const bool inner = is_inner(mc); + mc->flags &= z_clear_mask; + cASSERT(mc, is_filled(mc)); + cASSERT(mc, inner == is_inner(mc)); +} + +MDBX_MAYBE_UNUSED static inline bool is_related(const MDBX_cursor *base, + const MDBX_cursor *scan) { + cASSERT(base, base->top >= 0); + return base->top <= scan->top && base != scan; +} + +/* Флаги контроля/проверки курсора. */ +enum cursor_checking { + z_branch = 0x01 /* same as P_BRANCH for check_leaf_type() */, + z_leaf = 0x02 /* same as P_LEAF for check_leaf_type() */, + z_largepage = 0x04 /* same as P_LARGE for check_leaf_type() */, + z_updating = 0x08 /* update/rebalance pending */, + z_ignord = 0x10 /* don't check keys ordering */, + z_dupfix = 0x20 /* same as P_DUPFIX for check_leaf_type() */, + z_retiring = 0x40 /* refs to child pages may be invalid */, + z_pagecheck = 0x80 /* perform page checking, see MDBX_VALIDATION */ +}; + +MDBX_INTERNAL int __must_check_result cursor_check(const MDBX_cursor *mc); + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline size_t +cursor_dbi(const MDBX_cursor *mc) { + cASSERT(mc, mc->txn && mc->txn->signature == txn_signature); + size_t dbi = mc->dbi_state - mc->txn->dbi_state; + cASSERT(mc, dbi < mc->txn->env->n_dbi); + return dbi; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +cursor_dbi_changed(const MDBX_cursor *mc) { + return dbi_changed(mc->txn, cursor_dbi(mc)); +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t * +cursor_dbi_state(const MDBX_cursor *mc) { + return mc->dbi_state; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +cursor_is_gc(const MDBX_cursor *mc) { + return mc->dbi_state == mc->txn->dbi_state + FREE_DBI; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +cursor_is_main(const MDBX_cursor *mc) { + return mc->dbi_state == mc->txn->dbi_state + MAIN_DBI; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +cursor_is_core(const MDBX_cursor *mc) { + return mc->dbi_state < mc->txn->dbi_state + CORE_DBS; +} + +MDBX_MAYBE_UNUSED static inline int cursor_dbi_dbg(const MDBX_cursor *mc) { + /* Debugging output value of a cursor's DBI: Negative for a sub-cursor. */ + const int dbi = cursor_dbi(mc); + return (mc->flags & z_inner) ? -dbi : dbi; +} + +MDBX_MAYBE_UNUSED static inline int __must_check_result +cursor_push(MDBX_cursor *mc, page_t *mp, indx_t ki) { + TRACE("pushing page %" PRIaPGNO " on db %d cursor %p", mp->pgno, + cursor_dbi_dbg(mc), __Wpedantic_format_voidptr(mc)); + if (unlikely(mc->top >= CURSOR_STACK_SIZE - 1)) { + be_poor(mc); + mc->txn->flags |= MDBX_TXN_ERROR; + return MDBX_CURSOR_FULL; + } + mc->top += 1; + mc->pg[mc->top] = mp; + mc->ki[mc->top] = ki; + return MDBX_SUCCESS; +} + +MDBX_MAYBE_UNUSED static inline void cursor_pop(MDBX_cursor *mc) { + TRACE("popped page %" PRIaPGNO " off db %d cursor %p", mc->pg[mc->top]->pgno, + cursor_dbi_dbg(mc), __Wpedantic_format_voidptr(mc)); + cASSERT(mc, mc->top >= 0); + mc->top -= 1; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline bool +check_leaf_type(const MDBX_cursor *mc, const page_t *mp) { + return (((page_type(mp) ^ mc->checking) & + (z_branch | z_leaf | z_largepage | z_dupfix)) == 0); +} + +MDBX_INTERNAL void cursor_eot(MDBX_cursor *mc, const bool merge); +MDBX_INTERNAL int cursor_shadow(MDBX_cursor *parent_cursor, + MDBX_txn *nested_txn, const size_t dbi); + +MDBX_INTERNAL MDBX_cursor *cursor_cpstk(const MDBX_cursor *csrc, + MDBX_cursor *cdst); + +MDBX_INTERNAL int __must_check_result cursor_ops(MDBX_cursor *mc, MDBX_val *key, + MDBX_val *data, + const MDBX_cursor_op op); + +MDBX_INTERNAL int __must_check_result cursor_put_checklen(MDBX_cursor *mc, + const MDBX_val *key, + MDBX_val *data, + unsigned flags); + +MDBX_INTERNAL int __must_check_result cursor_put(MDBX_cursor *mc, + const MDBX_val *key, + MDBX_val *data, + unsigned flags); + +MDBX_INTERNAL int __must_check_result cursor_check_updating(MDBX_cursor *mc); + +MDBX_INTERNAL int __must_check_result cursor_del(MDBX_cursor *mc, + unsigned flags); + +MDBX_INTERNAL int __must_check_result cursor_sibling_left(MDBX_cursor *mc); +MDBX_INTERNAL int __must_check_result cursor_sibling_right(MDBX_cursor *mc); + +typedef struct cursor_set_result { + int err; + bool exact; +} csr_t; + +MDBX_INTERNAL csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, + MDBX_cursor_op op); + +MDBX_INTERNAL int __must_check_result inner_first(MDBX_cursor *__restrict mc, + MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result inner_last(MDBX_cursor *__restrict mc, + MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result outer_first(MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result outer_last(MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data); + +MDBX_INTERNAL int __must_check_result inner_next(MDBX_cursor *__restrict mc, + MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result inner_prev(MDBX_cursor *__restrict mc, + MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result outer_next(MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data, + MDBX_cursor_op op); +MDBX_INTERNAL int __must_check_result outer_prev(MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, + MDBX_val *__restrict data, + MDBX_cursor_op op); + +MDBX_INTERNAL int cursor_init4walk(cursor_couple_t *couple, + const MDBX_txn *const txn, + tree_t *const tree, kvx_t *const kvx); + +MDBX_INTERNAL int __must_check_result cursor_init(MDBX_cursor *mc, + const MDBX_txn *txn, + size_t dbi); + +MDBX_INTERNAL int __must_check_result cursor_dupsort_setup(MDBX_cursor *mc, + const node_t *node, + const page_t *mp); + +MDBX_INTERNAL int __must_check_result cursor_touch(MDBX_cursor *const mc, + const MDBX_val *key, + const MDBX_val *data); + +/*----------------------------------------------------------------------------*/ + +/* Update sub-page pointer, if any, in mc->subcur. + * Needed when the node which contains the sub-page may have moved. + * Called with mp = mc->pg[mc->top], ki = mc->ki[mc->top]. */ +MDBX_MAYBE_UNUSED static inline void +cursor_inner_refresh(const MDBX_cursor *mc, const page_t *mp, unsigned ki) { + cASSERT(mc, is_leaf(mp)); + const node_t *node = page_node(mp, ki); + if ((node_flags(node) & (N_DUPDATA | N_SUBDATA)) == N_DUPDATA) + mc->subcur->cursor.pg[0] = node_data(node); +} + +MDBX_MAYBE_UNUSED MDBX_INTERNAL bool cursor_is_tracked(const MDBX_cursor *mc); diff --git a/src/dbi.c b/src/dbi.c new file mode 100644 index 00000000..b8becf4d --- /dev/null +++ b/src/dbi.c @@ -0,0 +1,954 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi) { + tASSERT(txn, bmi > 0); + bmi &= -bmi; + if (sizeof(txn->dbi_sparse[0]) > 4) { + static const uint8_t debruijn_ctz64[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12}; + return debruijn_ctz64[(UINT64_C(0x022FDD63CC95386D) * (uint64_t)bmi) >> 58]; + } else { + static const uint8_t debruijn_ctz32[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27]; + } +} + +struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) { + eASSERT(env, dbi < env->n_dbi); + struct dbi_snap_result r; + uint32_t snap = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease); + do { + r.sequence = snap; + r.flags = env->dbs_flags[dbi]; + snap = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease); + } while (unlikely(snap != r.sequence)); + return r; +} + +__noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { + const MDBX_env *const env = txn->env; + if (dbi >= env->n_dbi || !env->dbs_flags[dbi]) + return MDBX_BAD_DBI; + +#if MDBX_ENABLE_DBI_SPARSE + const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->dbi_sparse[0]); + const size_t bitmap_indx = dbi / bitmap_chunk; + const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; + if (dbi >= txn->n_dbi) { + for (size_t i = (txn->n_dbi + bitmap_chunk - 1) / bitmap_chunk; + bitmap_indx >= i; ++i) + txn->dbi_sparse[i] = 0; + eASSERT(env, (txn->dbi_sparse[bitmap_indx] & bitmap_mask) == 0); + MDBX_txn *scan = txn; + do { + eASSERT(env, scan->dbi_sparse == txn->dbi_sparse); + eASSERT(env, scan->n_dbi < dbi + 1); + scan->n_dbi = (unsigned)dbi + 1; + scan->dbi_state[dbi] = 0; + scan = scan->parent; + } while (scan /* && scan->dbi_sparse == txn->dbi_sparse */); + txn->dbi_sparse[bitmap_indx] |= bitmap_mask; + goto lindo; + } + if ((txn->dbi_sparse[bitmap_indx] & bitmap_mask) == 0) { + MDBX_txn *scan = txn; + do { + eASSERT(env, scan->dbi_sparse == txn->dbi_sparse); + eASSERT(env, scan->n_dbi == txn->n_dbi); + scan->dbi_state[dbi] = 0; + scan = scan->parent; + } while (scan /* && scan->dbi_sparse == txn->dbi_sparse */); + txn->dbi_sparse[bitmap_indx] |= bitmap_mask; + goto lindo; + } +#else + if (dbi >= txn->n_dbi) { + size_t i = txn->n_dbi; + do + txn->dbi_state[i] = 0; + while (dbi >= ++i); + txn->n_dbi = i; + goto lindo; + } +#endif /* MDBX_ENABLE_DBI_SPARSE */ + + if (!txn->dbi_state[dbi]) { + lindo: + /* dbi-слот еще не инициализирован в транзакции, а хендл не использовался */ + txn->cursors[dbi] = nullptr; + MDBX_txn *const parent = txn->parent; + if (parent) { + /* вложенная пишущая транзакция */ + int rc = dbi_check(parent, dbi); + /* копируем состояние subDB очищая new-флаги. */ + eASSERT(env, txn->dbi_seqs == parent->dbi_seqs); + txn->dbi_state[dbi] = + parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + if (likely(rc == MDBX_SUCCESS)) { + txn->dbs[dbi] = parent->dbs[dbi]; + if (parent->cursors[dbi]) { + rc = cursor_shadow(parent->cursors[dbi], txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + /* не получилось забекапить курсоры */ + txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO | DBI_STALE; + txn->flags |= MDBX_TXN_ERROR; + } + } + } + return rc; + } + txn->dbi_seqs[dbi] = 0; + txn->dbi_state[dbi] = DBI_LINDO; + } else { + eASSERT(env, txn->dbi_seqs[dbi] != env->dbi_seqs[dbi].weak); + if (unlikely((txn->dbi_state[dbi] & (DBI_VALID | DBI_OLDEN)) || + txn->cursors[dbi])) { + /* хендл уже использовался в транзакции, но был закрыт или переоткрыт, + * либо при явном пере-открытии хендла есть висячие курсоры */ + eASSERT(env, (txn->dbi_state[dbi] & DBI_STALE) == 0); + txn->dbi_seqs[dbi] = env->dbi_seqs[dbi].weak; + txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO; + return txn->cursors[dbi] ? MDBX_DANGLING_DBI : MDBX_BAD_DBI; + } + } + + /* хендл не использовался в транзакции, либо явно пере-отрывается при + * отсутствии висячих курсоров */ + eASSERT(env, (txn->dbi_state[dbi] & DBI_LINDO) && !txn->cursors[dbi]); + + /* читаем актуальные флаги и sequence */ + struct dbi_snap_result snap = dbi_snap(env, dbi); + txn->dbi_seqs[dbi] = snap.sequence; + if (snap.flags & DB_VALID) { + txn->dbs[dbi].flags = snap.flags & DB_PERSISTENT_FLAGS; + txn->dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_STALE; + return MDBX_SUCCESS; + } + return MDBX_BAD_DBI; +} + +static int defer_and_release(MDBX_env *const env, + defer_free_item_t *const chain) { + size_t length = 0; + defer_free_item_t *obsolete_chain = nullptr; +#if MDBX_ENABLE_DBI_LOCKFREE + const uint64_t now = osal_monotime(); + defer_free_item_t **scan = &env->defer_free; + if (env->defer_free) { + const uint64_t threshold_1second = osal_16dot16_to_monotime(1 * 65536); + do { + defer_free_item_t *item = *scan; + if (now - item->timestamp < threshold_1second) { + scan = &item->next; + length += 1; + } else { + *scan = item->next; + item->next = obsolete_chain; + obsolete_chain = item; + } + } while (*scan); + } + + eASSERT(env, *scan == nullptr); + if (chain) { + defer_free_item_t *item = chain; + do { + item->timestamp = now; + item = item->next; + } while (item); + *scan = chain; + } +#else /* MDBX_ENABLE_DBI_LOCKFREE */ + obsolete_chain = chain; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + + ENSURE(env, osal_fastmutex_release(&env->dbi_lock) == MDBX_SUCCESS); + if (length > 42) { +#if defined(_WIN32) || defined(_WIN64) + SwitchToThread(); +#else + sched_yield(); +#endif /* Windows */ + } + while (obsolete_chain) { + defer_free_item_t *item = obsolete_chain; + obsolete_chain = obsolete_chain->next; + osal_free(item); + } + return chain ? MDBX_SUCCESS : MDBX_BAD_DBI; +} + +/* Export or close DBI handles opened in this txn. */ +int dbi_update(MDBX_txn *txn, int keep) { + MDBX_env *const env = txn->env; + tASSERT(txn, !txn->parent && txn == env->basal_txn); + bool locked = false; + defer_free_item_t *defer_chain = nullptr; + TXN_FOREACH_DBI_USER(txn, dbi) { + if (likely((txn->dbi_state[dbi] & DBI_CREAT) == 0)) + continue; + if (!locked) { + int err = osal_fastmutex_acquire(&env->dbi_lock); + if (unlikely(err != MDBX_SUCCESS)) + return err; + locked = true; + if (dbi >= env->n_dbi) + /* хендл был закрыт из другого потока пока захватывали блокировку */ + continue; + } + tASSERT(txn, dbi < env->n_dbi); + if (keep) { + env->dbs_flags[dbi] = txn->dbs[dbi].flags | DB_VALID; + } else { + uint32_t seq = dbi_seq_next(env, dbi); + defer_free_item_t *item = env->kvs[dbi].name.iov_base; + if (item) { + env->dbs_flags[dbi] = 0; + env->kvs[dbi].name.iov_len = 0; + env->kvs[dbi].name.iov_base = nullptr; + atomic_store32(&env->dbi_seqs[dbi], seq, mo_AcquireRelease); + osal_flush_incoherent_cpu_writeback(); + item->next = defer_chain; + defer_chain = item; + } else { + eASSERT(env, env->kvs[dbi].name.iov_len == 0); + eASSERT(env, env->dbs_flags[dbi] == 0); + } + } + } + + if (locked) { + size_t i = env->n_dbi; + while ((env->dbs_flags[i - 1] & DB_VALID) == 0) { + --i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && + !env->kvs[i].name.iov_base); + } + env->n_dbi = (unsigned)i; + defer_and_release(env, defer_chain); + } + return MDBX_SUCCESS; +} + +int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { + const MDBX_env *const env = txn->env; + eASSERT(env, dbi < txn->n_dbi && dbi < env->n_dbi); + eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); + eASSERT(env, env->dbs_flags[dbi] != DB_POISON); + if ((env->dbs_flags[dbi] & DB_VALID) == 0) { + eASSERT(env, !env->kvs[dbi].clc.k.cmp && !env->kvs[dbi].clc.v.cmp && + !env->kvs[dbi].name.iov_len && + !env->kvs[dbi].name.iov_base && + !env->kvs[dbi].clc.k.lmax && !env->kvs[dbi].clc.k.lmin && + !env->kvs[dbi].clc.v.lmax && !env->kvs[dbi].clc.v.lmin); + } else { + eASSERT(env, !(txn->dbi_state[dbi] & DBI_VALID) || + (txn->dbs[dbi].flags | DB_VALID) == env->dbs_flags[dbi]); + eASSERT(env, env->kvs[dbi].name.iov_base || dbi < CORE_DBS); + } + + /* Если dbi уже использовался, то корректными считаем четыре варианта: + * 1) user_flags равны MDBX_DB_ACCEDE + * = предполагаем что пользователь открывает существующую subDb, + * при этом код проверки не позволит установить другие компараторы. + * 2) user_flags нулевые, а оба компаратора пустые/нулевые или равны текущим + * = предполагаем что пользователь открывает существующую subDb + * старым способом с нулевыми с флагами по-умолчанию. + * 3) user_flags совпадают, а компараторы не заданы или те же + * = предполагаем что пользователь открывает subDb указывая все параметры; + * 4) user_flags отличаются, но subDb пустая и задан флаг MDBX_CREATE + * = предполагаем что пользователь пересоздает subDb; + */ + if ((user_flags & ~MDBX_CREATE) != + (unsigned)(env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS)) { + /* flags are differs, check other conditions */ + if ((!user_flags && (!keycmp || keycmp == env->kvs[dbi].clc.k.cmp) && + (!datacmp || datacmp == env->kvs[dbi].clc.v.cmp)) || + user_flags == MDBX_DB_ACCEDE) { + user_flags = env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS; + } else if ((user_flags & MDBX_CREATE) == 0) + return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; + else { + eASSERT(env, env->dbs_flags[dbi] & DB_VALID); + if (txn->dbi_state[dbi] & DBI_STALE) { + int err = sdb_fetch(txn, dbi); + if (unlikely(err == MDBX_SUCCESS)) + return err; + } + eASSERT(env, + (txn->dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == + (DBI_LINDO | DBI_VALID)); + if (unlikely(txn->dbs[dbi].leaf_pages)) + return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; + + /* Пересоздаём subDB если там пусто */ + if (unlikely(txn->cursors[dbi])) + return MDBX_DANGLING_DBI; + env->dbs_flags[dbi] = DB_POISON; + atomic_store32(&env->dbi_seqs[dbi], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); + + const uint32_t seq = dbi_seq_next(env, dbi); + const uint16_t db_flags = user_flags & DB_PERSISTENT_FLAGS; + eASSERT(env, txn->dbs[dbi].height == 0 && txn->dbs[dbi].items == 0 && + txn->dbs[dbi].root == P_INVALID); + env->kvs[dbi].clc.k.cmp = keycmp ? keycmp : builtin_keycmp(user_flags); + env->kvs[dbi].clc.v.cmp = datacmp ? datacmp : builtin_datacmp(user_flags); + txn->dbs[dbi].flags = db_flags; + txn->dbs[dbi].dupfix_size = 0; + if (unlikely(sdb_setup(env, &env->kvs[dbi], &txn->dbs[dbi]))) { + txn->dbi_state[dbi] = DBI_LINDO; + txn->flags |= MDBX_TXN_ERROR; + return MDBX_PROBLEM; + } + + env->dbs_flags[dbi] = db_flags | DB_VALID; + atomic_store32(&env->dbi_seqs[dbi], seq, mo_AcquireRelease); + txn->dbi_seqs[dbi] = seq; + txn->dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_CREAT | DBI_DIRTY; + txn->flags |= MDBX_TXN_DIRTY; + } + } + + if (!keycmp) + keycmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.k.cmp + : builtin_keycmp(user_flags); + if (env->kvs[dbi].clc.k.cmp != keycmp) { + if (env->dbs_flags[dbi] & DB_VALID) + return MDBX_EINVAL; + env->kvs[dbi].clc.k.cmp = keycmp; + } + + if (!datacmp) + datacmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.v.cmp + : builtin_datacmp(user_flags); + if (env->kvs[dbi].clc.v.cmp != datacmp) { + if (env->dbs_flags[dbi] & DB_VALID) + return MDBX_EINVAL; + env->kvs[dbi].clc.v.cmp = datacmp; + } + + return MDBX_SUCCESS; +} + +static inline size_t dbi_namelen(const MDBX_val name) { + return (name.iov_len > sizeof(defer_free_item_t)) ? name.iov_len + : sizeof(defer_free_item_t); +} + +static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, + MDBX_val name) { + MDBX_env *const env = txn->env; + + /* Cannot mix named table(s) with DUPSORT flags */ + tASSERT(txn, + (txn->dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == + (DBI_LINDO | DBI_VALID)); + if (unlikely(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT)) { + if (unlikely((user_flags & MDBX_CREATE) == 0)) + return MDBX_NOTFOUND; + if (unlikely(txn->dbs[MAIN_DBI].leaf_pages)) + /* В MainDB есть записи, либо она уже использовалась. */ + return MDBX_INCOMPATIBLE; + + /* Пересоздаём MainDB когда там пусто. */ + tASSERT(txn, txn->dbs[MAIN_DBI].height == 0 && + txn->dbs[MAIN_DBI].items == 0 && + txn->dbs[MAIN_DBI].root == P_INVALID); + if (unlikely(txn->cursors[MAIN_DBI])) + return MDBX_DANGLING_DBI; + env->dbs_flags[MAIN_DBI] = DB_POISON; + atomic_store32(&env->dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); + + const uint32_t seq = dbi_seq_next(env, MAIN_DBI); + const uint16_t main_flags = + txn->dbs[MAIN_DBI].flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + env->kvs[MAIN_DBI].clc.k.cmp = builtin_keycmp(main_flags); + env->kvs[MAIN_DBI].clc.v.cmp = builtin_datacmp(main_flags); + txn->dbs[MAIN_DBI].flags = main_flags; + txn->dbs[MAIN_DBI].dupfix_size = 0; + int err = sdb_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]); + if (unlikely(err != MDBX_SUCCESS)) { + txn->dbi_state[MAIN_DBI] = DBI_LINDO; + txn->flags |= MDBX_TXN_ERROR; + env->flags |= ENV_FATAL_ERROR; + return err; + } + env->dbs_flags[MAIN_DBI] = main_flags | DB_VALID; + txn->dbi_seqs[MAIN_DBI] = + atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + txn->dbi_state[MAIN_DBI] |= DBI_DIRTY; + txn->flags |= MDBX_TXN_DIRTY; + } + + tASSERT(txn, env->kvs[MAIN_DBI].clc.k.cmp); + + /* Is the DB already open? */ + size_t slot = env->n_dbi; + for (size_t scan = CORE_DBS; scan < env->n_dbi; ++scan) { + if ((env->dbs_flags[scan] & DB_VALID) == 0) { + /* Remember this free slot */ + slot = (slot < scan) ? slot : scan; + continue; + } + if (!env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[scan].name)) { + slot = scan; + int err = dbi_check(txn, slot); + if (err == MDBX_BAD_DBI && + txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) { + /* хендл использовался, стал невалидным, + * но теперь явно пере-открывается в этой транзакци */ + eASSERT(env, !txn->cursors[slot]); + txn->dbi_state[slot] = DBI_LINDO; + err = dbi_check(txn, slot); + } + if (err == MDBX_SUCCESS) { + err = dbi_bind(txn, slot, user_flags, keycmp, datacmp); + if (likely(err == MDBX_SUCCESS)) { + goto done; + } + } + return err; + } + } + + /* Fail, if no free slot and max hit */ + if (unlikely(slot >= env->max_dbi)) + return MDBX_DBS_FULL; + + if (env->n_dbi == slot) + eASSERT(env, !env->dbs_flags[slot] && !env->kvs[slot].name.iov_len && + !env->kvs[slot].name.iov_base); + + env->dbs_flags[slot] = DB_POISON; + atomic_store32(&env->dbi_seqs[slot], dbi_seq_next(env, slot), + mo_AcquireRelease); + memset(&env->kvs[slot], 0, sizeof(env->kvs[slot])); + if (env->n_dbi == slot) + env->n_dbi = (unsigned)slot + 1; + eASSERT(env, slot < env->n_dbi); + + int err = dbi_check(txn, slot); + eASSERT(env, err == MDBX_BAD_DBI); + if (err != MDBX_BAD_DBI) + return MDBX_PROBLEM; + + /* Find the DB info */ + MDBX_val body; + cursor_couple_t cx; + int rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = cursor_seek(&cx.outer, &name, &body, MDBX_SET).err; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) + return rc; + } else { + /* make sure this is actually a table */ + node_t *node = + page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + if (unlikely((node_flags(node) & (N_DUPDATA | N_SUBDATA)) != N_SUBDATA)) + return MDBX_INCOMPATIBLE; + if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(tree_t))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid subDb node size", body.iov_len); + return MDBX_CORRUPTED; + } + memcpy(&txn->dbs[slot], body.iov_base, sizeof(tree_t)); + } + + /* Done here so we cannot fail after creating a new DB */ + void *clone = nullptr; + if (name.iov_len) { + clone = osal_malloc(dbi_namelen(name)); + if (unlikely(!clone)) + return MDBX_ENOMEM; + name.iov_base = memcpy(clone, name.iov_base, name.iov_len); + } else + name.iov_base = ""; + + uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH; + if (unlikely(rc)) { + /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */ + tASSERT(txn, rc == MDBX_NOTFOUND); + body.iov_base = memset(&txn->dbs[slot], 0, body.iov_len = sizeof(tree_t)); + txn->dbs[slot].root = P_INVALID; + txn->dbs[slot].mod_txnid = txn->txnid; + txn->dbs[slot].flags = user_flags & DB_PERSISTENT_FLAGS; + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + rc = cursor_put_checklen(&cx.outer, &name, &body, + N_SUBDATA | MDBX_NOOVERWRITE); + txn->cursors[MAIN_DBI] = cx.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + dbi_state |= DBI_DIRTY | DBI_CREAT; + txn->flags |= MDBX_TXN_DIRTY; + tASSERT(txn, (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); + } + + /* Got info, register DBI in this txn */ + const uint32_t seq = dbi_seq_next(env, slot); + eASSERT(env, + env->dbs_flags[slot] == DB_POISON && !txn->cursors[slot] && + (txn->dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO); + txn->dbi_state[slot] = dbi_state; + memcpy(&txn->dbs[slot], body.iov_base, sizeof(txn->dbs[slot])); + env->dbs_flags[slot] = txn->dbs[slot].flags; + rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + env->kvs[slot].name = name; + env->dbs_flags[slot] = txn->dbs[slot].flags | DB_VALID; + txn->dbi_seqs[slot] = + atomic_store32(&env->dbi_seqs[slot], seq, mo_AcquireRelease); + +done: + *dbi = (MDBX_dbi)slot; + tASSERT(txn, slot < txn->n_dbi && (env->dbs_flags[slot] & DB_VALID) != 0); + eASSERT(env, dbi_check(txn, slot) == MDBX_SUCCESS); + return MDBX_SUCCESS; + +bailout: + eASSERT(env, !txn->cursors[slot] && !env->kvs[slot].name.iov_len && + !env->kvs[slot].name.iov_base); + txn->dbi_state[slot] &= DBI_LINDO | DBI_OLDEN; + env->dbs_flags[slot] = 0; + osal_free(clone); + if (slot + 1 == env->n_dbi) + txn->n_dbi = env->n_dbi = (unsigned)slot; + return rc; +} + +int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, + MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { + if (unlikely(!dbi)) + return MDBX_EINVAL; + *dbi = 0; + + if (user_flags != MDBX_ACCEDE && + unlikely(!check_sdb_flags(user_flags & ~MDBX_CREATE))) + return MDBX_EINVAL; + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if ((user_flags & MDBX_CREATE) && unlikely(txn->flags & MDBX_TXN_RDONLY)) + return MDBX_EACCESS; + + /* main table? */ + if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) { + rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); + if (likely(rc == MDBX_SUCCESS)) + *dbi = MAIN_DBI; + return rc; + } + if (unlikely(name == MDBX_CHK_GC || name->iov_base == MDBX_CHK_GC)) { + rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); + if (likely(rc == MDBX_SUCCESS)) + *dbi = FREE_DBI; + return rc; + } + if (unlikely(name == MDBX_CHK_META || name->iov_base == MDBX_CHK_META)) + return MDBX_EINVAL; + if (unlikely(name->iov_len > + txn->env->leaf_nodemax - NODESIZE - sizeof(tree_t))) + return MDBX_EINVAL; + +#if MDBX_ENABLE_DBI_LOCKFREE + /* Is the DB already open? */ + const MDBX_env *const env = txn->env; + size_t free_slot = env->n_dbi; + for (size_t i = CORE_DBS; i < env->n_dbi; ++i) { + retry: + if ((env->dbs_flags[i] & DB_VALID) == 0) { + free_slot = i; + continue; + } + + const uint32_t snap_seq = + atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease); + const uint16_t snap_flags = env->dbs_flags[i]; + const MDBX_val snap_name = env->kvs[i].name; + if (user_flags != MDBX_ACCEDE && + (((user_flags ^ snap_flags) & DB_PERSISTENT_FLAGS) || + (keycmp && keycmp != env->kvs[i].clc.k.cmp) || + (datacmp && datacmp != env->kvs[i].clc.v.cmp))) + continue; + const uint32_t main_seq = + atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease); + MDBX_cmp_func *const snap_cmp = env->kvs[MAIN_DBI].clc.k.cmp; + if (unlikely(!(snap_flags & DB_VALID) || !snap_name.iov_base || + !snap_name.iov_len || !snap_cmp)) + continue; + + const bool name_match = snap_cmp(&snap_name, name) == 0; + osal_flush_incoherent_cpu_writeback(); + if (unlikely( + snap_seq != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) || + main_seq != + atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) || + snap_flags != env->dbs_flags[i] || + snap_name.iov_base != env->kvs[i].name.iov_base || + snap_name.iov_len != env->kvs[i].name.iov_len)) + goto retry; + if (name_match) { + rc = dbi_check(txn, i); + if (rc == MDBX_BAD_DBI && txn->dbi_state[i] == (DBI_OLDEN | DBI_LINDO)) { + /* хендл использовался, стал невалидным, + * но теперь явно пере-открывается в этой транзакци */ + eASSERT(env, !txn->cursors[i]); + txn->dbi_state[i] = DBI_LINDO; + rc = dbi_check(txn, i); + } + if (likely(rc == MDBX_SUCCESS)) { + rc = dbi_bind(txn, i, user_flags, keycmp, datacmp); + if (likely(rc == MDBX_SUCCESS)) + *dbi = (MDBX_dbi)i; + } + return rc; + } + } + + /* Fail, if no free slot and max hit */ + if (unlikely(free_slot >= env->max_dbi)) + return MDBX_DBS_FULL; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + + rc = osal_fastmutex_acquire(&txn->env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); + ENSURE(txn->env, + osal_fastmutex_release(&txn->env->dbi_lock) == MDBX_SUCCESS); + } + return rc; +} + +static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, + MDBX_db_flags_t flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { + MDBX_val thunk, *name; + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) + name = (void *)name_cstr; + else { + thunk.iov_len = strlen(name_cstr); + thunk.iov_base = (void *)name_cstr; + name = &thunk; + } + return dbi_open(txn, name, flags, dbi, keycmp, datacmp); +} + +struct dbi_rename_result { + defer_free_item_t *defer; + int err; +}; + +__cold static struct dbi_rename_result +dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { + struct dbi_rename_result pair; + pair.defer = nullptr; + pair.err = dbi_check(txn, dbi); + if (unlikely(pair.err != MDBX_SUCCESS)) + return pair; + + MDBX_env *const env = txn->env; + MDBX_val old_name = env->kvs[dbi].name; + if (env->kvs[MAIN_DBI].clc.k.cmp(&new_name, &old_name) == 0 && + MDBX_DEBUG == 0) + return pair; + + cursor_couple_t cx; + pair.err = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(pair.err != MDBX_SUCCESS)) + return pair; + pair.err = cursor_seek(&cx.outer, &new_name, nullptr, MDBX_SET).err; + if (unlikely(pair.err != MDBX_NOTFOUND)) { + pair.err = (pair.err == MDBX_SUCCESS) ? MDBX_KEYEXIST : pair.err; + return pair; + } + + pair.defer = osal_malloc(dbi_namelen(new_name)); + if (unlikely(!pair.defer)) { + pair.err = MDBX_ENOMEM; + return pair; + } + new_name.iov_base = memcpy(pair.defer, new_name.iov_base, new_name.iov_len); + + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + + MDBX_val data = {&txn->dbs[dbi], sizeof(tree_t)}; + pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, + N_SUBDATA | MDBX_NOOVERWRITE); + if (likely(pair.err == MDBX_SUCCESS)) { + pair.err = cursor_seek(&cx.outer, &old_name, nullptr, MDBX_SET).err; + if (likely(pair.err == MDBX_SUCCESS)) + pair.err = cursor_del(&cx.outer, N_SUBDATA); + if (likely(pair.err == MDBX_SUCCESS)) { + pair.defer = env->kvs[dbi].name.iov_base; + env->kvs[dbi].name = new_name; + } else + txn->flags |= MDBX_TXN_ERROR; + } + + txn->cursors[MAIN_DBI] = cx.outer.next; + return pair; +} + +static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { + eASSERT(env, dbi >= CORE_DBS); + if (unlikely(dbi >= env->n_dbi)) + return nullptr; + + const uint32_t seq = dbi_seq_next(env, dbi); + defer_free_item_t *defer_item = env->kvs[dbi].name.iov_base; + if (likely(defer_item)) { + env->dbs_flags[dbi] = 0; + env->kvs[dbi].name.iov_len = 0; + env->kvs[dbi].name.iov_base = nullptr; + atomic_store32(&env->dbi_seqs[dbi], seq, mo_AcquireRelease); + osal_flush_incoherent_cpu_writeback(); + defer_item->next = nullptr; + + if (env->n_dbi == dbi + 1) { + size_t i = env->n_dbi; + do { + --i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && + !env->kvs[i].name.iov_base); + } while (i > CORE_DBS && !env->kvs[i - 1].name.iov_base); + env->n_dbi = (unsigned)i; + } + } + + return defer_item; +} + +/*----------------------------------------------------------------------------*/ +/* API */ + +int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, + MDBX_dbi *dbi) { + return dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr); +} + +int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, + MDBX_dbi *dbi) { + return dbi_open(txn, name, flags, dbi, nullptr, nullptr); +} + +int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, + MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { + return dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp); +} + +int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, + MDBX_db_flags_t flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { + return dbi_open(txn, name, flags, dbi, keycmp, datacmp); +} + +__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (txn->dbs[dbi].height) { + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + rc = tree_drop(&cx.outer, + dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT)); + txn->cursors[dbi] = cx.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + /* Invalidate the dropped DB's cursors */ + for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next) + be_poor(mc); + + if (!del || dbi < CORE_DBS) { + /* reset the DB record, mark it dirty */ + txn->dbi_state[dbi] |= DBI_DIRTY; + txn->dbs[dbi].height = 0; + txn->dbs[dbi].branch_pages = 0; + txn->dbs[dbi].leaf_pages = 0; + txn->dbs[dbi].large_pages = 0; + txn->dbs[dbi].items = 0; + txn->dbs[dbi].root = P_INVALID; + txn->dbs[dbi].sequence = 0; + /* txn->dbs[dbi].mod_txnid = txn->txnid; */ + txn->flags |= MDBX_TXN_DIRTY; + return MDBX_SUCCESS; + } + + MDBX_env *const env = txn->env; + MDBX_val name = env->kvs[dbi].name; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (likely(rc == MDBX_SUCCESS)) { + rc = cursor_seek(&cx.outer, &name, nullptr, MDBX_SET).err; + if (likely(rc == MDBX_SUCCESS)) { + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + rc = cursor_del(&cx.outer, N_SUBDATA); + txn->cursors[MAIN_DBI] = cx.outer.next; + if (likely(rc == MDBX_SUCCESS)) { + tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY); + tASSERT(txn, txn->flags & MDBX_TXN_DIRTY); + txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; + rc = osal_fastmutex_acquire(&env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) + return defer_and_release(env, dbi_close_locked(env, dbi)); + } + } + } + txn->flags |= MDBX_TXN_ERROR; + return rc; +} + +__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { + MDBX_val thunk, *name; + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) + name = (void *)name_cstr; + else { + thunk.iov_len = strlen(name_cstr); + thunk.iov_base = (void *)name_cstr; + name = &thunk; + } + return mdbx_dbi_rename2(txn, dbi, name); +} + +int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(dbi < CORE_DBS)) + return (dbi == MAIN_DBI) ? MDBX_SUCCESS : MDBX_BAD_DBI; + + if (unlikely(dbi >= env->max_dbi)) + return MDBX_BAD_DBI; + + if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi)) + return MDBX_BAD_DBI; + + rc = osal_fastmutex_acquire(&env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) + rc = defer_and_release(env, dbi_close_locked(env, dbi)); + return rc; +} + +int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, + unsigned *state) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!flags || !state)) + return MDBX_EINVAL; + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; + *state = + txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); + + return MDBX_SUCCESS; +} + +__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *new_name) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(new_name == MDBX_CHK_MAIN || + new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || + new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || + new_name->iov_base == MDBX_CHK_META)) + return MDBX_EINVAL; + + if (unlikely(dbi < CORE_DBS)) + return MDBX_EINVAL; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = osal_fastmutex_acquire(&txn->env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name); + if (pair.defer) + pair.defer->next = nullptr; + defer_and_release(txn->env, pair.defer); + rc = pair.err; + } + return rc; +} + +static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { + st->ms_depth = db->height; + st->ms_branch_pages = db->branch_pages; + st->ms_leaf_pages = db->leaf_pages; + st->ms_overflow_pages = db->large_pages; + st->ms_entries = db->items; + if (likely(bytes >= + offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) + st->ms_mod_txnid = db->mod_txnid; +} + +__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, + size_t bytes) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!dest)) + return MDBX_EINVAL; + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) + return MDBX_EINVAL; + + if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) + return MDBX_BAD_TXN; + + if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { + rc = sdb_fetch((MDBX_txn *)txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + dest->ms_psize = txn->env->ps; + stat_get(&txn->dbs[dbi], dest, bytes); + return MDBX_SUCCESS; +} diff --git a/src/dbi.h b/src/dbi.h new file mode 100644 index 00000000..29c1bf93 --- /dev/null +++ b/src/dbi.h @@ -0,0 +1,133 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL size_t +dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi); + +#if MDBX_ENABLE_DBI_SPARSE + +static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { + tASSERT(txn, bmi > 0); + STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->dbi_sparse[0])); +#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl) + if (sizeof(txn->dbi_sparse[0]) <= sizeof(int)) + return __builtin_ctz((int)bmi); + if (sizeof(txn->dbi_sparse[0]) == sizeof(long)) + return __builtin_ctzl((long)bmi); +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ + __has_builtin(__builtin_ctzll) + return __builtin_ctzll(bmi); +#endif /* have(long long) && long long == uint64_t */ +#endif /* GNU C */ + +#if defined(_MSC_VER) + unsigned long index; + if (sizeof(txn->dbi_sparse[0]) > 4) { +#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) + _BitScanForward64(&index, bmi); + return index; +#else + if (bmi > UINT32_MAX) { + _BitScanForward(&index, (uint32_t)((uint64_t)bmi >> 32)); + return index; + } +#endif + } + _BitScanForward(&index, (uint32_t)bmi); + return index; +#endif /* MSVC */ + + return dbi_bitmap_ctz_fallback(txn, bmi); +} + +/* LY: Макрос целенаправленно сделан с одним циклом, чтобы сохранить возможность + * использования оператора break */ +#define TXN_FOREACH_DBI_FROM(TXN, I, FROM) \ + for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->dbi_sparse[0]), \ + bitmap_item = TXN->dbi_sparse[0] >> FROM, I = FROM; \ + I < TXN->n_dbi; ++I) \ + if (bitmap_item == 0) { \ + I = (I - 1) | (bitmap_chunk - 1); \ + bitmap_item = TXN->dbi_sparse[(1 + I) / bitmap_chunk]; \ + if (!bitmap_item) \ + I += bitmap_chunk; \ + continue; \ + } else if ((bitmap_item & 1) == 0) { \ + size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ + bitmap_item >>= bitmap_skip; \ + I += bitmap_skip - 1; \ + continue; \ + } else if (bitmap_item >>= 1, TXN->dbi_state[I]) + +#else + +#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP) \ + for (size_t I = SKIP; I < TXN->n_dbi; ++I) \ + if (TXN->dbi_state[I]) + +#endif /* MDBX_ENABLE_DBI_SPARSE */ + +#define TXN_FOREACH_DBI_ALL(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, 0) +#define TXN_FOREACH_DBI_USER(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, CORE_DBS) + +MDBX_INTERNAL int dbi_import(MDBX_txn *txn, const size_t dbi); + +struct dbi_snap_result { + uint32_t sequence; + unsigned flags; +}; +MDBX_INTERNAL struct dbi_snap_result dbi_snap(const MDBX_env *env, + const size_t dbi); + +MDBX_INTERNAL int dbi_update(MDBX_txn *txn, int keep); + +static inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { + STATIC_ASSERT( + (int)DBI_DIRTY == MDBX_DBI_DIRTY && (int)DBI_STALE == MDBX_DBI_STALE && + (int)DBI_FRESH == MDBX_DBI_FRESH && (int)DBI_CREAT == MDBX_DBI_CREAT); + +#if MDBX_ENABLE_DBI_SPARSE + const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->dbi_sparse[0]); + const size_t bitmap_indx = dbi / bitmap_chunk; + const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; + return likely(dbi < txn->n_dbi && + (txn->dbi_sparse[bitmap_indx] & bitmap_mask) != 0) + ? txn->dbi_state[dbi] + : 0; +#else + return likely(dbi < txn->n_dbi) ? txn->dbi_state[dbi] : 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ +} + +static inline bool dbi_changed(const MDBX_txn *txn, const size_t dbi) { + const MDBX_env *const env = txn->env; + eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); + const uint32_t snap_seq = + atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease); + return snap_seq != txn->dbi_seqs[dbi]; +} + +static inline int dbi_check(const MDBX_txn *txn, const size_t dbi) { + const uint8_t state = dbi_state(txn, dbi); + if (likely((state & DBI_LINDO) != 0 && !dbi_changed(txn, dbi))) + return (state & DBI_VALID) ? MDBX_SUCCESS : MDBX_BAD_DBI; + + /* Медленный путь: ленивая до-инициализацяи и импорт */ + return dbi_import((MDBX_txn *)txn, dbi); +} + +static inline uint32_t dbi_seq_next(const MDBX_env *const env, size_t dbi) { + uint32_t v = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease) + 1; + return v ? v : 1; +} + +MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, + unsigned user_flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); + +MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); diff --git a/src/dpl.c b/src/dpl.c new file mode 100644 index 00000000..5e9f4485 --- /dev/null +++ b/src/dpl.c @@ -0,0 +1,520 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +static inline size_t dpl_size2bytes(ptrdiff_t size) { + assert(size > CURSOR_STACK_SIZE && (size_t)size <= PAGELIST_LIMIT); +#if MDBX_DPL_PREALLOC_FOR_RADIXSORT + size += size; +#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ + STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) + + (PAGELIST_LIMIT * (MDBX_DPL_PREALLOC_FOR_RADIXSORT + 1)) * + sizeof(dp_t) + + MDBX_PNL_GRANULATE * sizeof(void *) * 2 < + SIZE_MAX / 4 * 3); + size_t bytes = ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) + + size * sizeof(dp_t), + MDBX_PNL_GRANULATE * sizeof(void *) * 2) - + MDBX_ASSUME_MALLOC_OVERHEAD; + return bytes; +} + +static inline size_t dpl_bytes2size(const ptrdiff_t bytes) { + size_t size = (bytes - sizeof(dpl_t)) / sizeof(dp_t); + assert(size > CURSOR_STACK_SIZE && + size <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); +#if MDBX_DPL_PREALLOC_FOR_RADIXSORT + size >>= 1; +#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ + return size; +} + +void dpl_free(MDBX_txn *txn) { + if (likely(txn->tw.dirtylist)) { + osal_free(txn->tw.dirtylist); + txn->tw.dirtylist = nullptr; + } +} + +dpl_t *dpl_reserve(MDBX_txn *txn, size_t size) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + size_t bytes = + dpl_size2bytes((size < PAGELIST_LIMIT) ? size : PAGELIST_LIMIT); + dpl_t *const dl = osal_realloc(txn->tw.dirtylist, bytes); + if (likely(dl)) { +#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) + bytes = malloc_usable_size(dl); +#endif /* malloc_usable_size */ + dl->detent = dpl_bytes2size(bytes); + tASSERT(txn, txn->tw.dirtylist == nullptr || dl->length <= dl->detent); + txn->tw.dirtylist = dl; + } + return dl; +} + +int dpl_alloc(MDBX_txn *txn) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + const size_t wanna = (txn->env->options.dp_initial < txn->geo.upper) + ? txn->env->options.dp_initial + : txn->geo.upper; +#if MDBX_FORCE_ASSERTIONS || MDBX_DEBUG + if (txn->tw.dirtylist) + /* обнуляем чтобы не сработал ассерт внутри dpl_reserve() */ + txn->tw.dirtylist->sorted = txn->tw.dirtylist->length = 0; +#endif /* asertions enabled */ + if (unlikely(!txn->tw.dirtylist || txn->tw.dirtylist->detent < wanna || + txn->tw.dirtylist->detent > wanna + wanna) && + unlikely(!dpl_reserve(txn, wanna))) + return MDBX_ENOMEM; + + dpl_clear(txn->tw.dirtylist); + return MDBX_SUCCESS; +} + +#define MDBX_DPL_EXTRACT_KEY(ptr) ((ptr)->pgno) +RADIXSORT_IMPL(dp, dp_t, MDBX_DPL_EXTRACT_KEY, MDBX_DPL_PREALLOC_FOR_RADIXSORT, + 1) + +#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno) +SORT_IMPL(dp_sort, false, dp_t, DP_SORT_CMP) + +__hot __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + dpl_t *dl = txn->tw.dirtylist; + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); + const size_t unsorted = dl->length - dl->sorted; + if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) || + unlikely(!dp_radixsort(dl->items + 1, dl->length))) { + if (dl->sorted > unsorted / 4 + 4 && + (MDBX_DPL_PREALLOC_FOR_RADIXSORT || + dl->length + unsorted < dl->detent + dpl_gap_mergesort)) { + dp_t *const sorted_begin = dl->items + 1; + dp_t *const sorted_end = sorted_begin + dl->sorted; + dp_t *const end = dl->items + (MDBX_DPL_PREALLOC_FOR_RADIXSORT + ? dl->length + dl->length + 1 + : dl->detent + dpl_reserve_gap); + dp_t *const tmp = end - unsorted; + assert(dl->items + dl->length + 1 < tmp); + /* copy unsorted to the end of allocated space and sort it */ + memcpy(tmp, sorted_end, unsorted * sizeof(dp_t)); + dp_sort(tmp, tmp + unsorted); + /* merge two parts from end to begin */ + dp_t *__restrict w = dl->items + dl->length; + dp_t *__restrict l = dl->items + dl->sorted; + dp_t *__restrict r = end - 1; + do { + const bool cmp = expect_with_probability(l->pgno > r->pgno, 0, .5); +#if defined(__LCC__) || __CLANG_PREREQ(13, 0) || !MDBX_HAVE_CMOV + *w = cmp ? *l-- : *r--; +#else + *w = cmp ? *l : *r; + l -= cmp; + r += (ptrdiff_t)cmp - 1; +#endif + } while (likely(--w > l)); + assert(r == tmp - 1); + assert(dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + if (ASSERT_ENABLED()) + for (size_t i = 0; i <= dl->length; ++i) + assert(dl->items[i].pgno < dl->items[i + 1].pgno); + } else { + dp_sort(dl->items + 1, dl->items + dl->length + 1); + assert(dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + } + } else { + assert(dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + } + dl->sorted = dl->length; + return dl; +} + +/* Returns the index of the first dirty-page whose pgno + * member is greater than or equal to id. */ +#define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id)) +SEARCH_IMPL(dp_bsearch, dp_t, pgno_t, DP_SEARCH_CMP) + +__hot __noinline MDBX_INTERNAL size_t dpl_search(const MDBX_txn *txn, + pgno_t pgno) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + dpl_t *dl = txn->tw.dirtylist; + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); + if (AUDIT_ENABLED()) { + for (const dp_t *ptr = dl->items + dl->sorted; --ptr > dl->items;) { + assert(ptr[0].pgno < ptr[1].pgno); + assert(ptr[0].pgno >= NUM_METAS); + } + } + + switch (dl->length - dl->sorted) { + default: + /* sort a whole */ + dpl_sort_slowpath(txn); + break; + case 0: + /* whole sorted cases */ + break; + +#define LINEAR_SEARCH_CASE(N) \ + case N: \ + if (dl->items[dl->length - N + 1].pgno == pgno) \ + return dl->length - N + 1; \ + __fallthrough + + /* use linear scan until the threshold */ + LINEAR_SEARCH_CASE(7); /* fall through */ + LINEAR_SEARCH_CASE(6); /* fall through */ + LINEAR_SEARCH_CASE(5); /* fall through */ + LINEAR_SEARCH_CASE(4); /* fall through */ + LINEAR_SEARCH_CASE(3); /* fall through */ + LINEAR_SEARCH_CASE(2); /* fall through */ + case 1: + if (dl->items[dl->length].pgno == pgno) + return dl->length; + /* continue bsearch on the sorted part */ + break; + } + return dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items; +} + +const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + const dpl_t *dl = txn->tw.dirtylist; + if (dl) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + assert(dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + for (size_t i = dl->length; i > dl->sorted; --i) + if (dl->items[i].pgno == pgno) + return dl->items[i].ptr; + + if (dl->sorted) { + const size_t i = dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items; + if (dl->items[i].pgno == pgno) + return dl->items[i].ptr; + } + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } + return nullptr; +} + +void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + dpl_t *dl = txn->tw.dirtylist; + assert((intptr_t)i > 0 && i <= dl->length); + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); + dl->pages_including_loose -= npages; + dl->sorted -= dl->sorted >= i; + dl->length -= 1; + memmove(dl->items + i, dl->items + i + 1, + (dl->length - i + 2) * sizeof(dl->items[0])); + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); +} + +int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, + size_t npages) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + const dp_t dp = {page, pgno, (pgno_t)npages}; + if ((txn->flags & MDBX_WRITEMAP) == 0) { + size_t *const ptr = ptr_disp(page, -(ptrdiff_t)sizeof(size_t)); + *ptr = txn->tw.dirtylru; + } + + dpl_t *dl = txn->tw.dirtylist; + tASSERT(txn, dl->length <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); + tASSERT(txn, dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + if (AUDIT_ENABLED()) { + for (size_t i = dl->length; i > 0; --i) { + assert(dl->items[i].pgno != dp.pgno); + if (unlikely(dl->items[i].pgno == dp.pgno)) { + ERROR("Page %u already exist in the DPL at %zu", dp.pgno, i); + return MDBX_PROBLEM; + } + } + } + + if (unlikely(dl->length == dl->detent)) { + if (unlikely(dl->detent >= PAGELIST_LIMIT)) { + ERROR("DPL is full (PAGELIST_LIMIT %zu)", PAGELIST_LIMIT); + return MDBX_TXN_FULL; + } + const size_t size = (dl->detent < MDBX_PNL_INITIAL * 42) + ? dl->detent + dl->detent + : dl->detent + dl->detent / 2; + dl = dpl_reserve(txn, size); + if (unlikely(!dl)) + return MDBX_ENOMEM; + tASSERT(txn, dl->length < dl->detent); + } + + /* Сортировка нужна для быстрого поиска, используем несколько тактик: + * 1) Сохраняем упорядоченность при естественной вставке в нужном порядке. + * 2) Добавляем в не-сортированный хвост, который сортируем и сливаем + * с отсортированной головой по необходимости, а пока хвост короткий + * ищем в нём сканированием, избегая большой пересортировки. + * 3) Если не-сортированный хвост короткий, а добавляемый элемент близок + * к концу отсортированной головы, то выгоднее сразу вставить элемент + * в нужное место. + * + * Алгоритмически: + * - добавлять в не-сортированный хвост следует только если вставка сильно + * дорогая, т.е. если целевая позиция элемента сильно далека от конца; + * - для быстрой проверки достаточно сравнить добавляемый элемент с отстоящим + * от конца на максимально-приемлемое расстояние; + * - если список короче, либо элемент в этой позиции меньше вставляемого, + * то следует перемещать элементы и вставлять в отсортированную голову; + * - если не-сортированный хвост длиннее, либо элемент в этой позиции больше, + * то следует добавлять в не-сортированный хвост. */ + + dl->pages_including_loose += npages; + dp_t *i = dl->items + dl->length; + + const ptrdiff_t pivot = (ptrdiff_t)dl->length - dpl_insertion_threshold; +#if MDBX_HAVE_CMOV + const pgno_t pivot_pgno = + dl->items[(dl->length < dpl_insertion_threshold) + ? 0 + : dl->length - dpl_insertion_threshold] + .pgno; +#endif /* MDBX_HAVE_CMOV */ + + /* copy the stub beyond the end */ + i[2] = i[1]; + dl->length += 1; + + if (likely(pivot <= (ptrdiff_t)dl->sorted) && +#if MDBX_HAVE_CMOV + pivot_pgno < dp.pgno) { +#else + (pivot <= 0 || dl->items[pivot].pgno < dp.pgno)) { +#endif /* MDBX_HAVE_CMOV */ + dl->sorted += 1; + + /* сдвигаем несортированный хвост */ + while (i >= dl->items + dl->sorted) { +#if !defined(__GNUC__) /* пытаемся избежать вызова memmove() */ + i[1] = *i; +#elif MDBX_WORDBITS == 64 && \ + (defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) + STATIC_ASSERT(sizeof(dp) == sizeof(__uint128_t)); + ((__uint128_t *)i)[1] = *(volatile __uint128_t *)i; +#else + i[1].ptr = i->ptr; + i[1].pgno = i->pgno; + i[1].npages = i->npages; +#endif + --i; + } + /* ищем нужную позицию сдвигая отсортированные элементы */ + while (i->pgno > pgno) { + tASSERT(txn, i > dl->items); + i[1] = *i; + --i; + } + tASSERT(txn, i->pgno < dp.pgno); + } + + i[1] = dp; + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); + assert(dl->sorted <= dl->length); + return MDBX_SUCCESS; +} + +__cold bool dpl_check(MDBX_txn *txn) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + const dpl_t *const dl = txn->tw.dirtylist; + if (!dl) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + return true; + } + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); + tASSERT(txn, txn->tw.dirtyroom + dl->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + + if (!AUDIT_ENABLED()) + return true; + + size_t loose = 0, pages = 0; + for (size_t i = dl->length; i > 0; --i) { + const page_t *const dp = dl->items[i].ptr; + if (!dp) + continue; + + tASSERT(txn, dp->pgno == dl->items[i].pgno); + if (unlikely(dp->pgno != dl->items[i].pgno)) + return false; + + if ((txn->flags & MDBX_WRITEMAP) == 0) { + const uint32_t age = dpl_age(txn, i); + tASSERT(txn, age < UINT32_MAX / 3); + if (unlikely(age > UINT32_MAX / 3)) + return false; + } + + tASSERT(txn, dp->flags == P_LOOSE || is_modifable(txn, dp)); + if (dp->flags == P_LOOSE) { + loose += 1; + } else if (unlikely(!is_modifable(txn, dp))) + return false; + + const unsigned num = dpl_npages(dl, i); + pages += num; + tASSERT(txn, txn->geo.first_unallocated >= dp->pgno + num); + if (unlikely(txn->geo.first_unallocated < dp->pgno + num)) + return false; + + if (i < dl->sorted) { + tASSERT(txn, dl->items[i + 1].pgno >= dp->pgno + num); + if (unlikely(dl->items[i + 1].pgno < dp->pgno + num)) + return false; + } + + const size_t rpa = + pnl_search(txn->tw.relist, dp->pgno, txn->geo.first_unallocated); + tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.relist) || + txn->tw.relist[rpa] != dp->pgno); + if (rpa <= MDBX_PNL_GETSIZE(txn->tw.relist) && + unlikely(txn->tw.relist[rpa] == dp->pgno)) + return false; + if (num > 1) { + const size_t rpb = pnl_search(txn->tw.relist, dp->pgno + num - 1, + txn->geo.first_unallocated); + tASSERT(txn, rpa == rpb); + if (unlikely(rpa != rpb)) + return false; + } + } + + tASSERT(txn, loose == txn->tw.loose_count); + if (unlikely(loose != txn->tw.loose_count)) + return false; + + tASSERT(txn, pages == dl->pages_including_loose); + if (unlikely(pages != dl->pages_including_loose)) + return false; + + for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.retired_pages); ++i) { + const page_t *const dp = debug_dpl_find(txn, txn->tw.retired_pages[i]); + tASSERT(txn, !dp); + if (unlikely(dp)) + return false; + } + + return true; +} + +/*----------------------------------------------------------------------------*/ + +__noinline void dpl_lru_reduce(MDBX_txn *txn) { + NOTICE("lru-reduce %u -> %u", txn->tw.dirtylru, txn->tw.dirtylru >> 1); + tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); + do { + txn->tw.dirtylru >>= 1; + dpl_t *dl = txn->tw.dirtylist; + for (size_t i = 1; i <= dl->length; ++i) { + size_t *const ptr = + ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); + *ptr >>= 1; + } + txn = txn->parent; + } while (txn); +} + +void dpl_sift(MDBX_txn *const txn, pnl_t pl, const bool spilled) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + if (MDBX_PNL_GETSIZE(pl) && txn->tw.dirtylist->length) { + tASSERT(txn, pnl_check_allocated(pl, (size_t)txn->geo.first_unallocated + << spilled)); + dpl_t *dl = dpl_sort(txn); + + /* Scanning in ascend order */ + const intptr_t step = MDBX_PNL_ASCENDING ? 1 : -1; + const intptr_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pl); + const intptr_t end = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(pl) + 1 : 0; + tASSERT(txn, pl[begin] <= pl[end - step]); + + size_t w, r = dpl_search(txn, pl[begin] >> spilled); + tASSERT(txn, dl->sorted == dl->length); + for (intptr_t i = begin; r <= dl->length;) { /* scan loop */ + assert(i != end); + tASSERT(txn, !spilled || (pl[i] & 1) == 0); + pgno_t pl_pgno = pl[i] >> spilled; + pgno_t dp_pgno = dl->items[r].pgno; + if (likely(dp_pgno != pl_pgno)) { + const bool cmp = dp_pgno < pl_pgno; + r += cmp; + i += cmp ? 0 : step; + if (likely(i != end)) + continue; + return; + } + + /* update loop */ + unsigned npages; + w = r; + remove_dl: + npages = dpl_npages(dl, r); + dl->pages_including_loose -= npages; + if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) + page_shadow_release(txn->env, dl->items[r].ptr, npages); + ++r; + next_i: + i += step; + if (unlikely(i == end)) { + while (r <= dl->length) + dl->items[w++] = dl->items[r++]; + } else { + while (r <= dl->length) { + assert(i != end); + tASSERT(txn, !spilled || (pl[i] & 1) == 0); + pl_pgno = pl[i] >> spilled; + dp_pgno = dl->items[r].pgno; + if (dp_pgno < pl_pgno) + dl->items[w++] = dl->items[r++]; + else if (dp_pgno > pl_pgno) + goto next_i; + else + goto remove_dl; + } + } + dl->sorted = dpl_setlen(dl, w - 1); + txn->tw.dirtyroom += r - w; + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + return; + } + } +} + +void dpl_release_shadows(MDBX_txn *txn) { + tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); + MDBX_env *env = txn->env; + dpl_t *const dl = txn->tw.dirtylist; + + for (size_t i = 1; i <= dl->length; i++) + page_shadow_release(env, dl->items[i].ptr, dpl_npages(dl, i)); + + dpl_clear(dl); +} diff --git a/src/dpl.h b/src/dpl.h new file mode 100644 index 00000000..9d2f59c6 --- /dev/null +++ b/src/dpl.h @@ -0,0 +1,145 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +static inline size_t dpl_setlen(dpl_t *dl, size_t len) { + static const page_t dpl_stub_pageE = {INVALID_TXNID, + 0, + P_BAD, + {0}, + /* pgno */ ~(pgno_t)0}; + assert(dpl_stub_pageE.flags == P_BAD && dpl_stub_pageE.pgno == P_INVALID); + dl->length = len; + dl->items[len + 1].ptr = (page_t *)&dpl_stub_pageE; + dl->items[len + 1].pgno = P_INVALID; + dl->items[len + 1].npages = 1; + return len; +} + +static inline void dpl_clear(dpl_t *dl) { + static const page_t dpl_stub_pageB = {INVALID_TXNID, + 0, + P_BAD, + {0}, + /* pgno */ 0}; + assert(dpl_stub_pageB.flags == P_BAD && dpl_stub_pageB.pgno == 0); + dl->sorted = dpl_setlen(dl, 0); + dl->pages_including_loose = 0; + dl->items[0].ptr = (page_t *)&dpl_stub_pageB; + dl->items[0].pgno = 0; + dl->items[0].npages = 1; + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); +} + +MDBX_INTERNAL int __must_check_result dpl_alloc(MDBX_txn *txn); + +MDBX_INTERNAL void dpl_free(MDBX_txn *txn); + +MDBX_INTERNAL dpl_t *dpl_reserve(MDBX_txn *txn, size_t size); + +MDBX_INTERNAL __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn); + +static inline dpl_t *dpl_sort(const MDBX_txn *txn) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + dpl_t *dl = txn->tw.dirtylist; + tASSERT(txn, dl->length <= PAGELIST_LIMIT); + tASSERT(txn, dl->sorted <= dl->length); + tASSERT(txn, dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + return likely(dl->sorted == dl->length) ? dl : dpl_sort_slowpath(txn); +} + +MDBX_INTERNAL __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno); + +MDBX_MAYBE_UNUSED MDBX_INTERNAL const page_t * +debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno); + +MDBX_NOTHROW_PURE_FUNCTION static inline unsigned dpl_npages(const dpl_t *dl, + size_t i) { + assert(0 <= (intptr_t)i && i <= dl->length); + unsigned n = dl->items[i].npages; + assert(n == (is_largepage(dl->items[i].ptr) ? dl->items[i].ptr->pages : 1)); + return n; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t dpl_endpgno(const dpl_t *dl, + size_t i) { + return dpl_npages(dl, i) + dl->items[i].pgno; +} + +static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, + size_t npages) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + dpl_t *dl = txn->tw.dirtylist; + tASSERT(txn, dl->sorted == dl->length); + tASSERT(txn, dl->items[0].pgno == 0 && + dl->items[dl->length + 1].pgno == P_INVALID); + size_t const n = dpl_search(txn, pgno); + tASSERT(txn, n >= 1 && n <= dl->length + 1); + tASSERT(txn, pgno <= dl->items[n].pgno); + tASSERT(txn, pgno > dl->items[n - 1].pgno); + const bool rc = + /* intersection with founded */ pgno + npages > dl->items[n].pgno || + /* intersection with prev */ dpl_endpgno(dl, n - 1) > pgno; + if (ASSERT_ENABLED()) { + bool check = false; + for (size_t i = 1; i <= dl->length; ++i) { + const page_t *const dp = dl->items[i].ptr; + if (!(dp->pgno /* begin */ >= /* end */ pgno + npages || + dpl_endpgno(dl, i) /* end */ <= /* begin */ pgno)) + check |= true; + } + tASSERT(txn, check == rc); + } + return rc; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline size_t dpl_exist(const MDBX_txn *txn, + pgno_t pgno) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + dpl_t *dl = txn->tw.dirtylist; + size_t i = dpl_search(txn, pgno); + tASSERT(txn, (int)i > 0); + return (dl->items[i].pgno == pgno) ? i : 0; +} + +MDBX_INTERNAL void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages); + +static inline void dpl_remove(const MDBX_txn *txn, size_t i) { + dpl_remove_ex(txn, i, dpl_npages(txn->tw.dirtylist, i)); +} + +MDBX_INTERNAL int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, + page_t *page, size_t npages); + +MDBX_MAYBE_UNUSED MDBX_INTERNAL bool dpl_check(MDBX_txn *txn); + +MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t dpl_age(const MDBX_txn *txn, + size_t i) { + tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); + const dpl_t *dl = txn->tw.dirtylist; + assert((intptr_t)i > 0 && i <= dl->length); + size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); + return txn->tw.dirtylru - (uint32_t)*ptr; +} + +MDBX_INTERNAL void dpl_lru_reduce(MDBX_txn *txn); + +static inline uint32_t dpl_lru_turn(MDBX_txn *txn) { + txn->tw.dirtylru += 1; + if (unlikely(txn->tw.dirtylru > UINT32_MAX / 3) && + (txn->flags & MDBX_WRITEMAP) == 0) + dpl_lru_reduce(txn); + return txn->tw.dirtylru; +} + +MDBX_INTERNAL void dpl_sift(MDBX_txn *const txn, pnl_t pl, const bool spilled); + +MDBX_INTERNAL void dpl_release_shadows(MDBX_txn *txn); diff --git a/src/dxb.c b/src/dxb.c new file mode 100644 index 00000000..cacf5975 --- /dev/null +++ b/src/dxb.c @@ -0,0 +1,1553 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, + const mdbx_mode_t mode_bits) { + memset(dest, 0, sizeof(meta_t)); + int rc = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + unaligned_poke_u64(4, dest->sign, DATASIGN_WEAK); + rc = MDBX_CORRUPTED; + + /* Read twice all meta pages so we can find the latest one. */ + unsigned loop_limit = NUM_METAS * 2; + /* We don't know the page size on first time. So, just guess it. */ + unsigned guess_pagesize = 0; + for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) { + const unsigned meta_number = loop_count % NUM_METAS; + const unsigned offset = + (guess_pagesize ? guess_pagesize + : (loop_count > NUM_METAS) ? env->ps + : globals.sys_pagesize) * + meta_number; + + char buffer[MDBX_MIN_PAGESIZE]; + unsigned retryleft = 42; + while (1) { + TRACE("reading meta[%d]: offset %u, bytes %u, retry-left %u", meta_number, + offset, MDBX_MIN_PAGESIZE, retryleft); + int err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset); + if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && + env->dxb_mmap.filesize == 0 && + mode_bits /* non-zero for DB creation */ != 0) { + NOTICE("read meta: empty file (%d, %s)", err, mdbx_strerror(err)); + return err; + } +#if defined(_WIN32) || defined(_WIN64) + if (err == ERROR_LOCK_VIOLATION) { + SleepEx(0, true); + err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset); + if (err == ERROR_LOCK_VIOLATION && --retryleft) { + WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, + mdbx_strerror(err)); + continue; + } + } +#endif /* Windows */ + if (err != MDBX_SUCCESS) { + ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, + mdbx_strerror(err)); + return err; + } + + char again[MDBX_MIN_PAGESIZE]; + err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset); +#if defined(_WIN32) || defined(_WIN64) + if (err == ERROR_LOCK_VIOLATION) { + SleepEx(0, true); + err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset); + if (err == ERROR_LOCK_VIOLATION && --retryleft) { + WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, + mdbx_strerror(err)); + continue; + } + } +#endif /* Windows */ + if (err != MDBX_SUCCESS) { + ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, + mdbx_strerror(err)); + return err; + } + + if (memcmp(buffer, again, MDBX_MIN_PAGESIZE) == 0 || --retryleft == 0) + break; + + VERBOSE("meta[%u] was updated, re-read it", meta_number); + } + + if (!retryleft) { + ERROR("meta[%u] is too volatile, skip it", meta_number); + continue; + } + + page_t *const page = (page_t *)buffer; + meta_t *const meta = page_meta(page); + rc = meta_validate(env, meta, page, meta_number, &guess_pagesize); + if (rc != MDBX_SUCCESS) + continue; + + bool latch; + if (env->stuck_meta >= 0) + latch = (meta_number == (unsigned)env->stuck_meta); + else if (meta_bootid_match(meta)) + latch = meta_choice_recent( + meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), + dest->unsafe_txnid, SIGN_IS_STEADY(dest->unsafe_sign)); + else + latch = meta_choice_steady( + meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), + dest->unsafe_txnid, SIGN_IS_STEADY(dest->unsafe_sign)); + if (latch) { + *dest = *meta; + if (!lck_exclusive && !meta_is_steady(dest)) + loop_limit += 1; /* LY: should re-read to hush race with update */ + VERBOSE("latch meta[%u]", meta_number); + } + } + + if (dest->pagesize == 0 || + (env->stuck_meta < 0 && + !(meta_is_steady(dest) || + meta_weak_acceptable(env, dest, lck_exclusive)))) { + ERROR("%s", "no usable meta-pages, database is corrupted"); + if (rc == MDBX_SUCCESS) { + /* TODO: try to restore the database by fully checking b-tree structure + * for the each meta page, if the corresponding option was given */ + return MDBX_CORRUPTED; + } + return rc; + } + + return MDBX_SUCCESS; +} + +__cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, + const pgno_t size_pgno, pgno_t limit_pgno, + const enum resize_mode mode) { + /* Acquire guard to avoid collision between read and write txns + * around geo_in_bytes and dxb_mmap */ +#if defined(_WIN32) || defined(_WIN64) + imports.srwl_AcquireExclusive(&env->remap_guard); + int rc = MDBX_SUCCESS; + mdbx_handle_array_t *suspended = nullptr; + mdbx_handle_array_t array_onstack; +#else + int rc = osal_fastmutex_acquire(&env->remap_guard); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; +#endif + + const size_t prev_size = env->dxb_mmap.current; + const size_t prev_limit = env->dxb_mmap.limit; + const pgno_t prev_limit_pgno = bytes2pgno(env, prev_limit); + eASSERT(env, limit_pgno >= size_pgno); + eASSERT(env, size_pgno >= used_pgno); + if (mode < explicit_resize && size_pgno <= prev_limit_pgno) { + /* The actual mapsize may be less since the geo.upper may be changed + * by other process. Avoids remapping until it necessary. */ + limit_pgno = prev_limit_pgno; + } + const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); + const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); +#if MDBX_ENABLE_MADVISE || defined(ENABLE_MEMCHECK) + const void *const prev_map = env->dxb_mmap.base; +#endif /* MDBX_ENABLE_MADVISE || ENABLE_MEMCHECK */ + + VERBOSE("resize/%d datafile/mapping: " + "present %" PRIuPTR " -> %" PRIuPTR ", " + "limit %" PRIuPTR " -> %" PRIuPTR, + mode, prev_size, size_bytes, prev_limit, limit_bytes); + + eASSERT(env, limit_bytes >= size_bytes); + eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno); + eASSERT(env, bytes2pgno(env, limit_bytes) >= limit_pgno); + + unsigned mresize_flags = + env->flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC); + if (mode >= impilict_shrink) + mresize_flags |= txn_shrink_allowed; + + if (limit_bytes == env->dxb_mmap.limit && + size_bytes == env->dxb_mmap.current && + size_bytes == env->dxb_mmap.filesize) + goto bailout; + + /* При использовании MDBX_NOSTICKYTHREADS с транзакциями могут работать любые + * потоки и у нас нет информации о том, какие именно. Поэтому нет возможности + * выполнить remap-действия требующие приостановки работающих с БД потоков. */ + if ((env->flags & MDBX_NOSTICKYTHREADS) == 0) { +#if defined(_WIN32) || defined(_WIN64) + if ((size_bytes < env->dxb_mmap.current && mode > implicit_grow) || + limit_bytes != env->dxb_mmap.limit) { + /* 1) Windows allows only extending a read-write section, but not a + * corresponding mapped view. Therefore in other cases we must suspend + * the local threads for safe remap. + * 2) At least on Windows 10 1803 the entire mapped section is unavailable + * for short time during NtExtendSection() or VirtualAlloc() execution. + * 3) Under Wine runtime environment on Linux a section extending is not + * supported. + * + * THEREFORE LOCAL THREADS SUSPENDING IS ALWAYS REQUIRED! */ + array_onstack.limit = ARRAY_LENGTH(array_onstack.handles); + array_onstack.count = 0; + suspended = &array_onstack; + rc = osal_suspend_threads_before_remap(env, &suspended); + if (rc != MDBX_SUCCESS) { + ERROR("failed suspend-for-remap: errcode %d", rc); + goto bailout; + } + mresize_flags |= (mode < explicit_resize) + ? MDBX_MRESIZE_MAY_UNMAP + : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; + } +#else /* Windows */ + lck_t *const lck = env->lck_mmap.lck; + if (mode == explicit_resize && limit_bytes != env->dxb_mmap.limit) { + mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; + if (lck) { + int err = lck_rdt_lock(env) /* lock readers table until remap done */; + if (unlikely(MDBX_IS_ERROR(err))) { + rc = err; + goto bailout; + } + + /* looking for readers from this process */ + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + eASSERT(env, mode == explicit_resize); + for (size_t i = 0; i < snap_nreaders; ++i) { + if (lck->rdt[i].pid.weak == env->pid && + lck->rdt[i].tid.weak != osal_thread_self()) { + /* the base address of the mapping can't be changed since + * the other reader thread from this process exists. */ + lck_rdt_unlock(env); + mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE); + break; + } + } + } + } +#endif /* ! Windows */ + } + + const pgno_t aligned_munlock_pgno = + (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) + ? 0 + : bytes2pgno(env, size_bytes); + if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) { + mincore_clean_cache(env); + if ((env->flags & MDBX_WRITEMAP) && env->lck->unsynced_pages.weak) { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno), + MDBX_SYNC_NONE); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + } + munlock_after(env, aligned_munlock_pgno, size_bytes); + +#if MDBX_ENABLE_MADVISE + if (size_bytes < prev_size && mode > implicit_grow) { + NOTICE("resize-MADV_%s %u..%u", + (env->flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno, + bytes2pgno(env, prev_size)); + const uint32_t munlocks_before = + atomic_load32(&env->lck->mlcnt[1], mo_Relaxed); + rc = MDBX_RESULT_TRUE; +#if defined(MADV_REMOVE) + if (env->flags & MDBX_WRITEMAP) + rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), + prev_size - size_bytes, MADV_REMOVE) + ? ignore_enosys(errno) + : MDBX_SUCCESS; +#endif /* MADV_REMOVE */ +#if defined(MADV_DONTNEED) + if (rc == MDBX_RESULT_TRUE) + rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), + prev_size - size_bytes, MADV_DONTNEED) + ? ignore_enosys(errno) + : MDBX_SUCCESS; +#elif defined(POSIX_MADV_DONTNEED) + if (rc == MDBX_RESULT_TRUE) + rc = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, size_bytes), + prev_size - size_bytes, + POSIX_MADV_DONTNEED)); +#elif defined(POSIX_FADV_DONTNEED) + if (rc == MDBX_RESULT_TRUE) + rc = ignore_enosys(posix_fadvise(env->lazy_fd, size_bytes, + prev_size - size_bytes, + POSIX_FADV_DONTNEED)); +#endif /* MADV_DONTNEED */ + if (unlikely(MDBX_IS_ERROR(rc))) { + const uint32_t mlocks_after = + atomic_load32(&env->lck->mlcnt[0], mo_Relaxed); + if (rc == MDBX_EINVAL) { + const int severity = + (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN; + if (LOG_ENABLED(severity)) + debug_log(severity, __func__, __LINE__, + "%s-madvise: ignore EINVAL (%d) since some pages maybe " + "locked (%u/%u mlcnt-processes)", + "resize", rc, mlocks_after, munlocks_before); + } else { + ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", + "mresize", "DONTNEED", size_bytes, prev_size - size_bytes, + mlocks_after, munlocks_before, rc); + goto bailout; + } + } else + env->lck->discarded_tail.weak = size_pgno; + } +#endif /* MDBX_ENABLE_MADVISE */ + + rc = osal_mresize(mresize_flags, &env->dxb_mmap, size_bytes, limit_bytes); + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + +#if MDBX_ENABLE_MADVISE + if (rc == MDBX_SUCCESS) { + eASSERT(env, limit_bytes == env->dxb_mmap.limit); + eASSERT(env, size_bytes <= env->dxb_mmap.filesize); + if (mode == explicit_resize) + eASSERT(env, size_bytes == env->dxb_mmap.current); + else + eASSERT(env, size_bytes <= env->dxb_mmap.current); + env->lck->discarded_tail.weak = size_pgno; + const bool readahead = + !(env->flags & MDBX_NORDAHEAD) && + mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size); + const bool force = limit_bytes != prev_limit || + env->dxb_mmap.base != prev_map +#if defined(_WIN32) || defined(_WIN64) + || prev_size > size_bytes +#endif /* Windows */ + ; + rc = dxb_set_readahead(env, size_pgno, readahead, force); + } +#endif /* MDBX_ENABLE_MADVISE */ + +bailout: + if (rc == MDBX_SUCCESS) { + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + eASSERT(env, limit_bytes == env->dxb_mmap.limit); + eASSERT(env, size_bytes <= env->dxb_mmap.filesize); + if (mode == explicit_resize) + eASSERT(env, size_bytes == env->dxb_mmap.current); + else + eASSERT(env, size_bytes <= env->dxb_mmap.current); + /* update env-geo to avoid influences */ + env->geo_in_bytes.now = env->dxb_mmap.current; + env->geo_in_bytes.upper = env->dxb_mmap.limit; + env_options_adjust_defaults(env); +#ifdef ENABLE_MEMCHECK + if (prev_limit != env->dxb_mmap.limit || prev_map != env->dxb_mmap.base) { + VALGRIND_DISCARD(env->valgrind_handle); + env->valgrind_handle = 0; + if (env->dxb_mmap.limit) + env->valgrind_handle = VALGRIND_CREATE_BLOCK( + env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx"); + } +#endif /* ENABLE_MEMCHECK */ + } else { + if (rc != MDBX_UNABLE_EXTEND_MAPSIZE && rc != MDBX_EPERM) { + ERROR("failed resize datafile/mapping: " + "present %" PRIuPTR " -> %" PRIuPTR ", " + "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", + prev_size, size_bytes, prev_limit, limit_bytes, rc); + } else { + WARNING("unable resize datafile/mapping: " + "present %" PRIuPTR " -> %" PRIuPTR ", " + "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", + prev_size, size_bytes, prev_limit, limit_bytes, rc); + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + } + if (!env->dxb_mmap.base) { + env->flags |= ENV_FATAL_ERROR; + if (env->txn) + env->txn->flags |= MDBX_TXN_ERROR; + rc = MDBX_PANIC; + } + } + +#if defined(_WIN32) || defined(_WIN64) + int err = MDBX_SUCCESS; + imports.srwl_ReleaseExclusive(&env->remap_guard); + if (suspended) { + err = osal_resume_threads_after_remap(suspended); + if (suspended != &array_onstack) + osal_free(suspended); + } +#else + if (env->lck_mmap.lck && + (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) != 0) + lck_rdt_unlock(env); + int err = osal_fastmutex_release(&env->remap_guard); +#endif /* Windows */ + if (err != MDBX_SUCCESS) { + FATAL("failed resume-after-remap: errcode %d", err); + return MDBX_PANIC; + } + return rc; +} +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) +void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { +#if !defined(__SANITIZE_ADDRESS__) + if (!RUNNING_ON_VALGRIND) + return; +#endif + if (txn) { /* transaction start */ + if (env->poison_edge < txn->geo.first_unallocated) + env->poison_edge = txn->geo.first_unallocated; + VALGRIND_MAKE_MEM_DEFINED(env->dxb_mmap.base, + pgno2bytes(env, txn->geo.first_unallocated)); + MDBX_ASAN_UNPOISON_MEMORY_REGION( + env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated)); + /* don't touch more, it should be already poisoned */ + } else { /* transaction end */ + bool should_unlock = false; + pgno_t last = MAX_PAGENO + 1; + if (env->pid != osal_getpid()) { + /* resurrect after fork */ + return; + } else if (env->txn && env_txn0_owned(env)) { + /* inside write-txn */ + last = meta_recent(env, &env->basal_txn->tw.troika) + .ptr_v->geometry.first_unallocated; + } else if (env->flags & MDBX_RDONLY) { + /* read-only mode, no write-txn, no wlock mutex */ + last = NUM_METAS; + } else if (lck_txn_lock(env, true) == MDBX_SUCCESS) { + /* no write-txn */ + last = NUM_METAS; + should_unlock = true; + } else { + /* write txn is running, therefore shouldn't poison any memory range */ + return; + } + + last = mvcc_largest_this(env, last); + const pgno_t edge = env->poison_edge; + if (edge > last) { + eASSERT(env, last >= NUM_METAS); + env->poison_edge = last; + VALGRIND_MAKE_MEM_NOACCESS( + ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), + pgno2bytes(env, edge - last)); + MDBX_ASAN_POISON_MEMORY_REGION( + ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), + pgno2bytes(env, edge - last)); + } + if (should_unlock) + lck_txn_unlock(env); + } +} +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ + +#if MDBX_ENABLE_MADVISE +/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */ +__cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, + const bool enable, const bool force_whole) { + eASSERT(env, edge >= NUM_METAS && edge <= MAX_PAGENO + 1); + eASSERT(env, (enable & 1) == (enable != 0)); + const bool toggle = force_whole || + ((enable ^ env->lck->readahead_anchor) & 1) || + !env->lck->readahead_anchor; + const pgno_t prev_edge = env->lck->readahead_anchor >> 1; + const size_t limit = env->dxb_mmap.limit; + size_t offset = + toggle ? 0 + : pgno_align2os_bytes(env, (prev_edge < edge) ? prev_edge : edge); + offset = (offset < limit) ? offset : limit; + + size_t length = + pgno_align2os_bytes(env, (prev_edge < edge) ? edge : prev_edge); + length = (length < limit) ? length : limit; + length -= offset; + + eASSERT(env, 0 <= (intptr_t)length); + if (length == 0) + return MDBX_SUCCESS; + + NOTICE("readahead %s %u..%u", enable ? "ON" : "OFF", bytes2pgno(env, offset), + bytes2pgno(env, offset + length)); + +#if defined(F_RDAHEAD) + if (toggle && unlikely(fcntl(env->lazy_fd, F_RDAHEAD, enable) == -1)) + return errno; +#endif /* F_RDAHEAD */ + + int err; + void *const ptr = ptr_disp(env->dxb_mmap.base, offset); + if (enable) { +#if defined(MADV_NORMAL) + err = + madvise(ptr, length, MADV_NORMAL) ? ignore_enosys(errno) : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_NORMAL) + err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_NORMAL)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED) + err = ignore_enosys( + posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_NORMAL)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(_WIN32) || defined(_WIN64) + /* no madvise on Windows */ +#else +#warning "FIXME" +#endif + if (toggle) { + /* NOTE: Seems there is a bug in the Mach/Darwin/OSX kernel, + * because MADV_WILLNEED with offset != 0 may cause SIGBUS + * on following access to the hinted region. + * 19.6.0 Darwin Kernel Version 19.6.0: Tue Jan 12 22:13:05 PST 2021; + * root:xnu-6153.141.16~1/RELEASE_X86_64 x86_64 */ +#if defined(F_RDADVISE) + struct radvisory hint; + hint.ra_offset = offset; + hint.ra_count = + unlikely(length > INT_MAX && sizeof(length) > sizeof(hint.ra_count)) + ? INT_MAX + : (int)length; + (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl( + env->lazy_fd, F_RDADVISE, &hint); +#elif defined(MADV_WILLNEED) + err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_WILLNEED) + err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_WILLNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(_WIN32) || defined(_WIN64) + if (imports.PrefetchVirtualMemory) { + WIN32_MEMORY_RANGE_ENTRY hint; + hint.VirtualAddress = ptr; + hint.NumberOfBytes = length; + (void)imports.PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0); + } +#elif defined(POSIX_FADV_WILLNEED) + err = ignore_enosys( + posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_WILLNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#else +#warning "FIXME" +#endif + } + } else { + mincore_clean_cache(env); +#if defined(MADV_RANDOM) + err = + madvise(ptr, length, MADV_RANDOM) ? ignore_enosys(errno) : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_RANDOM) + err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_RANDOM)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_FADV_RANDOM) + err = ignore_enosys( + posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_RANDOM)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(_WIN32) || defined(_WIN64) + /* no madvise on Windows */ +#else +#warning "FIXME" +#endif /* MADV_RANDOM */ + } + + env->lck->readahead_anchor = (enable & 1) + (edge << 1); + err = MDBX_SUCCESS; + return err; +} +#endif /* MDBX_ENABLE_MADVISE */ + +__cold int dxb_setup(MDBX_env *env, const int lck_rc, + const mdbx_mode_t mode_bits) { + meta_t header; + eASSERT(env, !(env->flags & ENV_ACTIVE)); + int rc = MDBX_RESULT_FALSE; + int err = dxb_read_header(env, &header, lck_rc, mode_bits); + if (unlikely(err != MDBX_SUCCESS)) { + if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || + (env->flags & MDBX_RDONLY) != 0 || + /* recovery mode */ env->stuck_meta >= 0) + return err; + + DEBUG("%s", "create new database"); + rc = /* new database */ MDBX_RESULT_TRUE; + + if (!env->geo_in_bytes.now) { + /* set defaults if not configured */ + err = mdbx_env_set_geometry(env, 0, -1, DEFAULT_MAPSIZE, -1, -1, -1); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + + err = env_page_auxbuffer(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + header = *meta_init_triplet(env, env->page_auxbuf); + err = osal_pwrite(env->lazy_fd, env->page_auxbuf, + env->ps * (size_t)NUM_METAS, 0); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + err = osal_ftruncate(env->lazy_fd, env->dxb_mmap.filesize = + env->dxb_mmap.current = + env->geo_in_bytes.now); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#ifndef NDEBUG /* just for checking */ + err = dxb_read_header(env, &header, lck_rc, mode_bits); + if (unlikely(err != MDBX_SUCCESS)) + return err; +#endif + } + + VERBOSE("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO + "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN + ", %s", + header.trees.main.root, header.trees.gc.root, header.geometry.lower, + header.geometry.first_unallocated, header.geometry.now, + header.geometry.upper, pv2pages(header.geometry.grow_pv), + pv2pages(header.geometry.shrink_pv), + unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); + + if (unlikely(header.trees.gc.flags != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + header.trees.gc.flags); + return MDBX_INCOMPATIBLE; + } + env->dbs_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; + env->kvs[FREE_DBI].clc.k.cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ + env->kvs[FREE_DBI].clc.k.lmax = env->kvs[FREE_DBI].clc.k.lmin = 8; + env->kvs[FREE_DBI].clc.v.cmp = cmp_lenfast; + env->kvs[FREE_DBI].clc.v.lmin = 4; + env->kvs[FREE_DBI].clc.v.lmax = + mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); + + if (env->ps != header.pagesize) + env_setup_pagesize(env, header.pagesize); + const size_t used_bytes = pgno2bytes(env, header.geometry.first_unallocated); + const size_t used_aligned2os_bytes = + ceil_powerof2(used_bytes, globals.sys_pagesize); + if ((env->flags & MDBX_RDONLY) /* readonly */ + || lck_rc != MDBX_RESULT_TRUE /* not exclusive */ + || /* recovery mode */ env->stuck_meta >= 0) { + /* use present params from db */ + const size_t pagesize = header.pagesize; + err = mdbx_env_set_geometry( + env, header.geometry.lower * pagesize, header.geometry.now * pagesize, + header.geometry.upper * pagesize, + pv2pages(header.geometry.grow_pv) * pagesize, + pv2pages(header.geometry.shrink_pv) * pagesize, header.pagesize); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("%s: err %d", "could not apply geometry from db", err); + return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; + } + } else if (env->geo_in_bytes.now) { + /* silently growth to last used page */ + if (env->geo_in_bytes.now < used_aligned2os_bytes) + env->geo_in_bytes.now = used_aligned2os_bytes; + if (env->geo_in_bytes.upper < used_aligned2os_bytes) + env->geo_in_bytes.upper = used_aligned2os_bytes; + + /* apply preconfigured params, but only if substantial changes: + * - upper or lower limit changes + * - shrink threshold or growth step + * But ignore change just a 'now/current' size. */ + if (bytes_align2os_bytes(env, env->geo_in_bytes.upper) != + pgno2bytes(env, header.geometry.upper) || + bytes_align2os_bytes(env, env->geo_in_bytes.lower) != + pgno2bytes(env, header.geometry.lower) || + bytes_align2os_bytes(env, env->geo_in_bytes.shrink) != + pgno2bytes(env, pv2pages(header.geometry.shrink_pv)) || + bytes_align2os_bytes(env, env->geo_in_bytes.grow) != + pgno2bytes(env, pv2pages(header.geometry.grow_pv))) { + + if (env->geo_in_bytes.shrink && env->geo_in_bytes.now > used_bytes) + /* pre-shrink if enabled */ + env->geo_in_bytes.now = used_bytes + env->geo_in_bytes.shrink - + used_bytes % env->geo_in_bytes.shrink; + + err = mdbx_env_set_geometry( + env, env->geo_in_bytes.lower, env->geo_in_bytes.now, + env->geo_in_bytes.upper, env->geo_in_bytes.grow, + env->geo_in_bytes.shrink, header.pagesize); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("%s: err %d", "could not apply preconfigured db-geometry", err); + return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; + } + + /* update meta fields */ + header.geometry.now = bytes2pgno(env, env->geo_in_bytes.now); + header.geometry.lower = bytes2pgno(env, env->geo_in_bytes.lower); + header.geometry.upper = bytes2pgno(env, env->geo_in_bytes.upper); + header.geometry.grow_pv = + pages2pv(bytes2pgno(env, env->geo_in_bytes.grow)); + header.geometry.shrink_pv = + pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink)); + + VERBOSE("amended: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO + "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO + " +%u -%u, txn_id %" PRIaTXN ", %s", + header.trees.main.root, header.trees.gc.root, + header.geometry.lower, header.geometry.first_unallocated, + header.geometry.now, header.geometry.upper, + pv2pages(header.geometry.grow_pv), + pv2pages(header.geometry.shrink_pv), + unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); + } else { + /* fetch back 'now/current' size, since it was ignored during comparison + * and may differ. */ + env->geo_in_bytes.now = pgno_align2os_bytes(env, header.geometry.now); + } + ENSURE(env, header.geometry.now >= header.geometry.first_unallocated); + } else { + /* geo-params are not pre-configured by user, + * get current values from the meta. */ + env->geo_in_bytes.now = pgno2bytes(env, header.geometry.now); + env->geo_in_bytes.lower = pgno2bytes(env, header.geometry.lower); + env->geo_in_bytes.upper = pgno2bytes(env, header.geometry.upper); + env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(header.geometry.grow_pv)); + env->geo_in_bytes.shrink = + pgno2bytes(env, pv2pages(header.geometry.shrink_pv)); + } + + ENSURE(env, pgno_align2os_bytes(env, header.geometry.now) == + env->geo_in_bytes.now); + ENSURE(env, env->geo_in_bytes.now >= used_bytes); + const uint64_t filesize_before = env->dxb_mmap.filesize; + if (unlikely(filesize_before != env->geo_in_bytes.now)) { + if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) { + VERBOSE("filesize mismatch (expect %" PRIuPTR "b/%" PRIaPGNO + "p, have %" PRIu64 "b/%" PRIaPGNO "p), " + "assume other process working", + env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), + filesize_before, bytes2pgno(env, (size_t)filesize_before)); + } else { + WARNING("filesize mismatch (expect %" PRIuSIZE "b/%" PRIaPGNO + "p, have %" PRIu64 "b/%" PRIaPGNO "p)", + env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), + filesize_before, bytes2pgno(env, (size_t)filesize_before)); + if (filesize_before < used_bytes) { + ERROR("last-page beyond end-of-file (last %" PRIaPGNO + ", have %" PRIaPGNO ")", + header.geometry.first_unallocated, + bytes2pgno(env, (size_t)filesize_before)); + return MDBX_CORRUPTED; + } + + if (env->flags & MDBX_RDONLY) { + if (filesize_before & (globals.sys_pagesize - 1)) { + ERROR("%s", "filesize should be rounded-up to system page"); + return MDBX_WANNA_RECOVERY; + } + WARNING("%s", "ignore filesize mismatch in readonly-mode"); + } else { + VERBOSE("will resize datafile to %" PRIuSIZE " bytes, %" PRIaPGNO + " pages", + env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now)); + } + } + } + + VERBOSE("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)", + globals.bootid.x, globals.bootid.y, + (globals.bootid.x | globals.bootid.y) ? "" : "not-"); + +#if MDBX_ENABLE_MADVISE + /* calculate readahead hint before mmap with zero redundant pages */ + const bool readahead = + !(env->flags & MDBX_NORDAHEAD) && + mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; +#endif /* MDBX_ENABLE_MADVISE */ + + err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, + env->geo_in_bytes.upper, + (lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_ENABLE_MADVISE +#if defined(MADV_DONTDUMP) + err = madvise(env->dxb_mmap.base, env->dxb_mmap.limit, MADV_DONTDUMP) + ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_DONTDUMP */ +#if defined(MADV_DODUMP) + if (globals.runtime_flags & MDBX_DBG_DUMP) { + const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS); + err = madvise(env->dxb_mmap.base, meta_length_aligned2os, MADV_DODUMP) + ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; + } +#endif /* MADV_DODUMP */ +#endif /* MDBX_ENABLE_MADVISE */ + +#ifdef ENABLE_MEMCHECK + env->valgrind_handle = + VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx"); +#endif /* ENABLE_MEMCHECK */ + + eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && + used_bytes <= env->dxb_mmap.limit); +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) + if (env->dxb_mmap.filesize > used_bytes && + env->dxb_mmap.filesize < env->dxb_mmap.limit) { + VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, used_bytes), + env->dxb_mmap.filesize - used_bytes); + MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, used_bytes), + env->dxb_mmap.filesize - used_bytes); + } + env->poison_edge = + bytes2pgno(env, (env->dxb_mmap.filesize < env->dxb_mmap.limit) + ? env->dxb_mmap.filesize + : env->dxb_mmap.limit); +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ + + troika_t troika = meta_tap(env); +#if MDBX_DEBUG + meta_troika_dump(env, &troika); +#endif + //-------------------------------- validate/rollback head & steady meta-pages + if (unlikely(env->stuck_meta >= 0)) { + /* recovery mode */ + meta_t clone; + meta_t const *const target = METAPAGE(env, env->stuck_meta); + err = meta_validate_copy(env, target, &clone); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("target meta[%u] is corrupted", + bytes2pgno(env, ptr_dist(data_page(target), env->dxb_mmap.base))); + meta_troika_dump(env, &troika); + return MDBX_CORRUPTED; + } + } else /* not recovery mode */ + while (1) { + const unsigned meta_clash_mask = meta_eq_mask(&troika); + if (unlikely(meta_clash_mask)) { + ERROR("meta-pages are clashed: mask 0x%d", meta_clash_mask); + meta_troika_dump(env, &troika); + return MDBX_CORRUPTED; + } + + if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) { + /* non-exclusive mode, + * meta-pages should be validated by a first process opened the DB */ + if (troika.recent == troika.prefer_steady) + break; + + if (!env->lck_mmap.lck) { + /* LY: without-lck (read-only) mode, so it is impossible that other + * process made weak checkpoint. */ + ERROR("%s", "without-lck, unable recovery/rollback"); + meta_troika_dump(env, &troika); + return MDBX_WANNA_RECOVERY; + } + + /* LY: assume just have a collision with other running process, + * or someone make a weak checkpoint */ + VERBOSE("%s", "assume collision or online weak checkpoint"); + break; + } + eASSERT(env, lck_rc == MDBX_RESULT_TRUE); + /* exclusive mode */ + + const meta_ptr_t recent = meta_recent(env, &troika); + const meta_ptr_t prefer_steady = meta_prefer_steady(env, &troika); + meta_t clone; + if (prefer_steady.is_steady) { + err = meta_validate_copy(env, prefer_steady.ptr_c, &clone); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed", + bytes2pgno(env, + ptr_dist(prefer_steady.ptr_c, env->dxb_mmap.base)), + "steady", prefer_steady.txnid, "manual recovery"); + meta_troika_dump(env, &troika); + return MDBX_CORRUPTED; + } + if (prefer_steady.ptr_c == recent.ptr_c) + break; + } + + const pgno_t pgno = + bytes2pgno(env, ptr_dist(recent.ptr_c, env->dxb_mmap.base)); + const bool last_valid = + meta_validate_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS; + eASSERT(env, + !prefer_steady.is_steady || recent.txnid != prefer_steady.txnid); + if (unlikely(!last_valid)) { + if (unlikely(!prefer_steady.is_steady)) { + ERROR("%s for open or automatic rollback, %s", + "there are no suitable meta-pages", + "manual recovery is required"); + meta_troika_dump(env, &troika); + return MDBX_CORRUPTED; + } + WARNING("meta[%u] with last txnid %" PRIaTXN + " is corrupted, rollback needed", + pgno, recent.txnid); + meta_troika_dump(env, &troika); + goto purge_meta_head; + } + + if (meta_bootid_match(recent.ptr_c)) { + if (env->flags & MDBX_RDONLY) { + ERROR("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: " + "rollback NOT needed, steady-sync NEEDED%s", + "opening after an unclean shutdown", globals.bootid.x, + globals.bootid.y, ", but unable in read-only mode"); + meta_troika_dump(env, &troika); + return MDBX_WANNA_RECOVERY; + } + WARNING("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: " + "rollback NOT needed, steady-sync NEEDED%s", + "opening after an unclean shutdown", globals.bootid.x, + globals.bootid.y, ""); + header = clone; + env->lck->unsynced_pages.weak = header.geometry.first_unallocated; + if (!env->lck->eoos_timestamp.weak) + env->lck->eoos_timestamp.weak = osal_monotime(); + break; + } + if (unlikely(!prefer_steady.is_steady)) { + ERROR("%s, but %s for automatic rollback: %s", + "opening after an unclean shutdown", + "there are no suitable meta-pages", + "manual recovery is required"); + meta_troika_dump(env, &troika); + return MDBX_CORRUPTED; + } + if (env->flags & MDBX_RDONLY) { + ERROR("%s and rollback needed: (from head %" PRIaTXN + " to steady %" PRIaTXN ")%s", + "opening after an unclean shutdown", recent.txnid, + prefer_steady.txnid, ", but unable in read-only mode"); + meta_troika_dump(env, &troika); + return MDBX_WANNA_RECOVERY; + } + + purge_meta_head: + NOTICE("%s and doing automatic rollback: " + "purge%s meta[%u] with%s txnid %" PRIaTXN, + "opening after an unclean shutdown", last_valid ? "" : " invalid", + pgno, last_valid ? " weak" : "", recent.txnid); + meta_troika_dump(env, &troika); + ENSURE(env, prefer_steady.is_steady); + err = meta_override(env, pgno, 0, + last_valid ? recent.ptr_c : prefer_steady.ptr_c); + if (err) { + ERROR("rollback: overwrite meta[%u] with txnid %" PRIaTXN ", error %d", + pgno, recent.txnid, err); + return err; + } + troika = meta_tap(env); + ENSURE(env, 0 == meta_txnid(recent.ptr_v)); + ENSURE(env, 0 == meta_eq_mask(&troika)); + } + + if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) { + //-------------------------------------------------- shrink DB & update geo + /* re-check size after mmap */ + if ((env->dxb_mmap.current & (globals.sys_pagesize - 1)) != 0 || + env->dxb_mmap.current < used_bytes) { + ERROR("unacceptable/unexpected datafile size %" PRIuPTR, + env->dxb_mmap.current); + return MDBX_PROBLEM; + } + if (env->dxb_mmap.current != env->geo_in_bytes.now) { + header.geometry.now = bytes2pgno(env, env->dxb_mmap.current); + NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO + " pages", + env->dxb_mmap.current, header.geometry.now); + } + + const meta_ptr_t recent = meta_recent(env, &troika); + if (/* не учитываем различия в geo.first_unallocated */ + header.geometry.grow_pv != recent.ptr_c->geometry.grow_pv || + header.geometry.shrink_pv != recent.ptr_c->geometry.shrink_pv || + header.geometry.lower != recent.ptr_c->geometry.lower || + header.geometry.upper != recent.ptr_c->geometry.upper || + header.geometry.now != recent.ptr_c->geometry.now) { + if ((env->flags & MDBX_RDONLY) != 0 || + /* recovery mode */ env->stuck_meta >= 0) { + WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO + "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u, to l%" PRIaPGNO + "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u", + (env->stuck_meta < 0) ? "read-only" : "recovery", + recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, + recent.ptr_c->geometry.upper, + pv2pages(recent.ptr_c->geometry.shrink_pv), + pv2pages(recent.ptr_c->geometry.grow_pv), header.geometry.lower, + header.geometry.now, header.geometry.upper, + pv2pages(header.geometry.shrink_pv), + pv2pages(header.geometry.grow_pv)); + } else { + const txnid_t next_txnid = safe64_txnid_next(recent.txnid); + if (unlikely(next_txnid > MAX_TXNID)) { + ERROR("txnid overflow, raise %d", MDBX_TXN_FULL); + return MDBX_TXN_FULL; + } + NOTICE("updating meta.geo: " + "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO + "/s%u-g%u (txn#%" PRIaTXN "), " + "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO + "/s%u-g%u (txn#%" PRIaTXN ")", + recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, + recent.ptr_c->geometry.upper, + pv2pages(recent.ptr_c->geometry.shrink_pv), + pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid, + header.geometry.lower, header.geometry.now, + header.geometry.upper, pv2pages(header.geometry.shrink_pv), + pv2pages(header.geometry.grow_pv), next_txnid); + + ENSURE(env, header.unsafe_txnid == recent.txnid); + meta_set_txnid(env, &header, next_txnid); + err = dxb_sync_locked(env, env->flags | txn_shrink_allowed, &header, + &troika); + if (err) { + ERROR("error %d, while updating meta.geo: " + "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO + "/s%u-g%u (txn#%" PRIaTXN "), " + "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO + "/s%u-g%u (txn#%" PRIaTXN ")", + err, recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, + recent.ptr_c->geometry.upper, + pv2pages(recent.ptr_c->geometry.shrink_pv), + pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid, + header.geometry.lower, header.geometry.now, + header.geometry.upper, pv2pages(header.geometry.shrink_pv), + pv2pages(header.geometry.grow_pv), header.unsafe_txnid); + return err; + } + } + } + + atomic_store32(&env->lck->discarded_tail, + bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed); + + if ((env->flags & MDBX_RDONLY) == 0 && env->stuck_meta < 0 && + (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) == 0) { + for (int n = 0; n < NUM_METAS; ++n) { + meta_t *const meta = METAPAGE(env, n); + if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != + MDBX_DATA_MAGIC)) { + const txnid_t txnid = constmeta_txnid(meta); + NOTICE("%s %s" + "meta[%u], txnid %" PRIaTXN, + "updating db-format signature for", + meta_is_steady(meta) ? "stead-" : "weak-", n, txnid); + err = meta_override(env, n, txnid, meta); + if (unlikely(err != MDBX_SUCCESS) && + /* Just ignore the MDBX_PROBLEM error, since here it is + * returned only in case of the attempt to upgrade an obsolete + * meta-page that is invalid for current state of a DB, + * e.g. after shrinking DB file */ + err != MDBX_PROBLEM) { + ERROR("%s meta[%u], txnid %" PRIaTXN ", error %d", + "updating db-format signature for", n, txnid, err); + return err; + } + troika = meta_tap(env); + } + } + } + } /* lck exclusive, lck_rc == MDBX_RESULT_TRUE */ + + //---------------------------------------------------- setup madvise/readahead +#if MDBX_ENABLE_MADVISE + if (used_aligned2os_bytes < env->dxb_mmap.current) { +#if defined(MADV_REMOVE) + if (lck_rc && (env->flags & MDBX_WRITEMAP) != 0 && + /* not recovery mode */ env->stuck_meta < 0) { + NOTICE("open-MADV_%s %u..%u", "REMOVE (deallocate file space)", + env->lck->discarded_tail.weak, + bytes2pgno(env, env->dxb_mmap.current)); + err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), + env->dxb_mmap.current - used_aligned2os_bytes, MADV_REMOVE) + ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; + } +#endif /* MADV_REMOVE */ +#if defined(MADV_DONTNEED) + NOTICE("open-MADV_%s %u..%u", "DONTNEED", env->lck->discarded_tail.weak, + bytes2pgno(env, env->dxb_mmap.current)); + err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), + env->dxb_mmap.current - used_aligned2os_bytes, MADV_DONTNEED) + ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_DONTNEED) + err = ignore_enosys(posix_madvise( + ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), + env->dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_FADV_DONTNEED) + err = ignore_enosys(posix_fadvise( + env->lazy_fd, used_aligned2os_bytes, + env->dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_DONTNEED */ + } + + err = dxb_set_readahead(env, bytes2pgno(env, used_bytes), readahead, true); + if (unlikely(err != MDBX_SUCCESS)) + return err; +#endif /* MDBX_ENABLE_MADVISE */ + + return rc; +} + +int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, + troika_t *const troika) { + eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0); + eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY); + eASSERT(env, check_sdb_flags(pending->trees.main.flags)); + const meta_t *const meta0 = METAPAGE(env, 0); + const meta_t *const meta1 = METAPAGE(env, 1); + const meta_t *const meta2 = METAPAGE(env, 2); + const meta_ptr_t head = meta_recent(env, troika); + int rc; + + eASSERT(env, + pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS)); + eASSERT(env, (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0); + eASSERT(env, pending->geometry.first_unallocated <= pending->geometry.now); + + if (flags & MDBX_SAFE_NOSYNC) { + /* Check auto-sync conditions */ + const pgno_t autosync_threshold = + atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); + const uint64_t autosync_period = + atomic_load64(&env->lck->autosync_period, mo_Relaxed); + uint64_t eoos_timestamp; + if ((autosync_threshold && + atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= + autosync_threshold) || + (autosync_period && + (eoos_timestamp = + atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && + osal_monotime() - eoos_timestamp >= autosync_period)) + flags &= MDBX_WRITEMAP | txn_shrink_allowed; /* force steady */ + } + + pgno_t shrink = 0; + if (flags & txn_shrink_allowed) { + const size_t prev_discarded_pgno = + atomic_load32(&env->lck->discarded_tail, mo_Relaxed); + if (prev_discarded_pgno < pending->geometry.first_unallocated) + env->lck->discarded_tail.weak = pending->geometry.first_unallocated; + else if (prev_discarded_pgno >= + pending->geometry.first_unallocated + env->madv_threshold) { + /* LY: check conditions to discard unused pages */ + const pgno_t largest_pgno = mvcc_snapshot_largest( + env, (head.ptr_c->geometry.first_unallocated > + pending->geometry.first_unallocated) + ? head.ptr_c->geometry.first_unallocated + : pending->geometry.first_unallocated); + eASSERT(env, largest_pgno >= NUM_METAS); + +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) + const pgno_t edge = env->poison_edge; + if (edge > largest_pgno) { + env->poison_edge = largest_pgno; + VALGRIND_MAKE_MEM_NOACCESS( + ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)), + pgno2bytes(env, edge - largest_pgno)); + MDBX_ASAN_POISON_MEMORY_REGION( + ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)), + pgno2bytes(env, edge - largest_pgno)); + } +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ + +#if MDBX_ENABLE_MADVISE && \ + (defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)) + const size_t discard_edge_pgno = pgno_align2os_pgno(env, largest_pgno); + if (prev_discarded_pgno >= discard_edge_pgno + env->madv_threshold) { + const size_t prev_discarded_bytes = + pgno_align2os_bytes(env, prev_discarded_pgno); + const size_t discard_edge_bytes = pgno2bytes(env, discard_edge_pgno); + /* из-за выравнивания prev_discarded_bytes и discard_edge_bytes + * могут быть равны */ + if (prev_discarded_bytes > discard_edge_bytes) { + NOTICE("shrink-MADV_%s %zu..%zu", "DONTNEED", discard_edge_pgno, + prev_discarded_pgno); + munlock_after(env, discard_edge_pgno, + bytes_align2os_bytes(env, env->dxb_mmap.current)); + const uint32_t munlocks_before = + atomic_load32(&env->lck->mlcnt[1], mo_Relaxed); +#if defined(MADV_DONTNEED) + int advise = MADV_DONTNEED; +#if defined(MADV_FREE) && \ + 0 /* MADV_FREE works for only anonymous vma at the moment */ + if ((env->flags & MDBX_WRITEMAP) && + global.linux_kernel_version > 0x04050000) + advise = MADV_FREE; +#endif /* MADV_FREE */ + int err = madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes), + prev_discarded_bytes - discard_edge_bytes, advise) + ? ignore_enosys(errno) + : MDBX_SUCCESS; +#else + int err = ignore_enosys(posix_madvise( + ptr_disp(env->dxb_mmap.base, discard_edge_bytes), + prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED)); +#endif + if (unlikely(MDBX_IS_ERROR(err))) { + const uint32_t mlocks_after = + atomic_load32(&env->lck->mlcnt[0], mo_Relaxed); + if (err == MDBX_EINVAL) { + const int severity = (mlocks_after - munlocks_before) + ? MDBX_LOG_NOTICE + : MDBX_LOG_WARN; + if (LOG_ENABLED(severity)) + debug_log( + severity, __func__, __LINE__, + "%s-madvise: ignore EINVAL (%d) since some pages maybe " + "locked (%u/%u mlcnt-processes)", + "shrink", err, mlocks_after, munlocks_before); + } else { + ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", + "shrink", "DONTNEED", discard_edge_bytes, + prev_discarded_bytes - discard_edge_bytes, mlocks_after, + munlocks_before, err); + return err; + } + } else + env->lck->discarded_tail.weak = discard_edge_pgno; + } + } +#endif /* MDBX_ENABLE_MADVISE && (MADV_DONTNEED || POSIX_MADV_DONTNEED) */ + + /* LY: check conditions to shrink datafile */ + const pgno_t backlog_gap = 3 + pending->trees.gc.height * 3; + pgno_t shrink_step = 0; + if (pending->geometry.shrink_pv && + pending->geometry.now - pending->geometry.first_unallocated > + (shrink_step = pv2pages(pending->geometry.shrink_pv)) + + backlog_gap) { + if (pending->geometry.now > largest_pgno && + pending->geometry.now - largest_pgno > shrink_step + backlog_gap) { + const pgno_t aligner = + pending->geometry.grow_pv + ? /* grow_step */ pv2pages(pending->geometry.grow_pv) + : shrink_step; + const pgno_t with_backlog_gap = largest_pgno + backlog_gap; + const pgno_t aligned = + pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - + with_backlog_gap % aligner); + const pgno_t bottom = (aligned > pending->geometry.lower) + ? aligned + : pending->geometry.lower; + if (pending->geometry.now > bottom) { + if (TROIKA_HAVE_STEADY(troika)) + /* force steady, but only if steady-checkpoint is present */ + flags &= MDBX_WRITEMAP | txn_shrink_allowed; + shrink = pending->geometry.now - bottom; + pending->geometry.now = bottom; + if (unlikely(head.txnid == pending->unsafe_txnid)) { + const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid); + NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, + pending->unsafe_txnid, txnid); + ENSURE(env, !env->basal_txn || !env->txn); + if (unlikely(txnid > MAX_TXNID)) { + rc = MDBX_TXN_FULL; + ERROR("txnid overflow, raise %d", rc); + goto fail; + } + meta_set_txnid(env, pending, txnid); + eASSERT(env, coherency_check_meta(env, pending, true)); + } + } + } + } + } + } + + /* LY: step#1 - sync previously written/updated data-pages */ + rc = MDBX_RESULT_FALSE /* carry steady */; + if (atomic_load64(&env->lck->unsynced_pages, mo_Relaxed)) { + eASSERT(env, ((flags ^ env->flags) & MDBX_WRITEMAP) == 0); + enum osal_syncmode_bits mode_bits = MDBX_SYNC_NONE; + unsigned sync_op = 0; + if ((flags & MDBX_SAFE_NOSYNC) == 0) { + sync_op = 1; + mode_bits = MDBX_SYNC_DATA; + if (pending->geometry.first_unallocated > + meta_prefer_steady(env, troika).ptr_c->geometry.now) + mode_bits |= MDBX_SYNC_SIZE; + if (flags & MDBX_NOMETASYNC) + mode_bits |= MDBX_SYNC_IODQ; + } else if (unlikely(env->incore)) + goto skip_incore_sync; + if (flags & MDBX_WRITEMAP) { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += sync_op; +#else + (void)sync_op; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_msync( + &env->dxb_mmap, 0, + pgno_align2os_bytes(env, pending->geometry.first_unallocated), + mode_bits); + } else { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += sync_op; +#else + (void)sync_op; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_fsync(env->lazy_fd, mode_bits); + } + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */ + : MDBX_RESULT_FALSE /* carry steady */; + } + eASSERT(env, coherency_check_meta(env, pending, true)); + + /* Steady or Weak */ + if (rc == MDBX_RESULT_FALSE /* carry steady */) { + meta_sign_as_steady(pending); + atomic_store64(&env->lck->eoos_timestamp, 0, mo_Relaxed); + atomic_store64(&env->lck->unsynced_pages, 0, mo_Relaxed); + } else { + assert(rc == MDBX_RESULT_TRUE /* carry non-steady */); + skip_incore_sync: + eASSERT(env, env->lck->unsynced_pages.weak > 0); + /* Может быть нулевым если unsynced_pages > 0 в результате спиллинга. + * eASSERT(env, env->lck->eoos_timestamp.weak != 0); */ + unaligned_poke_u64(4, pending->sign, DATASIGN_WEAK); + } + + const bool legal4overwrite = + head.txnid == pending->unsafe_txnid && + !memcmp(&head.ptr_c->trees, &pending->trees, sizeof(pending->trees)) && + !memcmp(&head.ptr_c->canary, &pending->canary, sizeof(pending->canary)) && + !memcmp(&head.ptr_c->geometry, &pending->geometry, + sizeof(pending->geometry)); + meta_t *target = nullptr; + if (head.txnid == pending->unsafe_txnid) { + ENSURE(env, legal4overwrite); + if (!head.is_steady && meta_is_steady(pending)) + target = (meta_t *)head.ptr_c; + else { + WARNING("%s", "skip update meta"); + return MDBX_SUCCESS; + } + } else { + const unsigned troika_tail = troika->tail_and_flags & 3; + ENSURE(env, troika_tail < NUM_METAS && troika_tail != troika->recent && + troika_tail != troika->prefer_steady); + target = (meta_t *)meta_tail(env, troika).ptr_c; + } + + /* LY: step#2 - update meta-page. */ + DEBUG("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO + ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO + " +%u -%u, txn_id %" PRIaTXN ", %s", + data_page(target)->pgno, pending->trees.main.root, + pending->trees.gc.root, pending->geometry.lower, + pending->geometry.first_unallocated, pending->geometry.now, + pending->geometry.upper, pv2pages(pending->geometry.grow_pv), + pv2pages(pending->geometry.shrink_pv), pending->unsafe_txnid, + durable_caption(pending)); + + DEBUG("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, + (meta0 == head.ptr_c) ? "head" + : (meta0 == target) ? "tail" + : "stay", + durable_caption(meta0), constmeta_txnid(meta0), meta0->trees.main.root, + meta0->trees.gc.root); + DEBUG("meta1: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, + (meta1 == head.ptr_c) ? "head" + : (meta1 == target) ? "tail" + : "stay", + durable_caption(meta1), constmeta_txnid(meta1), meta1->trees.main.root, + meta1->trees.gc.root); + DEBUG("meta2: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, + (meta2 == head.ptr_c) ? "head" + : (meta2 == target) ? "tail" + : "stay", + durable_caption(meta2), constmeta_txnid(meta2), meta2->trees.main.root, + meta2->trees.gc.root); + + eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta0) || + (meta_is_steady(pending) && !meta_is_steady(meta0))); + eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta1) || + (meta_is_steady(pending) && !meta_is_steady(meta1))); + eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta2) || + (meta_is_steady(pending) && !meta_is_steady(meta2))); + + eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0); + ENSURE(env, target == head.ptr_c || + constmeta_txnid(target) < pending->unsafe_txnid); + if (flags & MDBX_WRITEMAP) { + jitter4testing(true); + if (likely(target != head.ptr_c)) { + /* LY: 'invalidate' the meta. */ + meta_update_begin(env, target, pending->unsafe_txnid); + unaligned_poke_u64(4, target->sign, DATASIGN_WEAK); +#ifndef NDEBUG + /* debug: provoke failure to catch a violators, but don't touch pagesize + * to allow readers catch actual pagesize. */ + void *provoke_begin = &target->trees.gc.root; + void *provoke_end = &target->sign; + memset(provoke_begin, 0xCC, ptr_dist(provoke_end, provoke_begin)); + jitter4testing(false); +#endif + + /* LY: update info */ + target->geometry = pending->geometry; + target->trees.gc = pending->trees.gc; + target->trees.main = pending->trees.main; + eASSERT(env, target->trees.gc.flags == MDBX_INTEGERKEY); + eASSERT(env, check_sdb_flags(target->trees.main.flags)); + target->canary = pending->canary; + memcpy(target->pages_retired, pending->pages_retired, 8); + jitter4testing(true); + + /* LY: 'commit' the meta */ + meta_update_end(env, target, unaligned_peek_u64(4, pending->txnid_b)); + jitter4testing(true); + eASSERT(env, coherency_check_meta(env, target, true)); + } else { + /* dangerous case (target == head), only sign could + * me updated, check assertions once again */ + eASSERT(env, + legal4overwrite && !head.is_steady && meta_is_steady(pending)); + } + memcpy(target->sign, pending->sign, 8); + osal_flush_incoherent_cpu_writeback(); + jitter4testing(true); + if (!env->incore) { + if (!MDBX_AVOID_MSYNC) { + /* sync meta-pages */ +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), + (flags & MDBX_NOMETASYNC) + ? MDBX_SYNC_NONE + : MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + } else { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.wops.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + const page_t *page = data_page(target); + rc = osal_pwrite(env->fd4meta, page, env->ps, + ptr_dist(page, env->dxb_mmap.base)); + if (likely(rc == MDBX_SUCCESS)) { + osal_flush_incoherent_mmap(target, sizeof(meta_t), + globals.sys_pagesize); + if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd) { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + } + } + } + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + } else { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.wops.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + const meta_t undo_meta = *target; + eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY); + eASSERT(env, check_sdb_flags(pending->trees.main.flags)); + rc = osal_pwrite(env->fd4meta, pending, sizeof(meta_t), + ptr_dist(target, env->dxb_mmap.base)); + if (unlikely(rc != MDBX_SUCCESS)) { + undo: + DEBUG("%s", "write failed, disk error?"); + /* On a failure, the pagecache still contains the new data. + * Try write some old data back, to prevent it from being used. */ + osal_pwrite(env->fd4meta, &undo_meta, sizeof(meta_t), + ptr_dist(target, env->dxb_mmap.base)); + goto fail; + } + osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize); + /* sync meta-pages */ + if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd && + !env->incore) { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + if (rc != MDBX_SUCCESS) + goto undo; + } + } + + uint64_t timestamp = 0; + while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") { + rc = coherency_check_written( + env, pending->unsafe_txnid, target, + bytes2pgno(env, ptr_dist(target, env->dxb_mmap.base)), ×tamp); + if (likely(rc == MDBX_SUCCESS)) + break; + if (unlikely(rc != MDBX_RESULT_TRUE)) + goto fail; + } + + const uint32_t sync_txnid_dist = + ((flags & MDBX_NOMETASYNC) == 0) ? 0 + : ((flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) + ? MDBX_NOMETASYNC_LAZY_FD + : MDBX_NOMETASYNC_LAZY_WRITEMAP; + env->lck->meta_sync_txnid.weak = + pending->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__].weak - + sync_txnid_dist; + + *troika = meta_tap(env); + for (MDBX_txn *txn = env->basal_txn; txn; txn = txn->nested) + if (troika != &txn->tw.troika) + txn->tw.troika = *troika; + + /* LY: shrink datafile if needed */ + if (unlikely(shrink)) { + VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", + pending->geometry.now, shrink); + rc = dxb_resize(env, pending->geometry.first_unallocated, + pending->geometry.now, pending->geometry.upper, + impilict_shrink); + if (rc != MDBX_SUCCESS && rc != MDBX_EPERM) + goto fail; + eASSERT(env, coherency_check_meta(env, target, true)); + } + + lck_t *const lck = env->lck_mmap.lck; + if (likely(lck)) + /* toggle oldest refresh */ + atomic_store32(&lck->rdt_refresh_flag, false, mo_Relaxed); + + return MDBX_SUCCESS; + +fail: + env->flags |= ENV_FATAL_ERROR; + return rc; +} diff --git a/src/env-opts.c b/src/env-opts.c new file mode 100644 index 00000000..c1e6324d --- /dev/null +++ b/src/env-opts.c @@ -0,0 +1,419 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold static unsigned default_rp_augment_limit(const MDBX_env *env) { + const size_t timeframe = /* 16 секунд */ 16 << 16; + const size_t remain_1sec = + (env->options.gc_time_limit < timeframe) + ? timeframe - (size_t)env->options.gc_time_limit + : 0; + const size_t minimum = (env->maxgc_large1page * 2 > MDBX_PNL_INITIAL) + ? env->maxgc_large1page * 2 + : MDBX_PNL_INITIAL; + const size_t one_third = env->geo_in_bytes.now / 3 >> env->ps2ln; + const size_t augment_limit = + (one_third > minimum) + ? minimum + (one_third - minimum) / timeframe * remain_1sec + : minimum; + eASSERT(env, augment_limit < PAGELIST_LIMIT); + return pnl_bytes2size(pnl_size2bytes(augment_limit)); +} + +static bool default_prefault_write(const MDBX_env *env) { + return !MDBX_MMAP_INCOHERENT_FILE_WRITE && !env->incore && + (env->flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == MDBX_WRITEMAP; +} + +static bool default_prefer_waf_insteadof_balance(const MDBX_env *env) { + (void)env; + return false; +} + +void env_options_init(MDBX_env *env) { + env->options.rp_augment_limit = MDBX_PNL_INITIAL; + env->options.dp_reserve_limit = MDBX_PNL_INITIAL; + env->options.dp_initial = MDBX_PNL_INITIAL; + env->options.spill_max_denominator = 8; + env->options.spill_min_denominator = 8; + env->options.spill_parent4child_denominator = 0; + env->options.dp_loose_limit = 64; + env->options.merge_threshold_16dot16_percent = 65536 / 4 /* 25% */; + if (default_prefer_waf_insteadof_balance(env)) + env->options.prefer_waf_insteadof_balance = true; + +#if !(defined(_WIN32) || defined(_WIN64)) + env->options.writethrough_threshold = +#if defined(__linux__) || defined(__gnu_linux__) + globals.running_on_WSL1 ? MAX_PAGENO : +#endif /* Linux */ + MDBX_WRITETHROUGH_THRESHOLD_DEFAULT; +#endif /* Windows */ +} + +void env_options_adjust_defaults(MDBX_env *env) { + if (!env->options.flags.non_auto.rp_augment_limit) + env->options.rp_augment_limit = default_rp_augment_limit(env); + if (!env->options.flags.non_auto.prefault_write) + env->options.prefault_write = default_prefault_write(env); + + const size_t basis = env->geo_in_bytes.now; + /* TODO: use options? */ + const unsigned factor = 9; + size_t threshold = (basis < ((size_t)65536 << factor)) + ? 65536 /* minimal threshold */ + : (basis > (MEGABYTE * 4 << factor)) + ? MEGABYTE * 4 /* maximal threshold */ + : basis >> factor; + threshold = + (threshold < env->geo_in_bytes.shrink || !env->geo_in_bytes.shrink) + ? threshold + : env->geo_in_bytes.shrink; + + env->madv_threshold = bytes2pgno(env, bytes_align2os_bytes(env, threshold)); +} + +//------------------------------------------------------------------------------ + +__cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, + uint64_t value) { + int err = check_env(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + const bool lock_needed = + ((env->flags & ENV_ACTIVE) && env->basal_txn && !env_txn0_owned(env)); + bool should_unlock = false; + switch (option) { + case MDBX_opt_sync_bytes: + if (value == /* default */ UINT64_MAX) + value = MAX_WRITE; + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(!(env->flags & ENV_ACTIVE))) + return MDBX_EPERM; + if (unlikely(value > SIZE_MAX - 65536)) + return MDBX_EINVAL; + value = bytes2pgno(env, (size_t)value + env->ps - 1); + if ((uint32_t)value != + atomic_load32(&env->lck->autosync_threshold, mo_AcquireRelease) && + atomic_store32(&env->lck->autosync_threshold, (uint32_t)value, + mo_Relaxed) + /* Дергаем sync(force=off) только если задано новое не-нулевое значение + * и мы вне транзакции */ + && lock_needed) { + err = env_sync(env, false, false); + if (err == /* нечего сбрасывать на диск */ MDBX_RESULT_TRUE) + err = MDBX_SUCCESS; + } + break; + + case MDBX_opt_sync_period: + if (value == /* default */ UINT64_MAX) + value = 2780315 /* 42.42424 секунды */; + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(!(env->flags & ENV_ACTIVE))) + return MDBX_EPERM; + if (unlikely(value > UINT32_MAX)) + return MDBX_EINVAL; + value = osal_16dot16_to_monotime((uint32_t)value); + if (value != atomic_load64(&env->lck->autosync_period, mo_AcquireRelease) && + atomic_store64(&env->lck->autosync_period, value, mo_Relaxed) + /* Дергаем sync(force=off) только если задано новое не-нулевое значение + * и мы вне транзакции */ + && lock_needed) { + err = env_sync(env, false, false); + if (err == /* нечего сбрасывать на диск */ MDBX_RESULT_TRUE) + err = MDBX_SUCCESS; + } + break; + + case MDBX_opt_max_db: + if (value == /* default */ UINT64_MAX) + value = 42; + if (unlikely(value > MDBX_MAX_DBI)) + return MDBX_EINVAL; + if (unlikely(env->dxb_mmap.base)) + return MDBX_EPERM; + env->max_dbi = (unsigned)value + CORE_DBS; + break; + + case MDBX_opt_max_readers: + if (value == /* default */ UINT64_MAX) + value = MDBX_READERS_LIMIT; + if (unlikely(value < 1 || value > MDBX_READERS_LIMIT)) + return MDBX_EINVAL; + if (unlikely(env->dxb_mmap.base)) + return MDBX_EPERM; + env->max_readers = (unsigned)value; + break; + + case MDBX_opt_dp_reserve_limit: + if (value == /* default */ UINT64_MAX) + value = INT_MAX; + if (unlikely(value > INT_MAX)) + return MDBX_EINVAL; + if (env->options.dp_reserve_limit != (unsigned)value) { + if (lock_needed) { + err = lck_txn_lock(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + should_unlock = true; + } + env->options.dp_reserve_limit = (unsigned)value; + while (env->shadow_reserve_len > env->options.dp_reserve_limit) { + eASSERT(env, env->shadow_reserve != nullptr); + page_t *dp = env->shadow_reserve; + MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->ps); + VALGRIND_MAKE_MEM_DEFINED(&page_next(dp), sizeof(page_t *)); + env->shadow_reserve = page_next(dp); + void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); + osal_free(ptr); + env->shadow_reserve_len -= 1; + } + } + break; + + case MDBX_opt_rp_augment_limit: + if (value == /* default */ UINT64_MAX) { + env->options.flags.non_auto.rp_augment_limit = 0; + env->options.rp_augment_limit = default_rp_augment_limit(env); + } else if (unlikely(value > PAGELIST_LIMIT)) + return MDBX_EINVAL; + else { + env->options.flags.non_auto.rp_augment_limit = 1; + env->options.rp_augment_limit = (unsigned)value; + } + break; + + case MDBX_opt_gc_time_limit: + if (value == /* default */ UINT64_MAX) + value = 0; + if (unlikely(value > UINT32_MAX)) + return MDBX_EINVAL; + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + value = osal_16dot16_to_monotime((uint32_t)value); + if (value != env->options.gc_time_limit) { + if (env->txn && lock_needed) + return MDBX_EPERM; + env->options.gc_time_limit = value; + if (!env->options.flags.non_auto.rp_augment_limit) + env->options.rp_augment_limit = default_rp_augment_limit(env); + } + break; + + case MDBX_opt_txn_dp_limit: + case MDBX_opt_txn_dp_initial: + if (value == /* default */ UINT64_MAX) + value = PAGELIST_LIMIT; + if (unlikely(value > PAGELIST_LIMIT || value < CURSOR_STACK_SIZE * 4)) + return MDBX_EINVAL; + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (lock_needed) { + err = lck_txn_lock(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + should_unlock = true; + } + if (env->txn) + err = MDBX_EPERM /* unable change during transaction */; + else { + const pgno_t value32 = (pgno_t)value; + if (option == MDBX_opt_txn_dp_initial && + env->options.dp_initial != value32) { + env->options.dp_initial = value32; + if (env->options.dp_limit < value32) { + env->options.dp_limit = value32; + env->options.flags.non_auto.dp_limit = 1; + } + } + if (option == MDBX_opt_txn_dp_limit && env->options.dp_limit != value32) { + env->options.dp_limit = value32; + env->options.flags.non_auto.dp_limit = 1; + if (env->options.dp_initial > value32) + env->options.dp_initial = value32; + } + } + break; + + case MDBX_opt_spill_max_denominator: + if (value == /* default */ UINT64_MAX) + value = 8; + if (unlikely(value > 255)) + return MDBX_EINVAL; + env->options.spill_max_denominator = (uint8_t)value; + break; + case MDBX_opt_spill_min_denominator: + if (value == /* default */ UINT64_MAX) + value = 8; + if (unlikely(value > 255)) + return MDBX_EINVAL; + env->options.spill_min_denominator = (uint8_t)value; + break; + case MDBX_opt_spill_parent4child_denominator: + if (value == /* default */ UINT64_MAX) + value = 0; + if (unlikely(value > 255)) + return MDBX_EINVAL; + env->options.spill_parent4child_denominator = (uint8_t)value; + break; + + case MDBX_opt_loose_limit: + if (value == /* default */ UINT64_MAX) + value = 64; + if (unlikely(value > 255)) + return MDBX_EINVAL; + env->options.dp_loose_limit = (uint8_t)value; + break; + + case MDBX_opt_merge_threshold_16dot16_percent: + if (value == /* default */ UINT64_MAX) + value = 65536 / 4 /* 25% */; + if (unlikely(value < 8192 || value > 32768)) + return MDBX_EINVAL; + env->options.merge_threshold_16dot16_percent = (unsigned)value; + recalculate_merge_thresholds(env); + break; + + case MDBX_opt_writethrough_threshold: +#if defined(_WIN32) || defined(_WIN64) + /* позволяем "установить" значение по-умолчанию и совпадающее + * с поведением соответствующим текущей установке MDBX_NOMETASYNC */ + if (value == /* default */ UINT64_MAX && + value != ((env->flags & MDBX_NOMETASYNC) ? 0 : UINT_MAX)) + err = MDBX_EINVAL; +#else + if (value == /* default */ UINT64_MAX) + value = MDBX_WRITETHROUGH_THRESHOLD_DEFAULT; + if (value != (unsigned)value) + err = MDBX_EINVAL; + else + env->options.writethrough_threshold = (unsigned)value; +#endif + break; + + case MDBX_opt_prefault_write_enable: + if (value == /* default */ UINT64_MAX) { + env->options.prefault_write = default_prefault_write(env); + env->options.flags.non_auto.prefault_write = false; + } else if (value > 1) + err = MDBX_EINVAL; + else { + env->options.prefault_write = value != 0; + env->options.flags.non_auto.prefault_write = true; + } + break; + + case MDBX_opt_prefer_waf_insteadof_balance: + if (value == /* default */ UINT64_MAX) + env->options.prefer_waf_insteadof_balance = + default_prefer_waf_insteadof_balance(env); + else if (value > 1) + err = MDBX_EINVAL; + else + env->options.prefer_waf_insteadof_balance = value != 0; + break; + + default: + return MDBX_EINVAL; + } + + if (should_unlock) + lck_txn_unlock(env); + return err; +} + +__cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, + uint64_t *pvalue) { + int err = check_env(env, false); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (unlikely(!pvalue)) + return MDBX_EINVAL; + + switch (option) { + case MDBX_opt_sync_bytes: + if (unlikely(!(env->flags & ENV_ACTIVE))) + return MDBX_EPERM; + *pvalue = pgno2bytes( + env, atomic_load32(&env->lck->autosync_threshold, mo_Relaxed)); + break; + + case MDBX_opt_sync_period: + if (unlikely(!(env->flags & ENV_ACTIVE))) + return MDBX_EPERM; + *pvalue = osal_monotime_to_16dot16( + atomic_load64(&env->lck->autosync_period, mo_Relaxed)); + break; + + case MDBX_opt_max_db: + *pvalue = env->max_dbi - CORE_DBS; + break; + + case MDBX_opt_max_readers: + *pvalue = env->max_readers; + break; + + case MDBX_opt_dp_reserve_limit: + *pvalue = env->options.dp_reserve_limit; + break; + + case MDBX_opt_rp_augment_limit: + *pvalue = env->options.rp_augment_limit; + break; + + case MDBX_opt_gc_time_limit: + *pvalue = osal_monotime_to_16dot16(env->options.gc_time_limit); + break; + + case MDBX_opt_txn_dp_limit: + *pvalue = env->options.dp_limit; + break; + case MDBX_opt_txn_dp_initial: + *pvalue = env->options.dp_initial; + break; + + case MDBX_opt_spill_max_denominator: + *pvalue = env->options.spill_max_denominator; + break; + case MDBX_opt_spill_min_denominator: + *pvalue = env->options.spill_min_denominator; + break; + case MDBX_opt_spill_parent4child_denominator: + *pvalue = env->options.spill_parent4child_denominator; + break; + + case MDBX_opt_loose_limit: + *pvalue = env->options.dp_loose_limit; + break; + + case MDBX_opt_merge_threshold_16dot16_percent: + *pvalue = env->options.merge_threshold_16dot16_percent; + break; + + case MDBX_opt_writethrough_threshold: +#if defined(_WIN32) || defined(_WIN64) + *pvalue = (env->flags & MDBX_NOMETASYNC) ? 0 : INT_MAX; +#else + *pvalue = env->options.writethrough_threshold; +#endif + break; + + case MDBX_opt_prefault_write_enable: + *pvalue = env->options.prefault_write; + break; + + case MDBX_opt_prefer_waf_insteadof_balance: + *pvalue = env->options.prefer_waf_insteadof_balance; + break; + + default: + return MDBX_EINVAL; + } + + return MDBX_SUCCESS; +} diff --git a/src/env.c b/src/env.c new file mode 100644 index 00000000..10fcfc29 --- /dev/null +++ b/src/env.c @@ -0,0 +1,679 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +bool env_txn0_owned(const MDBX_env *env) { + return (env->flags & MDBX_NOSTICKYTHREADS) + ? (env->basal_txn->owner != 0) + : (env->basal_txn->owner == osal_thread_self()); +} + +int env_page_auxbuffer(MDBX_env *env) { + return env->page_auxbuf ? MDBX_SUCCESS + : osal_memalign_alloc(globals.sys_pagesize, + env->ps * (size_t)NUM_METAS, + &env->page_auxbuf); +} + +__cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { + STATIC_ASSERT(PTRDIFF_MAX > MAX_MAPSIZE); + STATIC_ASSERT(MDBX_MIN_PAGESIZE > sizeof(page_t) + sizeof(meta_t)); + ENSURE(env, is_powerof2(pagesize)); + ENSURE(env, pagesize >= MDBX_MIN_PAGESIZE); + ENSURE(env, pagesize <= MDBX_MAX_PAGESIZE); + env->ps = (unsigned)pagesize; + if (env->page_auxbuf) { + osal_memalign_free(env->page_auxbuf); + env->page_auxbuf = nullptr; + } + + STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MIN_PAGESIZE) > 4); + STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MAX_PAGESIZE) < PAGELIST_LIMIT); + const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1; + ENSURE(env, + maxgc_ov1page > 42 && maxgc_ov1page < (intptr_t)PAGELIST_LIMIT / 4); + env->maxgc_large1page = (unsigned)maxgc_ov1page; + env->maxgc_per_branch = + (unsigned)((pagesize - PAGEHDRSZ) / + (sizeof(indx_t) + sizeof(node_t) + sizeof(txnid_t))); + + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) > + sizeof(tree_t) + NODESIZE + 42); + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MAX_PAGESIZE) < UINT16_MAX); + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) >= + BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE)); + STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) > NODESIZE + 42); + STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) < UINT16_MAX); + const intptr_t branch_nodemax = BRANCH_NODE_MAX(pagesize); + const intptr_t leaf_nodemax = LEAF_NODE_MAX(pagesize); + ENSURE(env, branch_nodemax > (intptr_t)(NODESIZE + 42) && + branch_nodemax % 2 == 0 && + leaf_nodemax > (intptr_t)(sizeof(tree_t) + NODESIZE + 42) && + leaf_nodemax >= branch_nodemax && + leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0); + env->leaf_nodemax = (uint16_t)leaf_nodemax; + env->branch_nodemax = (uint16_t)branch_nodemax; + env->ps2ln = (uint8_t)log2n_powerof2(pagesize); + eASSERT(env, pgno2bytes(env, 1) == pagesize); + eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); + recalculate_merge_thresholds(env); + + /* TODO: recalculate me_subpage_xyz values from MDBX_opt_subpage_xyz. */ + env->subpage_limit = env->leaf_nodemax - NODESIZE; + env->subpage_room_threshold = 0; + env->subpage_reserve_prereq = env->leaf_nodemax; + env->subpage_reserve_limit = env->subpage_limit / 42; + eASSERT(env, env->subpage_reserve_prereq > + env->subpage_room_threshold + env->subpage_reserve_limit); + eASSERT(env, env->leaf_nodemax >= env->subpage_limit + NODESIZE); + + const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); + if (!env->options.flags.non_auto.dp_limit) { + /* auto-setup dp_limit by "The42" ;-) */ + intptr_t total_ram_pages, avail_ram_pages; + int err = mdbx_get_sysraminfo(nullptr, &total_ram_pages, &avail_ram_pages); + if (unlikely(err != MDBX_SUCCESS)) + ERROR("mdbx_get_sysraminfo(), rc %d", err); + else { + size_t reasonable_dpl_limit = + (size_t)(total_ram_pages + avail_ram_pages) / 42; + if (pagesize > globals.sys_pagesize) + reasonable_dpl_limit /= pagesize / globals.sys_pagesize; + else if (pagesize < globals.sys_pagesize) + reasonable_dpl_limit *= globals.sys_pagesize / pagesize; + reasonable_dpl_limit = (reasonable_dpl_limit < PAGELIST_LIMIT) + ? reasonable_dpl_limit + : PAGELIST_LIMIT; + reasonable_dpl_limit = (reasonable_dpl_limit > CURSOR_STACK_SIZE * 4) + ? reasonable_dpl_limit + : CURSOR_STACK_SIZE * 4; + env->options.dp_limit = (unsigned)reasonable_dpl_limit; + } + } + if (env->options.dp_limit > max_pgno - NUM_METAS) + env->options.dp_limit = max_pgno - NUM_METAS; + if (env->options.dp_initial > env->options.dp_limit) + env->options.dp_initial = env->options.dp_limit; + return env->ps; +} + +__cold int env_sync(MDBX_env *env, bool force, bool nonblock) { + if (unlikely(env->flags & MDBX_RDONLY)) + return MDBX_EACCESS; + + const bool txn0_owned = env_txn0_owned(env); + bool should_unlock = false; + int rc = MDBX_RESULT_TRUE /* means "nothing to sync" */; + +retry:; + unsigned flags = env->flags & ~(MDBX_NOMETASYNC | txn_shrink_allowed); + if (unlikely((flags & (ENV_FATAL_ERROR | ENV_ACTIVE)) != ENV_ACTIVE)) { + rc = (flags & ENV_FATAL_ERROR) ? MDBX_PANIC : MDBX_EPERM; + goto bailout; + } + + const troika_t troika = + (txn0_owned | should_unlock) ? env->basal_txn->tw.troika : meta_tap(env); + const meta_ptr_t head = meta_recent(env, &troika); + const uint64_t unsynced_pages = + atomic_load64(&env->lck->unsynced_pages, mo_Relaxed); + if (unsynced_pages == 0) { + const uint32_t synched_meta_txnid_u32 = + atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); + if (synched_meta_txnid_u32 == (uint32_t)head.txnid && head.is_steady) + goto bailout; + } + + if (should_unlock && (env->flags & MDBX_WRITEMAP) && + unlikely(head.ptr_c->geometry.first_unallocated > + bytes2pgno(env, env->dxb_mmap.current))) { + + if (unlikely(env->stuck_meta >= 0) && + troika.recent != (uint8_t)env->stuck_meta) { + NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " + "meta-page (%u)", + "sync datafile", env->stuck_meta, troika.recent); + rc = MDBX_RESULT_TRUE; + } else { + rc = dxb_resize(env, head.ptr_c->geometry.first_unallocated, + head.ptr_c->geometry.now, head.ptr_c->geometry.upper, + implicit_grow); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + } + + const size_t autosync_threshold = + atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); + const uint64_t autosync_period = + atomic_load64(&env->lck->autosync_period, mo_Relaxed); + uint64_t eoos_timestamp; + if (force || (autosync_threshold && unsynced_pages >= autosync_threshold) || + (autosync_period && + (eoos_timestamp = + atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && + osal_monotime() - eoos_timestamp >= autosync_period)) + flags &= MDBX_WRITEMAP /* clear flags for full steady sync */; + + if (!txn0_owned) { + if (!should_unlock) { +#if MDBX_ENABLE_PGOP_STAT + unsigned wops = 0; +#endif /* MDBX_ENABLE_PGOP_STAT */ + + int err; + /* pre-sync to avoid latency for writer */ + if (unsynced_pages > /* FIXME: define threshold */ 42 && + (flags & MDBX_SAFE_NOSYNC) == 0) { + eASSERT(env, ((flags ^ env->flags) & MDBX_WRITEMAP) == 0); + if (flags & MDBX_WRITEMAP) { + /* Acquire guard to avoid collision with remap */ +#if defined(_WIN32) || defined(_WIN64) + imports.srwl_AcquireShared(&env->remap_guard); +#else + err = osal_fastmutex_acquire(&env->remap_guard); + if (unlikely(err != MDBX_SUCCESS)) + return err; +#endif + const size_t usedbytes = + pgno_align2os_bytes(env, head.ptr_c->geometry.first_unallocated); + err = osal_msync(&env->dxb_mmap, 0, usedbytes, MDBX_SYNC_DATA); +#if defined(_WIN32) || defined(_WIN64) + imports.srwl_ReleaseShared(&env->remap_guard); +#else + int unlock_err = osal_fastmutex_release(&env->remap_guard); + if (unlikely(unlock_err != MDBX_SUCCESS) && err == MDBX_SUCCESS) + err = unlock_err; +#endif + } else + err = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA); + + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_ENABLE_PGOP_STAT + wops = 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + /* pre-sync done */ + rc = MDBX_SUCCESS /* means "some data was synced" */; + } + + err = lck_txn_lock(env, nonblock); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + should_unlock = true; +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.wops.weak += wops; +#endif /* MDBX_ENABLE_PGOP_STAT */ + env->basal_txn->tw.troika = meta_tap(env); + eASSERT(env, !env->txn && !env->basal_txn->nested); + goto retry; + } + eASSERT(env, head.txnid == recent_committed_txnid(env)); + env->basal_txn->txnid = head.txnid; + txn_snapshot_oldest(env->basal_txn); + flags |= txn_shrink_allowed; + } + + eASSERT(env, txn0_owned || should_unlock); + eASSERT(env, !txn0_owned || (flags & txn_shrink_allowed) == 0); + + if (!head.is_steady && unlikely(env->stuck_meta >= 0) && + troika.recent != (uint8_t)env->stuck_meta) { + NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " + "meta-page (%u)", + "sync datafile", env->stuck_meta, troika.recent); + rc = MDBX_RESULT_TRUE; + goto bailout; + } + if (!head.is_steady || ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) { + DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, + data_page(head.ptr_c)->pgno, durable_caption(head.ptr_c), + unsynced_pages); + meta_t meta = *head.ptr_c; + rc = dxb_sync_locked(env, flags, &meta, &env->basal_txn->tw.troika); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + /* LY: sync meta-pages if MDBX_NOMETASYNC enabled + * and someone was not synced above. */ + if (atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != + (uint32_t)head.txnid) + rc = meta_sync(env, head); + +bailout: + if (should_unlock) + lck_txn_unlock(env); + return rc; +} + +__cold int env_open(MDBX_env *env, mdbx_mode_t mode) { + /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: + * + * 0) Если размер страниц БД меньше системной страницы ОЗУ, то ядру ОС + * придется чаще обновлять страницы в unified page cache. + * + * Однако, O_DSYNC не предполагает отключение unified page cache, + * поэтому подобные затруднения будем считать проблемой ОС и/или + * ожидаемым пенальти из-за использования мелких страниц БД. + * + * 1) В режиме MDBX_SYNC_DURABLE - O_DSYNC для записи как данных, + * так и мета-страниц. Однако, на Linux отказ от O_DSYNC с последующим + * fdatasync() может быть выгоднее при использовании HDD, так как + * позволяет io-scheduler переупорядочить запись с учетом актуального + * расположения файла БД на носителе. + * + * 2) В режиме MDBX_NOMETASYNC - O_DSYNC можно использовать для данных, + * но в этом может не быть смысла, так как fdatasync() всё равно + * требуется для гарантии фиксации мета после предыдущей транзакции. + * + * В итоге на нормальных системах (не Windows) есть два варианта: + * - при возможности O_DIRECT и/или io_ring для данных, скорее всего, + * есть смысл вызвать fdatasync() перед записью данных, а затем + * использовать O_DSYNC; + * - не использовать O_DSYNC и вызывать fdatasync() после записи данных. + * + * На Windows же следует минимизировать использование FlushFileBuffers() + * из-за проблем с производительностью. Поэтому на Windows в режиме + * MDBX_NOMETASYNC: + * - мета обновляется через дескриптор без FILE_FLAG_WRITE_THROUGH; + * - перед началом записи данных вызывается FlushFileBuffers(), если + * meta_sync_txnid не совпадает с последней записанной мета; + * - данные записываются через дескриптор с FILE_FLAG_WRITE_THROUGH. + * + * 3) В режиме MDBX_SAFE_NOSYNC - O_DSYNC нет смысла использовать, пока не + * будет реализована возможность полностью асинхронной "догоняющей" + * записи в выделенном процессе-сервере с io-ring очередями внутри. + * + * ----- + * + * Использование O_DIRECT или FILE_FLAG_NO_BUFFERING: + * + * Назначение этих флагов в отключении файлового дескриптора от + * unified page cache, т.е. от отображенных в память данных в случае + * libmdbx. + * + * Поэтому, использование direct i/o в libmdbx без MDBX_WRITEMAP лишено + * смысла и контр-продуктивно, ибо так мы провоцируем ядро ОС на + * не-когерентность отображения в память с содержимым файла на носителе, + * либо требуем дополнительных проверок и действий направленных на + * фактическое отключение O_DIRECT для отображенных в память данных. + * + * В режиме MDBX_WRITEMAP когерентность отображенных данных обеспечивается + * физически. Поэтому использование direct i/o может иметь смысл, если у + * ядра ОС есть какие-то проблемы с msync(), в том числе с + * производительностью: + * - использование io_ring или gather-write может быть дешевле, чем + * просмотр PTE ядром и запись измененных/грязных; + * - но проблема в том, что записываемые из user mode страницы либо не + * будут помечены чистыми (и соответственно будут записаны ядром + * еще раз), либо ядру необходимо искать и чистить PTE при получении + * запроса на запись. + * + * Поэтому O_DIRECT или FILE_FLAG_NO_BUFFERING используется: + * - только в режиме MDBX_SYNC_DURABLE с MDBX_WRITEMAP; + * - когда ps >= me_os_psize; + * - опция сборки MDBX_AVOID_MSYNC != 0, которая по-умолчанию включена + * только на Windows (см ниже). + * + * ----- + * + * Использование FILE_FLAG_OVERLAPPED на Windows: + * + * У Windows очень плохо с I/O (за исключением прямых постраничных + * scatter/gather, которые работают в обход проблемного unified page + * cache и поэтому почти бесполезны в libmdbx). + * + * При этом всё еще хуже при использовании FlushFileBuffers(), что также + * требуется после FlushViewOfFile() в режиме MDBX_WRITEMAP. Поэтому + * на Windows вместо FlushViewOfFile() и FlushFileBuffers() следует + * использовать запись через дескриптор с FILE_FLAG_WRITE_THROUGH. + * + * В свою очередь, запись с FILE_FLAG_WRITE_THROUGH дешевле/быстрее + * при использовании FILE_FLAG_OVERLAPPED. В результате, на Windows + * в durable-режимах запись данных всегда в overlapped-режиме, + * при этом для записи мета требуется отдельный не-overlapped дескриптор. + */ + + env->pid = osal_getpid(); + int rc = osal_openfile((env->flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ + : MDBX_OPEN_DXB_LAZY, + env, env->pathname.dxb, &env->lazy_fd, mode); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + +#if MDBX_LOCKING == MDBX_LOCKING_SYSV + env->me_sysv_ipc.key = ftok(env->pathname.dxb, 42); + if (unlikely(env->me_sysv_ipc.key == -1)) + return errno; +#endif /* MDBX_LOCKING */ + + /* Set the position in files outside of the data to avoid corruption + * due to erroneous use of file descriptors in the application code. */ + const uint64_t safe_parking_lot_offset = UINT64_C(0x7fffFFFF80000000); + osal_fseek(env->lazy_fd, safe_parking_lot_offset); + + env->fd4meta = env->lazy_fd; +#if defined(_WIN32) || defined(_WIN64) + eASSERT(env, env->ioring.overlapped_fd == 0); + bool ior_direct = false; + if (!(env->flags & + (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) { + if (MDBX_AVOID_MSYNC && (env->flags & MDBX_WRITEMAP)) { + /* Запрошен режим MDBX_SYNC_DURABLE | MDBX_WRITEMAP при активной опции + * MDBX_AVOID_MSYNC. + * + * 1) В этой комбинации наиболее выгодно использовать WriteFileGather(), + * но для этого необходимо открыть файл с флагом FILE_FLAG_NO_BUFFERING и + * после обеспечивать выравнивание адресов и размера данных на границу + * системной страницы, что в свою очередь возможно если размер страницы БД + * не меньше размера системной страницы ОЗУ. Поэтому для открытия файла в + * нужном режиме требуется знать размер страницы БД. + * + * 2) Кроме этого, в Windows запись в заблокированный регион файла + * возможно только через тот-же дескриптор. Поэтому изначальный захват + * блокировок посредством lck_seize(), захват/освобождение блокировок + * во время пишущих транзакций и запись данных должны выполнятся через + * один дескриптор. + * + * Таким образом, требуется прочитать волатильный заголовок БД, чтобы + * узнать размер страницы, чтобы открыть дескриптор файла в режиме нужном + * для записи данных, чтобы использовать именно этот дескриптор для + * изначального захвата блокировок. */ + meta_t header; + uint64_t dxb_filesize; + int err = dxb_read_header(env, &header, MDBX_SUCCESS, true); + if ((err == MDBX_SUCCESS && header.pagesize >= globals.sys_pagesize) || + (err == MDBX_ENODATA && mode && env->ps >= globals.sys_pagesize && + osal_filesize(env->lazy_fd, &dxb_filesize) == MDBX_SUCCESS && + dxb_filesize == 0)) + /* Может быть коллизия, если два процесса пытаются одновременно создать + * БД с разным размером страницы, который у одного меньше системной + * страницы, а у другого НЕ меньше. Эта допустимая, но очень странная + * ситуация. Поэтому считаем её ошибочной и не пытаемся разрешить. */ + ior_direct = true; + } + + rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT + : MDBX_OPEN_DXB_OVERLAPPED, + env, env->pathname.dxb, &env->ioring.overlapped_fd, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + env->dxb_lock_event = CreateEventW(nullptr, true, false, nullptr); + if (unlikely(!env->dxb_lock_event)) + return (int)GetLastError(); + osal_fseek(env->ioring.overlapped_fd, safe_parking_lot_offset); + } +#else + if (mode == 0) { + /* pickup mode for lck-file */ + struct stat st; + if (unlikely(fstat(env->lazy_fd, &st))) + return errno; + mode = st.st_mode; + } + mode = (/* inherit read permissions for group and others */ mode & + (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | + /* always add read/write for owner */ S_IRUSR | S_IWUSR | + ((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) | + ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); +#endif /* !Windows */ + const int lck_rc = lck_setup(env, mode); + if (unlikely(MDBX_IS_ERROR(lck_rc))) + return lck_rc; + if (env->lck_mmap.fd != INVALID_HANDLE_VALUE) + osal_fseek(env->lck_mmap.fd, safe_parking_lot_offset); + + eASSERT(env, env->dsync_fd == INVALID_HANDLE_VALUE); + if (!(env->flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | DEPRECATED_MAPASYNC +#if defined(_WIN32) || defined(_WIN64) + | MDBX_EXCLUSIVE +#endif /* !Windows */ + ))) { + rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->pathname.dxb, + &env->dsync_fd, 0); + if (unlikely(MDBX_IS_ERROR(rc))) + return rc; + if (env->dsync_fd != INVALID_HANDLE_VALUE) { + if ((env->flags & MDBX_NOMETASYNC) == 0) + env->fd4meta = env->dsync_fd; + osal_fseek(env->dsync_fd, safe_parking_lot_offset); + } + } + + const MDBX_env_flags_t lazy_flags = + MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_NOMETASYNC; + const MDBX_env_flags_t mode_flags = lazy_flags | MDBX_LIFORECLAIM | + MDBX_NORDAHEAD | MDBX_RDONLY | + MDBX_WRITEMAP; + + lck_t *const lck = env->lck_mmap.lck; + if (lck && lck_rc != MDBX_RESULT_TRUE && (env->flags & MDBX_RDONLY) == 0) { + MDBX_env_flags_t snap_flags; + while ((snap_flags = atomic_load32(&lck->envmode, mo_AcquireRelease)) == + MDBX_RDONLY) { + if (atomic_cas32(&lck->envmode, MDBX_RDONLY, + (snap_flags = (env->flags & mode_flags)))) { + /* The case: + * - let's assume that for some reason the DB file is smaller + * than it should be according to the geometry, + * but not smaller than the last page used; + * - the first process that opens the database (lck_rc == RESULT_TRUE) + * does this in readonly mode and therefore cannot bring + * the file size back to normal; + * - some next process (lck_rc != RESULT_TRUE) opens the DB in + * read-write mode and now is here. + * + * FIXME: Should we re-check and set the size of DB-file right here? */ + break; + } + atomic_yield(); + } + + if (env->flags & MDBX_ACCEDE) { + /* Pickup current mode-flags (MDBX_LIFORECLAIM, MDBX_NORDAHEAD, etc). */ + const MDBX_env_flags_t diff = + (snap_flags ^ env->flags) & + ((snap_flags & lazy_flags) ? mode_flags + : mode_flags & ~MDBX_WRITEMAP); + env->flags ^= diff; + NOTICE("accede mode-flags: 0x%X, 0x%X -> 0x%X", diff, env->flags ^ diff, + env->flags); + } + + /* Ранее упущенный не очевидный момент: При работе БД в режимах + * не-синхронной/отложенной фиксации на диске, все процессы-писатели должны + * иметь одинаковый режим MDBX_WRITEMAP. + * + * В противном случае, сброс на диск следует выполнять дважды: сначала + * msync(), затем fdatasync(). При этом msync() не обязан отрабатывать + * в процессах без MDBX_WRITEMAP, так как файл в память отображен только + * для чтения. Поэтому, в общем случае, различия по MDBX_WRITEMAP не + * позволяют выполнить фиксацию данных на диск, после их изменения в другом + * процессе. + * + * В режиме MDBX_UTTERLY_NOSYNC позволять совместную работу с MDBX_WRITEMAP + * также не следует, поскольку никакой процесс (в том числе последний) не + * может гарантированно сбросить данные на диск, а следовательно не должен + * помечать какую-либо транзакцию как steady. + * + * В результате, требуется либо запретить совместную работу процессам с + * разным MDBX_WRITEMAP в режиме отложенной записи, либо отслеживать такое + * смешивание и блокировать steady-пометки - что контрпродуктивно. */ + const MDBX_env_flags_t rigorous_flags = + (snap_flags & lazy_flags) + ? MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_WRITEMAP + : MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC; + const MDBX_env_flags_t rigorous_diff = + (snap_flags ^ env->flags) & rigorous_flags; + if (rigorous_diff) { + ERROR("current mode/flags 0x%X incompatible with requested 0x%X, " + "rigorous diff 0x%X", + env->flags, snap_flags, rigorous_diff); + return MDBX_INCOMPATIBLE; + } + } + + mincore_clean_cache(env); + const int dxb_rc = dxb_setup(env, lck_rc, mode); + if (MDBX_IS_ERROR(dxb_rc)) + return dxb_rc; + + rc = osal_check_fs_incore(env->lazy_fd); + env->incore = false; + if (rc == MDBX_RESULT_TRUE) { + env->incore = true; + NOTICE("%s", "in-core database"); + rc = MDBX_SUCCESS; + } else if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("check_fs_incore(), err %d", rc); + return rc; + } + + if (unlikely(/* recovery mode */ env->stuck_meta >= 0) && + (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || + (env->flags & MDBX_EXCLUSIVE) == 0)) { + ERROR("%s", "recovery requires exclusive mode"); + return MDBX_BUSY; + } + + DEBUG("opened dbenv %p", (void *)env); + env->flags |= ENV_ACTIVE; + if (!lck || lck_rc == MDBX_RESULT_TRUE) { + env->lck->envmode.weak = env->flags & mode_flags; + env->lck->meta_sync_txnid.weak = (uint32_t)recent_committed_txnid(env); + env->lck->readers_check_timestamp.weak = osal_monotime(); + } + if (lck) { + if (lck_rc == MDBX_RESULT_TRUE) { + rc = lck_downgrade(env); + DEBUG("lck-downgrade-%s: rc %i", + (env->flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc); + if (rc != MDBX_SUCCESS) + return rc; + } else { + rc = mvcc_cleanup_dead(env, false, nullptr); + if (MDBX_IS_ERROR(rc)) + return rc; + } + } + + rc = (env->flags & MDBX_RDONLY) + ? MDBX_SUCCESS + : osal_ioring_create(&env->ioring +#if defined(_WIN32) || defined(_WIN64) + , + ior_direct, env->ioring.overlapped_fd +#endif /* Windows */ + ); + return rc; +} + +__cold int env_close(MDBX_env *env, bool resurrect_after_fork) { + const unsigned flags = env->flags; + env->flags &= ~ENV_INTERNAL_FLAGS; + if (flags & ENV_TXKEY) { + thread_key_delete(env->me_txkey); + env->me_txkey = 0; + } + + if (env->lck) + munlock_all(env); + + rthc_lock(); + int rc = rthc_remove(env); + rthc_unlock(); + +#if MDBX_ENABLE_DBI_LOCKFREE + for (defer_free_item_t *next, *ptr = env->defer_free; ptr; ptr = next) { + next = ptr->next; + osal_free(ptr); + } + env->defer_free = nullptr; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + + if (!(env->flags & MDBX_RDONLY)) + osal_ioring_destroy(&env->ioring); + + env->lck = nullptr; + if (env->lck_mmap.lck) + osal_munmap(&env->lck_mmap); + + if (env->dxb_mmap.base) { + osal_munmap(&env->dxb_mmap); +#ifdef ENABLE_MEMCHECK + VALGRIND_DISCARD(env->valgrind_handle); + env->valgrind_handle = -1; +#endif /* ENABLE_MEMCHECK */ + } + +#if defined(_WIN32) || defined(_WIN64) + eASSERT(env, !env->ioring.overlapped_fd || + env->ioring.overlapped_fd == INVALID_HANDLE_VALUE); + if (env->dxb_lock_event != INVALID_HANDLE_VALUE) { + CloseHandle(env->dxb_lock_event); + env->dxb_lock_event = INVALID_HANDLE_VALUE; + } + eASSERT(env, !resurrect_after_fork); + if (env->pathname_char) { + osal_free(env->pathname_char); + env->pathname_char = nullptr; + } +#endif /* Windows */ + + if (env->dsync_fd != INVALID_HANDLE_VALUE) { + (void)osal_closefile(env->dsync_fd); + env->dsync_fd = INVALID_HANDLE_VALUE; + } + + if (env->lazy_fd != INVALID_HANDLE_VALUE) { + (void)osal_closefile(env->lazy_fd); + env->lazy_fd = INVALID_HANDLE_VALUE; + } + + if (env->lck_mmap.fd != INVALID_HANDLE_VALUE) { + (void)osal_closefile(env->lck_mmap.fd); + env->lck_mmap.fd = INVALID_HANDLE_VALUE; + } + + if (!resurrect_after_fork) { + if (env->kvs) { + for (size_t i = CORE_DBS; i < env->n_dbi; ++i) + if (env->kvs[i].name.iov_len) + osal_free(env->kvs[i].name.iov_base); + osal_free(env->kvs); + env->n_dbi = CORE_DBS; + env->kvs = nullptr; + } + if (env->page_auxbuf) { + osal_memalign_free(env->page_auxbuf); + env->page_auxbuf = nullptr; + } + if (env->dbi_seqs) { + osal_free(env->dbi_seqs); + env->dbi_seqs = nullptr; + } + if (env->dbs_flags) { + osal_free(env->dbs_flags); + env->dbs_flags = nullptr; + } + if (env->pathname.buffer) { + osal_free(env->pathname.buffer); + env->pathname.buffer = nullptr; + } + if (env->basal_txn) { + dpl_free(env->basal_txn); + txl_free(env->basal_txn->tw.gc.reclaimed); + pnl_free(env->basal_txn->tw.retired_pages); + pnl_free(env->basal_txn->tw.spilled.list); + pnl_free(env->basal_txn->tw.relist); + osal_free(env->basal_txn); + env->basal_txn = nullptr; + } + } + env->stuck_meta = -1; + return rc; +} diff --git a/src/essentials.h b/src/essentials.h new file mode 100644 index 00000000..9ac71df5 --- /dev/null +++ b/src/essentials.h @@ -0,0 +1,136 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#define LIBMDBX_INTERNALS +#define MDBX_DEPRECATED + +#ifdef MDBX_CONFIG_H +#include MDBX_CONFIG_H +#endif + +#include "preface.h" + +#ifdef xMDBX_ALLOY +/* Amalgamated build */ +#define MDBX_INTERNAL static +#else +/* Non-amalgamated build */ +#define MDBX_INTERNAL +#endif /* xMDBX_ALLOY */ + +#include "../mdbx.h" + +/*----------------------------------------------------------------------------*/ +/* Basic constants and types */ + +typedef struct iov_ctx iov_ctx_t; +#include "osal.h" + +#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64) +#define MDBX_WORDBITS 64 +#else +#define MDBX_WORDBITS 32 +#endif /* MDBX_WORDBITS */ + +#include "options.h" + +#include "atomics-types.h" + +#include "layout-dxb.h" +#include "layout-lck.h" + +#define MIN_MAPSIZE (MDBX_MIN_PAGESIZE * MIN_PAGENO) +#if defined(_WIN32) || defined(_WIN64) +#define MAX_MAPSIZE32 UINT32_C(0x38000000) +#else +#define MAX_MAPSIZE32 UINT32_C(0x7f000000) +#endif +#define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MDBX_MAX_PAGESIZE) + +#if MDBX_WORDBITS >= 64 +#define MAX_MAPSIZE MAX_MAPSIZE64 +#define PAGELIST_LIMIT ((size_t)MAX_PAGENO) +#else +#define MAX_MAPSIZE MAX_MAPSIZE32 +#define PAGELIST_LIMIT (MAX_MAPSIZE32 / MDBX_MIN_PAGESIZE) +#endif /* MDBX_WORDBITS */ + +#define MDBX_GOLD_RATIO_DBL 1.6180339887498948482 +#define MEGABYTE ((size_t)1 << 20) + +/*----------------------------------------------------------------------------*/ + +union logger_union { + void *ptr; + MDBX_debug_func *fmt; + MDBX_debug_func_nofmt *nofmt; +}; + +struct libmdbx_globals { + bin128_t bootid; + unsigned sys_pagesize, sys_allocation_granularity; + uint8_t sys_pagesize_ln2; + uint8_t runtime_flags; + uint8_t loglevel; +#if defined(_WIN32) || defined(_WIN64) + bool running_under_Wine; +#elif defined(__linux__) || defined(__gnu_linux__) + bool running_on_WSL1 /* Windows Subsystem 1 for Linux */; + uint32_t linux_kernel_version; +#endif /* Linux */ + union logger_union logger; + osal_fastmutex_t debug_lock; + size_t logger_buffer_size; + char *logger_buffer; +}; + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +extern struct libmdbx_globals globals; +#if defined(_WIN32) || defined(_WIN64) +extern struct libmdbx_imports imports; +#endif /* Windows */ + +#include "logging_and_debug.h" + +#include "utils.h" + +#include "pnl.h" + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY) +#if defined(xMDBX_TOOLS) +extern LIBMDBX_API const char *const mdbx_sourcery_anchor; +#endif + +#define MDBX_IS_ERROR(rc) \ + ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE) + +/*----------------------------------------------------------------------------*/ + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t +int64pgno(int64_t i64) { + if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1)) + return (pgno_t)i64; + return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t +pgno_add(size_t base, size_t augend) { + assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO); + return int64pgno((int64_t)base + (int64_t)augend); +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t +pgno_sub(size_t base, size_t subtrahend) { + assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && + subtrahend < MAX_PAGENO); + return int64pgno((int64_t)base - (int64_t)subtrahend); +} diff --git a/src/gc-get.c b/src/gc-get.c new file mode 100644 index 00000000..595c18a5 --- /dev/null +++ b/src/gc-get.c @@ -0,0 +1,1460 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +#if MDBX_ENABLE_MINCORE +/*------------------------------------------------------------------------------ + * Проверка размещения/расположения отображенных страниц БД в ОЗУ (mem-in-core), + * с кешированием этой информации. */ + +static inline bool bit_tas(uint64_t *field, char bit) { + const uint64_t m = UINT64_C(1) << bit; + const bool r = (*field & m) != 0; + *field |= m; + return r; +} + +static bool mincore_fetch(MDBX_env *const env, const size_t unit_begin) { + lck_t *const lck = env->lck; + for (size_t i = 1; i < ARRAY_LENGTH(lck->mincore_cache.begin); ++i) { + const ptrdiff_t dist = unit_begin - lck->mincore_cache.begin[i]; + if (likely(dist >= 0 && dist < 64)) { + const pgno_t tmp_begin = lck->mincore_cache.begin[i]; + const uint64_t tmp_mask = lck->mincore_cache.mask[i]; + do { + lck->mincore_cache.begin[i] = lck->mincore_cache.begin[i - 1]; + lck->mincore_cache.mask[i] = lck->mincore_cache.mask[i - 1]; + } while (--i); + lck->mincore_cache.begin[0] = tmp_begin; + lck->mincore_cache.mask[0] = tmp_mask; + return bit_tas(lck->mincore_cache.mask, (char)dist); + } + } + + size_t pages = 64; + unsigned unit_log = globals.sys_pagesize_ln2; + unsigned shift = 0; + if (env->ps > globals.sys_pagesize) { + unit_log = env->ps2ln; + shift = env->ps2ln - globals.sys_pagesize_ln2; + pages <<= shift; + } + + const size_t offset = unit_begin << unit_log; + size_t length = pages << globals.sys_pagesize_ln2; + if (offset + length > env->dxb_mmap.current) { + length = env->dxb_mmap.current - offset; + pages = length >> globals.sys_pagesize_ln2; + } + +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.mincore.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + uint8_t *const vector = alloca(pages); + if (unlikely(mincore(ptr_disp(env->dxb_mmap.base, offset), length, + (void *)vector))) { + NOTICE("mincore(+%zu, %zu), err %d", offset, length, errno); + return false; + } + + for (size_t i = 1; i < ARRAY_LENGTH(lck->mincore_cache.begin); ++i) { + lck->mincore_cache.begin[i] = lck->mincore_cache.begin[i - 1]; + lck->mincore_cache.mask[i] = lck->mincore_cache.mask[i - 1]; + } + lck->mincore_cache.begin[0] = unit_begin; + + uint64_t mask = 0; +#ifdef MINCORE_INCORE + STATIC_ASSERT(MINCORE_INCORE == 1); +#endif + for (size_t i = 0; i < pages; ++i) { + uint64_t bit = (vector[i] & 1) == 0; + bit <<= i >> shift; + mask |= bit; + } + + lck->mincore_cache.mask[0] = ~mask; + return bit_tas(lck->mincore_cache.mask, 0); +} +#endif /* MDBX_ENABLE_MINCORE */ + +MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, + const pgno_t pgno) { +#if MDBX_ENABLE_MINCORE + const size_t offset_aligned = + floor_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize); + const unsigned unit_log2 = (env->ps2ln > globals.sys_pagesize_ln2) + ? env->ps2ln + : globals.sys_pagesize_ln2; + const size_t unit_begin = offset_aligned >> unit_log2; + eASSERT(env, (unit_begin << unit_log2) == offset_aligned); + const ptrdiff_t dist = unit_begin - env->lck->mincore_cache.begin[0]; + if (likely(dist >= 0 && dist < 64)) + return bit_tas(env->lck->mincore_cache.mask, (char)dist); + return mincore_fetch(env, unit_begin); +#else + (void)env; + (void)pgno; + return false; +#endif /* MDBX_ENABLE_MINCORE */ +} + +/*----------------------------------------------------------------------------*/ + +MDBX_MAYBE_UNUSED __hot static pgno_t * +scan4seq_fallback(pgno_t *range, const size_t len, const size_t seq) { + assert(seq > 0 && len > seq); +#if MDBX_PNL_ASCENDING + assert(range[-1] == len); + const pgno_t *const detent = range + len - seq; + const ptrdiff_t offset = (ptrdiff_t)seq; + const pgno_t target = (pgno_t)offset; + if (likely(len > seq + 3)) { + do { + const pgno_t diff0 = range[offset + 0] - range[0]; + const pgno_t diff1 = range[offset + 1] - range[1]; + const pgno_t diff2 = range[offset + 2] - range[2]; + const pgno_t diff3 = range[offset + 3] - range[3]; + if (diff0 == target) + return range + 0; + if (diff1 == target) + return range + 1; + if (diff2 == target) + return range + 2; + if (diff3 == target) + return range + 3; + range += 4; + } while (range + 3 < detent); + if (range == detent) + return nullptr; + } + do + if (range[offset] - *range == target) + return range; + while (++range < detent); +#else + assert(range[-(ptrdiff_t)len] == len); + const pgno_t *const detent = range - len + seq; + const ptrdiff_t offset = -(ptrdiff_t)seq; + const pgno_t target = (pgno_t)offset; + if (likely(len > seq + 3)) { + do { + const pgno_t diff0 = range[-0] - range[offset - 0]; + const pgno_t diff1 = range[-1] - range[offset - 1]; + const pgno_t diff2 = range[-2] - range[offset - 2]; + const pgno_t diff3 = range[-3] - range[offset - 3]; + /* Смысл вычислений до ветвлений в том, чтобы позволить компилятору + * загружать и вычислять все значения параллельно. */ + if (diff0 == target) + return range - 0; + if (diff1 == target) + return range - 1; + if (diff2 == target) + return range - 2; + if (diff3 == target) + return range - 3; + range -= 4; + } while (range > detent + 3); + if (range == detent) + return nullptr; + } + do + if (*range - range[offset] == target) + return range; + while (--range > detent); +#endif /* pnl_t sort-order */ + return nullptr; +} + +MDBX_MAYBE_UNUSED static const pgno_t *scan4range_checker(const pnl_t pnl, + const size_t seq) { + size_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pnl); +#if MDBX_PNL_ASCENDING + while (seq <= MDBX_PNL_GETSIZE(pnl) - begin) { + if (pnl[begin + seq] - pnl[begin] == seq) + return pnl + begin; + ++begin; + } +#else + while (begin > seq) { + if (pnl[begin - seq] - pnl[begin] == seq) + return pnl + begin; + --begin; + } +#endif /* pnl_t sort-order */ + return nullptr; +} + +#if defined(_MSC_VER) && !defined(__builtin_clz) && \ + !__has_builtin(__builtin_clz) +MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clz(uint32_t value) { + unsigned long index; + _BitScanReverse(&index, value); + return 31 - index; +} +#endif /* _MSC_VER */ + +#if defined(_MSC_VER) && !defined(__builtin_clzl) && \ + !__has_builtin(__builtin_clzl) +MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { + unsigned long index; +#ifdef _WIN64 + assert(sizeof(value) == 8); + _BitScanReverse64(&index, value); + return 63 - index; +#else + assert(sizeof(value) == 4); + _BitScanReverse(&index, value); + return 31 - index; +#endif +} +#endif /* _MSC_VER */ + +#if !MDBX_PNL_ASCENDING + +#if !defined(MDBX_ATTRIBUTE_TARGET) && \ + (__has_attribute(__target__) || __GNUC_PREREQ(5, 0)) +#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target))) +#endif /* MDBX_ATTRIBUTE_TARGET */ + +#ifndef MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +/* Workaround for GCC's bug with `-m32 -march=i686 -Ofast` + * gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1: + * error: inlining failed in call to 'always_inline' '_mm_movemask_ps': + * target specific option mismatch */ +#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || \ + defined(__e2k__) || defined(__clang__) || defined(__amd64__) || \ + defined(__SSE2__) +#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0 +#else +#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1 +#endif +#endif /* MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND */ + +#if defined(__SSE2__) && defined(__SSE__) +#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ +#elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__) +#define __SSE2__ +#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2") +#endif /* __SSE2__ */ + +#if defined(__AVX2__) +#define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */ +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2") +#endif /* __AVX2__ */ + +#if defined(MDBX_ATTRIBUTE_TARGET_AVX2) +#if defined(__AVX512BW__) +#define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */ +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \ + (__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0)) +#define MDBX_ATTRIBUTE_TARGET_AVX512BW \ + MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw") +#endif /* __AVX512BW__ */ +#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */ + +#ifdef MDBX_ATTRIBUTE_TARGET_SSE2 +MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned +diffcmp2mask_sse2(const pgno_t *const ptr, const ptrdiff_t offset, + const __m128i pattern) { + const __m128i f = _mm_loadu_si128((const __m128i *)ptr); + const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); + const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); + return _mm_movemask_ps(*(const __m128 *)&cmp); +} + +MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_SSE2 static pgno_t * +scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { + assert(seq > 0 && len > seq); +#if MDBX_PNL_ASCENDING +#error "FIXME: Not implemented" +#endif /* MDBX_PNL_ASCENDING */ + assert(range[-(ptrdiff_t)len] == len); + pgno_t *const detent = range - len + seq; + const ptrdiff_t offset = -(ptrdiff_t)seq; + const pgno_t target = (pgno_t)offset; + const __m128i pattern = _mm_set1_epi32(target); + uint8_t mask; + if (likely(len > seq + 3)) { + do { + mask = (uint8_t)diffcmp2mask_sse2(range - 3, offset, pattern); + if (mask) { +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + found: +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + return range + 28 - __builtin_clz(mask); + } + range -= 4; + } while (range > detent + 3); + if (range == detent) + return nullptr; + } + + /* Далее происходит чтение от 4 до 12 лишних байт, которые могут быть не + * только за пределами региона выделенного под PNL, но и пересекать границу + * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. + * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && + !RUNNING_ON_VALGRIND) { + const unsigned extra = (unsigned)(detent + 4 - range); + assert(extra > 0 && extra < 4); + mask = 0xF << extra; + mask &= diffcmp2mask_sse2(range - 3, offset, pattern); + if (mask) + goto found; + return nullptr; + } +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + do + if (*range - range[offset] == target) + return range; + while (--range != detent); + return nullptr; +} +#endif /* MDBX_ATTRIBUTE_TARGET_SSE2 */ + +#ifdef MDBX_ATTRIBUTE_TARGET_AVX2 +MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned +diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, + const __m256i pattern) { + const __m256i f = _mm256_loadu_si256((const __m256i *)ptr); + const __m256i l = _mm256_loadu_si256((const __m256i *)(ptr + offset)); + const __m256i cmp = _mm256_cmpeq_epi32(_mm256_sub_epi32(f, l), pattern); + return _mm256_movemask_ps(*(const __m256 *)&cmp); +} + +MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned +diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, + const __m128i pattern) { + const __m128i f = _mm_loadu_si128((const __m128i *)ptr); + const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); + const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); + return _mm_movemask_ps(*(const __m128 *)&cmp); +} + +MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t * +scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { + assert(seq > 0 && len > seq); +#if MDBX_PNL_ASCENDING +#error "FIXME: Not implemented" +#endif /* MDBX_PNL_ASCENDING */ + assert(range[-(ptrdiff_t)len] == len); + pgno_t *const detent = range - len + seq; + const ptrdiff_t offset = -(ptrdiff_t)seq; + const pgno_t target = (pgno_t)offset; + const __m256i pattern = _mm256_set1_epi32(target); + uint8_t mask; + if (likely(len > seq + 7)) { + do { + mask = (uint8_t)diffcmp2mask_avx2(range - 7, offset, pattern); + if (mask) { +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + found: +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + return range + 24 - __builtin_clz(mask); + } + range -= 8; + } while (range > detent + 7); + if (range == detent) + return nullptr; + } + + /* Далее происходит чтение от 4 до 28 лишних байт, которые могут быть не + * только за пределами региона выделенного под PNL, но и пересекать границу + * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. + * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + const unsigned on_page_safe_mask = 0xfe0 /* enough for '-31' bytes offset */; + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && + !RUNNING_ON_VALGRIND) { + const unsigned extra = (unsigned)(detent + 8 - range); + assert(extra > 0 && extra < 8); + mask = 0xFF << extra; + mask &= diffcmp2mask_avx2(range - 7, offset, pattern); + if (mask) + goto found; + return nullptr; + } +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + if (range - 3 > detent) { + mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); + if (mask) + return range + 28 - __builtin_clz(mask); + range -= 4; + } + while (range > detent) { + if (*range - range[offset] == target) + return range; + --range; + } + return nullptr; +} +#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 */ + +#ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW +MDBX_ATTRIBUTE_TARGET_AVX512BW static __always_inline unsigned +diffcmp2mask_avx512bw(const pgno_t *const ptr, const ptrdiff_t offset, + const __m512i pattern) { + const __m512i f = _mm512_loadu_si512((const __m512i *)ptr); + const __m512i l = _mm512_loadu_si512((const __m512i *)(ptr + offset)); + return _mm512_cmpeq_epi32_mask(_mm512_sub_epi32(f, l), pattern); +} + +MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX512BW static pgno_t * +scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { + assert(seq > 0 && len > seq); +#if MDBX_PNL_ASCENDING +#error "FIXME: Not implemented" +#endif /* MDBX_PNL_ASCENDING */ + assert(range[-(ptrdiff_t)len] == len); + pgno_t *const detent = range - len + seq; + const ptrdiff_t offset = -(ptrdiff_t)seq; + const pgno_t target = (pgno_t)offset; + const __m512i pattern = _mm512_set1_epi32(target); + unsigned mask; + if (likely(len > seq + 15)) { + do { + mask = diffcmp2mask_avx512bw(range - 15, offset, pattern); + if (mask) { +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + found: +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + return range + 16 - __builtin_clz(mask); + } + range -= 16; + } while (range > detent + 15); + if (range == detent) + return nullptr; + } + + /* Далее происходит чтение от 4 до 60 лишних байт, которые могут быть не + * только за пределами региона выделенного под PNL, но и пересекать границу + * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. + * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + const unsigned on_page_safe_mask = 0xfc0 /* enough for '-63' bytes offset */; + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && + !RUNNING_ON_VALGRIND) { + const unsigned extra = (unsigned)(detent + 16 - range); + assert(extra > 0 && extra < 16); + mask = 0xFFFF << extra; + mask &= diffcmp2mask_avx512bw(range - 15, offset, pattern); + if (mask) + goto found; + return nullptr; + } +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + if (range - 7 > detent) { + mask = diffcmp2mask_avx2(range - 7, offset, *(const __m256i *)&pattern); + if (mask) + return range + 24 - __builtin_clz(mask); + range -= 8; + } + if (range - 3 > detent) { + mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); + if (mask) + return range + 28 - __builtin_clz(mask); + range -= 4; + } + while (range > detent) { + if (*range - range[offset] == target) + return range; + --range; + } + return nullptr; +} +#endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */ + +#if (defined(__ARM_NEON) || defined(__ARM_NEON__)) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +static __always_inline size_t diffcmp2mask_neon(const pgno_t *const ptr, + const ptrdiff_t offset, + const uint32x4_t pattern) { + const uint32x4_t f = vld1q_u32(ptr); + const uint32x4_t l = vld1q_u32(ptr + offset); + const uint16x4_t cmp = vmovn_u32(vceqq_u32(vsubq_u32(f, l), pattern)); + if (sizeof(size_t) > 7) + return vget_lane_u64(vreinterpret_u64_u16(cmp), 0); + else + return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(cmp, cmp))), + 0); +} + +__hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, + const size_t seq) { + assert(seq > 0 && len > seq); +#if MDBX_PNL_ASCENDING +#error "FIXME: Not implemented" +#endif /* MDBX_PNL_ASCENDING */ + assert(range[-(ptrdiff_t)len] == len); + pgno_t *const detent = range - len + seq; + const ptrdiff_t offset = -(ptrdiff_t)seq; + const pgno_t target = (pgno_t)offset; + const uint32x4_t pattern = vmovq_n_u32(target); + size_t mask; + if (likely(len > seq + 3)) { + do { + mask = diffcmp2mask_neon(range - 3, offset, pattern); + if (mask) { +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + found: +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + return ptr_disp(range, -(__builtin_clzl(mask) >> sizeof(size_t) / 4)); + } + range -= 4; + } while (range > detent + 3); + if (range == detent) + return nullptr; + } + + /* Далее происходит чтение от 4 до 12 лишних байт, которые могут быть не + * только за пределами региона выделенного под PNL, но и пересекать границу + * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. + * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && + !RUNNING_ON_VALGRIND) { + const unsigned extra = (unsigned)(detent + 4 - range); + assert(extra > 0 && extra < 4); + mask = (~(size_t)0) << (extra * sizeof(size_t) * 2); + mask &= diffcmp2mask_neon(range - 3, offset, pattern); + if (mask) + goto found; + return nullptr; + } +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ + do + if (*range - range[offset] == target) + return range; + while (--range != detent); + return nullptr; +} +#endif /* __ARM_NEON || __ARM_NEON__ */ + +#if defined(__AVX512BW__) && defined(MDBX_ATTRIBUTE_TARGET_AVX512BW) +#define scan4seq_default scan4seq_avx512bw +#define scan4seq_impl scan4seq_default +#elif defined(__AVX2__) && defined(MDBX_ATTRIBUTE_TARGET_AVX2) +#define scan4seq_default scan4seq_avx2 +#elif defined(__SSE2__) && defined(MDBX_ATTRIBUTE_TARGET_SSE2) +#define scan4seq_default scan4seq_sse2 +#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define scan4seq_default scan4seq_neon +/* Choosing of another variants should be added here. */ +#endif /* scan4seq_default */ + +#endif /* MDBX_PNL_ASCENDING */ + +#ifndef scan4seq_default +#define scan4seq_default scan4seq_fallback +#endif /* scan4seq_default */ + +#ifdef scan4seq_impl +/* The scan4seq_impl() is the best or no alternatives */ +#elif !MDBX_HAVE_BUILTIN_CPU_SUPPORTS +/* The scan4seq_default() will be used since no cpu-features detection support + * from compiler. Please don't ask to implement cpuid-based detection and don't + * make such PRs. */ +#define scan4seq_impl scan4seq_default +#else +/* Selecting the most appropriate implementation at runtime, + * depending on the available CPU features. */ +static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, + const size_t seq); +static pgno_t *(*scan4seq_impl)(pgno_t *range, const size_t len, + const size_t seq) = scan4seq_resolver; + +static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, + const size_t seq) { + pgno_t *(*choice)(pgno_t *range, const size_t len, const size_t seq) = + nullptr; +#if __has_builtin(__builtin_cpu_init) || defined(__BUILTIN_CPU_INIT__) || \ + __GNUC_PREREQ(4, 8) + __builtin_cpu_init(); +#endif /* __builtin_cpu_init() */ +#ifdef MDBX_ATTRIBUTE_TARGET_SSE2 + if (__builtin_cpu_supports("sse2")) + choice = scan4seq_sse2; +#endif /* MDBX_ATTRIBUTE_TARGET_SSE2 */ +#ifdef MDBX_ATTRIBUTE_TARGET_AVX2 + if (__builtin_cpu_supports("avx2")) + choice = scan4seq_avx2; +#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 */ +#ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW + if (__builtin_cpu_supports("avx512bw")) + choice = scan4seq_avx512bw; +#endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */ + /* Choosing of another variants should be added here. */ + scan4seq_impl = choice ? choice : scan4seq_default; + return scan4seq_impl(range, len, seq); +} +#endif /* scan4seq_impl */ + +/*----------------------------------------------------------------------------*/ + +#define ALLOC_COALESCE 4 /* внутреннее состояние */ +#define ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */ +#define ALLOC_LIFO 16 /* внутреннее состояние */ + +static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, + const uint8_t flags) { + /* If txn is updating the GC, then the retired-list cannot play catch-up with + * itself by growing while trying to save it. */ + if (mc->tree == &txn->dbs[FREE_DBI] && !(flags & ALLOC_RESERVE) && + !(mc->flags & z_gcu_preparation)) + return false; + + /* avoid search inside empty tree and while tree is updating, + https://libmdbx.dqdkfa.ru/dead-github/issues/31 */ + if (unlikely(txn->dbs[FREE_DBI].items == 0)) { + txn->flags |= txn_gc_drained; + return false; + } + + return true; +} + +__hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { + const size_t len = MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed); + for (size_t i = 1; i <= len; ++i) + if (txn->tw.gc.reclaimed[i] == id) + return true; + return false; +} + +__hot static pgno_t relist_get_single(MDBX_txn *txn) { + const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); + assert(len > 0); + pgno_t *target = MDBX_PNL_EDGE(txn->tw.relist); + const ptrdiff_t dir = MDBX_PNL_ASCENDING ? 1 : -1; + + /* Есть ТРИ потенциально выигрышные, но противо-направленные тактики: + * + * 1. Стараться использовать страницы с наименьшими номерами. Так обмен с + * диском будет более кучным, а у страниц ближе к концу БД будет больше шансов + * попасть под авто-компактификацию. Частично эта тактика уже реализована, но + * для её эффективности требуется явно приоритезировать выделение страниц: + * - поддерживать для relist, для ближних и для дальних страниц; + * - использовать страницы из дальнего списка, если первый пуст, + * а второй слишком большой, либо при пустой GC. + * + * 2. Стараться выделять страницы последовательно. Так записываемые на диск + * регионы будут линейными, что принципиально ускоряет запись на HDD. + * Одновременно, в среднем это не повлияет на чтение, точнее говоря, если + * порядок чтения не совпадает с порядком изменения (иначе говоря, если + * чтение не коррклирует с обновлениями и/или вставками) то не повлияет, иначе + * может ускорить. Однако, последовательности в среднем достаточно редки. + * Поэтому для эффективности требуется аккумулировать и поддерживать в ОЗУ + * огромные списки страниц, а затем сохранять их обратно в БД. Текущий формат + * БД (без битовых карт) для этого крайне не удачен. Поэтому эта тактика не + * имеет шансов быть успешной без смены формата БД (Mithril). + * + * 3. Стараться экономить последовательности страниц. Это позволяет избегать + * лишнего чтения/поиска в GC при более-менее постоянном размещении и/или + * обновлении данных требующих более одной страницы. Проблема в том, что без + * информации от приложения библиотека не может знать насколько + * востребованными будут последовательности в ближайшей перспективе, а + * экономия последовательностей "на всякий случай" не только затратна + * сама-по-себе, но и работает во вред. + * + * Поэтому: + * - в TODO добавляется разделение relist на «ближние» и «дальние» страницы, + * с последующей реализацией первой тактики; + * - преимущественное использование последовательностей отправляется + * в MithrilDB как составляющая "HDD frendly" feature; + * - реализованная в 3757eb72f7c6b46862f8f17881ac88e8cecc1979 экономия + * последовательностей отключается через MDBX_ENABLE_SAVING_SEQUENCES=0. + * + * В качестве альтернативы для безусловной «экономии» последовательностей, + * в следующих версиях libmdbx, вероятно, будет предложено + * API для взаимодействия с GC: + * - получение размера GC, включая гистограммы размеров последовательностей + * и близости к концу БД; + * - включение формирования "линейного запаса" для последующего использования + * в рамках текущей транзакции; + * - намеренная загрузка GC в память для коагуляции и "выпрямления"; + * - намеренное копирование данных из страниц в конце БД для последующего + * из освобождения, т.е. контролируемая компактификация по запросу. */ + +#ifndef MDBX_ENABLE_SAVING_SEQUENCES +#define MDBX_ENABLE_SAVING_SEQUENCES 0 +#endif + if (MDBX_ENABLE_SAVING_SEQUENCES && unlikely(target[dir] == *target + 1) && + len > 2) { + /* Пытаемся пропускать последовательности при наличии одиночных элементов. + * TODO: необходимо кэшировать пропускаемые последовательности + * чтобы не сканировать список сначала при каждом выделении. */ + pgno_t *scan = target + dir + dir; + size_t left = len; + do { + if (likely(scan[-dir] != *scan - 1 && *scan + 1 != scan[dir])) { +#if MDBX_PNL_ASCENDING + target = scan; + break; +#else + /* вырезаем элемент с перемещением хвоста */ + const pgno_t pgno = *scan; + MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); + while (++scan <= target) + scan[-1] = *scan; + return pgno; +#endif + } + scan += dir; + } while (--left > 2); + } + + const pgno_t pgno = *target; +#if MDBX_PNL_ASCENDING + /* вырезаем элемент с перемещением хвоста */ + MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); + for (const pgno_t *const end = txn->tw.relist + len - 1; target <= end; + ++target) + *target = target[1]; +#else + /* перемещать хвост не нужно, просто усекам список */ + MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); +#endif + return pgno; +} + +__hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, + uint8_t flags) { + const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); + pgno_t *edge = MDBX_PNL_EDGE(txn->tw.relist); + assert(len >= num && num > 1); + const size_t seq = num - 1; +#if !MDBX_PNL_ASCENDING + if (edge[-(ptrdiff_t)seq] - *edge == seq) { + if (unlikely(flags & ALLOC_RESERVE)) + return P_INVALID; + assert(edge == scan4range_checker(txn->tw.relist, seq)); + /* перемещать хвост не нужно, просто усекам список */ + MDBX_PNL_SETSIZE(txn->tw.relist, len - num); + return *edge; + } +#endif + pgno_t *target = scan4seq_impl(edge, len, seq); + assert(target == scan4range_checker(txn->tw.relist, seq)); + if (target) { + if (unlikely(flags & ALLOC_RESERVE)) + return P_INVALID; + const pgno_t pgno = *target; + /* вырезаем найденную последовательность с перемещением хвоста */ + MDBX_PNL_SETSIZE(txn->tw.relist, len - num); +#if MDBX_PNL_ASCENDING + for (const pgno_t *const end = txn->tw.relist + len - num; target <= end; + ++target) + *target = target[num]; +#else + for (const pgno_t *const end = txn->tw.relist + len; ++target <= end;) + target[-(ptrdiff_t)num] = *target; +#endif + return pgno; + } + return 0; +} + +static inline pgr_t page_alloc_finalize(MDBX_env *const env, + MDBX_txn *const txn, + const MDBX_cursor *const mc, + const pgno_t pgno, const size_t num) { +#if MDBX_ENABLE_PROFGC + size_t majflt_before; + const uint64_t cputime_before = osal_cputime(&majflt_before); + gc_prof_stat_t *const prof = (mc->mc_dbi == FREE_DBI) + ? &env->lck->pgops.gc_prof.self + : &env->lck->pgops.gc_prof.work; +#else + (void)mc; +#endif /* MDBX_ENABLE_PROFGC */ + ENSURE(env, pgno >= NUM_METAS); + + pgr_t ret; + bool need_clean = (env->flags & MDBX_PAGEPERTURB) != 0; + if (env->flags & MDBX_WRITEMAP) { + ret.page = pgno2page(env, pgno); + MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, pgno2bytes(env, num)); + VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num)); + + /* Содержимое выделенной страницы не нужно, но если страница отсутствует + * в ОЗУ (что весьма вероятно), то любое обращение к ней приведет + * к page-fault: + * - прерыванию по отсутствию страницы; + * - переключение контекста в режим ядра с засыпанием процесса; + * - чтение страницы с диска; + * - обновление PTE и пробуждением процесса; + * - переключение контекста по доступности ЦПУ. + * + * Пытаемся минимизировать накладные расходы записывая страницу, что при + * наличии unified page cache приведет к появлению страницы в ОЗУ без чтения + * с диска. При этом запись на диск должна быть отложена адекватным ядром, + * так как страница отображена в память в режиме чтения-записи и следом в + * неё пишет ЦПУ. */ + + /* В случае если страница в памяти процесса, то излишняя запись может быть + * достаточно дорогой. Кроме системного вызова и копирования данных, в особо + * одаренных ОС при этом могут включаться файловая система, выделяться + * временная страница, пополняться очереди асинхронного выполнения, + * обновляться PTE с последующей генерацией page-fault и чтением данных из + * грязной I/O очереди. Из-за этого штраф за лишнюю запись может быть + * сравним с избегаемым ненужным чтением. */ + if (env->prefault_write_activated) { + void *const pattern = + ptr_disp(env->page_auxbuf, need_clean ? env->ps : env->ps * 2); + size_t file_offset = pgno2bytes(env, pgno); + if (likely(num == 1)) { + if (!mincore_probe(env, pgno)) { + osal_pwrite(env->lazy_fd, pattern, env->ps, file_offset); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.prefault.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + need_clean = false; + } + } else { + struct iovec iov[MDBX_AUXILARY_IOV_MAX]; + size_t n = 0, cleared = 0; + for (size_t i = 0; i < num; ++i) { + if (!mincore_probe(env, pgno + (pgno_t)i)) { + ++cleared; + iov[n].iov_len = env->ps; + iov[n].iov_base = pattern; + if (unlikely(++n == MDBX_AUXILARY_IOV_MAX)) { + osal_pwritev(env->lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, + file_offset); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.prefault.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + file_offset += pgno2bytes(env, MDBX_AUXILARY_IOV_MAX); + n = 0; + } + } + } + if (likely(n > 0)) { + osal_pwritev(env->lazy_fd, iov, n, file_offset); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.prefault.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } + if (cleared == num) + need_clean = false; + } + } + } else { + ret.page = page_shadow_alloc(txn, num); + if (unlikely(!ret.page)) { + ret.err = MDBX_ENOMEM; + goto bailout; + } + } + + if (unlikely(need_clean)) + memset(ret.page, -1, pgno2bytes(env, num)); + + VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num)); + ret.page->pgno = pgno; + ret.page->dupfix_ksize = 0; + ret.page->flags = 0; + if ((ASSERT_ENABLED() || AUDIT_ENABLED()) && num > 1) { + ret.page->pages = (pgno_t)num; + ret.page->flags = P_LARGE; + } + + ret.err = page_dirty(txn, ret.page, (pgno_t)num); +bailout: + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); +#if MDBX_ENABLE_PROFGC + size_t majflt_after; + prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before; + prof->majflt += (uint32_t)(majflt_after - majflt_before); +#endif /* MDBX_ENABLE_PROFGC */ + return ret; +} + +pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, + uint8_t flags) { + pgr_t ret; + MDBX_txn *const txn = mc->txn; + MDBX_env *const env = txn->env; +#if MDBX_ENABLE_PROFGC + gc_prof_stat_t *const prof = (mc->mc_dbi == FREE_DBI) + ? &env->lck->pgops.gc_prof.self + : &env->lck->pgops.gc_prof.work; + prof->spe_counter += 1; +#endif /* MDBX_ENABLE_PROFGC */ + + eASSERT(env, num > 0 || (flags & ALLOC_RESERVE)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + + size_t newnext; + const uint64_t monotime_begin = + (MDBX_ENABLE_PROFGC || (num > 1 && env->options.gc_time_limit)) + ? osal_monotime() + : 0; + struct monotime_cache now_cache; + now_cache.expire_countdown = + 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда + лимит времени задан нулевой или уже исчерпан, так и от подсчета + времени при не-достижении rp_augment_limit */ + ; + now_cache.value = monotime_begin; + pgno_t pgno = 0; + if (num > 1) { +#if MDBX_ENABLE_PROFGC + prof->xpages += 1; +#endif /* MDBX_ENABLE_PROFGC */ + if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { + eASSERT(env, + MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); + pgno = relist_get_sequence(txn, num, flags); + if (likely(pgno)) + goto done; + } + } else { + eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->tw.relist) == 0); + eASSERT(env, !(flags & ALLOC_RESERVE) || num == 0); + } + + //--------------------------------------------------------------------------- + + if (unlikely(!is_gc_usable(txn, mc, flags))) { + eASSERT(env, (txn->flags & txn_gc_drained) || num > 1); + goto no_gc; + } + + eASSERT(env, + (flags & (ALLOC_COALESCE | ALLOC_LIFO | ALLOC_SHOULD_SCAN)) == 0); + flags += (env->flags & MDBX_LIFORECLAIM) ? ALLOC_LIFO : 0; + + if (/* Не коагулируем записи при подготовке резерва для обновления GC. + * Иначе попытка увеличить резерв может приводить к необходимости ещё + * большего резерва из-за увеличения списка переработанных страниц. */ + (flags & ALLOC_RESERVE) == 0) { + if (txn->dbs[FREE_DBI].branch_pages && + MDBX_PNL_GETSIZE(txn->tw.relist) < env->maxgc_large1page / 2) + flags += ALLOC_COALESCE; + } + + MDBX_cursor *const gc = ptr_disp(env->basal_txn, sizeof(MDBX_txn)); + eASSERT(env, mc != gc && gc->next == gc); + gc->txn = txn; + gc->dbi_state = txn->dbi_state; + gc->top_and_flags = z_fresh_mark; + + env->prefault_write_activated = env->options.prefault_write; + if (env->prefault_write_activated) { + /* Проверка посредством minicore() существенно снижает затраты, но в + * простейших случаях (тривиальный бенчмарк) интегральная производительность + * становится вдвое меньше. А на платформах без mincore() и с проблемной + * подсистемой виртуальной памяти ситуация может быть многократно хуже. + * Поэтому избегаем затрат в ситуациях когда prefault-write скорее всего не + * нужна. */ + const bool readahead_enabled = env->lck->readahead_anchor & 1; + const pgno_t readahead_edge = env->lck->readahead_anchor >> 1; + if (/* Не суетимся если GC почти пустая и БД маленькая */ + (txn->dbs[FREE_DBI].branch_pages == 0 && txn->geo.now < 1234) || + /* Не суетимся если страница в зоне включенного упреждающего чтения */ + (readahead_enabled && pgno + num < readahead_edge)) + env->prefault_write_activated = false; + } + +retry_gc_refresh_oldest:; + txnid_t oldest = txn_snapshot_oldest(txn); +retry_gc_have_oldest: + if (unlikely(oldest >= txn->txnid)) { + ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN + " for current-txnid %" PRIaTXN, + oldest, txn->txnid); + ret.err = MDBX_PROBLEM; + goto fail; + } + const txnid_t detent = oldest + 1; + + txnid_t id = 0; + MDBX_cursor_op op = MDBX_FIRST; + if (flags & ALLOC_LIFO) { + if (!txn->tw.gc.reclaimed) { + txn->tw.gc.reclaimed = txl_alloc(); + if (unlikely(!txn->tw.gc.reclaimed)) { + ret.err = MDBX_ENOMEM; + goto fail; + } + } + /* Begin lookup backward from oldest reader */ + id = detent - 1; + op = MDBX_SET_RANGE; + } else if (txn->tw.gc.last_reclaimed) { + /* Continue lookup forward from last-reclaimed */ + id = txn->tw.gc.last_reclaimed + 1; + if (id >= detent) + goto depleted_gc; + op = MDBX_SET_RANGE; + } + +next_gc:; + MDBX_val key; + key.iov_base = &id; + key.iov_len = sizeof(id); + +#if MDBX_ENABLE_PROFGC + prof->rsteps += 1; +#endif /* MDBX_ENABLE_PROFGC */ + + /* Seek first/next GC record */ + ret.err = cursor_ops(gc, &key, nullptr, op); + if (unlikely(ret.err != MDBX_SUCCESS)) { + if (unlikely(ret.err != MDBX_NOTFOUND)) + goto fail; + if ((flags & ALLOC_LIFO) && op == MDBX_SET_RANGE) { + op = MDBX_PREV; + goto next_gc; + } + goto depleted_gc; + } + if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC key-length"); + ret.err = MDBX_CORRUPTED; + goto fail; + } + id = unaligned_peek_u64(4, key.iov_base); + if (flags & ALLOC_LIFO) { + op = MDBX_PREV; + if (id >= detent || is_already_reclaimed(txn, id)) + goto next_gc; + } else { + op = MDBX_NEXT; + if (unlikely(id >= detent)) + goto depleted_gc; + } + txn->flags &= ~txn_gc_drained; + + /* Reading next GC record */ + MDBX_val data; + page_t *const mp = gc->pg[gc->top]; + if (unlikely((ret.err = node_read(gc, page_node(mp, gc->ki[gc->top]), &data, + mp)) != MDBX_SUCCESS)) + goto fail; + + pgno_t *gc_pnl = (pgno_t *)data.iov_base; + if (unlikely(data.iov_len % sizeof(pgno_t) || + data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) || + !pnl_check(gc_pnl, txn->geo.first_unallocated))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC value-length"); + ret.err = MDBX_CORRUPTED; + goto fail; + } + + const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl); + TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, + gc_len + MDBX_PNL_GETSIZE(txn->tw.relist)); + + if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= + env->maxgc_large1page)) { + /* Don't try to coalesce too much. */ + if (flags & ALLOC_SHOULD_SCAN) { + eASSERT(env, flags & ALLOC_COALESCE); + eASSERT(env, !(flags & ALLOC_RESERVE)); + eASSERT(env, num > 0); +#if MDBX_ENABLE_PROFGC + env->lck->pgops.gc_prof.coalescences += 1; +#endif /* MDBX_ENABLE_PROFGC */ + TRACE("clear %s %s", "ALLOC_COALESCE", "since got threshold"); + if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { + eASSERT(env, + MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.relist) < + txn->geo.first_unallocated); + if (likely(num == 1)) { + pgno = relist_get_single(txn); + goto done; + } + pgno = relist_get_sequence(txn, num, flags); + if (likely(pgno)) + goto done; + } + flags -= ALLOC_COALESCE | ALLOC_SHOULD_SCAN; + } + if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE( + txn->tw.relist) >= env->options.rp_augment_limit) && + ((/* not a slot-request from gc-update */ num && + /* have enough unallocated space */ txn->geo.upper >= + txn->geo.first_unallocated + num && + monotime_since_cached(monotime_begin, &now_cache) + + txn->tw.gc.time_acc >= + env->options.gc_time_limit) || + gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= PAGELIST_LIMIT)) { + /* Stop reclaiming to avoid large/overflow the page list. This is a rare + * case while search for a continuously multi-page region in a + * large database, see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */ + NOTICE("stop reclaiming %s: %zu (current) + %zu " + "(chunk) -> %zu, rp_augment_limit %u", + likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) < PAGELIST_LIMIT) + ? "since rp_augment_limit was reached" + : "to avoid PNL overflow", + MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, + gc_len + MDBX_PNL_GETSIZE(txn->tw.relist), + env->options.rp_augment_limit); + goto depleted_gc; + } + } + + /* Remember ID of readed GC record */ + txn->tw.gc.last_reclaimed = id; + if (flags & ALLOC_LIFO) { + ret.err = txl_append(&txn->tw.gc.reclaimed, id); + if (unlikely(ret.err != MDBX_SUCCESS)) + goto fail; + } + + /* Append PNL from GC record to tw.relist */ + ret.err = pnl_need(&txn->tw.relist, gc_len); + if (unlikely(ret.err != MDBX_SUCCESS)) + goto fail; + + if (LOG_ENABLED(MDBX_LOG_EXTRA)) { + DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO + " len %zu, PNL", + id, txn->dbs[FREE_DBI].root, gc_len); + for (size_t i = gc_len; i; i--) + DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]); + DEBUG_EXTRA_PRINT(", first_unallocated %u\n", txn->geo.first_unallocated); + } + + /* Merge in descending sorted order */ + pnl_merge(txn->tw.relist, gc_pnl); + flags |= ALLOC_SHOULD_SCAN; + if (AUDIT_ENABLED()) { + if (unlikely(!pnl_check(txn->tw.relist, txn->geo.first_unallocated))) { + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid txn retired-list"); + ret.err = MDBX_CORRUPTED; + goto fail; + } + } else { + eASSERT(env, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated)); + } + eASSERT(env, dpl_check(txn)); + + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || + MDBX_PNL_MOST(txn->tw.relist) < txn->geo.first_unallocated); + if (MDBX_ENABLE_REFUND && MDBX_PNL_GETSIZE(txn->tw.relist) && + unlikely(MDBX_PNL_MOST(txn->tw.relist) == + txn->geo.first_unallocated - 1)) { + /* Refund suitable pages into "unallocated" space */ + txn_refund(txn); + } + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + + /* Done for a kick-reclaim mode, actually no page needed */ + if (unlikely(num == 0)) { + eASSERT(env, ret.err == MDBX_SUCCESS); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, + MDBX_PNL_GETSIZE(txn->tw.relist)); + goto early_exit; + } + + /* TODO: delete reclaimed records */ + + eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); + if (flags & ALLOC_COALESCE) { + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, + MDBX_PNL_GETSIZE(txn->tw.relist)); + goto next_gc; + } + +scan: + eASSERT(env, flags & ALLOC_SHOULD_SCAN); + eASSERT(env, num > 0); + if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { + eASSERT(env, + MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); + if (likely(num == 1)) { + eASSERT(env, !(flags & ALLOC_RESERVE)); + pgno = relist_get_single(txn); + goto done; + } + pgno = relist_get_sequence(txn, num, flags); + if (likely(pgno)) + goto done; + } + flags -= ALLOC_SHOULD_SCAN; + if (ret.err == MDBX_SUCCESS) { + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, + MDBX_PNL_GETSIZE(txn->tw.relist)); + goto next_gc; + } + +depleted_gc: + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, + MDBX_PNL_GETSIZE(txn->tw.relist)); + ret.err = MDBX_NOTFOUND; + if (flags & ALLOC_SHOULD_SCAN) + goto scan; + txn->flags |= txn_gc_drained; + + //------------------------------------------------------------------------- + + /* There is no suitable pages in the GC and to be able to allocate + * we should CHOICE one of: + * - make a new steady checkpoint if reclaiming was stopped by + * the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode; + * - kick lagging reader(s) if reclaiming was stopped by ones of it. + * - extend the database file. */ + + /* Will use new pages from the map if nothing is suitable in the GC. */ + newnext = txn->geo.first_unallocated + num; + + /* Does reclaiming stopped at the last steady point? */ + const meta_ptr_t recent = meta_recent(env, &txn->tw.troika); + const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika); + if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && + detent == prefer_steady.txnid + 1) { + DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN + "-%s, detent %" PRIaTXN, + recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid, + durable_caption(prefer_steady.ptr_c), detent); + const pgno_t autosync_threshold = + atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); + const uint64_t autosync_period = + atomic_load64(&env->lck->autosync_period, mo_Relaxed); + uint64_t eoos_timestamp; + /* wipe the last steady-point if one of: + * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified + * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted + * otherwise, make a new steady-point if one of: + * - auto-sync threshold is specified and reached; + * - upper limit of database size is reached; + * - database is full (with the current file size) + * AND auto-sync threshold it NOT specified */ + if (F_ISSET(env->flags, MDBX_UTTERLY_NOSYNC) && + ((autosync_threshold | autosync_period) == 0 || + newnext >= prefer_steady.ptr_c->geometry.now)) { + /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode + * without any auto-sync threshold(s). */ +#if MDBX_ENABLE_PROFGC + env->lck->pgops.gc_prof.wipes += 1; +#endif /* MDBX_ENABLE_PROFGC */ + ret.err = meta_wipe_steady(env, detent); + DEBUG("gc-wipe-steady, rc %d", ret.err); + if (unlikely(ret.err != MDBX_SUCCESS)) + goto fail; + eASSERT(env, prefer_steady.ptr_c != + meta_prefer_steady(env, &txn->tw.troika).ptr_c); + goto retry_gc_refresh_oldest; + } + if ((autosync_threshold && + atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= + autosync_threshold) || + (autosync_period && + (eoos_timestamp = + atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && + osal_monotime() - eoos_timestamp >= autosync_period) || + newnext >= txn->geo.upper || + ((num == 0 || newnext >= txn->geo.end_pgno) && + (autosync_threshold | autosync_period) == 0)) { + /* make steady checkpoint. */ +#if MDBX_ENABLE_PROFGC + env->lck->pgops.gc_prof.flushes += 1; +#endif /* MDBX_ENABLE_PROFGC */ + meta_t meta = *recent.ptr_c; + ret.err = dxb_sync_locked(env, env->flags & MDBX_WRITEMAP, &meta, + &txn->tw.troika); + DEBUG("gc-make-steady, rc %d", ret.err); + eASSERT(env, ret.err != MDBX_RESULT_TRUE); + if (unlikely(ret.err != MDBX_SUCCESS)) + goto fail; + eASSERT(env, prefer_steady.ptr_c != + meta_prefer_steady(env, &txn->tw.troika).ptr_c); + goto retry_gc_refresh_oldest; + } + } + + if (unlikely(true == + atomic_load32(&env->lck->rdt_refresh_flag, mo_AcquireRelease))) { + oldest = txn_snapshot_oldest(txn); + if (oldest >= detent) + goto retry_gc_have_oldest; + } + + /* Avoid kick lagging reader(s) if is enough unallocated space + * at the end of database file. */ + if (!(flags & ALLOC_RESERVE) && newnext <= txn->geo.end_pgno) { + eASSERT(env, pgno == 0); + goto done; + } + + if (oldest < txn->txnid - xMDBX_TXNID_STEP) { + oldest = mvcc_kick_laggards(env, oldest); + if (oldest >= detent) + goto retry_gc_have_oldest; + } + + //--------------------------------------------------------------------------- + +no_gc: + eASSERT(env, pgno == 0); +#ifndef MDBX_ENABLE_BACKLOG_DEPLETED +#define MDBX_ENABLE_BACKLOG_DEPLETED 0 +#endif /* MDBX_ENABLE_BACKLOG_DEPLETED*/ + if (MDBX_ENABLE_BACKLOG_DEPLETED && + unlikely(!(txn->flags & txn_gc_drained))) { + ret.err = MDBX_BACKLOG_DEPLETED; + goto fail; + } + if (flags & ALLOC_RESERVE) { + ret.err = MDBX_NOTFOUND; + goto fail; + } + + /* Will use new pages from the map if nothing is suitable in the GC. */ + newnext = txn->geo.first_unallocated + num; + if (newnext <= txn->geo.end_pgno) + goto done; + + if (newnext > txn->geo.upper || !txn->geo.grow_pv) { + NOTICE("gc-alloc: next %zu > upper %" PRIaPGNO, newnext, txn->geo.upper); + ret.err = MDBX_MAP_FULL; + goto fail; + } + + eASSERT(env, newnext > txn->geo.end_pgno); + const size_t grow_step = pv2pages(txn->geo.grow_pv); + size_t aligned = pgno_align2os_pgno( + env, (pgno_t)(newnext + grow_step - newnext % grow_step)); + + if (aligned > txn->geo.upper) + aligned = txn->geo.upper; + eASSERT(env, aligned >= newnext); + + VERBOSE("try growth datafile to %zu pages (+%zu)", aligned, + aligned - txn->geo.end_pgno); + ret.err = dxb_resize(env, txn->geo.first_unallocated, (pgno_t)aligned, + txn->geo.upper, implicit_grow); + if (ret.err != MDBX_SUCCESS) { + ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned, + aligned - txn->geo.end_pgno, ret.err); + goto fail; + } + env->txn->geo.end_pgno = (pgno_t)aligned; + eASSERT(env, pgno == 0); + + //--------------------------------------------------------------------------- + +done: + ret.err = MDBX_SUCCESS; + if (likely((flags & ALLOC_RESERVE) == 0)) { + if (pgno) { + eASSERT(env, + pgno + num <= txn->geo.first_unallocated && pgno >= NUM_METAS); + eASSERT(env, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + } else { + pgno = txn->geo.first_unallocated; + txn->geo.first_unallocated += (pgno_t)num; + eASSERT(env, txn->geo.first_unallocated <= txn->geo.end_pgno); + eASSERT(env, + pgno >= NUM_METAS && pgno + num <= txn->geo.first_unallocated); + } + + ret = page_alloc_finalize(env, txn, mc, pgno, num); + if (unlikely(ret.err != MDBX_SUCCESS)) { + fail: + eASSERT(env, ret.err != MDBX_SUCCESS); + eASSERT(env, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + int level; + const char *what; + if (flags & ALLOC_RESERVE) { + level = (flags & ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE; + what = num ? "reserve-pages" : "fetch-slot"; + } else { + txn->flags |= MDBX_TXN_ERROR; + level = MDBX_LOG_ERROR; + what = "pages"; + } + if (LOG_ENABLED(level)) + debug_log(level, __func__, __LINE__, + "unable alloc %zu %s, alloc-flags 0x%x, err %d, txn-flags " + "0x%x, re-list-len %zu, loose-count %zu, gc: height %u, " + "branch %zu, leaf %zu, large %zu, entries %zu\n", + num, what, flags, ret.err, txn->flags, + MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count, + txn->dbs[FREE_DBI].height, + (size_t)txn->dbs[FREE_DBI].branch_pages, + (size_t)txn->dbs[FREE_DBI].leaf_pages, + (size_t)txn->dbs[FREE_DBI].large_pages, + (size_t)txn->dbs[FREE_DBI].items); + ret.page = nullptr; + } + if (num > 1) + txn->tw.gc.time_acc += monotime_since_cached(monotime_begin, &now_cache); + } else { + early_exit: + DEBUG("return nullptr for %zu pages for ALLOC_%s, rc %d", num, + num ? "RESERVE" : "SLOT", ret.err); + ret.page = nullptr; + } + +#if MDBX_ENABLE_PROFGC + prof->rtime_monotonic += osal_monotime() - monotime_begin; +#endif /* MDBX_ENABLE_PROFGC */ + return ret; +} + +__hot pgr_t gc_alloc_single(const MDBX_cursor *const mc) { + MDBX_txn *const txn = mc->txn; + tASSERT(txn, mc->txn->flags & MDBX_TXN_DIRTY); + tASSERT(txn, + F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY)); + + /* If there are any loose pages, just use them */ + while (likely(txn->tw.loose_pages)) { +#if MDBX_ENABLE_REFUND + if (unlikely(txn->tw.loose_refund_wl > txn->geo.first_unallocated)) { + txn_refund(txn); + if (!txn->tw.loose_pages) + break; + } +#endif /* MDBX_ENABLE_REFUND */ + + page_t *lp = txn->tw.loose_pages; + MDBX_ASAN_UNPOISON_MEMORY_REGION(lp, txn->env->ps); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + txn->tw.loose_pages = page_next(lp); + txn->tw.loose_count--; + DEBUG_EXTRA("db %d use loose page %" PRIaPGNO, cursor_dbi_dbg(mc), + lp->pgno); + tASSERT(txn, lp->pgno < txn->geo.first_unallocated); + tASSERT(txn, lp->pgno >= NUM_METAS); + VALGRIND_MAKE_MEM_UNDEFINED(page_data(lp), page_space(txn->env)); + lp->txnid = txn->front_txnid; + pgr_t ret = {lp, MDBX_SUCCESS}; + return ret; + } + + if (likely(MDBX_PNL_GETSIZE(txn->tw.relist) > 0)) + return page_alloc_finalize(txn->env, txn, mc, relist_get_single(txn), 1); + + return gc_alloc_ex(mc, 1, ALLOC_DEFAULT); +} diff --git a/src/gc-put.c b/src/gc-put.c new file mode 100644 index 00000000..81106b7a --- /dev/null +++ b/src/gc-put.c @@ -0,0 +1,1094 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(gcu_t *ctx) { + return ctx->lifo ? " lifo" : " fifo"; +} + +static inline size_t backlog_size(MDBX_txn *txn) { + return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count; +} + +static int clean_stored_retired(MDBX_txn *txn, gcu_t *ctx) { + int err = MDBX_SUCCESS; + if (ctx->retired_stored) { + MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); + tASSERT(txn, txn == txn->env->basal_txn && gc->next == gc); + gc->txn = txn; + gc->dbi_state = txn->dbi_state; + gc->top_and_flags = z_fresh_mark; + gc->next = txn->cursors[FREE_DBI]; + txn->cursors[FREE_DBI] = gc; + do { + MDBX_val key, val; +#if MDBX_ENABLE_BIGFOOT + key.iov_base = &ctx->bigfoot; +#else + key.iov_base = &txn->txnid; +#endif /* MDBX_ENABLE_BIGFOOT */ + key.iov_len = sizeof(txnid_t); + const csr_t csr = cursor_seek(gc, &key, &val, MDBX_SET); + if (csr.err == MDBX_SUCCESS && csr.exact) { + ctx->retired_stored = 0; + err = cursor_del(gc, 0); + TRACE("== clear-4linear, backlog %zu, err %d", backlog_size(txn), err); + } else + err = (csr.err == MDBX_NOTFOUND) ? MDBX_SUCCESS : csr.err; + } +#if MDBX_ENABLE_BIGFOOT + while (!err && --ctx->bigfoot >= txn->txnid); +#else + while (0); +#endif /* MDBX_ENABLE_BIGFOOT */ + txn->cursors[FREE_DBI] = gc->next; + gc->next = gc; + } + return err; +} + +static int touch_gc(gcu_t *ctx) { + tASSERT(ctx->cursor.txn, is_pointed(&ctx->cursor) || + ctx->cursor.txn->dbs[FREE_DBI].leaf_pages == 0); + MDBX_val key, val; + key.iov_base = val.iov_base = nullptr; + key.iov_len = sizeof(txnid_t); + val.iov_len = MDBX_PNL_SIZEOF(ctx->cursor.txn->tw.retired_pages); + ctx->cursor.flags |= z_gcu_preparation; + int err = cursor_touch(&ctx->cursor, &key, &val); + ctx->cursor.flags -= z_gcu_preparation; + return err; +} + +/* Prepare a backlog of pages to modify GC itself, while reclaiming is + * prohibited. It should be enough to prevent search in gc_alloc_ex() + * during a deleting, when GC tree is unbalanced. */ +static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { + const size_t for_cow = txn->dbs[FREE_DBI].height; + const size_t for_rebalance = + for_cow + 1 + + (txn->dbs[FREE_DBI].height + 1ul >= txn->dbs[FREE_DBI].branch_pages); + size_t for_split = ctx->retired_stored == 0; + tASSERT(txn, is_pointed(&ctx->cursor) || txn->dbs[FREE_DBI].leaf_pages == 0); + + const intptr_t retired_left = + MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored; + size_t for_relist = 0; + if (MDBX_ENABLE_BIGFOOT && retired_left > 0) { + for_relist = (retired_left + txn->env->maxgc_large1page - 1) / + txn->env->maxgc_large1page; + const size_t per_branch_page = txn->env->maxgc_per_branch; + for (size_t entries = for_relist; entries > 1; for_split += entries) + entries = (entries + per_branch_page - 1) / per_branch_page; + } else if (!MDBX_ENABLE_BIGFOOT && retired_left != 0) { + for_relist = + largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + } + + const size_t for_tree_before_touch = for_cow + for_rebalance + for_split; + const size_t for_tree_after_touch = for_rebalance + for_split; + const size_t for_all_before_touch = for_relist + for_tree_before_touch; + const size_t for_all_after_touch = for_relist + for_tree_after_touch; + + if (likely(for_relist < 2 && backlog_size(txn) > for_all_before_touch) && + (ctx->cursor.top < 0 || + is_modifable(txn, ctx->cursor.pg[ctx->cursor.top]))) + return MDBX_SUCCESS; + + TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " + "4split %zu, " + "4cow %zu, 4tree %zu)", + ctx->retired_stored, retired_left, backlog_size(txn), + for_all_before_touch, for_relist, for_split, for_cow, + for_tree_before_touch); + + int err = touch_gc(ctx); + TRACE("== after-touch, backlog %zu, err %d", backlog_size(txn), err); + + if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) && + MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored && + err == MDBX_SUCCESS) { + if (unlikely(ctx->retired_stored)) { + err = clean_stored_retired(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (!ctx->retired_stored) + return /* restart by tail-recursion */ prepare_backlog(txn, ctx); + } + err = gc_alloc_ex(&ctx->cursor, for_relist, ALLOC_RESERVE).err; + TRACE("== after-4linear, backlog %zu, err %d", backlog_size(txn), err); + cASSERT(&ctx->cursor, + backlog_size(txn) >= for_relist || err != MDBX_SUCCESS); + } + + while (backlog_size(txn) < for_all_after_touch && err == MDBX_SUCCESS) + err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE | ALLOC_UNIMPORTANT).err; + + TRACE("<< backlog %zu, err %d, gc: height %u, branch %zu, leaf %zu, large " + "%zu, entries %zu", + backlog_size(txn), err, txn->dbs[FREE_DBI].height, + (size_t)txn->dbs[FREE_DBI].branch_pages, + (size_t)txn->dbs[FREE_DBI].leaf_pages, + (size_t)txn->dbs[FREE_DBI].large_pages, + (size_t)txn->dbs[FREE_DBI].items); + tASSERT(txn, err != MDBX_NOTFOUND || (txn->flags & txn_gc_drained) != 0); + return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS; +} + +static inline void zeroize_reserved(const MDBX_env *env, MDBX_val pnl) { +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_seek(MDBX_SET_KEY), которая вызывается ниже внутри gc_update() в + * цикле очистки и цикле заполнения зарезервированных элементов. */ + memset(pnl.iov_base, 0xBB, pnl.iov_len); +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ + + /* PNL is initially empty, zero out at least the length */ + memset(pnl.iov_base, 0, sizeof(pgno_t)); + if ((env->flags & (MDBX_WRITEMAP | MDBX_NOMEMINIT)) == 0) + /* zero out to avoid leaking values from uninitialized malloc'ed memory + * to the file in non-writemap mode if length of the saving page-list + * was changed during space reservation. */ + memset(pnl.iov_base, 0, pnl.iov_len); +} + +static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { + tASSERT(txn, txn->tw.loose_count > 0); + /* Return loose page numbers to tw.relist, + * though usually none are left at this point. + * The pages themselves remain in dirtylist. */ + if (unlikely(!txn->tw.gc.reclaimed && txn->tw.gc.last_reclaimed < 1)) { + TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix(ctx), + txn->tw.loose_count); + int err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err; + if (err == MDBX_SUCCESS) { + TRACE("%s: retry since gc-slot for %zu loose-pages available", + dbg_prefix(ctx), txn->tw.loose_count); + return MDBX_SUCCESS; + } + + /* Put loose page numbers in tw.retired_pages, + * since unable to return ones to tw.relist. */ + err = pnl_need(&txn->tw.retired_pages, txn->tw.loose_count); + if (unlikely(err != MDBX_SUCCESS)) + return err; + for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) { + pnl_append_prereserved(txn->tw.retired_pages, lp->pgno); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + TRACE("%s: append %zu loose-pages to retired-pages", dbg_prefix(ctx), + txn->tw.loose_count); + } else { + /* Room for loose pages + temp PNL with same */ + int err = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2); + if (unlikely(err != MDBX_SUCCESS)) + return err; + pnl_t loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - + txn->tw.loose_count - 1; + size_t count = 0; + for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) { + tASSERT(txn, lp->flags == P_LOOSE); + loose[++count] = lp->pgno; + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + tASSERT(txn, count == txn->tw.loose_count); + MDBX_PNL_SETSIZE(loose, count); + pnl_sort(loose, txn->geo.first_unallocated); + pnl_merge(txn->tw.relist, loose); + TRACE("%s: append %zu loose-pages to reclaimed-pages", dbg_prefix(ctx), + txn->tw.loose_count); + } + + /* filter-out list of dirty-pages from loose-pages */ + dpl_t *const dl = txn->tw.dirtylist; + if (dl) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, dl->sorted <= dl->length); + size_t w = 0, sorted_out = 0; + for (size_t r = w; ++r <= dl->length;) { + page_t *dp = dl->items[r].ptr; + tASSERT(txn, dp->flags == P_LOOSE || is_modifable(txn, dp)); + tASSERT(txn, dpl_endpgno(dl, r) <= txn->geo.first_unallocated); + if ((dp->flags & P_LOOSE) == 0) { + if (++w != r) + dl->items[w] = dl->items[r]; + } else { + tASSERT(txn, dp->flags == P_LOOSE); + sorted_out += dl->sorted >= r; + if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) + page_shadow_release(txn->env, dp, 1); + } + } + TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", + dbg_prefix(ctx), dl->length, w); + tASSERT(txn, txn->tw.loose_count == dl->length - w); + dl->sorted -= sorted_out; + tASSERT(txn, dl->sorted <= w); + dpl_setlen(dl, w); + dl->pages_including_loose -= txn->tw.loose_count; + txn->tw.dirtyroom += txn->tw.loose_count; + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } + txn->tw.loose_pages = nullptr; + txn->tw.loose_count = 0; +#if MDBX_ENABLE_REFUND + txn->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + return MDBX_SUCCESS; +} + +static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { + int err; + if (unlikely(!ctx->retired_stored)) { + /* Make sure last page of GC is touched and on retired-list */ + err = outer_last(&ctx->cursor, nullptr, nullptr); + if (likely(err == MDBX_SUCCESS)) + err = touch_gc(ctx); + if (unlikely(err != MDBX_SUCCESS) && err != MDBX_NOTFOUND) + return err; + } + + MDBX_val key, data; +#if MDBX_ENABLE_BIGFOOT + size_t retired_pages_before; + do { + if (ctx->bigfoot > txn->txnid) { + err = clean_stored_retired(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + tASSERT(txn, ctx->bigfoot <= txn->txnid); + } + + retired_pages_before = MDBX_PNL_GETSIZE(txn->tw.retired_pages); + err = prepare_backlog(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), + retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + break; + } + + pnl_sort(txn->tw.retired_pages, txn->geo.first_unallocated); + ctx->retired_stored = 0; + ctx->bigfoot = txn->txnid; + do { + if (ctx->retired_stored) { + err = prepare_backlog(txn, ctx); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (ctx->retired_stored >= MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), + retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + break; + } + } + key.iov_len = sizeof(txnid_t); + key.iov_base = &ctx->bigfoot; + const size_t left = + MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored; + const size_t chunk = + (left > txn->env->maxgc_large1page && ctx->bigfoot < MAX_TXNID) + ? txn->env->maxgc_large1page + : left; + data.iov_len = (chunk + 1) * sizeof(pgno_t); + err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_seek(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. + */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ + + if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) + ? left - chunk + : ctx->retired_stored; + pgno_t *const begin = txn->tw.retired_pages + at; + /* MDBX_PNL_ASCENDING == false && LIFO == false: + * - the larger pgno is at the beginning of retired list + * and should be placed with the larger txnid. + * MDBX_PNL_ASCENDING == true && LIFO == true: + * - the larger pgno is at the ending of retired list + * and should be placed with the smaller txnid. */ + const pgno_t save = *begin; + *begin = (pgno_t)chunk; + memcpy(data.iov_base, begin, data.iov_len); + *begin = save; + TRACE("%s: put-retired/bigfoot @ %" PRIaTXN + " (slice #%u) #%zu [%zu..%zu] of %zu", + dbg_prefix(ctx), ctx->bigfoot, + (unsigned)(ctx->bigfoot - txn->txnid), chunk, at, at + chunk, + retired_pages_before); + } + ctx->retired_stored += chunk; + } while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) && + (++ctx->bigfoot, true)); + } while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)); +#else + /* Write to last page of GC */ + key.iov_len = sizeof(txnid_t); + key.iov_base = &txn->txnid; + do { + prepare_backlog(txn, ctx); + data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages); + err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_seek(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ + + /* Retry if tw.retired_pages[] grew during the Put() */ + } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + + ctx->retired_stored = MDBX_PNL_GETSIZE(txn->tw.retired_pages); + pnl_sort(txn->tw.retired_pages, txn->geo.first_unallocated); + tASSERT(txn, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len); + + TRACE("%s: put-retired #%zu @ %" PRIaTXN, dbg_prefix(ctx), + ctx->retired_stored, txn->txnid); +#endif /* MDBX_ENABLE_BIGFOOT */ + if (LOG_ENABLED(MDBX_LOG_EXTRA)) { + size_t i = ctx->retired_stored; + DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", + txn->txnid, txn->dbs[FREE_DBI].root, i); + for (; i; i--) + DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]); + DEBUG_EXTRA_PRINT("%s\n", "."); + } + return MDBX_SUCCESS; +} + +typedef struct gcu_rid_result { + int err; + txnid_t rid; +} rid_t; + +static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, + const size_t left) { + rid_t r; + if (ctx->lifo) { + if (txn->tw.gc.reclaimed == nullptr) { + txn->tw.gc.reclaimed = txl_alloc(); + if (unlikely(!txn->tw.gc.reclaimed)) { + r.err = MDBX_ENOMEM; + goto return_error; + } + } + if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && + left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * + txn->env->maxgc_large1page && + !ctx->dense) { + /* Hужен свободный для для сохранения списка страниц. */ + bool need_cleanup = false; + txnid_t snap_oldest = 0; + retry_rid: + do { + r.err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err; + snap_oldest = txn->env->lck->cached_oldest.weak; + if (likely(r.err == MDBX_SUCCESS)) { + TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx), + MDBX_PNL_LAST(txn->tw.gc.reclaimed)); + need_cleanup = true; + } + } while (r.err == MDBX_SUCCESS && + MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && + left > + (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * + txn->env->maxgc_large1page); + + if (likely(r.err == MDBX_SUCCESS)) { + TRACE("%s: got enough from GC.", dbg_prefix(ctx)); + goto return_continue; + } else if (unlikely(r.err != MDBX_NOTFOUND)) + /* LY: some troubles... */ + goto return_error; + + if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { + if (need_cleanup) { + txl_sort(txn->tw.gc.reclaimed); + ctx->cleaned_slot = 0; + } + ctx->rid = MDBX_PNL_LAST(txn->tw.gc.reclaimed); + } else { + tASSERT(txn, txn->tw.gc.last_reclaimed == 0); + if (unlikely(txn_snapshot_oldest(txn) != snap_oldest)) + /* should retry gc_alloc_ex() + * if the oldest reader changes since the last attempt */ + goto retry_rid; + /* no reclaimable GC entries, + * therefore no entries with ID < mdbx_find_oldest(txn) */ + txn->tw.gc.last_reclaimed = ctx->rid = snap_oldest; + TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix(ctx), + ctx->rid); + } + + /* В GC нет годных к переработке записей, + * будем использовать свободные id в обратном порядке. */ + while (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && + left > + (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * + txn->env->maxgc_large1page) { + if (unlikely(ctx->rid <= MIN_TXNID)) { + if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <= + ctx->reused_slot)) { + NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " + "gc.reclaimed %zu)", + ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + goto return_restart; + } + break; + } + + tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID); + ctx->rid -= 1; + MDBX_val key = {&ctx->rid, sizeof(ctx->rid)}, data; + r.err = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; + if (unlikely(r.err == MDBX_SUCCESS)) { + DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", + dbg_prefix(ctx), ctx->rid); + r.err = outer_first(&ctx->cursor, &key, nullptr); + if (unlikely(r.err != MDBX_SUCCESS || + key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); + r.err = MDBX_CORRUPTED; + goto return_error; + } + const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); + if (unlikely(gc_first <= MIN_TXNID)) { + DEBUG("%s: no free GC's id(s) less than %" PRIaTXN + " (going dense-mode)", + dbg_prefix(ctx), ctx->rid); + ctx->dense = true; + goto return_restart; + } + ctx->rid = gc_first - 1; + } + + tASSERT(txn, !ctx->dense); + r.err = txl_append(&txn->tw.gc.reclaimed, ctx->rid); + if (unlikely(r.err != MDBX_SUCCESS)) + goto return_error; + + if (ctx->reused_slot) + /* rare case, but it is better to clear and re-create GC entries + * with less fragmentation. */ + need_cleanup = true; + else + ctx->cleaned_slot += + 1 /* mark cleanup is not needed for added slot. */; + + TRACE("%s: append @%" PRIaTXN + " to lifo-reclaimed, cleaned-gc-slot = %zu", + dbg_prefix(ctx), ctx->rid, ctx->cleaned_slot); + } + + if (need_cleanup) { + if (ctx->cleaned_slot) { + TRACE("%s: restart to clear and re-create GC entries", + dbg_prefix(ctx)); + goto return_restart; + } + goto return_continue; + } + } + + const size_t i = MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot; + tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + r.rid = txn->tw.gc.reclaimed[i]; + TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix(ctx), + r.rid, i); + } else { + tASSERT(txn, txn->tw.gc.reclaimed == nullptr); + if (unlikely(ctx->rid == 0)) { + ctx->rid = txn_snapshot_oldest(txn); + MDBX_val key; + r.err = outer_first(&ctx->cursor, &key, nullptr); + if (likely(r.err == MDBX_SUCCESS)) { + if (unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); + r.err = MDBX_CORRUPTED; + goto return_error; + } + const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); + if (ctx->rid >= gc_first) + ctx->rid = gc_first - 1; + if (unlikely(ctx->rid == 0)) { + ERROR("%s", "** no GC tail-space to store (going dense-mode)"); + ctx->dense = true; + goto return_restart; + } + } else if (r.err != MDBX_NOTFOUND) { + r.rid = 0; + return r; + } + txn->tw.gc.last_reclaimed = ctx->rid; + ctx->cleaned_id = ctx->rid + 1; + } + r.rid = ctx->rid--; + TRACE("%s: take @%" PRIaTXN " from GC", dbg_prefix(ctx), r.rid); + } + ++ctx->reused_slot; + r.err = MDBX_SUCCESS; + return r; + +return_continue: + r.err = MDBX_SUCCESS; + r.rid = 0; + return r; + +return_restart: + r.err = MDBX_RESULT_TRUE; + r.rid = 0; + return r; + +return_error: + tASSERT(txn, r.err != MDBX_SUCCESS); + r.rid = 0; + return r; +} + +/* Cleanups reclaimed GC (aka freeDB) records, saves the retired-list (aka + * freelist) of current transaction to GC, puts back into GC leftover of the + * reclaimed pages with chunking. This recursive changes the reclaimed-list, + * loose-list and retired-list. Keep trying until it stabilizes. + * + * NOTE: This code is a consequence of many iterations of adding crutches (aka + * "checks and balances") to partially bypass the fundamental design problems + * inherited from LMDB. So do not try to understand it completely in order to + * avoid your madness. */ +int gc_update(MDBX_txn *txn, gcu_t *ctx) { + TRACE("\n>>> @%" PRIaTXN, txn->txnid); + MDBX_env *const env = txn->env; + ctx->cursor.next = txn->cursors[FREE_DBI]; + txn->cursors[FREE_DBI] = &ctx->cursor; + + pgno_t prev_first_unallocated = 0; + /* txn->tw.relist[] can grow and shrink during this call. + * txn->tw.gc.last_reclaimed and txn->tw.retired_pages[] can only grow. + * But page numbers cannot disappear from txn->tw.retired_pages[]. */ +retry_clean_adj: + ctx->reserve_adj = 0; +retry: + ctx->loop += prev_first_unallocated == txn->geo.first_unallocated; + prev_first_unallocated = txn->geo.first_unallocated; + + if (ctx->loop) + TRACE("%s", " >> restart"); + int rc = MDBX_SUCCESS; + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + tASSERT(txn, dpl_check(txn)); + if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) { + ERROR("too more loops %zu, bailout", ctx->loop); + rc = MDBX_PROBLEM; + goto bailout; + } + + if (unlikely(ctx->dense)) { + rc = clean_stored_retired(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + ctx->reserved = 0; + ctx->cleaned_slot = 0; + ctx->reused_slot = 0; + ctx->amount = ctx->fill_idx = ~0u; + ctx->cleaned_id = 0; + ctx->rid = txn->tw.gc.last_reclaimed; + while (true) { + /* Come back here after each Put() in case retired-list changed */ + TRACE("%s", " >> continue"); + + if (ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages) && + (ctx->loop == 1 || ctx->retired_stored > env->maxgc_large1page || + MDBX_PNL_GETSIZE(txn->tw.retired_pages) > env->maxgc_large1page)) { + rc = prepare_backlog(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + MDBX_val key, data; + if (ctx->lifo) { + if (ctx->cleaned_slot < + (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)) { + ctx->reserved = 0; + ctx->cleaned_slot = 0; + ctx->reused_slot = 0; + ctx->fill_idx = ~0u; + /* LY: cleanup reclaimed records. */ + do { + ctx->cleaned_id = txn->tw.gc.reclaimed[++ctx->cleaned_slot]; + tASSERT(txn, ctx->cleaned_slot > 0 && + ctx->cleaned_id <= env->lck->cached_oldest.weak); + key.iov_base = &ctx->cleaned_id; + key.iov_len = sizeof(ctx->cleaned_id); + rc = cursor_seek(&ctx->cursor, &key, nullptr, MDBX_SET).err; + if (rc == MDBX_NOTFOUND) + continue; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + if (likely(!ctx->dense)) { + rc = prepare_backlog(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak); + TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix(ctx), + ctx->cleaned_slot, ctx->cleaned_id); + tASSERT(txn, *txn->cursors == &ctx->cursor); + rc = cursor_del(&ctx->cursor, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + txl_sort(txn->tw.gc.reclaimed); + } + } else { + /* Удаляем оставшиеся вынутые из GC записи. */ + while (ctx->cleaned_id <= txn->tw.gc.last_reclaimed) { + rc = outer_first(&ctx->cursor, &key, nullptr); + if (rc == MDBX_NOTFOUND) + break; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + if (!MDBX_DISABLE_VALIDATION && + unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid GC-key size", (unsigned)key.iov_len); + rc = MDBX_CORRUPTED; + goto bailout; + } + if (ctx->rid != ctx->cleaned_id) { + ctx->rid = ctx->cleaned_id; + ctx->reserved = 0; + ctx->reused_slot = 0; + } + ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base); + if (ctx->cleaned_id > txn->tw.gc.last_reclaimed) + break; + if (likely(!ctx->dense)) { + rc = prepare_backlog(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + tASSERT(txn, ctx->cleaned_id <= txn->tw.gc.last_reclaimed); + tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak); + TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix(ctx), + ctx->cleaned_id); + tASSERT(txn, *txn->cursors == &ctx->cursor); + rc = cursor_del(&ctx->cursor, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + } + + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + tASSERT(txn, dpl_check(txn)); + if (AUDIT_ENABLED()) { + rc = audit_ex(txn, ctx->retired_stored, false); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + /* return suitable into unallocated space */ + if (txn_refund(txn)) { + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + if (AUDIT_ENABLED()) { + rc = audit_ex(txn, ctx->retired_stored, false); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + } + + if (txn->tw.loose_pages) { + /* put loose pages into the reclaimed- or retired-list */ + rc = gcu_loose(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + if (unlikely(txn->tw.loose_pages)) + continue; + } + + if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) && + (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) > + env->maxgc_large1page / 2)) { + TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), + ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); + ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); + goto retry; + } + ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist); + + if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { + /* store retired-list into GC */ + rc = gcu_retired(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + continue; + } + + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + tASSERT(txn, txn->tw.loose_count == 0); + + TRACE("%s", " >> reserving"); + if (AUDIT_ENABLED()) { + rc = audit_ex(txn, ctx->retired_stored, false); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj; + TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, " + "lifo-reclaimed-slots %zu, " + "reused-gc-slots %zu", + dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left, + txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, + ctx->reused_slot); + if (0 >= (intptr_t)left) + break; + + const rid_t rid_result = get_rid_for_reclaimed(txn, ctx, left); + if (unlikely(!rid_result.rid)) { + rc = rid_result.err; + if (likely(rc == MDBX_SUCCESS)) + continue; + if (likely(rc == MDBX_RESULT_TRUE)) + goto retry; + goto bailout; + } + tASSERT(txn, rid_result.err == MDBX_SUCCESS); + const txnid_t reservation_gc_id = rid_result.rid; + + size_t chunk = left; + if (unlikely(left > env->maxgc_large1page)) { + const size_t avail_gc_slots = + txn->tw.gc.reclaimed + ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot + 1 + : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid + : INT16_MAX; + if (likely(avail_gc_slots > 1)) { +#if MDBX_ENABLE_BIGFOOT + chunk = env->maxgc_large1page; + if (avail_gc_slots < INT16_MAX && + unlikely(left > env->maxgc_large1page * avail_gc_slots)) + /* TODO: Можно смотреть последовательности какой длины есть в relist + * и пробовать нарезать куски соответствующего размера. + * Смысл в том, чтобы не дробить последовательности страниц, + * а использовать целиком. */ + chunk = env->maxgc_large1page + + left / (env->maxgc_large1page * avail_gc_slots) * + env->maxgc_large1page; +#else + if (chunk < env->maxgc_large1page * 2) + chunk /= 2; + else { + const size_t prefer_max_scatter = 257; + const size_t threshold = + env->maxgc_large1page * ((avail_gc_slots < prefer_max_scatter) + ? avail_gc_slots + : prefer_max_scatter); + if (left < threshold) + chunk = env->maxgc_large1page; + else { + const size_t tail = left - threshold + env->maxgc_large1page + 1; + size_t span = 1; + size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / + sizeof(pgno_t)) /* - 1 + span */; + if (tail > avail) { + for (size_t i = ctx->amount - span; i > 0; --i) { + if (MDBX_PNL_ASCENDING ? (txn->tw.relist[i] + span) + : (txn->tw.relist[i] - span) == + txn->tw.relist[i + span]) { + span += 1; + avail = + ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) - + 1 + span; + if (avail >= tail) + break; + } + } + } + + chunk = (avail >= tail) ? tail - span + : (avail_gc_slots > 3 && + ctx->reused_slot < prefer_max_scatter - 3) + ? avail - span + : tail; + } + } +#endif /* MDBX_ENABLE_BIGFOOT */ + } + } + tASSERT(txn, chunk > 0); + + TRACE("%s: gc_rid %" PRIaTXN ", reused_gc_slot %zu, reservation-id " + "%" PRIaTXN, + dbg_prefix(ctx), ctx->rid, ctx->reused_slot, reservation_gc_id); + + TRACE("%s: chunk %zu, gc-per-ovpage %u", dbg_prefix(ctx), chunk, + env->maxgc_large1page); + + tASSERT(txn, reservation_gc_id <= env->lck->cached_oldest.weak); + if (unlikely(reservation_gc_id < MIN_TXNID || + reservation_gc_id > + atomic_load64(&env->lck->cached_oldest, mo_Relaxed))) { + ERROR("** internal error (reservation_gc_id %" PRIaTXN ")", + reservation_gc_id); + rc = MDBX_PROBLEM; + goto bailout; + } + + key.iov_len = sizeof(reservation_gc_id); + key.iov_base = (void *)&reservation_gc_id; + data.iov_len = (chunk + 1) * sizeof(pgno_t); + TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix(ctx), chunk, + ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id); + prepare_backlog(txn, ctx); + rc = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE); + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + zeroize_reserved(env, data); + ctx->reserved += chunk; + TRACE("%s: reserved %zu (+%zu), continue", dbg_prefix(ctx), ctx->reserved, + chunk); + + continue; + } + + tASSERT( + txn, + ctx->cleaned_slot == + (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)); + + TRACE("%s", " >> filling"); + /* Fill in the reserved records */ + size_t excess_slots = 0; + ctx->fill_idx = + txn->tw.gc.reclaimed + ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot + : ctx->reused_slot; + rc = MDBX_SUCCESS; + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + tASSERT(txn, dpl_check(txn)); + if (ctx->amount) { + MDBX_val key, data; + key.iov_len = data.iov_len = 0; /* avoid MSVC warning */ + key.iov_base = data.iov_base = nullptr; + + size_t left = ctx->amount, excess = 0; + if (txn->tw.gc.reclaimed == nullptr) { + tASSERT(txn, ctx->lifo == 0); + rc = outer_first(&ctx->cursor, &key, &data); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_NOTFOUND && ctx->reserve_adj) + goto retry_clean_adj; + goto bailout; + } + } else { + tASSERT(txn, ctx->lifo != 0); + } + + while (true) { + txnid_t fill_gc_id; + TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, + MDBX_PNL_GETSIZE(txn->tw.relist)); + if (txn->tw.gc.reclaimed == nullptr) { + tASSERT(txn, ctx->lifo == 0); + fill_gc_id = unaligned_peek_u64(4, key.iov_base); + if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) { + if (!left) + break; + NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN + " > last_reclaimed %" PRIaTXN ", left %zu", + ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); + ctx->reserve_adj = + (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; + goto retry; + } + ctx->fill_idx -= 1; + } else { + tASSERT(txn, ctx->lifo != 0); + if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { + if (!left) + break; + NOTICE("** restart: reserve depleted (fill_idx %zu >= " + "gc.reclaimed %zu, left %zu", + ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left); + ctx->reserve_adj = + (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; + goto retry; + } + ctx->fill_idx += 1; + fill_gc_id = txn->tw.gc.reclaimed[ctx->fill_idx]; + TRACE("%s: seek-reservation @%" PRIaTXN " at gc.reclaimed[%zu]", + dbg_prefix(ctx), fill_gc_id, ctx->fill_idx); + key.iov_base = &fill_gc_id; + key.iov_len = sizeof(fill_gc_id); + rc = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + tASSERT(txn, + ctx->cleaned_slot == (txn->tw.gc.reclaimed + ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) + : 0)); + tASSERT(txn, + fill_gc_id > 0 && fill_gc_id <= env->lck->cached_oldest.weak); + key.iov_base = &fill_gc_id; + key.iov_len = sizeof(fill_gc_id); + + tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2); + size_t chunk = data.iov_len / sizeof(pgno_t) - 1; + if (unlikely(chunk > left)) { + const size_t delta = chunk - left; + excess += delta; + if (!left) { + excess_slots += 1; + goto next; + } + TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, + left, fill_gc_id); + if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || + delta > env->maxgc_large1page) + data.iov_len = (left + 1) * sizeof(pgno_t); + chunk = left; + } + rc = cursor_put(&ctx->cursor, &key, &data, MDBX_CURRENT | MDBX_RESERVE); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + zeroize_reserved(env, data); + + if (unlikely(txn->tw.loose_count || + ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", + ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist), + txn->tw.loose_count); + if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) + goto retry_clean_adj; + goto retry; + } + + if (unlikely(txn->tw.gc.reclaimed + ? ctx->cleaned_slot < + MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) + : ctx->cleaned_id < txn->tw.gc.last_reclaimed)) { + NOTICE("%s", "** restart: reclaimed-slots changed"); + goto retry; + } + if (unlikely(ctx->retired_stored != + MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { + tASSERT(txn, + ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + NOTICE("** restart: retired-list growth (%zu -> %zu)", + ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + goto retry; + } + + pgno_t *dst = data.iov_base; + *dst++ = (pgno_t)chunk; + pgno_t *src = MDBX_PNL_BEGIN(txn->tw.relist) + left - chunk; + memcpy(dst, src, chunk * sizeof(pgno_t)); + pgno_t *from = src, *to = src + chunk; + TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, + dbg_prefix(ctx), chunk, from - txn->tw.relist, from[0], + to - txn->tw.relist, to[-1], fill_gc_id); + + left -= chunk; + if (AUDIT_ENABLED()) { + rc = audit_ex(txn, ctx->retired_stored + ctx->amount - left, true); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + next: + if (txn->tw.gc.reclaimed == nullptr) { + tASSERT(txn, ctx->lifo == 0); + rc = outer_next(&ctx->cursor, &key, &data, MDBX_NEXT); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND) + goto bailout; + rc = MDBX_SUCCESS; + break; + } + } else { + tASSERT(txn, ctx->lifo != 0); + } + } + + if (excess) { + size_t n = excess, adj = excess; + while (n >= env->maxgc_large1page) + adj -= n /= env->maxgc_large1page; + ctx->reserve_adj += adj; + TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx), + excess, adj, ctx->reserve_adj); + } + } + + tASSERT(txn, rc == MDBX_SUCCESS); + if (unlikely(txn->tw.loose_count != 0 || + ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", + txn->tw.loose_count, ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); + goto retry; + } + + if (unlikely(excess_slots)) { + const bool will_retry = ctx->loop < 5 || excess_slots > 1; + NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " + "loop %zu)", + will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, + ctx->reserve_adj, ctx->loop); + if (will_retry) + goto retry; + } + + tASSERT(txn, txn->tw.gc.reclaimed == nullptr || + ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + +bailout: + txn->cursors[FREE_DBI] = ctx->cursor.next; + + MDBX_PNL_SETSIZE(txn->tw.relist, 0); +#if MDBX_ENABLE_PROFGC + env->lck->pgops.gc_prof.wloops += (uint32_t)ctx->loop; +#endif /* MDBX_ENABLE_PROFGC */ + TRACE("<<< %zu loops, rc = %d", ctx->loop, rc); + return rc; +} diff --git a/src/gc.h b/src/gc.h new file mode 100644 index 00000000..f77a3bd5 --- /dev/null +++ b/src/gc.h @@ -0,0 +1,39 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +typedef struct gc_update_context { + size_t loop, reserve_adj; + size_t retired_stored; + size_t amount, reserved, cleaned_slot, reused_slot, fill_idx; + txnid_t cleaned_id, rid; + bool lifo, dense; +#if MDBX_ENABLE_BIGFOOT + txnid_t bigfoot; +#endif /* MDBX_ENABLE_BIGFOOT */ + union { + MDBX_cursor cursor; + cursor_couple_t couple; + }; +} gcu_t; + +static inline int gc_update_init(MDBX_txn *txn, gcu_t *ctx) { + memset(ctx, 0, offsetof(gcu_t, cursor)); + ctx->lifo = (txn->env->flags & MDBX_LIFORECLAIM) != 0; +#if MDBX_ENABLE_BIGFOOT + ctx->bigfoot = txn->txnid; +#endif /* MDBX_ENABLE_BIGFOOT */ + return cursor_init(&ctx->cursor, txn, FREE_DBI); +} + +#define ALLOC_DEFAULT 0 +#define ALLOC_RESERVE 1 +#define ALLOC_UNIMPORTANT 2 +MDBX_INTERNAL pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, + uint8_t flags); + +MDBX_INTERNAL pgr_t gc_alloc_single(const MDBX_cursor *const mc); +MDBX_INTERNAL int gc_update(MDBX_txn *txn, gcu_t *ctx); diff --git a/src/global.c b/src/global.c new file mode 100644 index 00000000..05755826 --- /dev/null +++ b/src/global.c @@ -0,0 +1,476 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +static void mdbx_init(void); +static void mdbx_fini(void); + +/*----------------------------------------------------------------------------*/ +/* mdbx constructor/destructor */ + +#if defined(_WIN32) || defined(_WIN64) + +#if MDBX_BUILD_SHARED_LIBRARY +#if MDBX_WITHOUT_MSVC_CRT && defined(NDEBUG) +/* DEBUG/CHECKED builds still require MSVC's CRT for runtime checks. + * + * Define dll's entry point only for Release build when NDEBUG is defined and + * MDBX_WITHOUT_MSVC_CRT=ON. if the entry point isn't defined then MSVC's will + * automatically use DllMainCRTStartup() from CRT library, which also + * automatically call DllMain() from our mdbx.dll */ +#pragma comment(linker, "/ENTRY:DllMain") +#endif /* MDBX_WITHOUT_MSVC_CRT */ + +BOOL APIENTRY DllMain(HANDLE module, DWORD reason, LPVOID reserved) +#else +#if !MDBX_MANUAL_MODULE_HANDLER +static +#endif /* !MDBX_MANUAL_MODULE_HANDLER */ + void NTAPI + mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved) +#endif /* MDBX_BUILD_SHARED_LIBRARY */ +{ + (void)reserved; + switch (reason) { + case DLL_PROCESS_ATTACH: + windows_import(); + mdbx_init(); + break; + case DLL_PROCESS_DETACH: + mdbx_fini(); + break; + + case DLL_THREAD_ATTACH: + break; + case DLL_THREAD_DETACH: + rthc_thread_dtor(module); + break; + } +#if MDBX_BUILD_SHARED_LIBRARY + return TRUE; +#endif +} + +#if !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER +/* *INDENT-OFF* */ +/* clang-format off */ +#if defined(_MSC_VER) +# pragma const_seg(push) +# pragma data_seg(push) + +# ifndef _M_IX86 + /* kick a linker to create the TLS directory if not already done */ +# pragma comment(linker, "/INCLUDE:_tls_used") + /* Force some symbol references. */ +# pragma comment(linker, "/INCLUDE:mdbx_tls_anchor") + /* specific const-segment for WIN64 */ +# pragma const_seg(".CRT$XLB") + const +# else + /* kick a linker to create the TLS directory if not already done */ +# pragma comment(linker, "/INCLUDE:__tls_used") + /* Force some symbol references. */ +# pragma comment(linker, "/INCLUDE:_mdbx_tls_anchor") + /* specific data-segment for WIN32 */ +# pragma data_seg(".CRT$XLB") +# endif + + __declspec(allocate(".CRT$XLB")) PIMAGE_TLS_CALLBACK mdbx_tls_anchor = mdbx_module_handler; +# pragma data_seg(pop) +# pragma const_seg(pop) + +#elif defined(__GNUC__) +# ifndef _M_IX86 + const +# endif + PIMAGE_TLS_CALLBACK mdbx_tls_anchor __attribute__((__section__(".CRT$XLB"), used)) = mdbx_module_handler; +#else +# error FIXME +#endif +/* *INDENT-ON* */ +/* clang-format on */ +#endif /* !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER */ + +#else + +#if defined(__linux__) || defined(__gnu_linux__) +#include + +MDBX_EXCLUDE_FOR_GPROF +__cold static uint8_t probe_for_WSL(const char *tag) { + const char *const WSL = strstr(tag, "WSL"); + if (WSL && WSL[3] >= '2' && WSL[3] <= '9') + return WSL[3] - '0'; + const char *const wsl = strstr(tag, "wsl"); + if (wsl && wsl[3] >= '2' && wsl[3] <= '9') + return wsl[3] - '0'; + if (WSL || wsl || strcasestr(tag, "Microsoft")) + /* Expecting no new kernel within WSL1, either it will explicitly + * marked by an appropriate WSL-version hint. */ + return (globals.linux_kernel_version < /* 4.19.x */ 0x04130000) ? 1 : 2; + return 0; +} +#endif /* Linux */ + +#ifdef ENABLE_GPROF +extern void _mcleanup(void); +extern void monstartup(unsigned long, unsigned long); +extern void _init(void); +extern void _fini(void); +extern void __gmon_start__(void) __attribute__((__weak__)); +#endif /* ENABLE_GPROF */ + +MDBX_EXCLUDE_FOR_GPROF +__cold static __attribute__((__constructor__)) void +mdbx_global_constructor(void) { +#ifdef ENABLE_GPROF + if (!&__gmon_start__) + monstartup((uintptr_t)&_init, (uintptr_t)&_fini); +#endif /* ENABLE_GPROF */ + +#if defined(__linux__) || defined(__gnu_linux__) + struct utsname buffer; + if (uname(&buffer) == 0) { + int i = 0; + char *p = buffer.release; + while (*p && i < 4) { + if (*p >= '0' && *p <= '9') { + long number = strtol(p, &p, 10); + if (number > 0) { + if (number > 255) + number = 255; + globals.linux_kernel_version += number << (24 - i * 8); + } + ++i; + } else { + ++p; + } + } + /* "Official" way of detecting WSL1 but not WSL2 + * https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 + * + * WARNING: False negative detection of WSL1 will result in DATA LOSS! + * So, the REQUIREMENTS for this code: + * 1. MUST detect WSL1 without false-negatives. + * 2. DESIRABLE detect WSL2 but without the risk of violating the first. */ + globals.running_on_WSL1 = probe_for_WSL(buffer.version) == 1 || + probe_for_WSL(buffer.sysname) == 1 || + probe_for_WSL(buffer.release) == 1; + } +#endif /* Linux */ + + mdbx_init(); +} + +MDBX_EXCLUDE_FOR_GPROF +__cold static __attribute__((__destructor__)) void +mdbx_global_destructor(void) { + mdbx_fini(); +#ifdef ENABLE_GPROF + if (!&__gmon_start__) + _mcleanup(); +#endif /* ENABLE_GPROF */ +} + +#endif /* ! Windows */ + +/******************************************************************************/ + +struct libmdbx_globals globals; + +__cold static void mdbx_init(void) { + globals.runtime_flags = ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT; + globals.loglevel = MDBX_LOG_FATAL; + ENSURE(nullptr, osal_fastmutex_init(&globals.debug_lock) == 0); + osal_ctor(); + assert(globals.sys_pagesize > 0 && + (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0); + rthc_ctor(); +#if MDBX_DEBUG + ENSURE(nullptr, troika_verify_fsm()); + ENSURE(nullptr, pv2pages_verify()); +#endif /* MDBX_DEBUG*/ +} + +MDBX_EXCLUDE_FOR_GPROF +__cold static void mdbx_fini(void) { + const uint32_t current_pid = osal_getpid(); + TRACE(">> pid %d", current_pid); + rthc_dtor(current_pid); + osal_dtor(); + TRACE("<< pid %d\n", current_pid); + ENSURE(nullptr, osal_fastmutex_destroy(&globals.debug_lock) == 0); +} + +/******************************************************************************/ + +/* *INDENT-OFF* */ +/* clang-format off */ + +__dll_export +#ifdef __attribute_used__ + __attribute_used__ +#elif defined(__GNUC__) || __has_attribute(__used__) + __attribute__((__used__)) +#endif +#ifdef __attribute_externally_visible__ + __attribute_externally_visible__ +#elif (defined(__GNUC__) && !defined(__clang__)) || \ + __has_attribute(__externally_visible__) + __attribute__((__externally_visible__)) +#endif + const struct MDBX_build_info mdbx_build = { +#ifdef MDBX_BUILD_TIMESTAMP + MDBX_BUILD_TIMESTAMP +#else + "\"" __DATE__ " " __TIME__ "\"" +#endif /* MDBX_BUILD_TIMESTAMP */ + + , +#ifdef MDBX_BUILD_TARGET + MDBX_BUILD_TARGET +#else + #if defined(__ANDROID_API__) + "Android" MDBX_STRINGIFY(__ANDROID_API__) + #elif defined(__linux__) || defined(__gnu_linux__) + "Linux" + #elif defined(EMSCRIPTEN) || defined(__EMSCRIPTEN__) + "webassembly" + #elif defined(__CYGWIN__) + "CYGWIN" + #elif defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) \ + || defined(__WINDOWS__) + "Windows" + #elif defined(__APPLE__) + #if (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) \ + || (defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR) + "iOS" + #else + "MacOS" + #endif + #elif defined(__FreeBSD__) + "FreeBSD" + #elif defined(__DragonFly__) + "DragonFlyBSD" + #elif defined(__NetBSD__) + "NetBSD" + #elif defined(__OpenBSD__) + "OpenBSD" + #elif defined(__bsdi__) + "UnixBSDI" + #elif defined(__MACH__) + "MACH" + #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) + "HPUX" + #elif defined(_AIX) + "AIX" + #elif defined(__sun) && defined(__SVR4) + "Solaris" + #elif defined(__BSD__) || defined(BSD) + "UnixBSD" + #elif defined(__unix__) || defined(UNIX) || defined(__unix) \ + || defined(__UNIX) || defined(__UNIX__) + "UNIX" + #elif defined(_POSIX_VERSION) + "POSIX" MDBX_STRINGIFY(_POSIX_VERSION) + #else + "UnknownOS" + #endif /* Target OS */ + + "-" + + #if defined(__amd64__) + "AMD64" + #elif defined(__ia32__) + "IA32" + #elif defined(__e2k__) || defined(__elbrus__) + "Elbrus" + #elif defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) + "Alpha" + #elif defined(__aarch64__) || defined(_M_ARM64) + "ARM64" + #elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) \ + || defined(__TARGET_ARCH_THUMB) || defined(_ARM) || defined(_M_ARM) \ + || defined(_M_ARMT) || defined(__arm) + "ARM" + #elif defined(__mips64) || defined(__mips64__) || (defined(__mips) && (__mips >= 64)) + "MIPS64" + #elif defined(__mips__) || defined(__mips) || defined(_R4000) || defined(__MIPS__) + "MIPS" + #elif defined(__hppa64__) || defined(__HPPA64__) || defined(__hppa64) + "PARISC64" + #elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) + "PARISC" + #elif defined(__ia64__) || defined(__ia64) || defined(_IA64) \ + || defined(__IA64__) || defined(_M_IA64) || defined(__itanium__) + "Itanium" + #elif defined(__powerpc64__) || defined(__ppc64__) || defined(__ppc64) \ + || defined(__powerpc64) || defined(_ARCH_PPC64) + "PowerPC64" + #elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc) \ + || defined(__ppc) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__POWERPC__) + "PowerPC" + #elif defined(__sparc64__) || defined(__sparc64) + "SPARC64" + #elif defined(__sparc__) || defined(__sparc) + "SPARC" + #elif defined(__s390__) || defined(__s390) || defined(__zarch__) || defined(__zarch) + "S390" + #else + "UnknownARCH" + #endif +#endif /* MDBX_BUILD_TARGET */ + +#ifdef MDBX_BUILD_TYPE +# if defined(_MSC_VER) +# pragma message("Configuration-depended MDBX_BUILD_TYPE: " MDBX_BUILD_TYPE) +# endif + "-" MDBX_BUILD_TYPE +#endif /* MDBX_BUILD_TYPE */ + , + "MDBX_DEBUG=" MDBX_STRINGIFY(MDBX_DEBUG) +#ifdef ENABLE_GPROF + " ENABLE_GPROF" +#endif /* ENABLE_GPROF */ + " MDBX_WORDBITS=" MDBX_STRINGIFY(MDBX_WORDBITS) + " BYTE_ORDER=" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "LITTLE_ENDIAN" +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + "BIG_ENDIAN" +#else + #error "FIXME: Unsupported byte order" +#endif /* __BYTE_ORDER__ */ + " MDBX_ENABLE_BIGFOOT=" MDBX_STRINGIFY(MDBX_ENABLE_BIGFOOT) + " MDBX_ENV_CHECKPID=" MDBX_ENV_CHECKPID_CONFIG + " MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG + " MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG + " MDBX_64BIT_CAS=" MDBX_64BIT_CAS_CONFIG + " MDBX_TRUST_RTC=" MDBX_TRUST_RTC_CONFIG + " MDBX_AVOID_MSYNC=" MDBX_STRINGIFY(MDBX_AVOID_MSYNC) + " MDBX_ENABLE_REFUND=" MDBX_STRINGIFY(MDBX_ENABLE_REFUND) + " MDBX_ENABLE_MADVISE=" MDBX_STRINGIFY(MDBX_ENABLE_MADVISE) + " MDBX_ENABLE_MINCORE=" MDBX_STRINGIFY(MDBX_ENABLE_MINCORE) + " MDBX_ENABLE_PGOP_STAT=" MDBX_STRINGIFY(MDBX_ENABLE_PGOP_STAT) + " MDBX_ENABLE_PROFGC=" MDBX_STRINGIFY(MDBX_ENABLE_PROFGC) +#if MDBX_DISABLE_VALIDATION + " MDBX_DISABLE_VALIDATION=YES" +#endif /* MDBX_DISABLE_VALIDATION */ +#ifdef __SANITIZE_ADDRESS__ + " SANITIZE_ADDRESS=YES" +#endif /* __SANITIZE_ADDRESS__ */ +#ifdef ENABLE_MEMCHECK + " ENABLE_MEMCHECK=YES" +#endif /* ENABLE_MEMCHECK */ +#if MDBX_FORCE_ASSERTIONS + " MDBX_FORCE_ASSERTIONS=YES" +#endif /* MDBX_FORCE_ASSERTIONS */ +#ifdef _GNU_SOURCE + " _GNU_SOURCE=YES" +#else + " _GNU_SOURCE=NO" +#endif /* _GNU_SOURCE */ +#ifdef __APPLE__ + " MDBX_OSX_SPEED_INSTEADOF_DURABILITY=" MDBX_STRINGIFY(MDBX_OSX_SPEED_INSTEADOF_DURABILITY) +#endif /* MacOS */ +#if defined(_WIN32) || defined(_WIN64) + " MDBX_WITHOUT_MSVC_CRT=" MDBX_STRINGIFY(MDBX_WITHOUT_MSVC_CRT) + " MDBX_BUILD_SHARED_LIBRARY=" MDBX_STRINGIFY(MDBX_BUILD_SHARED_LIBRARY) +#if !MDBX_BUILD_SHARED_LIBRARY + " MDBX_MANUAL_MODULE_HANDLER=" MDBX_STRINGIFY(MDBX_MANUAL_MODULE_HANDLER) +#endif + " WINVER=" MDBX_STRINGIFY(WINVER) +#else /* Windows */ + " MDBX_LOCKING=" MDBX_LOCKING_CONFIG + " MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG +#endif /* !Windows */ + " MDBX_CACHELINE_SIZE=" MDBX_STRINGIFY(MDBX_CACHELINE_SIZE) + " MDBX_CPU_WRITEBACK_INCOHERENT=" MDBX_STRINGIFY(MDBX_CPU_WRITEBACK_INCOHERENT) + " MDBX_MMAP_INCOHERENT_CPU_CACHE=" MDBX_STRINGIFY(MDBX_MMAP_INCOHERENT_CPU_CACHE) + " MDBX_MMAP_INCOHERENT_FILE_WRITE=" MDBX_STRINGIFY(MDBX_MMAP_INCOHERENT_FILE_WRITE) + " MDBX_UNALIGNED_OK=" MDBX_STRINGIFY(MDBX_UNALIGNED_OK) + " MDBX_PNL_ASCENDING=" MDBX_STRINGIFY(MDBX_PNL_ASCENDING) + , +#ifdef MDBX_BUILD_COMPILER + MDBX_BUILD_COMPILER +#else + #ifdef __INTEL_COMPILER + "Intel C/C++ " MDBX_STRINGIFY(__INTEL_COMPILER) + #elif defined(__apple_build_version__) + "Apple clang " MDBX_STRINGIFY(__apple_build_version__) + #elif defined(__ibmxl__) + "IBM clang C " MDBX_STRINGIFY(__ibmxl_version__) "." MDBX_STRINGIFY(__ibmxl_release__) + "." MDBX_STRINGIFY(__ibmxl_modification__) "." MDBX_STRINGIFY(__ibmxl_ptf_fix_level__) + #elif defined(__clang__) + "clang " MDBX_STRINGIFY(__clang_version__) + #elif defined(__MINGW64__) + "MINGW-64 " MDBX_STRINGIFY(__MINGW64_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW64_MINOR_VERSION) + #elif defined(__MINGW32__) + "MINGW-32 " MDBX_STRINGIFY(__MINGW32_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW32_MINOR_VERSION) + #elif defined(__MINGW__) + "MINGW " MDBX_STRINGIFY(__MINGW_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW_MINOR_VERSION) + #elif defined(__IBMC__) + "IBM C " MDBX_STRINGIFY(__IBMC__) + #elif defined(__GNUC__) + "GNU C/C++ " + #ifdef __VERSION__ + __VERSION__ + #else + MDBX_STRINGIFY(__GNUC__) "." MDBX_STRINGIFY(__GNUC_MINOR__) "." MDBX_STRINGIFY(__GNUC_PATCHLEVEL__) + #endif + #elif defined(_MSC_VER) + "MSVC " MDBX_STRINGIFY(_MSC_FULL_VER) "-" MDBX_STRINGIFY(_MSC_BUILD) + #else + "Unknown compiler" + #endif +#endif /* MDBX_BUILD_COMPILER */ + , +#ifdef MDBX_BUILD_FLAGS_CONFIG + MDBX_BUILD_FLAGS_CONFIG +#endif /* MDBX_BUILD_FLAGS_CONFIG */ +#ifdef MDBX_BUILD_FLAGS + MDBX_BUILD_FLAGS +#endif /* MDBX_BUILD_FLAGS */ +#if !(defined(MDBX_BUILD_FLAGS_CONFIG) || defined(MDBX_BUILD_FLAGS)) + "undefined (please use correct build script)" +#ifdef _MSC_VER +#pragma message("warning: Build flags undefined. Please use correct build script") +#else +#warning "Build flags undefined. Please use correct build script" +#endif // _MSC_VER +#endif +}; + +#ifdef __SANITIZE_ADDRESS__ +#if !defined(_MSC_VER) || __has_attribute(weak) +LIBMDBX_API __attribute__((__weak__)) +#endif +const char *__asan_default_options(void) { + return "symbolize=1:allow_addr2line=1:" +#if MDBX_DEBUG + "debug=1:" + "verbosity=2:" +#endif /* MDBX_DEBUG */ + "log_threads=1:" + "report_globals=1:" + "replace_str=1:replace_intrin=1:" + "malloc_context_size=9:" +#if !defined(__APPLE__) + "detect_leaks=1:" +#endif + "check_printf=1:" + "detect_deadlocks=1:" +#ifndef LTO_ENABLED + "check_initialization_order=1:" +#endif + "detect_stack_use_after_return=1:" + "intercept_tls_get_addr=1:" + "decorate_proc_maps=1:" + "abort_on_error=1"; +} +#endif /* __SANITIZE_ADDRESS__ */ + +/* *INDENT-ON* */ +/* clang-format on */ diff --git a/src/internals.h b/src/internals.h index 64bf7355..45d83725 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1,1153 +1,21 @@ -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 #pragma once -#ifdef MDBX_CONFIG_H -#include MDBX_CONFIG_H -#endif - -#define LIBMDBX_INTERNALS -#ifdef xMDBX_TOOLS -#define MDBX_DEPRECATED -#endif /* xMDBX_TOOLS */ - -#ifdef xMDBX_ALLOY -/* Amalgamated build */ -#define MDBX_INTERNAL_FUNC static -#define MDBX_INTERNAL_VAR_PROTO static -#define MDBX_INTERNAL_VAR_INSTA static -#else -/* Non-amalgamated build */ -#define MDBX_INTERNAL_FUNC -#define MDBX_INTERNAL_VAR_PROTO extern -#define MDBX_INTERNAL_VAR_INSTA -#endif /* xMDBX_ALLOY */ /*----------------------------------------------------------------------------*/ -/** Disables using GNU/Linux libc extensions. - * \ingroup build_option - * \note This option couldn't be moved to the options.h since dependent - * control macros/defined should be prepared before include the options.h */ -#ifndef MDBX_DISABLE_GNU_SOURCE -#define MDBX_DISABLE_GNU_SOURCE 0 -#endif -#if MDBX_DISABLE_GNU_SOURCE -#undef _GNU_SOURCE -#elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE) -#define _GNU_SOURCE -#endif /* MDBX_DISABLE_GNU_SOURCE */ - -/* Should be defined before any includes */ -#if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && \ - !defined(ANDROID) -#define _FILE_OFFSET_BITS 64 -#endif - -#ifdef __APPLE__ -#define _DARWIN_C_SOURCE -#endif - -#ifdef _MSC_VER -#if _MSC_FULL_VER < 190024234 -/* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual - * Studio 2015 Update 3). But you could remove this #error and try to continue - * at your own risk. In such case please don't rise up an issues related ONLY to - * old compilers. - * - * NOTE: - * Unfortunately, there are several different builds of "Visual Studio" that - * are called "Visual Studio 2015 Update 3". - * - * The 190024234 is used here because it is minimal version of Visual Studio - * that was used for build and testing libmdbx in recent years. Soon this - * value will be increased to 19.0.24241.7, since build and testing using - * "Visual Studio 2015" will be performed only at https://ci.appveyor.com. - * - * Please ask Microsoft (but not us) for information about version differences - * and how to and where you can obtain the latest "Visual Studio 2015" build - * with all fixes. - */ -#error \ - "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required." -#endif -#ifndef _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_WARNINGS -#endif /* _CRT_SECURE_NO_WARNINGS */ -#if _MSC_VER > 1800 -#pragma warning(disable : 4464) /* relative include path contains '..' */ -#endif -#if _MSC_VER > 1913 -#pragma warning(disable : 5045) /* will insert Spectre mitigation... */ -#endif -#if _MSC_VER > 1914 -#pragma warning( \ - disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ - producing 'defined' has undefined behavior */ -#endif -#if _MSC_VER < 1920 -/* avoid "error C2219: syntax error: type qualifier must be after '*'" */ -#define __restrict -#endif -#if _MSC_VER > 1930 -#pragma warning(disable : 6235) /* is always a constant */ -#pragma warning(disable : 6237) /* is never evaluated and might \ - have side effects */ -#endif -#pragma warning(disable : 4710) /* 'xyz': function not inlined */ -#pragma warning(disable : 4711) /* function 'xyz' selected for automatic \ - inline expansion */ -#pragma warning(disable : 4201) /* nonstandard extension used: nameless \ - struct/union */ -#pragma warning(disable : 4702) /* unreachable code */ -#pragma warning(disable : 4706) /* assignment within conditional expression */ -#pragma warning(disable : 4127) /* conditional expression is constant */ -#pragma warning(disable : 4324) /* 'xyz': structure was padded due to \ - alignment specifier */ -#pragma warning(disable : 4310) /* cast truncates constant value */ -#pragma warning(disable : 4820) /* bytes padding added after data member for \ - alignment */ -#pragma warning(disable : 4548) /* expression before comma has no effect; \ - expected expression with side - effect */ -#pragma warning(disable : 4366) /* the result of the unary '&' operator may be \ - unaligned */ -#pragma warning(disable : 4200) /* nonstandard extension used: zero-sized \ - array in struct/union */ -#pragma warning(disable : 4204) /* nonstandard extension used: non-constant \ - aggregate initializer */ -#pragma warning( \ - disable : 4505) /* unreferenced local function has been removed */ -#endif /* _MSC_VER (warnings) */ - -#if defined(__GNUC__) && __GNUC__ < 9 -#pragma GCC diagnostic ignored "-Wattributes" -#endif /* GCC < 9 */ - -#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \ - !defined(__USE_MINGW_ANSI_STDIO) -#define __USE_MINGW_ANSI_STDIO 1 -#endif /* MinGW */ - -#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE) -#define UNICODE -#endif /* UNICODE */ - -#include "../mdbx.h" -#include "base.h" - -#if defined(__GNUC__) && !__GNUC_PREREQ(4, 2) -/* Actually libmdbx was not tested with compilers older than GCC 4.2. - * But you could ignore this warning at your own risk. - * In such case please don't rise up an issues related ONLY to old compilers. - */ -#warning "libmdbx required GCC >= 4.2" -#endif - -#if defined(__clang__) && !__CLANG_PREREQ(3, 8) -/* Actually libmdbx was not tested with CLANG older than 3.8. - * But you could ignore this warning at your own risk. - * In such case please don't rise up an issues related ONLY to old compilers. - */ -#warning "libmdbx required CLANG >= 3.8" -#endif - -#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) -/* Actually libmdbx was not tested with something older than glibc 2.12. - * But you could ignore this warning at your own risk. - * In such case please don't rise up an issues related ONLY to old systems. - */ -#warning "libmdbx was only tested with GLIBC >= 2.12." -#endif - -#ifdef __SANITIZE_THREAD__ -#warning \ - "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues." -#endif /* __SANITIZE_THREAD__ */ - -#if __has_warning("-Wnested-anon-types") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wnested-anon-types" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wnested-anon-types" -#else -#pragma warning disable "nested-anon-types" -#endif -#endif /* -Wnested-anon-types */ - -#if __has_warning("-Wconstant-logical-operand") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wconstant-logical-operand" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wconstant-logical-operand" -#else -#pragma warning disable "constant-logical-operand" -#endif -#endif /* -Wconstant-logical-operand */ - -#if defined(__LCC__) && (__LCC__ <= 121) -/* bug #2798 */ -#pragma diag_suppress alignment_reduction_ignored -#elif defined(__ICC) -#pragma warning(disable : 3453 1366) -#elif __has_warning("-Walignment-reduction-ignored") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Walignment-reduction-ignored" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Walignment-reduction-ignored" -#else -#pragma warning disable "alignment-reduction-ignored" -#endif -#endif /* -Walignment-reduction-ignored */ - -#ifndef MDBX_EXCLUDE_FOR_GPROF -#ifdef ENABLE_GPROF -#define MDBX_EXCLUDE_FOR_GPROF \ - __attribute__((__no_instrument_function__, \ - __no_profile_instrument_function__)) -#else -#define MDBX_EXCLUDE_FOR_GPROF -#endif /* ENABLE_GPROF */ -#endif /* MDBX_EXCLUDE_FOR_GPROF */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "osal.h" - -#define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY) -#if defined(xMDBX_TOOLS) -extern LIBMDBX_API const char *const mdbx_sourcery_anchor; -#endif - -#include "options.h" - -/* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */ -#if MDBX_DEBUG -#undef NDEBUG -#endif - -#ifndef __cplusplus -/*----------------------------------------------------------------------------*/ -/* Debug and Logging stuff */ - -#define MDBX_RUNTIME_FLAGS_INIT \ - ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT - -union logger_union { - void *ptr; - MDBX_debug_func *fmt; - MDBX_debug_func_nofmt *nofmt; -}; - -MDBX_INTERNAL_VAR_PROTO struct mdbx_static { - uint8_t flags; - uint8_t loglevel; - union logger_union logger; - size_t logger_buffer_size; - char *logger_buffer; -} mdbx_static; - -MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny) { -#if MDBX_DEBUG - if (MDBX_DBG_JITTER & mdbx_static.flags) - osal_jitter(tiny); -#else - (void)tiny; -#endif -} - -MDBX_INTERNAL_FUNC void MDBX_PRINTF_ARGS(4, 5) - debug_log(int level, const char *function, int line, const char *fmt, ...) - MDBX_PRINTF_ARGS(4, 5); -MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line, - const char *fmt, va_list args); - -#if MDBX_DEBUG -#define LOG_ENABLED(msg) unlikely(msg <= mdbx_static.loglevel) -#define AUDIT_ENABLED() unlikely((mdbx_static.flags & MDBX_DBG_AUDIT)) -#else /* MDBX_DEBUG */ -#define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= mdbx_static.loglevel) -#define AUDIT_ENABLED() (0) -#endif /* MDBX_DEBUG */ - -#if MDBX_FORCE_ASSERTIONS -#define ASSERT_ENABLED() (1) -#elif MDBX_DEBUG -#define ASSERT_ENABLED() likely((mdbx_static.flags & MDBX_DBG_ASSERT)) -#else -#define ASSERT_ENABLED() (0) -#endif /* assertions */ - -#define DEBUG_EXTRA(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ - debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__); \ - } while (0) - -#define DEBUG_EXTRA_PRINT(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ - debug_log(MDBX_LOG_EXTRA, NULL, 0, fmt, __VA_ARGS__); \ - } while (0) - -#define TRACE(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_TRACE)) \ - debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ - } while (0) - -#define DEBUG(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_DEBUG)) \ - debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ - } while (0) - -#define VERBOSE(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_VERBOSE)) \ - debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ - } while (0) - -#define NOTICE(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_NOTICE)) \ - debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ - } while (0) - -#define WARNING(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_WARN)) \ - debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ - } while (0) - -#undef ERROR /* wingdi.h \ - Yeah, morons from M$ put such definition to the public header. */ - -#define ERROR(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_ERROR)) \ - debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ - } while (0) - -#define FATAL(fmt, ...) \ - debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__); - -#if MDBX_DEBUG -#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line) -#else /* MDBX_DEBUG */ -MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, - unsigned line); -#define ASSERT_FAIL(env, msg, func, line) \ - do { \ - (void)(env); \ - assert_fail(msg, func, line); \ - } while (0) -#endif /* MDBX_DEBUG */ - -#define ENSURE_MSG(env, expr, msg) \ - do { \ - if (unlikely(!(expr))) \ - ASSERT_FAIL(env, msg, __func__, __LINE__); \ - } while (0) - -#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr) - -/* assert(3) variant in environment context */ -#define eASSERT(env, expr) \ - do { \ - if (ASSERT_ENABLED()) \ - ENSURE(env, expr); \ - } while (0) - -/* assert(3) variant in cursor context */ -#define cASSERT(mc, expr) eASSERT((mc)->mc_txn->mt_env, expr) - -/* assert(3) variant in transaction context */ -#define tASSERT(txn, expr) eASSERT((txn)->mt_env, expr) - -#ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#undef assert -#define assert(expr) eASSERT(NULL, expr) -#endif - -#endif /* __cplusplus */ - -/*----------------------------------------------------------------------------*/ -/* Atomics */ - -enum MDBX_memory_order { - mo_Relaxed, - mo_AcquireRelease - /* , mo_SequentialConsistency */ -}; - -typedef union { - volatile uint32_t weak; -#ifdef MDBX_HAVE_C11ATOMICS - volatile _Atomic uint32_t c11a; -#endif /* MDBX_HAVE_C11ATOMICS */ -} MDBX_atomic_uint32_t; - -typedef union { - volatile uint64_t weak; -#if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC) - volatile _Atomic uint64_t c11a; -#endif -#if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC - __anonymous_struct_extension__ struct { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - MDBX_atomic_uint32_t low, high; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - MDBX_atomic_uint32_t high, low; -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - }; -#endif -} MDBX_atomic_uint64_t; - -#ifdef MDBX_HAVE_C11ATOMICS - -/* Crutches for C11 atomic compiler's bugs */ -#if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127 -#define MDBX_c11a_ro(type, ptr) (&(ptr)->weak) -#define MDBX_c11a_rw(type, ptr) (&(ptr)->weak) -#elif defined(__clang__) && __clang__ < 8 -#define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a) -#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) -#else -#define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a) -#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) -#endif /* Crutches for C11 atomic compiler's bugs */ - -#define mo_c11_store(fence) \ - (((fence) == mo_Relaxed) ? memory_order_relaxed \ - : ((fence) == mo_AcquireRelease) ? memory_order_release \ - : memory_order_seq_cst) -#define mo_c11_load(fence) \ - (((fence) == mo_Relaxed) ? memory_order_relaxed \ - : ((fence) == mo_AcquireRelease) ? memory_order_acquire \ - : memory_order_seq_cst) - -#endif /* MDBX_HAVE_C11ATOMICS */ - -#ifndef __cplusplus - -#ifdef MDBX_HAVE_C11ATOMICS -#define osal_memory_fence(order, write) \ - atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order)) -#else /* MDBX_HAVE_C11ATOMICS */ -#define osal_memory_fence(order, write) \ - do { \ - osal_compiler_barrier(); \ - if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed \ - : mo_AcquireRelease)) \ - osal_memory_barrier(); \ - } while (0) -#endif /* MDBX_HAVE_C11ATOMICS */ - -#if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__) -#define atomic_store32(p, value, order) \ - ({ \ - const uint32_t value_to_store = (value); \ - atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, \ - mo_c11_store(order)); \ - value_to_store; \ - }) -#define atomic_load32(p, order) \ - atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)) -#define atomic_store64(p, value, order) \ - ({ \ - const uint64_t value_to_store = (value); \ - atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, \ - mo_c11_store(order)); \ - value_to_store; \ - }) -#define atomic_load64(p, order) \ - atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)) -#endif /* LCC && MDBX_HAVE_C11ATOMICS */ - -#ifndef atomic_store32 -MDBX_MAYBE_UNUSED static __always_inline uint32_t -atomic_store32(MDBX_atomic_uint32_t *p, const uint32_t value, - enum MDBX_memory_order order) { - STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4); -#ifdef MDBX_HAVE_C11ATOMICS - assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); - atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order)); -#else /* MDBX_HAVE_C11ATOMICS */ - if (order != mo_Relaxed) - osal_compiler_barrier(); - p->weak = value; - osal_memory_fence(order, true); -#endif /* MDBX_HAVE_C11ATOMICS */ - return value; -} -#endif /* atomic_store32 */ - -#ifndef atomic_load32 -MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32( - const volatile MDBX_atomic_uint32_t *p, enum MDBX_memory_order order) { - STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4); -#ifdef MDBX_HAVE_C11ATOMICS - assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p))); - return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)); -#else /* MDBX_HAVE_C11ATOMICS */ - osal_memory_fence(order, false); - const uint32_t value = p->weak; - if (order != mo_Relaxed) - osal_compiler_barrier(); - return value; -#endif /* MDBX_HAVE_C11ATOMICS */ -} -#endif /* atomic_load32 */ - -#endif /* !__cplusplus */ - -/*----------------------------------------------------------------------------*/ -/* Basic constants and types */ - -/* A stamp that identifies a file as an MDBX file. - * There's nothing special about this value other than that it is easily - * recognizable, and it will reflect any byte order mismatches. */ -#define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11) - -/* FROZEN: The version number for a database's datafile format. */ -#define MDBX_DATA_VERSION 3 -/* The version number for a database's lockfile format. */ -#define MDBX_LOCK_VERSION 5 - -/* handle for the DB used to track free pages. */ -#define FREE_DBI 0 -/* handle for the default DB. */ -#define MAIN_DBI 1 -/* Number of DBs in metapage (free and main) - also hardcoded elsewhere */ -#define CORE_DBS 2 - -/* Number of meta pages - also hardcoded elsewhere */ -#define NUM_METAS 3 - -/* A page number in the database. - * - * MDBX uses 32 bit for page numbers. This limits database - * size up to 2^44 bytes, in case of 4K pages. */ -typedef uint32_t pgno_t; -typedef MDBX_atomic_uint32_t atomic_pgno_t; -#define PRIaPGNO PRIu32 -#define MAX_PAGENO UINT32_C(0x7FFFffff) -#define MIN_PAGENO NUM_METAS - -#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000) - -/* A transaction ID. */ -typedef uint64_t txnid_t; -typedef MDBX_atomic_uint64_t atomic_txnid_t; -#define PRIaTXN PRIi64 -#define MIN_TXNID UINT64_C(1) -#define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1) -#define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1) -#define INVALID_TXNID UINT64_MAX -/* LY: for testing non-atomic 64-bit txnid on 32-bit arches. - * #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */ -#ifndef xMDBX_TXNID_STEP -#if MDBX_64BIT_CAS -#define xMDBX_TXNID_STEP 1u -#else -#define xMDBX_TXNID_STEP 2u -#endif -#endif /* xMDBX_TXNID_STEP */ - -/* Used for offsets within a single page. - * Since memory pages are typically 4 or 8KB in size, 12-13 bits, - * this is plenty. */ -typedef uint16_t indx_t; - -#define MEGABYTE ((size_t)1 << 20) - -/*----------------------------------------------------------------------------*/ -/* Core structures for database and shared memory (i.e. format definition) */ -#pragma pack(push, 4) - -/* Information about a single database in the environment. */ -typedef struct MDBX_db { - uint16_t md_flags; /* see mdbx_dbi_open */ - uint16_t md_depth; /* depth of this tree */ - uint32_t md_xsize; /* key-size for MDBX_DUPFIXED (LEAF2 pages) */ - pgno_t md_root; /* the root page of this tree */ - pgno_t md_branch_pages; /* number of internal pages */ - pgno_t md_leaf_pages; /* number of leaf pages */ - pgno_t md_overflow_pages; /* number of overflow pages */ - uint64_t md_seq; /* table sequence counter */ - uint64_t md_entries; /* number of data items */ - uint64_t md_mod_txnid; /* txnid of last committed modification */ -} MDBX_db; - -/* database size-related parameters */ -typedef struct MDBX_geo { - uint16_t grow_pv; /* datafile growth step as a 16-bit packed (exponential - quantized) value */ - uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed - (exponential quantized) value */ - pgno_t lower; /* minimal size of datafile in pages */ - pgno_t upper; /* maximal size of datafile in pages */ - pgno_t now; /* current size of datafile in pages */ - pgno_t next; /* first unused page in the datafile, - but actually the file may be shorter. */ -} MDBX_geo; - -/* Meta page content. - * A meta page is the start point for accessing a database snapshot. - * Pages 0-1 are meta pages. Transaction N writes meta page (N % 2). */ -typedef struct MDBX_meta { - /* Stamp identifying this as an MDBX file. - * It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */ - uint32_t mm_magic_and_version[2]; - - /* txnid that committed this page, the first of a two-phase-update pair */ - union { - MDBX_atomic_uint32_t mm_txnid_a[2]; - uint64_t unsafe_txnid; - }; - - uint16_t mm_extra_flags; /* extra DB flags, zero (nothing) for now */ - uint8_t mm_validator_id; /* ID of checksum and page validation method, - * zero (nothing) for now */ - uint8_t mm_extra_pagehdr; /* extra bytes in the page header, - * zero (nothing) for now */ - - MDBX_geo mm_geo; /* database size-related parameters */ - - MDBX_db mm_dbs[CORE_DBS]; /* first is free space, 2nd is main db */ - /* The size of pages used in this DB */ -#define mm_psize mm_dbs[FREE_DBI].md_xsize - MDBX_canary mm_canary; - -#define MDBX_DATASIGN_NONE 0u -#define MDBX_DATASIGN_WEAK 1u -#define SIGN_IS_STEADY(sign) ((sign) > MDBX_DATASIGN_WEAK) -#define META_IS_STEADY(meta) \ - SIGN_IS_STEADY(unaligned_peek_u64_volatile(4, (meta)->mm_sign)) - union { - uint32_t mm_sign[2]; - uint64_t unsafe_sign; - }; - - /* txnid that committed this page, the second of a two-phase-update pair */ - MDBX_atomic_uint32_t mm_txnid_b[2]; - - /* Number of non-meta pages which were put in GC after COW. May be 0 in case - * DB was previously handled by libmdbx without corresponding feature. - * This value in couple with mr_snapshot_pages_retired allows fast estimation - * of "how much reader is restraining GC recycling". */ - uint32_t mm_pages_retired[2]; - - /* The analogue /proc/sys/kernel/random/boot_id or similar to determine - * whether the system was rebooted after the last use of the database files. - * If there was no reboot, but there is no need to rollback to the last - * steady sync point. Zeros mean that no relevant information is available - * from the system. */ - bin128_t mm_bootid; - -} MDBX_meta; - -#pragma pack(1) - -/* Common header for all page types. The page type depends on mp_flags. - * - * P_BRANCH and P_LEAF pages have unsorted 'MDBX_node's at the end, with - * sorted mp_ptrs[] entries referring to them. Exception: P_LEAF2 pages - * omit mp_ptrs and pack sorted MDBX_DUPFIXED values after the page header. - * - * P_OVERFLOW records occupy one or more contiguous pages where only the - * first has a page header. They hold the real data of F_BIGDATA nodes. - * - * P_SUBP sub-pages are small leaf "pages" with duplicate data. - * A node with flag F_DUPDATA but not F_SUBDATA contains a sub-page. - * (Duplicate data can also go in sub-databases, which use normal pages.) - * - * P_META pages contain MDBX_meta, the start point of an MDBX snapshot. - * - * Each non-metapage up to MDBX_meta.mm_last_pg is reachable exactly once - * in the snapshot: Either used by a database or listed in a GC record. */ -typedef struct MDBX_page { -#define IS_FROZEN(txn, p) ((p)->mp_txnid < (txn)->mt_txnid) -#define IS_SPILLED(txn, p) ((p)->mp_txnid == (txn)->mt_txnid) -#define IS_SHADOWED(txn, p) ((p)->mp_txnid > (txn)->mt_txnid) -#define IS_VALID(txn, p) ((p)->mp_txnid <= (txn)->mt_front) -#define IS_MODIFIABLE(txn, p) ((p)->mp_txnid == (txn)->mt_front) - uint64_t mp_txnid; /* txnid which created page, maybe zero in legacy DB */ - uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */ -#define P_BRANCH 0x01u /* branch page */ -#define P_LEAF 0x02u /* leaf page */ -#define P_OVERFLOW 0x04u /* overflow page */ -#define P_META 0x08u /* meta page */ -#define P_LEGACY_DIRTY 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */ -#define P_BAD P_LEGACY_DIRTY /* explicit flag for invalid/bad page */ -#define P_LEAF2 0x20u /* for MDBX_DUPFIXED records */ -#define P_SUBP 0x40u /* for MDBX_DUPSORT sub-pages */ -#define P_SPILLED 0x2000u /* spilled in parent txn */ -#define P_LOOSE 0x4000u /* page was dirtied then freed, can be reused */ -#define P_FROZEN 0x8000u /* used for retire page with known status */ -#define P_ILL_BITS \ - ((uint16_t) ~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED)) - uint16_t mp_flags; - union { - uint32_t mp_pages; /* number of overflow pages */ - __anonymous_struct_extension__ struct { - indx_t mp_lower; /* lower bound of free space */ - indx_t mp_upper; /* upper bound of free space */ - }; - }; - pgno_t mp_pgno; /* page number */ - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - (!defined(__cplusplus) && defined(_MSC_VER)) - indx_t mp_ptrs[] /* dynamic size */; -#endif /* C99 */ -} MDBX_page; - -#define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags) - -/* Drop legacy P_DIRTY flag for sub-pages for compatilibity, - * for assertions only. */ -#define PAGETYPE_COMPAT(p) \ - (unlikely(PAGETYPE_WHOLE(p) & P_SUBP) \ - ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY) \ - : PAGETYPE_WHOLE(p)) - -/* Size of the page header, excluding dynamic data at the end */ -#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs) - -/* Pointer displacement without casting to char* to avoid pointer-aliasing */ -#define ptr_disp(ptr, disp) ((void *)(((intptr_t)(ptr)) + ((intptr_t)(disp)))) - -/* Pointer distance as signed number of bytes */ -#define ptr_dist(more, less) (((intptr_t)(more)) - ((intptr_t)(less))) - -#define mp_next(mp) \ - (*(MDBX_page **)ptr_disp((mp)->mp_ptrs, sizeof(void *) - sizeof(uint32_t))) - -#pragma pack(pop) - -typedef struct profgc_stat { - /* Монотонное время по "настенным часам" - * затраченное на чтение и поиск внутри GC */ - uint64_t rtime_monotonic; - /* Процессорное время в режим пользователя - * на подготовку страниц извлекаемых из GC, включая подкачку с диска. */ - uint64_t xtime_cpu; - /* Количество итераций чтения-поиска внутри GC при выделении страниц */ - uint32_t rsteps; - /* Количество запросов на выделение последовательностей страниц, - * т.е. когда запрашивает выделение больше одной страницы */ - uint32_t xpages; - /* Счетчик выполнения по медленному пути (slow path execution count) */ - uint32_t spe_counter; - /* page faults (hard page faults) */ - uint32_t majflt; -} profgc_stat_t; - -/* Statistics of page operations overall of all (running, completed and aborted) - * transactions */ -typedef struct pgop_stat { - MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */ - MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */ - MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones - for nested transactions */ - MDBX_atomic_uint64_t split; /* Page splits */ - MDBX_atomic_uint64_t merge; /* Page merges */ - MDBX_atomic_uint64_t spill; /* Quantity of spilled dirty pages */ - MDBX_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */ - MDBX_atomic_uint64_t - wops; /* Number of explicit write operations (not a pages) to a disk */ - MDBX_atomic_uint64_t - msync; /* Number of explicit msync/flush-to-disk operations */ - MDBX_atomic_uint64_t - fsync; /* Number of explicit fsync/flush-to-disk operations */ - - MDBX_atomic_uint64_t prefault; /* Number of prefault write operations */ - MDBX_atomic_uint64_t mincore; /* Number of mincore() calls */ - - MDBX_atomic_uint32_t - incoherence; /* number of https://libmdbx.dqdkfa.ru/dead-github/issues/269 - caught */ - MDBX_atomic_uint32_t reserved; - - /* Статистика для профилирования GC. - * Логически эти данные может быть стоит вынести в другую структуру, - * но разница будет сугубо косметическая. */ - struct { - /* Затраты на поддержку данных пользователя */ - profgc_stat_t work; - /* Затраты на поддержку и обновления самой GC */ - profgc_stat_t self; - /* Итераций обновления GC, - * больше 1 если были повторы/перезапуски */ - uint32_t wloops; - /* Итерации слияния записей GC */ - uint32_t coalescences; - /* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */ - uint32_t wipes; - /* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */ - uint32_t flushes; - /* Попытки пнуть тормозящих читателей */ - uint32_t kicks; - } gc_prof; -} pgop_stat_t; - -#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES -#define MDBX_CLOCK_SIGN UINT32_C(0xF10C) -typedef void osal_ipclock_t; -#elif MDBX_LOCKING == MDBX_LOCKING_SYSV - -#define MDBX_CLOCK_SIGN UINT32_C(0xF18D) -typedef mdbx_pid_t osal_ipclock_t; -#ifndef EOWNERDEAD -#define EOWNERDEAD MDBX_RESULT_TRUE -#endif - -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 -#define MDBX_CLOCK_SIGN UINT32_C(0x8017) -typedef pthread_mutex_t osal_ipclock_t; -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 -#define MDBX_CLOCK_SIGN UINT32_C(0xFC29) -typedef sem_t osal_ipclock_t; -#else -#error "FIXME" -#endif /* MDBX_LOCKING */ - -#if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus) -MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc); -MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc); -#endif /* MDBX_LOCKING */ - -/* Reader Lock Table - * - * Readers don't acquire any locks for their data access. Instead, they - * simply record their transaction ID in the reader table. The reader - * mutex is needed just to find an empty slot in the reader table. The - * slot's address is saved in thread-specific data so that subsequent - * read transactions started by the same thread need no further locking to - * proceed. - * - * If MDBX_NOSTICKYTHREADS is set, the slot address is not saved in - * thread-specific data. No reader table is used if the database is on a - * read-only filesystem. - * - * Since the database uses multi-version concurrency control, readers don't - * actually need any locking. This table is used to keep track of which - * readers are using data from which old transactions, so that we'll know - * when a particular old transaction is no longer in use. Old transactions - * that have discarded any data pages can then have those pages reclaimed - * for use by a later write transaction. - * - * The lock table is constructed such that reader slots are aligned with the - * processor's cache line size. Any slot is only ever used by one thread. - * This alignment guarantees that there will be no contention or cache - * thrashing as threads update their own slot info, and also eliminates - * any need for locking when accessing a slot. - * - * A writer thread will scan every slot in the table to determine the oldest - * outstanding reader transaction. Any freed pages older than this will be - * reclaimed by the writer. The writer doesn't use any locks when scanning - * this table. This means that there's no guarantee that the writer will - * see the most up-to-date reader info, but that's not required for correct - * operation - all we need is to know the upper bound on the oldest reader, - * we don't care at all about the newest reader. So the only consequence of - * reading stale information here is that old pages might hang around a - * while longer before being reclaimed. That's actually good anyway, because - * the longer we delay reclaiming old pages, the more likely it is that a - * string of contiguous pages can be found after coalescing old pages from - * many old transactions together. */ - -/* The actual reader record, with cacheline padding. */ -typedef struct MDBX_reader { - /* Current Transaction ID when this transaction began, or (txnid_t)-1. - * Multiple readers that start at the same time will probably have the - * same ID here. Again, it's not important to exclude them from - * anything; all we need to know is which version of the DB they - * started from so we can avoid overwriting any data used in that - * particular version. */ - MDBX_atomic_uint64_t /* txnid_t */ mr_txnid; - - /* The information we store in a single slot of the reader table. - * In addition to a transaction ID, we also record the process and - * thread ID that owns a slot, so that we can detect stale information, - * e.g. threads or processes that went away without cleaning up. - * - * NOTE: We currently don't check for stale records. - * We simply re-init the table when we know that we're the only process - * opening the lock file. */ - - /* The thread ID of the thread owning this txn. */ - MDBX_atomic_uint64_t mr_tid; - - /* The process ID of the process owning this reader txn. */ - MDBX_atomic_uint32_t mr_pid; - - /* The number of pages used in the reader's MVCC snapshot, - * i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */ - atomic_pgno_t mr_snapshot_pages_used; - /* Number of retired pages at the time this reader starts transaction. So, - * at any time the difference mm_pages_retired - mr_snapshot_pages_retired - * will give the number of pages which this reader restraining from reuse. */ - MDBX_atomic_uint64_t mr_snapshot_pages_retired; -} MDBX_reader; - -/* The header for the reader table (a memory-mapped lock file). */ -typedef struct MDBX_lockinfo { - /* Stamp identifying this as an MDBX file. - * It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */ - uint64_t mti_magic_and_version; - - /* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */ - uint32_t mti_os_and_format; - - /* Flags which environment was opened. */ - MDBX_atomic_uint32_t mti_envmode; - - /* Threshold of un-synced-with-disk pages for auto-sync feature, - * zero means no-threshold, i.e. auto-sync is disabled. */ - atomic_pgno_t mti_autosync_threshold; - - /* Low 32-bit of txnid with which meta-pages was synced, - * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */ -#define MDBX_NOMETASYNC_LAZY_UNK (UINT32_MAX / 3) -#define MDBX_NOMETASYNC_LAZY_FD (MDBX_NOMETASYNC_LAZY_UNK + UINT32_MAX / 8) -#define MDBX_NOMETASYNC_LAZY_WRITEMAP \ - (MDBX_NOMETASYNC_LAZY_UNK - UINT32_MAX / 8) - MDBX_atomic_uint32_t mti_meta_sync_txnid; - - /* Period for timed auto-sync feature, i.e. at the every steady checkpoint - * the mti_unsynced_timeout sets to the current_time + mti_autosync_period. - * The time value is represented in a suitable system-dependent form, for - * example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC). - * Zero means timed auto-sync is disabled. */ - MDBX_atomic_uint64_t mti_autosync_period; - - /* Marker to distinguish uniqueness of DB/CLK. */ - MDBX_atomic_uint64_t mti_bait_uniqueness; - - /* Paired counter of processes that have mlock()ed part of mmapped DB. - * The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process - * lock at least one page, so therefore madvise() could return EINVAL. */ - MDBX_atomic_uint32_t mti_mlcnt[2]; - - MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ - - /* Statistics of costly ops of all (running, completed and aborted) - * transactions */ - pgop_stat_t mti_pgop_stat; - - MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ - - /* Write transaction lock. */ -#if MDBX_LOCKING > 0 - osal_ipclock_t mti_wlock; -#endif /* MDBX_LOCKING > 0 */ - - atomic_txnid_t mti_oldest_reader; - - /* Timestamp of entering an out-of-sync state. Value is represented in a - * suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) - * or clock_gettime(CLOCK_MONOTONIC). */ - MDBX_atomic_uint64_t mti_eoos_timestamp; - - /* Number un-synced-with-disk pages for auto-sync feature. */ - MDBX_atomic_uint64_t mti_unsynced_pages; - - /* Timestamp of the last readers check. */ - MDBX_atomic_uint64_t mti_reader_check_timestamp; - - /* Number of page which was discarded last time by madvise(DONTNEED). */ - atomic_pgno_t mti_discarded_tail; - - /* Shared anchor for tracking readahead edge and enabled/disabled status. */ - pgno_t mti_readahead_anchor; - - /* Shared cache for mincore() results */ - struct { - pgno_t begin[4]; - uint64_t mask[4]; - } mti_mincore_cache; - - MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ - - /* Readeaders registration lock. */ -#if MDBX_LOCKING > 0 - osal_ipclock_t mti_rlock; -#endif /* MDBX_LOCKING > 0 */ - - /* The number of slots that have been used in the reader table. - * This always records the maximum count, it is not decremented - * when readers release their slots. */ - MDBX_atomic_uint32_t mti_numreaders; - MDBX_atomic_uint32_t mti_readers_refresh_flag; - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - (!defined(__cplusplus) && defined(_MSC_VER)) - MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ - MDBX_reader mti_readers[] /* dynamic size */; -#endif /* C99 */ -} MDBX_lockinfo; - -/* Lockfile format signature: version, features and field layout */ -#define MDBX_LOCK_FORMAT \ - (MDBX_CLOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 + \ - (unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 + \ - (unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 + \ - (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 + \ - (unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29) - -#define MDBX_DATA_MAGIC \ - ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION) - -#define MDBX_DATA_MAGIC_LEGACY_COMPAT \ - ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2) - -#define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255) - -#define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION) - -/* The maximum size of a database page. - * - * It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper. - * - * MDBX will use database pages < OS pages if needed. - * That causes more I/O in write transactions: The OS must - * know (read) the whole page before writing a partial page. - * - * Note that we don't currently support Huge pages. On Linux, - * regular data files cannot use Huge pages, and in general - * Huge pages aren't actually pageable. We rely on the OS - * demand-pager to read our data and page it out when memory - * pressure from other processes is high. So until OSs have - * actual paging support for Huge pages, they're not viable. */ -#define MAX_PAGESIZE MDBX_MAX_PAGESIZE -#define MIN_PAGESIZE MDBX_MIN_PAGESIZE - -#define MIN_MAPSIZE (MIN_PAGESIZE * MIN_PAGENO) -#if defined(_WIN32) || defined(_WIN64) -#define MAX_MAPSIZE32 UINT32_C(0x38000000) -#else -#define MAX_MAPSIZE32 UINT32_C(0x7f000000) -#endif -#define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MAX_PAGESIZE) - -#if MDBX_WORDBITS >= 64 -#define MAX_MAPSIZE MAX_MAPSIZE64 -#define MDBX_PGL_LIMIT ((size_t)MAX_PAGENO) -#else -#define MAX_MAPSIZE MAX_MAPSIZE32 -#define MDBX_PGL_LIMIT (MAX_MAPSIZE32 / MIN_PAGESIZE) -#endif /* MDBX_WORDBITS */ - -#define MDBX_READERS_LIMIT 32767 -#define MDBX_RADIXSORT_THRESHOLD 142 -#define MDBX_GOLD_RATIO_DBL 1.6180339887498948482 - -/*----------------------------------------------------------------------------*/ - -/* An PNL is an Page Number List, a sorted array of IDs. - * The first element of the array is a counter for how many actual page-numbers - * are in the list. By default PNLs are sorted in descending order, this allow - * cut off a page with lowest pgno (at the tail) just truncating the list. The - * sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */ -typedef pgno_t *MDBX_PNL; - -#if MDBX_PNL_ASCENDING -#define MDBX_PNL_ORDERED(first, last) ((first) < (last)) -#define MDBX_PNL_DISORDERED(first, last) ((first) >= (last)) -#else -#define MDBX_PNL_ORDERED(first, last) ((first) > (last)) -#define MDBX_PNL_DISORDERED(first, last) ((first) <= (last)) -#endif - -/* List of txnid, only for MDBX_txn.tw.lifo_reclaimed */ -typedef txnid_t *MDBX_TXL; - -/* An Dirty-Page list item is an pgno/pointer pair. */ -typedef struct MDBX_dp { - MDBX_page *ptr; - pgno_t pgno, npages; -} MDBX_dp; - -/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */ -typedef struct MDBX_dpl { - size_t sorted; - size_t length; - size_t pages_including_loose; /* number of pages, but not an entries. */ - size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */ -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - (!defined(__cplusplus) && defined(_MSC_VER)) - MDBX_dp items[] /* dynamic size with holes at zero and after the last */; -#endif -} MDBX_dpl; - -/* PNL sizes */ -#define MDBX_PNL_GRANULATE_LOG2 10 -#define MDBX_PNL_GRANULATE (1 << MDBX_PNL_GRANULATE_LOG2) -#define MDBX_PNL_INITIAL \ - (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) - -#define MDBX_TXL_GRANULATE 32 -#define MDBX_TXL_INITIAL \ - (MDBX_TXL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t)) -#define MDBX_TXL_MAX \ - ((1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t)) - -#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1]) -#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0])) -#define MDBX_PNL_SETSIZE(pl, size) \ - do { \ - const size_t __size = size; \ - assert(__size < INT_MAX); \ - (pl)[0] = (pgno_t)__size; \ - } while (0) -#define MDBX_PNL_FIRST(pl) ((pl)[1]) -#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)]) -#define MDBX_PNL_BEGIN(pl) (&(pl)[1]) -#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1]) - -#if MDBX_PNL_ASCENDING -#define MDBX_PNL_EDGE(pl) ((pl) + 1) -#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl) -#define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl) -#else -#define MDBX_PNL_EDGE(pl) ((pl) + MDBX_PNL_GETSIZE(pl)) -#define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl) -#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl) -#endif - -#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t)) -#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0) - -/*----------------------------------------------------------------------------*/ -/* Internal structures */ - -/* Auxiliary DB info. - * The information here is mostly static/read-only. There is - * only a single copy of this record in the environment. */ -typedef struct MDBX_dbx { - MDBX_val md_name; /* name of the database */ - MDBX_cmp_func *md_cmp; /* function for comparing keys */ - MDBX_cmp_func *md_dcmp; /* function for comparing data items */ - size_t md_klen_min, md_klen_max; /* min/max key length for the database */ - size_t md_vlen_min, - md_vlen_max; /* min/max value/data length for the database */ -} MDBX_dbx; +#include "essentials.h" + +typedef struct dp dp_t; +typedef struct dpl dpl_t; +typedef struct kvx kvx_t; +typedef struct meta_ptr meta_ptr_t; +typedef struct inner_cursor subcur_t; +typedef struct cursor_couple cursor_couple_t; +typedef struct defer_free_item defer_free_item_t; typedef struct troika { uint8_t fsm, recent, prefer_steady, tail_and_flags; @@ -1159,88 +27,197 @@ typedef struct troika { #define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128u) #define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3u) txnid_t txnid[NUM_METAS]; -} meta_troika_t; +} troika_t; + +typedef struct page_get_result { + page_t *page; + int err; +} pgr_t; + +typedef struct node_search_result { + node_t *node; + bool exact; +} nsr_t; + +typedef struct bind_reader_slot_result { + int err; + reader_slot_t *rslot; +} bsr_t; + +#include "atomics-ops.h" +#include "proto.h" +#include "txl.h" +#include "unaligned.h" +#if defined(_WIN32) || defined(_WIN64) +#include "windows-import.h" +#endif /* Windows */ + +enum signatures { + env_signature = INT32_C(0x1A899641), + txn_signature = INT32_C(0x13D53A31), + cur_signature_live = INT32_C(0x7E05D5B1), + cur_signature_ready4dispose = INT32_C(0x2817A047), + cur_signature_wait4eot = INT32_C(0x10E297A7) +}; + +/*----------------------------------------------------------------------------*/ + +/* An dirty-page list item is an pgno/pointer pair. */ +struct dp { + page_t *ptr; + pgno_t pgno, npages; +}; + +enum dpl_rules { + dpl_gap_edging = 2, + dpl_gap_mergesort = 16, + dpl_reserve_gap = dpl_gap_mergesort + dpl_gap_edging, + dpl_insertion_threshold = 42 +}; + +/* An DPL (dirty-page list) is a lazy-sorted array of MDBX_DPs. */ +struct dpl { + size_t sorted; + size_t length; + /* number of pages, but not an entries. */ + size_t pages_including_loose; + /* allocated size excluding the dpl_reserve_gap */ + size_t detent; + /* dynamic size with holes at zero and after the last */ + dp_t items[dpl_reserve_gap]; +}; + +/*----------------------------------------------------------------------------*/ +/* Internal structures */ + +/* Comparing/ordering and length constraints */ +typedef struct clc { + MDBX_cmp_func *cmp; /* comparator */ + size_t lmin, lmax; /* min/max length constraints */ +} clc_t; + +/* Вспомогательная информация о subDB. + * + * Совокупность потребностей: + * 1. Для транзакций и основного курсора нужны все поля. + * 2. Для вложенного dupsort-курсора нужен компаратор значений, который изнутри + * курсора будет выглядеть как компаратор ключей. Плюс заглушка компаратора + * значений, которая не должна использоваться в штатных ситуациях, но + * требуется хотя-бы для отслеживания таких обращений. + * 3. Использование компараторов для курсора и вложенного dupsort-курсора + * должно выглядеть одинаково. + * 4. Желательно минимизировать объём данных размещаемых внутри вложенного + * dupsort-курсора. + * 5. Желательно чтобы объем всей структуры был степенью двойки. + * + * Решение: + * - не храним в dupsort-курсоре ничего лишнего, а только tree; + * - в курсоры помещаем только указатель на clc_t, который будет указывать + * на соответствующее clc-поле в общей kvx-таблице привязанной к env; + * - компаратор размещаем в начале clc_t, в kvx_t сначала размещаем clc + * для ключей, потом для значений, а имя БД в конце kvx_t. + * - тогда в курсоре clc[0] будет содержать информацию для ключей, + * а clc[1] для значений, причем компаратор значений для dupsort-курсора + * будет попадать на MDBX_val с именем, что приведет к SIGSEGV при попытке + * использования такого компаратора. + * - размер kvx_t становится равным 8 словам. + * + * Трюки и прочая экономия на списках: + * - не храним dbi внутри курсора, вместо этого вычисляем его как разницу между + * dbi_state курсора и началом таблицы dbi_state в транзакции. Смысл тут в + * экономии кол-ва полей при инициализации курсора. Затрат это не создает, + * так как dbi требуется для последующего доступа к массивам в транзакции, + * т.е. при вычислении dbi разыменовывается тот-же указатель на txn + * и читается та же кэш-линия с указателями. */ +typedef struct clc2 { + clc_t k; /* для ключей */ + clc_t v; /* для значений */ +} clc2_t; + +struct kvx { + clc2_t clc; + MDBX_val name; /* имя subDB */ +}; + +/* Non-shared DBI state flags inside transaction */ +enum dbi_state { + DBI_DIRTY = 0x01 /* DB was written in this txn */, + DBI_STALE = 0x02 /* Named-DB record is older than txnID */, + DBI_FRESH = 0x04 /* Named-DB handle opened in this txn */, + DBI_CREAT = 0x08 /* Named-DB handle created in this txn */, + DBI_VALID = 0x10 /* Handle is valid, see also DB_VALID */, + DBI_OLDEN = 0x40 /* Handle was closed/reopened outside txn */, + DBI_LINDO = 0x80 /* Lazy initialization done for DBI-slot */, +}; + +enum txn_flags { + txn_ro_begin_flags = MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE, + txn_rw_begin_flags = MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY, + txn_shrink_allowed = UINT32_C(0x40000000), + txn_gc_drained = 0x20 /* GC was depleted up to oldest reader */, + txn_state_flags = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | + MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | + txn_gc_drained +}; /* A database transaction. * Every operation requires a transaction handle. */ struct MDBX_txn { -#define MDBX_MT_SIGNATURE UINT32_C(0x93D53A31) - uint32_t mt_signature; + int32_t signature; + uint32_t flags; /* Transaction Flags */ + size_t n_dbi; + size_t owner; /* thread ID that owns this transaction */ - /* Transaction Flags */ - /* mdbx_txn_begin() flags */ -#define MDBX_TXN_RO_BEGIN_FLAGS (MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE) -#define MDBX_TXN_RW_BEGIN_FLAGS \ - (MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY) - /* Additional flag for sync_locked() */ -#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000) - -#define MDBX_TXN_DRAINED_GC 0x20 /* GC was depleted up to oldest reader */ - -#define TXN_FLAGS \ - (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \ - MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_DRAINED_GC) - -#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \ - ((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \ - MDBX_SHRINK_ALLOWED) -#error "Oops, some txn flags overlapped or wrong" -#endif - uint32_t mt_flags; - unsigned mt_numdbs; - size_t mt_owner; /* thread ID that owns this transaction */ - - MDBX_txn *mt_parent; /* parent of a nested txn */ - /* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */ - MDBX_txn *mt_child; - MDBX_geo mt_geo; - /* next unallocated page */ -#define mt_next_pgno mt_geo.next - /* corresponding to the current size of datafile */ -#define mt_end_pgno mt_geo.now + MDBX_txn *parent; /* parent of a nested txn */ + MDBX_txn *nested; /* nested txn under this txn, + set together with MDBX_TXN_HAS_CHILD */ + geo_t geo; /* The ID of this transaction. IDs are integers incrementing from * INITIAL_TXNID. Only committed write transactions increment the ID. If a * transaction aborts, the ID may be re-used by the next writer. */ - txnid_t mt_txnid; - txnid_t mt_front; + txnid_t txnid, front_txnid; - MDBX_env *mt_env; /* the DB environment */ - /* Array of MDBX_db records for each known DB */ - MDBX_db *mt_dbs; + MDBX_env *env; /* the DB environment */ + tree_t *dbs; /* Array of tree_t records for each known DB */ #if MDBX_ENABLE_DBI_SPARSE - unsigned *__restrict mt_dbi_sparse; + unsigned *__restrict dbi_sparse; #endif /* MDBX_ENABLE_DBI_SPARSE */ - /* Non-shared DBI state flags inside transaction */ -#define DBI_DIRTY 0x01 /* DB was written in this txn */ -#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ -#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ -#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ -#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ -#define DBI_OLDEN 0x40 /* Handle was closed/reopened outside txn */ -#define DBI_LINDO 0x80 /* Lazy initialization done for DBI-slot */ - /* Array of non-shared txn's flags of DBI */ - uint8_t *__restrict mt_dbi_state; + /* Array of non-shared txn's flags of DBI. + * Модификатор __restrict тут полезен и безопасен в текущем понимании, + * так как пересечение возможно только с dbi_state курсоров, + * и происходит по-чтению до последующего изменения/записи. */ + uint8_t *__restrict dbi_state; /* Array of sequence numbers for each DB handle. */ - uint32_t *__restrict mt_dbi_seqs; - MDBX_cursor **mt_cursors; + uint32_t *__restrict dbi_seqs; - MDBX_canary mt_canary; - void *mt_userctx; /* User-settable context */ + /* Массив с головами односвязных списков отслеживания курсоров. */ + MDBX_cursor **cursors; + + /* "Канареечные" маркеры/счетчики */ + MDBX_canary canary; + + /* User-settable context */ + void *userctx; union { struct { - /* For read txns: This thread/txn's reader table slot, or NULL. */ - MDBX_reader *reader; + /* For read txns: This thread/txn's reader table slot, or nullptr. */ + reader_slot_t *reader; } to; struct { - meta_troika_t troika; + troika_t troika; /* In write txns, array of cursors for each DB */ - MDBX_PNL __restrict relist; /* Reclaimed GC pages */ - txnid_t last_reclaimed; /* ID of last used record */ + pnl_t __restrict relist; /* Reclaimed GC pages */ + struct { + /* The list of reclaimed txns from GC */ + txl_t __restrict reclaimed; + txnid_t last_reclaimed; /* ID of last used record */ + uint64_t time_acc; + } gc; #if MDBX_ENABLE_REFUND pgno_t loose_refund_wl /* FIXME: describe */; #endif /* MDBX_ENABLE_REFUND */ @@ -1249,17 +226,15 @@ struct MDBX_txn { /* dirtylist room: Dirty array size - dirty pages visible to this txn. * Includes ancestor txns' dirty pages not hidden by other txns' * dirty/spilled pages. Thus commit(nested txn) has room to merge - * dirtylist into mt_parent after freeing hidden mt_parent pages. */ + * dirtylist into parent after freeing hidden parent pages. */ size_t dirtyroom; /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ - MDBX_dpl *__restrict dirtylist; - /* The list of reclaimed txns from GC */ - MDBX_TXL __restrict lifo_reclaimed; + dpl_t *__restrict dirtylist; /* The list of pages that became unused during this transaction. */ - MDBX_PNL __restrict retired_pages; + pnl_t __restrict retired_pages; /* The list of loose pages that became unused and may be reused - * in this transaction, linked through `mp_next`. */ - MDBX_page *__restrict loose_pages; + * in this transaction, linked through `page_next()`. */ + page_t *__restrict loose_pages; /* Number of loose pages (tw.loose_pages) */ size_t loose_count; union { @@ -1268,172 +243,153 @@ struct MDBX_txn { /* The sorted list of dirty pages we temporarily wrote to disk * because the dirty list was full. page numbers in here are * shifted left by 1, deleted slots have the LSB set. */ - MDBX_PNL __restrict list; + pnl_t __restrict list; } spilled; size_t writemap_dirty_npages; size_t writemap_spilled_npages; }; - uint64_t gc_time_acc; } tw; }; }; -#if MDBX_WORDBITS >= 64 -#define CURSOR_STACK 32 -#else -#define CURSOR_STACK 24 -#endif +#define CURSOR_STACK_SIZE (16 + MDBX_WORDBITS / 4) -struct MDBX_xcursor; - -/* Cursors are used for all DB operations. - * A cursor holds a path of (page pointer, key index) from the DB - * root to a position in the DB, plus other state. MDBX_DUPSORT - * cursors include an xcursor to the current data item. Write txns - * track their cursors and keep them up to date when data moves. - * Exception: An xcursor's pointer to a P_SUBP page can be stale. - * (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */ struct MDBX_cursor { -#define MDBX_MC_LIVE UINT32_C(0xFE05D5B1) -#define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047) -#define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7) - uint32_t mc_signature; - /* The database handle this cursor operates on */ - MDBX_dbi mc_dbi; - /* Next cursor on this DB in this txn */ - MDBX_cursor *mc_next; - /* Backup of the original cursor if this cursor is a shadow */ - MDBX_cursor *mc_backup; - /* Context used for databases with MDBX_DUPSORT, otherwise NULL */ - struct MDBX_xcursor *mc_xcursor; - /* The transaction that owns this cursor */ - MDBX_txn *mc_txn; - /* The database record for this cursor */ - MDBX_db *mc_db; - /* The database auxiliary record for this cursor */ - MDBX_dbx *mc_dbx; - /* The mt_dbi_state[] for this DBI */ - uint8_t *__restrict mc_dbi_state; - uint8_t mc_snum; /* number of pushed pages */ - uint8_t mc_top; /* index of top page, normally mc_snum-1 */ + int32_t signature; + union { + /* Тут некоторые трюки/заморочки с тем чтобы во всех основных сценариях + * проверять состояние курсора одной простой операцией сравнения, + * и при этом ни на каплю не усложнять код итерации стека курсора. + * + * Поэтому решение такое: + * - поля flags и top сделаны знаковыми, а их отрицательные значения + * используются для обозначения не-установленного/не-инициализированного + * состояния курсора; + * - для инвалидации/сброса курсора достаточно записать отрицательное + * значение в объединенное поле top_and_flags; + * - все проверки состояния сводятся к сравнению одного из полей + * flags/snum/snum_and_flags, которые в зависимости от сценария, + * трактуются либо как знаковые, либо как безнаковые. */ + __anonymous_struct_extension__ struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + int8_t flags; + /* индекс вершины стека, меньше нуля для не-инициализированного курсора */ + int8_t top; +#else + int8_t top; + int8_t flags; +#endif + }; + int16_t top_and_flags; + }; + /* флаги проверки, в том числе биты для проверки типа листовых страниц. */ + uint8_t checking; - /* Cursor state flags. */ -#define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */ -#define C_EOF 0x02 /* No more data */ -#define C_SUB 0x04 /* Cursor is a sub-cursor */ -#define C_DEL 0x08 /* last op was a cursor_del */ -#define C_UNTRACK 0x10 /* Un-track cursor when closing */ -#define C_GCU \ - 0x20 /* Происходит подготовка к обновлению GC, поэтому \ - * можно брать страницы из GC даже для FREE_DBI */ - uint8_t mc_flags; - - /* Cursor checking flags. */ -#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */ -#define CC_LEAF 0x02 /* same as P_LEAF for CHECK_LEAF_TYPE() */ -#define CC_OVERFLOW 0x04 /* same as P_OVERFLOW for CHECK_LEAF_TYPE() */ -#define CC_UPDATING 0x08 /* update/rebalance pending */ -#define CC_SKIPORD 0x10 /* don't check keys ordering */ -#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */ -#define CC_RETIRING 0x40 /* refs to child pages may be invalid */ -#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */ - uint8_t mc_checking; - - MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */ - indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */ + /* Указывает на txn->dbi_state[] для DBI этого курсора. + * Модификатор __restrict тут полезен и безопасен в текущем понимании, + * так как пересечение возможно только с dbi_state транзакции, + * и происходит по-чтению до последующего изменения/записи. */ + uint8_t *__restrict dbi_state; + /* Связь списка отслеживания курсоров в транзакции */ + MDBX_txn *txn; + /* Указывает на tree->dbs[] для DBI этого курсора. */ + tree_t *tree; + /* Указывает на env->kvs[] для DBI этого курсора. */ + clc2_t *clc; + subcur_t *__restrict subcur; + page_t *pg[CURSOR_STACK_SIZE]; /* stack of pushed pages */ + indx_t ki[CURSOR_STACK_SIZE]; /* stack of page indices */ + MDBX_cursor *next; + /* Состояние на момент старта вложенной транзакции */ + MDBX_cursor *backup; }; -#define CHECK_LEAF_TYPE(mc, mp) \ - (((PAGETYPE_WHOLE(mp) ^ (mc)->mc_checking) & \ - (CC_BRANCH | CC_LEAF | CC_OVERFLOW | CC_LEAF2)) == 0) +struct inner_cursor { + MDBX_cursor cursor; + tree_t nested_tree; +}; -/* Context for sorted-dup records. - * We could have gone to a fully recursive design, with arbitrarily - * deep nesting of sub-databases. But for now we only handle these - * levels - main DB, optional sub-DB, sorted-duplicate DB. */ -typedef struct MDBX_xcursor { - /* A sub-cursor for traversing the Dup DB */ - MDBX_cursor mx_cursor; - /* The database record for this Dup DB */ - MDBX_db mx_db; - /* The auxiliary DB record for this Dup DB */ - MDBX_dbx mx_dbx; -} MDBX_xcursor; - -typedef struct MDBX_cursor_couple { +struct cursor_couple { MDBX_cursor outer; - void *mc_userctx; /* User-settable context */ - MDBX_xcursor inner; -} MDBX_cursor_couple; + void *userctx; /* User-settable context */ + subcur_t inner; +}; -struct mdbx_defer_free_item { - struct mdbx_defer_free_item *next; +struct defer_free_item { + struct defer_free_item *next; uint64_t timestamp; }; +enum env_flags { + /* Failed to update the meta page. Probably an I/O error. */ + ENV_FATAL_ERROR = INT32_MIN /* 0x80000000 */, + /* Some fields are initialized. */ + ENV_ACTIVE = UINT32_C(0x20000000), + /* me_txkey is set */ + ENV_TXKEY = UINT32_C(0x10000000), + /* Legacy MDBX_MAPASYNC (prior v0.9) */ + DEPRECATED_MAPASYNC = UINT32_C(0x100000), + /* Legacy MDBX_COALESCE (prior v0.12) */ + DEPRECATED_COALESCE = UINT32_C(0x2000000), + ENV_INTERNAL_FLAGS = ENV_FATAL_ERROR | ENV_ACTIVE | ENV_TXKEY, + /* Only a subset of the mdbx_env flags can be changed + * at runtime. Changing other flags requires closing the + * environment and re-opening it with the new flags. */ + ENV_CHANGEABLE_FLAGS = MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | + DEPRECATED_MAPASYNC | MDBX_NOMEMINIT | + DEPRECATED_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | + MDBX_VALIDATION, + ENV_CHANGELESS_FLAGS = MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | + MDBX_NOSTICKYTHREADS | MDBX_NORDAHEAD | + MDBX_LIFORECLAIM | MDBX_EXCLUSIVE, + ENV_USABLE_FLAGS = ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS +}; + /* The database environment. */ struct MDBX_env { /* ----------------------------------------------------- mostly static part */ -#define MDBX_ME_SIGNATURE UINT32_C(0x9A899641) - MDBX_atomic_uint32_t me_signature; - /* Failed to update the meta page. Probably an I/O error. */ -#define MDBX_FATAL_ERROR UINT32_C(0x80000000) - /* Some fields are initialized. */ -#define MDBX_ENV_ACTIVE UINT32_C(0x20000000) - /* me_txkey is set */ -#define MDBX_ENV_TXKEY UINT32_C(0x10000000) - /* Legacy MDBX_MAPASYNC (prior v0.9) */ -#define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000) - /* Legacy MDBX_COALESCE (prior v0.12) */ -#define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000) -#define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) - uint32_t me_flags; - unsigned me_psize; /* DB page size, initialized from me_os_psize */ - osal_mmap_t me_dxb_mmap; /* The main data file */ -#define me_map me_dxb_mmap.base -#define me_lazy_fd me_dxb_mmap.fd - mdbx_filehandle_t me_dsync_fd, me_fd4meta; + mdbx_atomic_uint32_t signature; + uint32_t flags; + unsigned ps; /* DB page size, initialized from me_os_psize */ + osal_mmap_t dxb_mmap; /* The main data file */ +#define lazy_fd dxb_mmap.fd + mdbx_filehandle_t dsync_fd, fd4meta; #if defined(_WIN32) || defined(_WIN64) -#define me_overlapped_fd me_ioring.overlapped_fd - HANDLE me_data_lock_event; -#endif /* Windows */ - osal_mmap_t me_lck_mmap; /* The lock file */ -#define me_lfd me_lck_mmap.fd - struct MDBX_lockinfo *me_lck; + HANDLE dxb_lock_event; +#endif /* Windows */ + osal_mmap_t lck_mmap; /* The lock file */ + lck_t *lck; - uint16_t me_leaf_nodemax; /* max size of a leaf-node */ - uint16_t me_branch_nodemax; /* max size of a branch-node */ - uint16_t me_subpage_limit; - uint16_t me_subpage_room_threshold; - uint16_t me_subpage_reserve_prereq; - uint16_t me_subpage_reserve_limit; - atomic_pgno_t me_mlocked_pgno; - uint8_t me_psize2log; /* log2 of DB page size */ - int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */ - uint16_t me_merge_threshold, - me_merge_threshold_gc; /* pages emptier than this are candidates for - merging */ - unsigned me_os_psize; /* OS page size, from osal_syspagesize() */ - unsigned me_maxreaders; /* size of the reader table */ - MDBX_dbi me_maxdbs; /* size of the DB table */ - uint32_t me_pid; /* process ID of this env */ - osal_thread_key_t me_txkey; /* thread-key for readers */ - struct { /* path to the DB files */ + uint16_t leaf_nodemax; /* max size of a leaf-node */ + uint16_t branch_nodemax; /* max size of a branch-node */ + uint16_t subpage_limit; + uint16_t subpage_room_threshold; + uint16_t subpage_reserve_prereq; + uint16_t subpage_reserve_limit; + atomic_pgno_t mlocked_pgno; + uint8_t ps2ln; /* log2 of DB page size */ + int8_t stuck_meta; /* recovery-only: target meta page or less that zero */ + uint16_t merge_threshold, merge_threshold_gc; /* pages emptier than this are + candidates for merging */ + unsigned max_readers; /* size of the reader table */ + MDBX_dbi max_dbi; /* size of the DB table */ + uint32_t pid; /* process ID of this env */ + osal_thread_key_t me_txkey; /* thread-key for readers */ + struct { /* path to the DB files */ pathchar_t *lck, *dxb, *specified; void *buffer; - } me_pathname; - void *me_pbuf; /* scratch area for DUPSORT put() */ - MDBX_txn *me_txn0; /* preallocated write transaction */ - MDBX_dbx *me_dbxs; /* array of static DB info */ - uint16_t *__restrict me_db_flags; /* array of flags from MDBX_db.md_flags */ - MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ - unsigned - me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */ - unsigned me_maxgc_per_branch; - uint32_t me_live_reader; /* have liveness lock in reader table */ - void *me_userctx; /* User-settable context */ - MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */ - size_t me_madv_threshold; + } pathname; + void *page_auxbuf; /* scratch area for DUPSORT put() */ + MDBX_txn *basal_txn; /* preallocated write transaction */ + kvx_t *kvs; /* array of auxiliary key-value properties */ + uint8_t *__restrict dbs_flags; /* array of flags from tree_t.flags */ + mdbx_atomic_uint32_t *dbi_seqs; /* array of dbi sequence numbers */ + unsigned maxgc_large1page; /* Number of pgno_t fit in a single large page */ + unsigned maxgc_per_branch; + uint32_t registered_reader_pid; /* have liveness lock in reader table */ + void *userctx; /* User-settable context */ + MDBX_hsr_func *hsr_callback; /* Callback for kicking laggard readers */ + size_t madv_threshold; struct { unsigned dp_reserve_limit; @@ -1461,9 +417,9 @@ struct MDBX_env { unsigned prefault_write : 1; } non_auto; } flags; - } me_options; + } options; - /* struct me_dbgeo used for accepting db-geo params from user for the new + /* struct geo_in_bytes used for accepting db-geo params from user for the new * database creation, i.e. when mdbx_env_set_geometry() was called before * mdbx_env_open(). */ struct { @@ -1472,7 +428,7 @@ struct MDBX_env { size_t now; /* current size of datafile */ size_t grow; /* step to grow datafile */ size_t shrink; /* threshold to shrink datafile */ - } me_dbgeo; + } geo_in_bytes; #if MDBX_LOCKING == MDBX_LOCKING_SYSV union { @@ -1480,46 +436,23 @@ struct MDBX_env { int semid; } me_sysv_ipc; #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */ - bool me_incore; - bool me_prefault_write; + bool incore; + bool prefault_write_activated; #if MDBX_ENABLE_DBI_LOCKFREE - struct mdbx_defer_free_item *me_defer_free; + defer_free_item_t *defer_free; #endif /* MDBX_ENABLE_DBI_LOCKFREE */ - /* --------------------------------------------------- mostly volatile part */ - - MDBX_txn *me_txn; /* current write transaction */ - osal_fastmutex_t me_dbi_lock; - unsigned me_numdbs; /* number of DBs opened */ - - unsigned me_dp_reserve_len; - MDBX_page *__restrict me_dp_reserve; /* list of malloc'ed blocks for re-use */ - - /* PNL of pages that became unused in a write txn */ - MDBX_PNL __restrict me_retired_pages; - osal_ioring_t me_ioring; - -#if defined(_WIN32) || defined(_WIN64) - osal_srwlock_t me_remap_guard; - /* Workaround for LockFileEx and WriteFile multithread bug */ - CRITICAL_SECTION me_windowsbug_lock; - char *me_pathname_char; /* cache of multi-byte representation of pathname - to the DB files */ -#else - osal_fastmutex_t me_remap_guard; -#endif - /* -------------------------------------------------------------- debugging */ #if MDBX_DEBUG - MDBX_assert_func *me_assert_func; /* Callback for assertion failures */ + MDBX_assert_func *assert_func; /* Callback for assertion failures */ #endif #ifdef ENABLE_MEMCHECK - int me_valgrind_handle; + int valgrind_handle; #endif #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - pgno_t me_poison_edge; + pgno_t poison_edge; #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ #ifndef xMDBX_DEBUG_SPILLING @@ -1529,100 +462,37 @@ struct MDBX_env { size_t debug_dirtied_est, debug_dirtied_act; #endif /* xMDBX_DEBUG_SPILLING */ + /* --------------------------------------------------- mostly volatile part */ + + MDBX_txn *txn; /* current write transaction */ + osal_fastmutex_t dbi_lock; + unsigned n_dbi; /* number of DBs opened */ + + unsigned shadow_reserve_len; + page_t *__restrict shadow_reserve; /* list of malloc'ed blocks for re-use */ + + osal_ioring_t ioring; + +#if defined(_WIN32) || defined(_WIN64) + osal_srwlock_t remap_guard; + /* Workaround for LockFileEx and WriteFile multithread bug */ + CRITICAL_SECTION windowsbug_lock; + char *pathname_char; /* cache of multi-byte representation of pathname + to the DB files */ +#else + osal_fastmutex_t remap_guard; +#endif + /* ------------------------------------------------- stub for lck-less mode */ - MDBX_atomic_uint64_t - x_lckless_stub[(sizeof(MDBX_lockinfo) + MDBX_CACHELINE_SIZE - 1) / - sizeof(MDBX_atomic_uint64_t)]; + mdbx_atomic_uint64_t + lckless_placeholder[(sizeof(lck_t) + MDBX_CACHELINE_SIZE - 1) / + sizeof(mdbx_atomic_uint64_t)]; }; -#ifndef __cplusplus /*----------------------------------------------------------------------------*/ -/* Cache coherence and mmap invalidation */ -#if MDBX_CPU_WRITEBACK_INCOHERENT -#define osal_flush_incoherent_cpu_writeback() osal_memory_barrier() -#else -#define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier() -#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ - -MDBX_MAYBE_UNUSED static __inline void -osal_flush_incoherent_mmap(const void *addr, size_t nbytes, - const intptr_t pagesize) { -#if MDBX_MMAP_INCOHERENT_FILE_WRITE - char *const begin = (char *)(-pagesize & (intptr_t)addr); - char *const end = - (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1)); - int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0; - eASSERT(nullptr, err == 0); - (void)err; -#else - (void)pagesize; -#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ - -#if MDBX_MMAP_INCOHERENT_CPU_CACHE -#ifdef DCACHE - /* MIPS has cache coherency issues. - * Note: for any nbytes >= on-chip cache size, entire is flushed. */ - cacheflush((void *)addr, nbytes, DCACHE); -#else -#error "Oops, cacheflush() not available" -#endif /* DCACHE */ -#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ - -#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE - (void)addr; - (void)nbytes; -#endif -} - -/*----------------------------------------------------------------------------*/ -/* Internal prototypes */ - -MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked, - int *dead); -MDBX_INTERNAL_FUNC void global_ctor(void); -MDBX_INTERNAL_FUNC void osal_ctor(void); -MDBX_INTERNAL_FUNC void global_dtor(void); -MDBX_INTERNAL_FUNC void osal_dtor(void); -MDBX_INTERNAL_FUNC void thread_dtor(void *ptr); - -#endif /* !__cplusplus */ - -#define MDBX_IS_ERROR(rc) \ - ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE) - -/* Internal error codes, not exposed outside libmdbx */ -#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10) - -/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */ -#define DDBI(mc) \ - (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi) - -/* Key size which fits in a DKBUF (debug key buffer). */ -#define DKBUF_MAX 511 -#define DKBUF char _kbuf[DKBUF_MAX * 4 + 2] -#define DKEY(x) mdbx_dump_val(x, _kbuf, DKBUF_MAX * 2 + 1) -#define DVAL(x) mdbx_dump_val(x, _kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1) - -#if MDBX_DEBUG -#define DKBUF_DEBUG DKBUF -#define DKEY_DEBUG(x) DKEY(x) -#define DVAL_DEBUG(x) DVAL(x) -#else -#define DKBUF_DEBUG ((void)(0)) -#define DKEY_DEBUG(x) ("-") -#define DVAL_DEBUG(x) ("-") -#endif - -/* An invalid page number. - * Mainly used to denote an empty tree. */ -#define P_INVALID (~(pgno_t)0) - -/* Test if the flags f are set in a flag word w. */ -#define F_ISSET(w, f) (((w) & (f)) == (f)) - -/* Round n up to an even number. */ -#define EVEN(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */ +/* pseudo-error code, not exposed outside libmdbx */ +#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 33) /* Default size of memory map. * This is certainly too small for any actual applications. Apps should @@ -1635,219 +505,96 @@ MDBX_INTERNAL_FUNC void thread_dtor(void *ptr); * Applications should set the table size using mdbx_env_set_maxreaders(). */ #define DEFAULT_READERS 61 -/* Test if a page is a leaf page */ -#define IS_LEAF(p) (((p)->mp_flags & P_LEAF) != 0) -/* Test if a page is a LEAF2 page */ -#define IS_LEAF2(p) unlikely(((p)->mp_flags & P_LEAF2) != 0) -/* Test if a page is a branch page */ -#define IS_BRANCH(p) (((p)->mp_flags & P_BRANCH) != 0) -/* Test if a page is an overflow page */ -#define IS_OVERFLOW(p) unlikely(((p)->mp_flags & P_OVERFLOW) != 0) -/* Test if a page is a sub page */ -#define IS_SUBP(p) (((p)->mp_flags & P_SUBP) != 0) +enum db_flags { + DB_PERSISTENT_FLAGS = MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | + MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP, -/* Header for a single key/data pair within a page. - * Used in pages of type P_BRANCH and P_LEAF without P_LEAF2. - * We guarantee 2-byte alignment for 'MDBX_node's. - * - * Leaf node flags describe node contents. F_BIGDATA says the node's - * data part is the page number of an overflow page with actual data. - * F_DUPDATA and F_SUBDATA can be combined giving duplicate data in - * a sub-page/sub-database, and named databases (just F_SUBDATA). */ -typedef struct MDBX_node { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - union { - uint32_t mn_dsize; - uint32_t mn_pgno32; - }; - uint8_t mn_flags; /* see mdbx_node flags */ - uint8_t mn_extra; - uint16_t mn_ksize; /* key size */ -#else - uint16_t mn_ksize; /* key size */ - uint8_t mn_extra; - uint8_t mn_flags; /* see mdbx_node flags */ - union { - uint32_t mn_pgno32; - uint32_t mn_dsize; - }; -#endif /* __BYTE_ORDER__ */ + /* mdbx_dbi_open() flags */ + DB_USABLE_FLAGS = DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE, - /* mdbx_node Flags */ -#define F_BIGDATA 0x01 /* data put on overflow page */ -#define F_SUBDATA 0x02 /* data is a sub-database */ -#define F_DUPDATA 0x04 /* data has duplicates */ - - /* valid flags for mdbx_node_add() */ -#define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND) - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - (!defined(__cplusplus) && defined(_MSC_VER)) - uint8_t mn_data[] /* key and data are appended here */; -#endif /* C99 */ -} MDBX_node; - -#define DB_PERSISTENT_FLAGS \ - (MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED | \ - MDBX_INTEGERDUP | MDBX_REVERSEDUP) - -/* mdbx_dbi_open() flags */ -#define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE) - -#define DB_VALID 0x8000u /* DB handle is valid, for me_db_flags */ -#define DB_POISON 0x7fffu /* update pending */ -#define DB_INTERNAL_FLAGS DB_VALID - -#if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS -#error "Oops, some flags overlapped or wrong" -#endif -#if DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS -#error "Oops, some flags overlapped or wrong" -#endif - -/* Max length of iov-vector passed to writev() call, used for auxilary writes */ -#define MDBX_AUXILARY_IOV_MAX 64 -#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX -#undef MDBX_AUXILARY_IOV_MAX -#define MDBX_AUXILARY_IOV_MAX IOV_MAX -#endif /* MDBX_AUXILARY_IOV_MAX */ - -/* - * / - * | -1, a < b - * CMP2INT(a,b) = < 0, a == b - * | 1, a > b - * \ - */ -#define CMP2INT(a, b) (((a) != (b)) ? (((a) < (b)) ? -1 : 1) : 0) - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t -int64pgno(int64_t i64) { - if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1)) - return (pgno_t)i64; - return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO; -} - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t -pgno_add(size_t base, size_t augend) { - assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO); - return int64pgno((int64_t)base + (int64_t)augend); -} - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t -pgno_sub(size_t base, size_t subtrahend) { - assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && - subtrahend < MAX_PAGENO); - return int64pgno((int64_t)base - (int64_t)subtrahend); -} - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline bool -is_powerof2(size_t x) { - return (x & (x - 1)) == 0; -} - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t -floor_powerof2(size_t value, size_t granularity) { - assert(is_powerof2(granularity)); - return value & ~(granularity - 1); -} - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t -ceil_powerof2(size_t value, size_t granularity) { - return floor_powerof2(value + granularity - 1, granularity); -} - -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned -log2n_powerof2(size_t value_uintptr) { - assert(value_uintptr > 0 && value_uintptr < INT32_MAX && - is_powerof2(value_uintptr)); - assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr); - const uint32_t value_uint32 = (uint32_t)value_uintptr; -#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz) - STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned)); - return __builtin_ctz(value_uint32); -#elif defined(_MSC_VER) - unsigned long index; - STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long)); - _BitScanForward(&index, value_uint32); - return index; -#else - static const uint8_t debruijn_ctz32[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; - return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27]; -#endif -} - -/* Only a subset of the mdbx_env flags can be changed - * at runtime. Changing other flags requires closing the - * environment and re-opening it with the new flags. */ -#define ENV_CHANGEABLE_FLAGS \ - (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC | \ - MDBX_NOMEMINIT | MDBX_DEPRECATED_COALESCE | MDBX_PAGEPERTURB | \ - MDBX_ACCEDE | MDBX_VALIDATION) -#define ENV_CHANGELESS_FLAGS \ - (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS | \ - MDBX_NORDAHEAD | MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) -#define ENV_USABLE_FLAGS (ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS) + DB_VALID = 0x80u /* DB handle is valid, for dbs_flags */, + DB_POISON = 0x7fu /* update pending */, + DB_INTERNAL_FLAGS = DB_VALID +}; #if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS MDBX_MAYBE_UNUSED static void static_checks(void) { + STATIC_ASSERT(MDBX_WORDBITS == sizeof(void *) * CHAR_BIT); + STATIC_ASSERT(UINT64_C(0x80000000) == (uint32_t)ENV_FATAL_ERROR); STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI, "Oops, MDBX_MAX_DBI or CORE_DBS?"); STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) == ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) & (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)), "Oops, some flags overlapped or wrong"); + STATIC_ASSERT_MSG((DB_INTERNAL_FLAGS & DB_USABLE_FLAGS) == 0, + "Oops, some flags overlapped or wrong"); + STATIC_ASSERT_MSG((DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS) == 0, + "Oops, some flags overlapped or wrong"); + STATIC_ASSERT(DB_PERSISTENT_FLAGS <= UINT8_MAX); STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong"); + + STATIC_ASSERT_MSG( + (txn_state_flags & (txn_rw_begin_flags | txn_ro_begin_flags)) == 0, + "Oops, some txn flags overlapped or wrong"); + STATIC_ASSERT_MSG( + ((txn_rw_begin_flags | txn_ro_begin_flags | txn_state_flags) & + txn_shrink_allowed) == 0, + "Oops, some txn flags overlapped or wrong"); + + STATIC_ASSERT(sizeof(reader_slot_t) == 32); +#if MDBX_LOCKING > 0 + STATIC_ASSERT(offsetof(lck_t, wrt_lock) % MDBX_CACHELINE_SIZE == 0); + STATIC_ASSERT(offsetof(lck_t, rdt_lock) % MDBX_CACHELINE_SIZE == 0); +#else + STATIC_ASSERT(offsetof(lck_t, cached_oldest) % MDBX_CACHELINE_SIZE == 0); + STATIC_ASSERT(offsetof(lck_t, rdt_length) % MDBX_CACHELINE_SIZE == 0); +#endif /* MDBX_LOCKING */ + STATIC_ASSERT(offsetof(lck_t, rdt) % MDBX_CACHELINE_SIZE == 0); + +#if FLEXIBLE_ARRAY_MEMBERS + STATIC_ASSERT(NODESIZE == offsetof(node_t, payload)); + STATIC_ASSERT(PAGEHDRSZ == offsetof(page_t, entries)); +#endif /* FLEXIBLE_ARRAY_MEMBERS */ + STATIC_ASSERT(sizeof(clc_t) == 3 * sizeof(void *)); + STATIC_ASSERT(sizeof(kvx_t) == 8 * sizeof(void *)); + +#if MDBX_WORDBITS == 64 +#define KVX_SIZE_LN2 6 +#else +#define KVX_SIZE_LN2 5 +#endif + STATIC_ASSERT(sizeof(kvx_t) == (1u << KVX_SIZE_LN2)); } #endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */ -#ifdef __cplusplus -} -#endif - -#define MDBX_ASAN_POISON_MEMORY_REGION(addr, size) \ - do { \ - TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ - (size_t)(size), __LINE__); \ - ASAN_POISON_MEMORY_REGION(addr, size); \ - } while (0) - -#define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ - do { \ - TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ - (size_t)(size), __LINE__); \ - ASAN_UNPOISON_MEMORY_REGION(addr, size); \ - } while (0) - /******************************************************************************/ -/** \brief Page types for traverse the b-tree. - * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ -enum MDBX_page_type_t { - MDBX_page_broken, - MDBX_page_large, - MDBX_page_branch, - MDBX_page_leaf, - MDBX_page_dupfixed_leaf, - MDBX_subpage_leaf, - MDBX_subpage_dupfixed_leaf, - MDBX_subpage_broken, -}; -typedef enum MDBX_page_type_t MDBX_page_type_t; +#include "node.h" -typedef struct MDBX_walk_sdb { - MDBX_val name; - struct MDBX_db *internal, *nested; -} MDBX_walk_sdb_t; +#include "dbi.h" -/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ -typedef int -MDBX_pgvisitor_func(const size_t pgno, const unsigned number, void *const ctx, - const int deep, const MDBX_walk_sdb_t *subdb, - const size_t page_size, const MDBX_page_type_t page_type, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes); +#include "cogs.h" + +#include "cursor.h" + +#include "dpl.h" + +#include "gc.h" + +#include "lck.h" + +#include "meta.h" + +#include "page-iov.h" + +#include "spill.h" + +#include "page-ops.h" + +#include "tls.h" + +#include "walk.h" + +#include "sort.h" diff --git a/src/layout-dxb.h b/src/layout-dxb.h new file mode 100644 index 00000000..06b0e834 --- /dev/null +++ b/src/layout-dxb.h @@ -0,0 +1,306 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +#pragma pack(push, 4) + +/* A stamp that identifies a file as an MDBX file. + * There's nothing special about this value other than that it is easily + * recognizable, and it will reflect any byte order mismatches. */ +#define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11) + +/* FROZEN: The version number for a database's datafile format. */ +#define MDBX_DATA_VERSION 3 + +#define MDBX_DATA_MAGIC \ + ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION) +#define MDBX_DATA_MAGIC_LEGACY_COMPAT \ + ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2) +#define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255) + +/* handle for the DB used to track free pages. */ +#define FREE_DBI 0 +/* handle for the default DB. */ +#define MAIN_DBI 1 +/* Number of DBs in metapage (free and main) - also hardcoded elsewhere */ +#define CORE_DBS 2 + +/* Number of meta pages - also hardcoded elsewhere */ +#define NUM_METAS 3 + +/* A page number in the database. + * + * MDBX uses 32 bit for page numbers. This limits database + * size up to 2^44 bytes, in case of 4K pages. */ +typedef uint32_t pgno_t; +typedef mdbx_atomic_uint32_t atomic_pgno_t; +#define PRIaPGNO PRIu32 +#define MAX_PAGENO UINT32_C(0x7FFFffff) +#define MIN_PAGENO NUM_METAS + +/* An invalid page number. + * Mainly used to denote an empty tree. */ +#define P_INVALID (~(pgno_t)0) + +/* A transaction ID. */ +typedef uint64_t txnid_t; +typedef mdbx_atomic_uint64_t atomic_txnid_t; +#define PRIaTXN PRIi64 +#define MIN_TXNID UINT64_C(1) +#define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1) +#define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1) +#define INVALID_TXNID UINT64_MAX + +/* Used for offsets within a single page. */ +typedef uint16_t indx_t; + +typedef struct tree { + uint16_t flags; /* see mdbx_dbi_open */ + uint16_t height; /* height of this tree */ + uint32_t dupfix_size; /* key-size for MDBX_DUPFIXED (DUPFIX pages) */ + pgno_t root; /* the root page of this tree */ + pgno_t branch_pages; /* number of internal pages */ + pgno_t leaf_pages; /* number of leaf pages */ + pgno_t large_pages; /* number of large pages */ + uint64_t sequence; /* table sequence counter */ + uint64_t items; /* number of data items */ + uint64_t mod_txnid; /* txnid of last committed modification */ +} tree_t; + +/* database size-related parameters */ +typedef struct geo { + uint16_t grow_pv; /* datafile growth step as a 16-bit packed (exponential + quantized) value */ + uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed + (exponential quantized) value */ + pgno_t lower; /* minimal size of datafile in pages */ + pgno_t upper; /* maximal size of datafile in pages */ + union { + pgno_t now; /* current size of datafile in pages */ + pgno_t end_pgno; + }; + union { + pgno_t first_unallocated; /* first unused page in the datafile, + but actually the file may be shorter. */ + pgno_t next_pgno; + }; +} geo_t; + +typedef union bin128 { + __anonymous_struct_extension__ struct { + uint64_t x, y; + }; + __anonymous_struct_extension__ struct { + uint32_t a, b, c, d; + }; +} bin128_t; + +/* Meta page content. + * A meta page is the start point for accessing a database snapshot. + * Pages 0-2 are meta pages. */ +typedef struct meta { + /* Stamp identifying this as an MDBX file. + * It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */ + uint32_t magic_and_version[2]; + + /* txnid that committed this meta, the first of a two-phase-update pair */ + union { + mdbx_atomic_uint32_t txnid_a[2]; + uint64_t unsafe_txnid; + }; + + uint16_t reserve16; /* extra flags, zero (nothing) for now */ + uint8_t validator_id; /* ID of checksum and page validation method, + * zero (nothing) for now */ + int8_t extra_pagehdr; /* extra bytes in the page header, + * zero (nothing) for now */ + + geo_t geometry; /* database size-related parameters */ + + union { + struct { + tree_t gc, main; + } trees; + __anonymous_struct_extension__ struct { + uint16_t gc_flags; + uint16_t gc_height; + uint32_t pagesize; + }; + }; + + MDBX_canary canary; + +#define DATASIGN_NONE 0u +#define DATASIGN_WEAK 1u +#define SIGN_IS_STEADY(sign) ((sign) > DATASIGN_WEAK) + union { + uint32_t sign[2]; + uint64_t unsafe_sign; + }; + + /* txnid that committed this meta, the second of a two-phase-update pair */ + mdbx_atomic_uint32_t txnid_b[2]; + + /* Number of non-meta pages which were put in GC after COW. May be 0 in case + * DB was previously handled by libmdbx without corresponding feature. + * This value in couple with reader.snapshot_pages_retired allows fast + * estimation of "how much reader is restraining GC recycling". */ + uint32_t pages_retired[2]; + + /* The analogue /proc/sys/kernel/random/boot_id or similar to determine + * whether the system was rebooted after the last use of the database files. + * If there was no reboot, but there is no need to rollback to the last + * steady sync point. Zeros mean that no relevant information is available + * from the system. */ + bin128_t bootid; +} meta_t; + +#pragma pack(1) + +typedef enum page_type { + P_BRANCH = 0x01u /* branch page */, + P_LEAF = 0x02u /* leaf page */, + P_LARGE = 0x04u /* large/overflow page */, + P_META = 0x08u /* meta page */, + P_LEGACY_DIRTY = 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */, + P_BAD = P_LEGACY_DIRTY /* explicit flag for invalid/bad page */, + P_DUPFIX = 0x20u /* for MDBX_DUPFIXED records */, + P_SUBP = 0x40u /* for MDBX_DUPSORT sub-pages */, + P_SPILLED = 0x2000u /* spilled in parent txn */, + P_LOOSE = 0x4000u /* page was dirtied then freed, can be reused */, + P_FROZEN = 0x8000u /* used for retire page with known status */, + P_ILL_BITS = (uint16_t)~(P_BRANCH | P_LEAF | P_DUPFIX | P_LARGE | P_SPILLED), + + page_broken = 0, + page_large = P_LARGE, + page_branch = P_BRANCH, + page_leaf = P_LEAF, + page_dupfix_leaf = P_DUPFIX, + page_sub_leaf = P_SUBP | P_LEAF, + page_sub_dupfix_leaf = P_SUBP | P_DUPFIX, + page_sub_broken = P_SUBP, +} page_type_t; + +/* Common header for all page types. The page type depends on flags. + * + * P_BRANCH and P_LEAF pages have unsorted 'node_t's at the end, with + * sorted entries[] entries referring to them. Exception: P_DUPFIX pages + * omit entries and pack sorted MDBX_DUPFIXED values after the page header. + * + * P_LARGE records occupy one or more contiguous pages where only the + * first has a page header. They hold the real data of N_BIGDATA nodes. + * + * P_SUBP sub-pages are small leaf "pages" with duplicate data. + * A node with flag N_DUPDATA but not N_SUBDATA contains a sub-page. + * (Duplicate data can also go in sub-databases, which use normal pages.) + * + * P_META pages contain meta_t, the start point of an MDBX snapshot. + * + * Each non-metapage up to meta_t.mm_last_pg is reachable exactly once + * in the snapshot: Either used by a database or listed in a GC record. */ +typedef struct page { + uint64_t txnid; /* txnid which created page, maybe zero in legacy DB */ + uint16_t dupfix_ksize; /* key size if this is a DUPFIX page */ + uint16_t flags; + union { + uint32_t pages; /* number of overflow pages */ + __anonymous_struct_extension__ struct { + indx_t lower; /* lower bound of free space */ + indx_t upper; /* upper bound of free space */ + }; + }; + pgno_t pgno; /* page number */ + +#if FLEXIBLE_ARRAY_MEMBERS + indx_t entries[] /* dynamic size */; +#endif /* FLEXIBLE_ARRAY_MEMBERS */ +} page_t; + +/* Size of the page header, excluding dynamic data at the end */ +#define PAGEHDRSZ 20u + +/* Header for a single key/data pair within a page. + * Used in pages of type P_BRANCH and P_LEAF without P_DUPFIX. + * We guarantee 2-byte alignment for 'node_t's. + * + * Leaf node flags describe node contents. N_BIGDATA says the node's + * data part is the page number of an overflow page with actual data. + * N_DUPDATA and N_SUBDATA can be combined giving duplicate data in + * a sub-page/sub-database, and named databases (just N_SUBDATA). */ +typedef struct node { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + union { + uint32_t dsize; + uint32_t child_pgno; + }; + uint8_t flags; /* see node_flags */ + uint8_t extra; + uint16_t ksize; /* key size */ +#else + uint16_t ksize; /* key size */ + uint8_t extra; + uint8_t flags; /* see node_flags */ + union { + uint32_t child_pgno; + uint32_t dsize; + }; +#endif /* __BYTE_ORDER__ */ + +#if FLEXIBLE_ARRAY_MEMBERS + uint8_t payload[] /* key and data are appended here */; +#endif /* FLEXIBLE_ARRAY_MEMBERS */ +} node_t; + +/* Size of the node header, excluding dynamic data at the end */ +#define NODESIZE 8u + +typedef enum node_flags { + N_BIGDATA = 0x01 /* data put on large page */, + N_SUBDATA = 0x02 /* data is a sub-database */, + N_DUPDATA = 0x04 /* data has duplicates */ +} node_flags_t; + +#pragma pack(pop) + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t +page_type(const page_t *mp) { + return mp->flags; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t +page_type_compat(const page_t *mp) { + /* Drop legacy P_DIRTY flag for sub-pages for compatilibity, + * for assertions only. */ + return unlikely(mp->flags & P_SUBP) ? mp->flags & ~(P_SUBP | P_LEGACY_DIRTY) + : mp->flags; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_leaf(const page_t *mp) { + return (mp->flags & P_LEAF) != 0; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_dupfix_leaf(const page_t *mp) { + return (mp->flags & P_DUPFIX) != 0; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_branch(const page_t *mp) { + return (mp->flags & P_BRANCH) != 0; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_largepage(const page_t *mp) { + return (mp->flags & P_LARGE) != 0; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_subpage(const page_t *mp) { + return (mp->flags & P_SUBP) != 0; +} diff --git a/src/layout-lck.h b/src/layout-lck.h new file mode 100644 index 00000000..27edec08 --- /dev/null +++ b/src/layout-lck.h @@ -0,0 +1,285 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +/* The version number for a database's lockfile format. */ +#define MDBX_LOCK_VERSION 5 + +#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES + +#define MDBX_LCK_SIGN UINT32_C(0xF10C) +typedef void osal_ipclock_t; +#elif MDBX_LOCKING == MDBX_LOCKING_SYSV + +#define MDBX_LCK_SIGN UINT32_C(0xF18D) +typedef mdbx_pid_t osal_ipclock_t; + +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ + MDBX_LOCKING == MDBX_LOCKING_POSIX2008 + +#define MDBX_LCK_SIGN UINT32_C(0x8017) +typedef pthread_mutex_t osal_ipclock_t; + +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 + +#define MDBX_LCK_SIGN UINT32_C(0xFC29) +typedef sem_t osal_ipclock_t; + +#else +#error "FIXME" +#endif /* MDBX_LOCKING */ + +/* Статистика профилирования работы GC */ +typedef struct gc_prof_stat { + /* Монотонное время по "настенным часам" + * затраченное на чтение и поиск внутри GC */ + uint64_t rtime_monotonic; + /* Процессорное время в режим пользователя + * на подготовку страниц извлекаемых из GC, включая подкачку с диска. */ + uint64_t xtime_cpu; + /* Количество итераций чтения-поиска внутри GC при выделении страниц */ + uint32_t rsteps; + /* Количество запросов на выделение последовательностей страниц, + * т.е. когда запрашивает выделение больше одной страницы */ + uint32_t xpages; + /* Счетчик выполнения по медленному пути (slow path execution count) */ + uint32_t spe_counter; + /* page faults (hard page faults) */ + uint32_t majflt; +} gc_prof_stat_t; + +/* Statistics of pages operations for all transactions, + * including incomplete and aborted. */ +typedef struct pgops { + mdbx_atomic_uint64_t newly; /* Quantity of a new pages added */ + mdbx_atomic_uint64_t cow; /* Quantity of pages copied for update */ + mdbx_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones + for nested transactions */ + mdbx_atomic_uint64_t split; /* Page splits */ + mdbx_atomic_uint64_t merge; /* Page merges */ + mdbx_atomic_uint64_t spill; /* Quantity of spilled dirty pages */ + mdbx_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */ + mdbx_atomic_uint64_t + wops; /* Number of explicit write operations (not a pages) to a disk */ + mdbx_atomic_uint64_t + msync; /* Number of explicit msync/flush-to-disk operations */ + mdbx_atomic_uint64_t + fsync; /* Number of explicit fsync/flush-to-disk operations */ + + mdbx_atomic_uint64_t prefault; /* Number of prefault write operations */ + mdbx_atomic_uint64_t mincore; /* Number of mincore() calls */ + + mdbx_atomic_uint32_t + incoherence; /* number of https://libmdbx.dqdkfa.ru/dead-github/issues/269 + caught */ + mdbx_atomic_uint32_t reserved; + + /* Статистика для профилирования GC. + * Логически эти данные, возможно, стоит вынести в другую структуру, + * но разница будет сугубо косметическая. */ + struct { + /* Затраты на поддержку данных пользователя */ + gc_prof_stat_t work; + /* Затраты на поддержку и обновления самой GC */ + gc_prof_stat_t self; + /* Итераций обновления GC, + * больше 1 если были повторы/перезапуски */ + uint32_t wloops; + /* Итерации слияния записей GC */ + uint32_t coalescences; + /* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */ + uint32_t wipes; + /* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */ + uint32_t flushes; + /* Попытки пнуть тормозящих читателей */ + uint32_t kicks; + } gc_prof; +} pgop_stat_t; + +/* Reader Lock Table + * + * Readers don't acquire any locks for their data access. Instead, they + * simply record their transaction ID in the reader table. The reader + * mutex is needed just to find an empty slot in the reader table. The + * slot's address is saved in thread-specific data so that subsequent + * read transactions started by the same thread need no further locking to + * proceed. + * + * If MDBX_NOSTICKYTHREADS is set, the slot address is not saved in + * thread-specific data. No reader table is used if the database is on a + * read-only filesystem. + * + * Since the database uses multi-version concurrency control, readers don't + * actually need any locking. This table is used to keep track of which + * readers are using data from which old transactions, so that we'll know + * when a particular old transaction is no longer in use. Old transactions + * that have discarded any data pages can then have those pages reclaimed + * for use by a later write transaction. + * + * The lock table is constructed such that reader slots are aligned with the + * processor's cache line size. Any slot is only ever used by one thread. + * This alignment guarantees that there will be no contention or cache + * thrashing as threads update their own slot info, and also eliminates + * any need for locking when accessing a slot. + * + * A writer thread will scan every slot in the table to determine the oldest + * outstanding reader transaction. Any freed pages older than this will be + * reclaimed by the writer. The writer doesn't use any locks when scanning + * this table. This means that there's no guarantee that the writer will + * see the most up-to-date reader info, but that's not required for correct + * operation - all we need is to know the upper bound on the oldest reader, + * we don't care at all about the newest reader. So the only consequence of + * reading stale information here is that old pages might hang around a + * while longer before being reclaimed. That's actually good anyway, because + * the longer we delay reclaiming old pages, the more likely it is that a + * string of contiguous pages can be found after coalescing old pages from + * many old transactions together. */ + +/* The actual reader record, with cacheline padding. */ +typedef struct reader_slot { + /* Current Transaction ID when this transaction began, or INVALID_TXNID. + * Multiple readers that start at the same time will probably have the + * same ID here. Again, it's not important to exclude them from + * anything; all we need to know is which version of the DB they + * started from so we can avoid overwriting any data used in that + * particular version. */ + atomic_txnid_t txnid; + + /* The information we store in a single slot of the reader table. + * In addition to a transaction ID, we also record the process and + * thread ID that owns a slot, so that we can detect stale information, + * e.g. threads or processes that went away without cleaning up. + * + * NOTE: We currently don't check for stale records. + * We simply re-init the table when we know that we're the only process + * opening the lock file. */ + + /* The thread ID of the thread owning this txn. */ + mdbx_atomic_uint64_t tid; + + /* The process ID of the process owning this reader txn. */ + mdbx_atomic_uint32_t pid; + + /* The number of pages used in the reader's MVCC snapshot, + * i.e. the value of meta->geometry.first_unallocated and + * txn->geo.first_unallocated */ + atomic_pgno_t snapshot_pages_used; + /* Number of retired pages at the time this reader starts transaction. So, + * at any time the difference meta.pages_retired - + * reader.snapshot_pages_retired will give the number of pages which this + * reader restraining from reuse. */ + mdbx_atomic_uint64_t snapshot_pages_retired; +} reader_slot_t; + +/* The header for the reader table (a memory-mapped lock file). */ +typedef struct shared_lck { + /* Stamp identifying this as an MDBX file. + * It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */ + uint64_t magic_and_version; + + /* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */ + uint32_t os_and_format; + + /* Flags which environment was opened. */ + mdbx_atomic_uint32_t envmode; + + /* Threshold of un-synced-with-disk pages for auto-sync feature, + * zero means no-threshold, i.e. auto-sync is disabled. */ + atomic_pgno_t autosync_threshold; + + /* Low 32-bit of txnid with which meta-pages was synced, + * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */ +#define MDBX_NOMETASYNC_LAZY_UNK (UINT32_MAX / 3) +#define MDBX_NOMETASYNC_LAZY_FD (MDBX_NOMETASYNC_LAZY_UNK + UINT32_MAX / 8) +#define MDBX_NOMETASYNC_LAZY_WRITEMAP \ + (MDBX_NOMETASYNC_LAZY_UNK - UINT32_MAX / 8) + mdbx_atomic_uint32_t meta_sync_txnid; + + /* Period for timed auto-sync feature, i.e. at the every steady checkpoint + * the mti_unsynced_timeout sets to the current_time + autosync_period. + * The time value is represented in a suitable system-dependent form, for + * example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC). + * Zero means timed auto-sync is disabled. */ + mdbx_atomic_uint64_t autosync_period; + + /* Marker to distinguish uniqueness of DB/CLK. */ + mdbx_atomic_uint64_t bait_uniqueness; + + /* Paired counter of processes that have mlock()ed part of mmapped DB. + * The (mlcnt[0] - mlcnt[1]) > 0 means at least one process + * lock at least one page, so therefore madvise() could return EINVAL. */ + mdbx_atomic_uint32_t mlcnt[2]; + + MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ + + /* Statistics of costly ops of all (running, completed and aborted) + * transactions */ + pgop_stat_t pgops; + + MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ + +#if MDBX_LOCKING > 0 + /* Write transaction lock. */ + osal_ipclock_t wrt_lock; +#endif /* MDBX_LOCKING > 0 */ + + atomic_txnid_t cached_oldest; + + /* Timestamp of entering an out-of-sync state. Value is represented in a + * suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) + * or clock_gettime(CLOCK_MONOTONIC). */ + mdbx_atomic_uint64_t eoos_timestamp; + + /* Number un-synced-with-disk pages for auto-sync feature. */ + mdbx_atomic_uint64_t unsynced_pages; + + /* Timestamp of the last readers check. */ + mdbx_atomic_uint64_t readers_check_timestamp; + + /* Number of page which was discarded last time by madvise(DONTNEED). */ + atomic_pgno_t discarded_tail; + + /* Shared anchor for tracking readahead edge and enabled/disabled status. */ + pgno_t readahead_anchor; + + /* Shared cache for mincore() results */ + struct { + pgno_t begin[4]; + uint64_t mask[4]; + } mincore_cache; + + MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ + +#if MDBX_LOCKING > 0 + /* Readeaders table lock. */ + osal_ipclock_t rdt_lock; +#endif /* MDBX_LOCKING > 0 */ + + /* The number of slots that have been used in the reader table. + * This always records the maximum count, it is not decremented + * when readers release their slots. */ + mdbx_atomic_uint32_t rdt_length; + mdbx_atomic_uint32_t rdt_refresh_flag; + +#if FLEXIBLE_ARRAY_MEMBERS + MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ + reader_slot_t rdt[] /* dynamic size */; + +/* Lockfile format signature: version, features and field layout */ +#define MDBX_LOCK_FORMAT \ + (MDBX_LCK_SIGN * 27733 + (unsigned)sizeof(reader_slot_t) * 13 + \ + (unsigned)offsetof(reader_slot_t, snapshot_pages_used) * 251 + \ + (unsigned)offsetof(lck_t, cached_oldest) * 83 + \ + (unsigned)offsetof(lck_t, rdt_length) * 37 + \ + (unsigned)offsetof(lck_t, rdt) * 29) +#endif /* FLEXIBLE_ARRAY_MEMBERS */ +} lck_t; + +#define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION) + +#define MDBX_READERS_LIMIT 32767 diff --git a/src/lck-posix.c b/src/lck-posix.c index 7108106d..43ddd8ce 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -1,18 +1,9 @@ -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 -#if !(defined(_WIN32) || defined(_WIN64)) /* !Windows LCK-implementation */ +#if !(defined(_WIN32) || defined(_WIN64)) +/*----------------------------------------------------------------------------* + * POSIX/non-Windows LCK-implementation */ #include "internals.h" @@ -20,112 +11,21 @@ #include #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */ -/*----------------------------------------------------------------------------*/ -/* global constructor/destructor */ - -#if defined(__linux__) || defined(__gnu_linux__) - -#include - -MDBX_INTERNAL_VAR_INSTA uint32_t linux_kernel_version; -MDBX_INTERNAL_VAR_INSTA bool - mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */; - -MDBX_EXCLUDE_FOR_GPROF -__cold static uint8_t probe_for_WSL(const char *tag) { - const char *const WSL = strstr(tag, "WSL"); - if (WSL && WSL[3] >= '2' && WSL[3] <= '9') - return WSL[3] - '0'; - const char *const wsl = strstr(tag, "wsl"); - if (wsl && wsl[3] >= '2' && wsl[3] <= '9') - return wsl[3] - '0'; - if (WSL || wsl || strcasestr(tag, "Microsoft")) - /* Expecting no new kernel within WSL1, either it will explicitly - * marked by an appropriate WSL-version hint. */ - return (linux_kernel_version < /* 4.19.x */ 0x04130000) ? 1 : 2; - return 0; -} - -#endif /* Linux */ - -#ifdef ENABLE_GPROF -extern void _mcleanup(void); -extern void monstartup(unsigned long, unsigned long); -extern void _init(void); -extern void _fini(void); -extern void __gmon_start__(void) __attribute__((__weak__)); -#endif /* ENABLE_GPROF */ - -MDBX_EXCLUDE_FOR_GPROF -__cold static __attribute__((__constructor__)) void -mdbx_global_constructor(void) { -#ifdef ENABLE_GPROF - if (!&__gmon_start__) - monstartup((uintptr_t)&_init, (uintptr_t)&_fini); -#endif /* ENABLE_GPROF */ - -#if defined(__linux__) || defined(__gnu_linux__) - struct utsname buffer; - if (uname(&buffer) == 0) { - int i = 0; - char *p = buffer.release; - while (*p && i < 4) { - if (*p >= '0' && *p <= '9') { - long number = strtol(p, &p, 10); - if (number > 0) { - if (number > 255) - number = 255; - linux_kernel_version += number << (24 - i * 8); - } - ++i; - } else { - ++p; - } - } - /* "Official" way of detecting WSL1 but not WSL2 - * https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 - * - * WARNING: False negative detection of WSL1 will result in DATA LOSS! - * So, the REQUIREMENTS for this code: - * 1. MUST detect WSL1 without false-negatives. - * 2. DESIRABLE detect WSL2 but without the risk of violating the first. */ - mdbx_RunningOnWSL1 = probe_for_WSL(buffer.version) == 1 || - probe_for_WSL(buffer.sysname) == 1 || - probe_for_WSL(buffer.release) == 1; - } -#endif /* Linux */ - - global_ctor(); -} - -MDBX_EXCLUDE_FOR_GPROF -__cold static __attribute__((__destructor__)) void -mdbx_global_destructor(void) { - global_dtor(); -#ifdef ENABLE_GPROF - if (!&__gmon_start__) - _mcleanup(); -#endif /* ENABLE_GPROF */ -} - -/*----------------------------------------------------------------------------*/ -/* lck */ - /* Описание реализации блокировок для POSIX & Linux: * * lck-файл отображается в память, в нём организуется таблица читателей и * размещаются совместно используемые posix-мьютексы (futex). Посредством - * этих мьютексов (см struct MDBX_lockinfo) реализуются: + * этих мьютексов (см struct lck_t) реализуются: * - Блокировка таблицы читателей для регистрации, - * т.е. функции osal_rdt_lock() и osal_rdt_unlock(). + * т.е. функции lck_rdt_lock() и lck_rdt_unlock(). * - Блокировка БД для пишущих транзакций, - * т.е. функции osal_txn_lock() и osal_txn_unlock(). + * т.е. функции lck_txn_lock() и lck_txn_unlock(). * * Остальной функционал реализуется отдельно посредством файловых блокировок: * - Первоначальный захват БД в режиме exclusive/shared и последующий перевод - * в операционный режим, функции osal_lck_seize() и osal_lck_downgrade(). + * в операционный режим, функции lck_seize() и lck_downgrade(). * - Проверка присутствие процессов-читателей, - * т.е. функции osal_rpid_set(), osal_rpid_clear() и osal_rpid_check(). + * т.е. функции lck_rpid_set(), lck_rpid_clear() и lck_rpid_check(). * * Для блокировки файлов используется fcntl(F_SETLK), так как: * - lockf() оперирует только эксклюзивной блокировкой и требует @@ -169,9 +69,9 @@ mdbx_global_destructor(void) { static int op_setlk, op_setlkw, op_getlk; __cold static void choice_fcntl(void) { assert(!op_setlk && !op_setlkw && !op_getlk); - if ((mdbx_static.flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 + if ((globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 #if defined(__linux__) || defined(__gnu_linux__) - && linux_kernel_version > + && globals.linux_kernel_version > 0x030f0000 /* OFD locks are available since 3.15, but engages here only for 3.16 and later kernels (i.e. LTS) because of reliability reasons */ @@ -201,7 +101,6 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, "The bitness of system `off_t` type is mismatch. Please " "fix build and/or NDK configuration."); #endif /* Android */ - jitter4testing(true); assert(offset >= 0 && len > 0); assert((uint64_t)offset < (uint64_t)INT64_MAX && (uint64_t)len < (uint64_t)INT64_MAX && @@ -213,6 +112,8 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, assert((uint64_t)((off_t)((uint64_t)offset + (uint64_t)len)) == ((uint64_t)offset + (uint64_t)len)); + + jitter4testing(true); for (;;) { MDBX_STRUCT_FLOCK lock_op; STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(lock_op.l_start) && @@ -262,7 +163,7 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, } } -MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait) { +MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait) { #if MDBX_USE_OFDLOCKS if (unlikely(op_setlk == 0)) choice_fcntl(); @@ -270,30 +171,30 @@ MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait) { return lck_op(fd, wait ? op_setlkw : op_setlk, F_WRLCK, 0, OFF_T_MAX); } -MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env) { - assert(env->me_lfd != INVALID_HANDLE_VALUE); - assert(env->me_pid > 0); - if (unlikely(osal_getpid() != env->me_pid)) +MDBX_INTERNAL int lck_rpid_set(MDBX_env *env) { + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); + assert(env->pid > 0); + if (unlikely(osal_getpid() != env->pid)) return MDBX_PANIC; - return lck_op(env->me_lfd, op_setlk, F_WRLCK, env->me_pid, 1); + return lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, env->pid, 1); } -MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env) { - assert(env->me_lfd != INVALID_HANDLE_VALUE); - assert(env->me_pid > 0); - return lck_op(env->me_lfd, op_setlk, F_UNLCK, env->me_pid, 1); +MDBX_INTERNAL int lck_rpid_clear(MDBX_env *env) { + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); + assert(env->pid > 0); + return lck_op(env->lck_mmap.fd, op_setlk, F_UNLCK, env->pid, 1); } -MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) { - assert(env->me_lfd != INVALID_HANDLE_VALUE); +MDBX_INTERNAL int lck_rpid_check(MDBX_env *env, uint32_t pid) { + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); assert(pid > 0); - return lck_op(env->me_lfd, op_getlk, F_WRLCK, pid, 1); + return lck_op(env->lck_mmap.fd, op_getlk, F_WRLCK, pid, 1); } /*---------------------------------------------------------------------------*/ #if MDBX_LOCKING > MDBX_LOCKING_SYSV -MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc) { +MDBX_INTERNAL int lck_ipclock_stubinit(osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX1988 return sem_init(ipc, false, 1) ? errno : 0; #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ @@ -304,7 +205,7 @@ MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc) { #endif } -MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc) { +MDBX_INTERNAL int lck_ipclock_destroy(osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX1988 return sem_destroy(ipc) ? errno : 0; #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ @@ -320,7 +221,7 @@ static int check_fstat(MDBX_env *env) { struct stat st; int rc = MDBX_SUCCESS; - if (fstat(env->me_lazy_fd, &st)) { + if (fstat(env->lazy_fd, &st)) { rc = errno; ERROR("fstat(%s), err %d", "DXB", rc); return rc; @@ -345,7 +246,7 @@ static int check_fstat(MDBX_env *env) { //---------------------------------------------------------------------------- - if (fstat(env->me_lfd, &st)) { + if (fstat(env->lck_mmap.fd, &st)) { rc = errno; ERROR("fstat(%s), err %d", "LCK", rc); return rc; @@ -363,8 +264,8 @@ static int check_fstat(MDBX_env *env) { } /* Checking file size for detect the situation when we got the shared lock - * immediately after osal_lck_destroy(). */ - if (st.st_size < (off_t)(sizeof(MDBX_lockinfo) + sizeof(MDBX_reader))) { + * immediately after lck_destroy(). */ + if (st.st_size < (off_t)(sizeof(lck_t) + sizeof(reader_slot_t))) { VERBOSE("lck-file is too short (%u), exclusive-lock needed", (unsigned)st.st_size); rc = MDBX_RESULT_TRUE; @@ -373,18 +274,14 @@ static int check_fstat(MDBX_env *env) { return rc; } -__cold MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) { - assert(env->me_lazy_fd != INVALID_HANDLE_VALUE); - if (unlikely(osal_getpid() != env->me_pid)) +__cold MDBX_INTERNAL int lck_seize(MDBX_env *env) { + assert(env->lazy_fd != INVALID_HANDLE_VALUE); + if (unlikely(osal_getpid() != env->pid)) return MDBX_PANIC; -#if MDBX_USE_OFDLOCKS - if (unlikely(op_setlk == 0)) - choice_fcntl(); -#endif /* MDBX_USE_OFDLOCKS */ int rc = MDBX_SUCCESS; #if defined(__linux__) || defined(__gnu_linux__) - if (unlikely(mdbx_RunningOnWSL1)) { + if (unlikely(globals.running_on_WSL1)) { rc = ENOLCK /* No record locks available */; ERROR("%s, err %u", "WSL1 (Windows Subsystem for Linux) is mad and trouble-full, " @@ -394,11 +291,15 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) { } #endif /* Linux */ - if (env->me_lfd == INVALID_HANDLE_VALUE) { +#if MDBX_USE_OFDLOCKS + if (unlikely(op_setlk == 0)) + choice_fcntl(); +#endif /* MDBX_USE_OFDLOCKS */ + + if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) { /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ - rc = - lck_op(env->me_lazy_fd, op_setlk, - (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); + rc = lck_op(env->lazy_fd, op_setlk, + (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); if (rc != MDBX_SUCCESS) { ERROR("%s, err %u", "without-lck", rc); eASSERT(env, MDBX_IS_ERROR(rc)); @@ -412,7 +313,7 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) { retry: if (rc == MDBX_RESULT_TRUE) { - rc = lck_op(env->me_lfd, op_setlk, F_UNLCK, 0, 1); + rc = lck_op(env->lck_mmap.fd, op_setlk, F_UNLCK, 0, 1); if (rc != MDBX_SUCCESS) { ERROR("%s, err %u", "unlock-before-retry", rc); eASSERT(env, MDBX_IS_ERROR(rc)); @@ -421,16 +322,15 @@ retry: } /* Firstly try to get exclusive locking. */ - rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1); + rc = lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, 0, 1); if (rc == MDBX_SUCCESS) { rc = check_fstat(env); if (MDBX_IS_ERROR(rc)) return rc; continue_dxb_exclusive: - rc = - lck_op(env->me_lazy_fd, op_setlk, - (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); + rc = lck_op(env->lazy_fd, op_setlk, + (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); if (rc == MDBX_SUCCESS) return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */; @@ -455,16 +355,16 @@ retry: } /* Here could be one of two: - * - osal_lck_destroy() from the another process was hold the lock + * - lck_destroy() from the another process was hold the lock * during a destruction. - * - either osal_lck_seize() from the another process was got the exclusive + * - either lck_seize() from the another process was got the exclusive * lock and doing initialization. * For distinguish these cases will use size of the lck-file later. */ /* Wait for lck-shared now. */ /* Here may be await during transient processes, for instance until another * competing process doesn't call lck_downgrade(). */ - rc = lck_op(env->me_lfd, op_setlkw, F_RDLCK, 0, 1); + rc = lck_op(env->lck_mmap.fd, op_setlkw, F_RDLCK, 0, 1); if (rc != MDBX_SUCCESS) { ERROR("%s, err %u", "try-shared", rc); eASSERT(env, MDBX_IS_ERROR(rc)); @@ -480,7 +380,7 @@ retry: } /* got shared, retry exclusive */ - rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1); + rc = lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, 0, 1); if (rc == MDBX_SUCCESS) goto continue_dxb_exclusive; @@ -492,9 +392,8 @@ retry: } /* Lock against another process operating in without-lck or exclusive mode. */ - rc = - lck_op(env->me_lazy_fd, op_setlk, - (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->me_pid, 1); + rc = lck_op(env->lazy_fd, op_setlk, + (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->pid, 1); if (rc != MDBX_SUCCESS) { ERROR("%s, err %u", "lock-against-without-lck", rc); eASSERT(env, MDBX_IS_ERROR(rc)); @@ -505,20 +404,20 @@ retry: return MDBX_RESULT_FALSE; } -MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { - assert(env->me_lfd != INVALID_HANDLE_VALUE); - if (unlikely(osal_getpid() != env->me_pid)) +MDBX_INTERNAL int lck_downgrade(MDBX_env *env) { + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); + if (unlikely(osal_getpid() != env->pid)) return MDBX_PANIC; int rc = MDBX_SUCCESS; - if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { - rc = lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, 0, env->me_pid); + if ((env->flags & MDBX_EXCLUSIVE) == 0) { + rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, 0, env->pid); if (rc == MDBX_SUCCESS) - rc = lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, env->me_pid + 1, - OFF_T_MAX - env->me_pid - 1); + rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, env->pid + 1, + OFF_T_MAX - env->pid - 1); } if (rc == MDBX_SUCCESS) - rc = lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1); + rc = lck_op(env->lck_mmap.fd, op_setlk, F_RDLCK, 0, 1); if (unlikely(rc != 0)) { ERROR("%s, err %u", "lck", rc); assert(MDBX_IS_ERROR(rc)); @@ -526,25 +425,24 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return rc; } -MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { - assert(env->me_lfd != INVALID_HANDLE_VALUE); - if (unlikely(osal_getpid() != env->me_pid)) +MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); + if (unlikely(osal_getpid() != env->pid)) return MDBX_PANIC; const int cmd = dont_wait ? op_setlk : op_setlkw; - int rc = lck_op(env->me_lfd, cmd, F_WRLCK, 0, 1); - if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_EXCLUSIVE) == 0) { - rc = (env->me_pid > 1) - ? lck_op(env->me_lazy_fd, cmd, F_WRLCK, 0, env->me_pid - 1) - : MDBX_SUCCESS; + int rc = lck_op(env->lck_mmap.fd, cmd, F_WRLCK, 0, 1); + if (rc == MDBX_SUCCESS && (env->flags & MDBX_EXCLUSIVE) == 0) { + rc = (env->pid > 1) ? lck_op(env->lazy_fd, cmd, F_WRLCK, 0, env->pid - 1) + : MDBX_SUCCESS; if (rc == MDBX_SUCCESS) { - rc = lck_op(env->me_lazy_fd, cmd, F_WRLCK, env->me_pid + 1, - OFF_T_MAX - env->me_pid - 1); - if (rc != MDBX_SUCCESS && env->me_pid > 1 && - lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, 0, env->me_pid - 1)) + rc = lck_op(env->lazy_fd, cmd, F_WRLCK, env->pid + 1, + OFF_T_MAX - env->pid - 1); + if (rc != MDBX_SUCCESS && env->pid > 1 && + lck_op(env->lazy_fd, op_setlk, F_UNLCK, 0, env->pid - 1)) rc = MDBX_PANIC; } - if (rc != MDBX_SUCCESS && lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1)) + if (rc != MDBX_SUCCESS && lck_op(env->lck_mmap.fd, op_setlk, F_RDLCK, 0, 1)) rc = MDBX_PANIC; } if (unlikely(rc != 0)) { @@ -554,48 +452,48 @@ MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { return rc; } -__cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor, - const uint32_t current_pid) { +__cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, + MDBX_env *inprocess_neighbor, + const uint32_t current_pid) { eASSERT(env, osal_getpid() == current_pid); int rc = MDBX_SUCCESS; struct stat lck_info; - MDBX_lockinfo *lck = env->me_lck; - if (lck && lck == env->me_lck_mmap.lck && !inprocess_neighbor && + lck_t *lck = env->lck; + if (lck && lck == env->lck_mmap.lck && !inprocess_neighbor && /* try get exclusive access */ - lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && + lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && /* if LCK was not removed */ - fstat(env->me_lfd, &lck_info) == 0 && lck_info.st_nlink > 0 && - lck_op(env->me_lazy_fd, op_setlk, - (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, + fstat(env->lck_mmap.fd, &lck_info) == 0 && lck_info.st_nlink > 0 && + lck_op(env->lazy_fd, op_setlk, + (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX) == 0) { VERBOSE("%p got exclusive, drown ipc-locks", (void *)env); - eASSERT(env, current_pid == env->me_pid); + eASSERT(env, current_pid == env->pid); #if MDBX_LOCKING == MDBX_LOCKING_SYSV if (env->me_sysv_ipc.semid != -1) rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0; #else - rc = osal_ipclock_destroy(&lck->mti_rlock); + rc = lck_ipclock_destroy(&lck->rdt_lock); if (rc == 0) - rc = osal_ipclock_destroy(&lck->mti_wlock); + rc = lck_ipclock_destroy(&lck->wrt_lock); #endif /* MDBX_LOCKING */ eASSERT(env, rc == 0); if (rc == 0) { - const bool synced = lck->mti_unsynced_pages.weak == 0; - osal_munmap(&env->me_lck_mmap); - if (synced && env->me_lfd != INVALID_HANDLE_VALUE) - rc = ftruncate(env->me_lfd, 0) ? errno : 0; + const bool synced = lck->unsynced_pages.weak == 0; + osal_munmap(&env->lck_mmap); + if (synced && env->lck_mmap.fd != INVALID_HANDLE_VALUE) + rc = ftruncate(env->lck_mmap.fd, 0) ? errno : 0; } jitter4testing(false); } - if (current_pid != env->me_pid) { + if (current_pid != env->pid) { eASSERT(env, !inprocess_neighbor); NOTICE("drown env %p after-fork pid %d -> %d", - __Wpedantic_format_voidptr(env), env->me_pid, current_pid); + __Wpedantic_format_voidptr(env), env->pid, current_pid); inprocess_neighbor = nullptr; } @@ -607,57 +505,55 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, * locks should be released here explicitly with properly order. */ /* close dxb and restore lock */ - if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { - if (unlikely(close(env->me_dsync_fd) != 0) && rc == MDBX_SUCCESS) + if (env->dsync_fd != INVALID_HANDLE_VALUE) { + if (unlikely(close(env->dsync_fd) != 0) && rc == MDBX_SUCCESS) rc = errno; - env->me_dsync_fd = INVALID_HANDLE_VALUE; + env->dsync_fd = INVALID_HANDLE_VALUE; } - if (env->me_lazy_fd != INVALID_HANDLE_VALUE) { - if (unlikely(close(env->me_lazy_fd) != 0) && rc == MDBX_SUCCESS) + if (env->lazy_fd != INVALID_HANDLE_VALUE) { + if (unlikely(close(env->lazy_fd) != 0) && rc == MDBX_SUCCESS) rc = errno; - env->me_lazy_fd = INVALID_HANDLE_VALUE; + env->lazy_fd = INVALID_HANDLE_VALUE; if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) { /* restore file-lock */ - rc = lck_op( - inprocess_neighbor->me_lazy_fd, F_SETLKW, - (inprocess_neighbor->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) - ? 0 - : inprocess_neighbor->me_pid, - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) ? OFF_T_MAX : 1); + rc = lck_op(inprocess_neighbor->lazy_fd, F_SETLKW, + (inprocess_neighbor->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, + (inprocess_neighbor->flags & MDBX_EXCLUSIVE) + ? 0 + : inprocess_neighbor->pid, + (inprocess_neighbor->flags & MDBX_EXCLUSIVE) ? OFF_T_MAX : 1); } } /* close clk and restore locks */ - if (env->me_lfd != INVALID_HANDLE_VALUE) { - if (unlikely(close(env->me_lfd) != 0) && rc == MDBX_SUCCESS) + if (env->lck_mmap.fd != INVALID_HANDLE_VALUE) { + if (unlikely(close(env->lck_mmap.fd) != 0) && rc == MDBX_SUCCESS) rc = errno; - env->me_lfd = INVALID_HANDLE_VALUE; + env->lck_mmap.fd = INVALID_HANDLE_VALUE; if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) { /* restore file-locks */ - rc = lck_op(inprocess_neighbor->me_lfd, F_SETLKW, F_RDLCK, 0, 1); - if (rc == MDBX_SUCCESS && inprocess_neighbor->me_live_reader) - rc = osal_rpid_set(inprocess_neighbor); + rc = lck_op(inprocess_neighbor->lck_mmap.fd, F_SETLKW, F_RDLCK, 0, 1); + if (rc == MDBX_SUCCESS && inprocess_neighbor->registered_reader_pid) + rc = lck_rpid_set(inprocess_neighbor); } } if (inprocess_neighbor && rc != MDBX_SUCCESS) - inprocess_neighbor->me_flags |= MDBX_FATAL_ERROR; + inprocess_neighbor->flags |= ENV_FATAL_ERROR; return rc; } /*---------------------------------------------------------------------------*/ -__cold MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, - MDBX_env *inprocess_neighbor, - int global_uniqueness_flag) { +__cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, + int global_uniqueness_flag) { #if MDBX_LOCKING == MDBX_LOCKING_SYSV int semid = -1; /* don't initialize semaphores twice */ (void)inprocess_neighbor; if (global_uniqueness_flag == MDBX_RESULT_TRUE) { struct stat st; - if (fstat(env->me_lazy_fd, &st)) + if (fstat(env->lazy_fd, &st)) return errno; sysv_retry_create: semid = semget(env->me_sysv_ipc.key, 2, @@ -711,9 +607,9 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, /* don't initialize semaphores twice */ (void)inprocess_neighbor; if (global_uniqueness_flag == MDBX_RESULT_TRUE) { - if (sem_init(&env->me_lck_mmap.lck->mti_rlock, true, 1)) + if (sem_init(&env->lck_mmap.lck->rdt_lock, true, 1)) return errno; - if (sem_init(&env->me_lck_mmap.lck->mti_wlock, true, 1)) + if (sem_init(&env->lck_mmap.lck->wrt_lock, true, 1)) return errno; } return MDBX_SUCCESS; @@ -782,10 +678,10 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, if (rc && rc != ENOTSUP) goto bailout; - rc = pthread_mutex_init(&env->me_lck_mmap.lck->mti_rlock, &ma); + rc = pthread_mutex_init(&env->lck_mmap.lck->rdt_lock, &ma); if (rc) goto bailout; - rc = pthread_mutex_init(&env->me_lck_mmap.lck->mti_wlock, &ma); + rc = pthread_mutex_init(&env->lck_mmap.lck->wrt_lock, &ma); bailout: pthread_mutexattr_destroy(&ma); @@ -799,23 +695,27 @@ __cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, const int err) { int rc = err; #if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 || MDBX_LOCKING == MDBX_LOCKING_SYSV + +#ifndef EOWNERDEAD +#define EOWNERDEAD MDBX_RESULT_TRUE +#endif /* EOWNERDEAD */ + if (err == EOWNERDEAD) { /* We own the mutex. Clean up after dead previous owner. */ - - const bool rlocked = ipc == &env->me_lck->mti_rlock; + const bool rlocked = ipc == &env->lck->rdt_lock; rc = MDBX_SUCCESS; if (!rlocked) { - if (unlikely(env->me_txn)) { + if (unlikely(env->txn)) { /* env is hosed if the dead thread was ours */ - env->me_flags |= MDBX_FATAL_ERROR; - env->me_txn = NULL; + env->flags |= ENV_FATAL_ERROR; + env->txn = nullptr; rc = MDBX_PANIC; } } WARNING("%clock owner died, %s", (rlocked ? 'r' : 'w'), (rc ? "this process' env is hosed" : "recovering")); - int check_rc = cleanup_dead_readers(env, rlocked, NULL); + int check_rc = mvcc_cleanup_dead(env, rlocked, nullptr); check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc; #if MDBX_LOCKING == MDBX_LOCKING_SYSV @@ -858,12 +758,12 @@ __cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, ERROR("mutex (un)lock failed, %s", mdbx_strerror(err)); if (rc != EDEADLK) - env->me_flags |= MDBX_FATAL_ERROR; + env->flags |= ENV_FATAL_ERROR; return rc; } #if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC) -MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void) { +MDBX_INTERNAL int osal_check_tid4bionic(void) { /* avoid 32-bit Bionic bug/hang with 32-pit TID */ if (sizeof(pthread_mutex_t) < sizeof(pid_t) + sizeof(unsigned)) { pid_t tid = gettid(); @@ -900,7 +800,7 @@ static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, } else if (sem_wait(ipc)) rc = errno; #elif MDBX_LOCKING == MDBX_LOCKING_SYSV - struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock), + struct sembuf op = {.sem_num = (ipc != &env->lck->wrt_lock), .sem_op = -1, .sem_flg = dont_wait ? IPC_NOWAIT | SEM_UNDO : SEM_UNDO}; int rc; @@ -910,7 +810,7 @@ static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, rc = MDBX_BUSY; } else { rc = *ipc ? EOWNERDEAD : MDBX_SUCCESS; - *ipc = env->me_pid; + *ipc = env->pid; } #else #error "FIXME" @@ -929,11 +829,11 @@ int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 err = sem_post(ipc) ? errno : MDBX_SUCCESS; #elif MDBX_LOCKING == MDBX_LOCKING_SYSV - if (unlikely(*ipc != (pid_t)env->me_pid)) + if (unlikely(*ipc != (pid_t)env->pid)) err = EPERM; else { *ipc = 0; - struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock), + struct sembuf op = {.sem_num = (ipc != &env->lck->wrt_lock), .sem_op = 1, .sem_flg = SEM_UNDO}; err = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; @@ -944,66 +844,61 @@ int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { int rc = err; if (unlikely(rc != MDBX_SUCCESS)) { const uint32_t current_pid = osal_getpid(); - if (current_pid == env->me_pid || LOG_ENABLED(MDBX_LOG_NOTICE)) - debug_log((current_pid == env->me_pid) + if (current_pid == env->pid || LOG_ENABLED(MDBX_LOG_NOTICE)) + debug_log((current_pid == env->pid) ? MDBX_LOG_FATAL : (rc = MDBX_SUCCESS, MDBX_LOG_NOTICE), "ipc-unlock()", __LINE__, "failed: env %p, lck-%s %p, err %d\n", __Wpedantic_format_voidptr(env), - (env->me_lck == env->me_lck_mmap.lck) ? "mmap" : "stub", - __Wpedantic_format_voidptr(env->me_lck), err); + (env->lck == env->lck_mmap.lck) ? "mmap" : "stub", + __Wpedantic_format_voidptr(env->lck), err); } return rc; } -MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) { +MDBX_INTERNAL int lck_rdt_lock(MDBX_env *env) { TRACE("%s", ">>"); jitter4testing(true); - int rc = osal_ipclock_lock(env, &env->me_lck->mti_rlock, false); + int rc = osal_ipclock_lock(env, &env->lck->rdt_lock, false); TRACE("<< rc %d", rc); return rc; } -MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { +MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int err = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); + int err = osal_ipclock_unlock(env, &env->lck->rdt_lock); TRACE("<< err %d", err); if (unlikely(err != MDBX_SUCCESS)) mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } -int osal_txn_lock(MDBX_env *env, bool dont_wait) { +int lck_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); jitter4testing(true); - const int err = osal_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); + const int err = osal_ipclock_lock(env, &env->lck->wrt_lock, dont_wait); int rc = err; if (likely(!MDBX_IS_ERROR(err))) { - eASSERT(env, !env->me_txn0->mt_owner || + eASSERT(env, !env->basal_txn->owner || err == /* если другой поток в этом-же процессе завершился не освободив блокировку */ MDBX_RESULT_TRUE); - env->me_txn0->mt_owner = osal_thread_self(); + env->basal_txn->owner = osal_thread_self(); rc = MDBX_SUCCESS; } TRACE("<< err %d, rc %d", err, rc); return rc; } -void osal_txn_unlock(MDBX_env *env) { +void lck_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); - eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); - env->me_txn0->mt_owner = 0; - int err = osal_ipclock_unlock(env, &env->me_lck->mti_wlock); + eASSERT(env, env->basal_txn->owner == osal_thread_self()); + env->basal_txn->owner = 0; + int err = osal_ipclock_unlock(env, &env->lck->wrt_lock); TRACE("<< err %d", err); if (unlikely(err != MDBX_SUCCESS)) mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } -#else -#ifdef _MSC_VER -#pragma warning(disable : 4206) /* nonstandard extension used: translation \ - unit is empty */ -#endif /* _MSC_VER (warnings) */ -#endif /* !Windows LCK-implementation */ +#endif /* !Windows LCK-implementation */ diff --git a/src/lck-windows.c b/src/lck-windows.c index 241800aa..62710142 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -1,18 +1,7 @@ -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 -#if defined(_WIN32) || defined(_WIN64) /* Windows LCK-implementation */ +#if defined(_WIN32) || defined(_WIN64) /* PREAMBLE FOR WINDOWS: * @@ -22,91 +11,6 @@ #include "internals.h" -static void mdbx_winnt_import(void); - -#if MDBX_BUILD_SHARED_LIBRARY -#if MDBX_WITHOUT_MSVC_CRT && defined(NDEBUG) -/* DEBUG/CHECKED builds still require MSVC's CRT for runtime checks. - * - * Define dll's entry point only for Release build when NDEBUG is defined and - * MDBX_WITHOUT_MSVC_CRT=ON. if the entry point isn't defined then MSVC's will - * automatically use DllMainCRTStartup() from CRT library, which also - * automatically call DllMain() from our mdbx.dll */ -#pragma comment(linker, "/ENTRY:DllMain") -#endif /* MDBX_WITHOUT_MSVC_CRT */ - -BOOL APIENTRY DllMain(HANDLE module, DWORD reason, LPVOID reserved) -#else -#if !MDBX_MANUAL_MODULE_HANDLER -static -#endif /* !MDBX_MANUAL_MODULE_HANDLER */ - void NTAPI - mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved) -#endif /* MDBX_BUILD_SHARED_LIBRARY */ -{ - (void)reserved; - switch (reason) { - case DLL_PROCESS_ATTACH: - mdbx_winnt_import(); - global_ctor(); - break; - case DLL_PROCESS_DETACH: - global_dtor(); - break; - - case DLL_THREAD_ATTACH: - break; - case DLL_THREAD_DETACH: - thread_dtor(module); - break; - } -#if MDBX_BUILD_SHARED_LIBRARY - return TRUE; -#endif -} - -#if !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER -/* *INDENT-OFF* */ -/* clang-format off */ -#if defined(_MSC_VER) -# pragma const_seg(push) -# pragma data_seg(push) - -# ifndef _M_IX86 - /* kick a linker to create the TLS directory if not already done */ -# pragma comment(linker, "/INCLUDE:_tls_used") - /* Force some symbol references. */ -# pragma comment(linker, "/INCLUDE:mdbx_tls_anchor") - /* specific const-segment for WIN64 */ -# pragma const_seg(".CRT$XLB") - const -# else - /* kick a linker to create the TLS directory if not already done */ -# pragma comment(linker, "/INCLUDE:__tls_used") - /* Force some symbol references. */ -# pragma comment(linker, "/INCLUDE:_mdbx_tls_anchor") - /* specific data-segment for WIN32 */ -# pragma data_seg(".CRT$XLB") -# endif - - __declspec(allocate(".CRT$XLB")) PIMAGE_TLS_CALLBACK mdbx_tls_anchor = mdbx_module_handler; -# pragma data_seg(pop) -# pragma const_seg(pop) - -#elif defined(__GNUC__) -# ifndef _M_IX86 - const -# endif - PIMAGE_TLS_CALLBACK mdbx_tls_anchor __attribute__((__section__(".CRT$XLB"), used)) = mdbx_module_handler; -#else -# error FIXME -#endif -/* *INDENT-ON* */ -/* clang-format on */ -#endif /* !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER */ - -/*----------------------------------------------------------------------------*/ - #define LCK_SHARED 0 #define LCK_EXCLUSIVE LOCKFILE_EXCLUSIVE_LOCK #define LCK_WAITFOR 0 @@ -145,17 +49,16 @@ static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, return (int)rc; } -static __inline int flock(HANDLE fd, unsigned flags, size_t offset, - size_t bytes) { +static inline int flock(HANDLE fd, unsigned flags, size_t offset, + size_t bytes) { return flock_with_event(fd, 0, flags, offset, bytes); } -static __inline int flock_data(const MDBX_env *env, unsigned flags, - size_t offset, size_t bytes) { +static inline int flock_data(const MDBX_env *env, unsigned flags, size_t offset, + size_t bytes) { const HANDLE fd4data = - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; - return flock_with_event(fd4data, env->me_data_lock_event, flags, offset, - bytes); + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + return flock_with_event(fd4data, env->dxb_lock_event, flags, offset, bytes); } static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { @@ -175,16 +78,16 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { #else #define DXB_MAXLEN UINT32_C(0x7ff00000) #endif -#define DXB_BODY (env->me_psize * (size_t)NUM_METAS), DXB_MAXLEN +#define DXB_BODY (env->ps * (size_t)NUM_METAS), DXB_MAXLEN #define DXB_WHOLE 0, DXB_MAXLEN -int osal_txn_lock(MDBX_env *env, bool dontwait) { +int lck_txn_lock(MDBX_env *env, bool dontwait) { if (dontwait) { - if (!TryEnterCriticalSection(&env->me_windowsbug_lock)) + if (!TryEnterCriticalSection(&env->windowsbug_lock)) return MDBX_BUSY; } else { __try { - EnterCriticalSection(&env->me_windowsbug_lock); + EnterCriticalSection(&env->windowsbug_lock); } __except ((GetExceptionCode() == 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) @@ -194,93 +97,93 @@ int osal_txn_lock(MDBX_env *env, bool dontwait) { } } - eASSERT(env, !env->me_txn0->mt_owner); - if (env->me_flags & MDBX_EXCLUSIVE) + eASSERT(env, !env->basal_txn->owner); + if (env->flags & MDBX_EXCLUSIVE) goto done; const HANDLE fd4data = - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; - int rc = flock_with_event(fd4data, env->me_data_lock_event, + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + int rc = flock_with_event(fd4data, env->dxb_lock_event, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) : (LCK_EXCLUSIVE | LCK_WAITFOR), DXB_BODY); if (rc == ERROR_LOCK_VIOLATION && dontwait) { SleepEx(0, true); - rc = flock_with_event(fd4data, env->me_data_lock_event, + rc = flock_with_event(fd4data, env->dxb_lock_event, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY); if (rc == ERROR_LOCK_VIOLATION) { SleepEx(0, true); - rc = flock_with_event(fd4data, env->me_data_lock_event, + rc = flock_with_event(fd4data, env->dxb_lock_event, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY); } } if (rc == MDBX_SUCCESS) { done: - /* Zap: Failing to release lock 'env->me_windowsbug_lock' + /* Zap: Failing to release lock 'env->windowsbug_lock' * in function 'mdbx_txn_lock' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - env->me_txn0->mt_owner = osal_thread_self(); + env->basal_txn->owner = osal_thread_self(); return MDBX_SUCCESS; } - LeaveCriticalSection(&env->me_windowsbug_lock); + LeaveCriticalSection(&env->windowsbug_lock); return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY; } -void osal_txn_unlock(MDBX_env *env) { - eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); - if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { +void lck_txn_unlock(MDBX_env *env) { + eASSERT(env, env->basal_txn->owner == osal_thread_self()); + if ((env->flags & MDBX_EXCLUSIVE) == 0) { const HANDLE fd4data = - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; int err = funlock(fd4data, DXB_BODY); if (err != MDBX_SUCCESS) mdbx_panic("%s failed: err %u", __func__, err); } - env->me_txn0->mt_owner = 0; - LeaveCriticalSection(&env->me_windowsbug_lock); + env->basal_txn->owner = 0; + LeaveCriticalSection(&env->windowsbug_lock); } /*----------------------------------------------------------------------------*/ /* global `read` lock for readers registration, - * exclusive locking `mti_numreaders` (second) cacheline */ + * exclusive locking `rdt_length` (second) cacheline */ #define LCK_LO_OFFSET 0 -#define LCK_LO_LEN offsetof(MDBX_lockinfo, mti_numreaders) +#define LCK_LO_LEN offsetof(lck_t, rdt_length) #define LCK_UP_OFFSET LCK_LO_LEN -#define LCK_UP_LEN (sizeof(MDBX_lockinfo) - LCK_UP_OFFSET) +#define LCK_UP_LEN (sizeof(lck_t) - LCK_UP_OFFSET) #define LCK_LOWER LCK_LO_OFFSET, LCK_LO_LEN #define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN -MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) { - osal_srwlock_AcquireShared(&env->me_remap_guard); - if (env->me_lfd == INVALID_HANDLE_VALUE) +MDBX_INTERNAL int lck_rdt_lock(MDBX_env *env) { + imports.srwl_AcquireShared(&env->remap_guard); + if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) return MDBX_SUCCESS; /* readonly database in readonly filesystem */ /* transition from S-? (used) to S-E (locked), * e.g. exclusive lock upper-part */ - if (env->me_flags & MDBX_EXCLUSIVE) + if (env->flags & MDBX_EXCLUSIVE) return MDBX_SUCCESS; - int rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER); + int rc = flock(env->lck_mmap.fd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER); if (rc == MDBX_SUCCESS) return MDBX_SUCCESS; - osal_srwlock_ReleaseShared(&env->me_remap_guard); + imports.srwl_ReleaseShared(&env->remap_guard); return rc; } -MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { - if (env->me_lfd != INVALID_HANDLE_VALUE && - (env->me_flags & MDBX_EXCLUSIVE) == 0) { +MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env) { + if (env->lck_mmap.fd != INVALID_HANDLE_VALUE && + (env->flags & MDBX_EXCLUSIVE) == 0) { /* transition from S-E (locked) to S-? (used), e.g. unlock upper-part */ - int err = funlock(env->me_lfd, LCK_UPPER); + int err = funlock(env->lck_mmap.fd, LCK_UPPER); if (err != MDBX_SUCCESS) mdbx_panic("%s failed: err %u", __func__, err); } - osal_srwlock_ReleaseShared(&env->me_remap_guard); + imports.srwl_ReleaseShared(&env->remap_guard); } -MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait) { +MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait) { return flock( fd, wait ? LCK_EXCLUSIVE | LCK_WAITFOR : LCK_EXCLUSIVE | LCK_DONTWAIT, 0, DXB_MAXLEN); @@ -293,7 +196,7 @@ static int suspend_and_append(mdbx_handle_array_t **array, mdbx_handle_array_t *const ptr = osal_realloc((limit > ARRAY_LENGTH((*array)->handles)) ? *array - : /* don't free initial array on the stack */ NULL, + : /* don't free initial array on the stack */ nullptr, sizeof(mdbx_handle_array_t) + sizeof(HANDLE) * (limit * (size_t)2 - ARRAY_LENGTH((*array)->handles))); @@ -307,7 +210,7 @@ static int suspend_and_append(mdbx_handle_array_t **array, HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, FALSE, ThreadId); - if (hThread == NULL) + if (hThread == nullptr) return (int)GetLastError(); if (SuspendThread(hThread) == (DWORD)-1) { @@ -324,28 +227,27 @@ static int suspend_and_append(mdbx_handle_array_t **array, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int +MDBX_INTERNAL int osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { - eASSERT(env, (env->me_flags & MDBX_NOSTICKYTHREADS) == 0); + eASSERT(env, (env->flags & MDBX_NOSTICKYTHREADS) == 0); const uintptr_t CurrentTid = GetCurrentThreadId(); int rc; - if (env->me_lck_mmap.lck) { + if (env->lck_mmap.lck) { /* Scan LCK for threads of the current process */ - const MDBX_reader *const begin = env->me_lck_mmap.lck->mti_readers; - const MDBX_reader *const end = + const reader_slot_t *const begin = env->lck_mmap.lck->rdt; + const reader_slot_t *const end = begin + - atomic_load32(&env->me_lck_mmap.lck->mti_numreaders, mo_AcquireRelease); - const uintptr_t WriteTxnOwner = env->me_txn0 ? env->me_txn0->mt_owner : 0; - for (const MDBX_reader *reader = begin; reader < end; ++reader) { - if (reader->mr_pid.weak != env->me_pid || !reader->mr_tid.weak) { + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); + const uintptr_t WriteTxnOwner = env->basal_txn ? env->basal_txn->owner : 0; + for (const reader_slot_t *reader = begin; reader < end; ++reader) { + if (reader->pid.weak != env->pid || !reader->tid.weak) { skip_lck: continue; } - if (reader->mr_tid.weak == CurrentTid || - reader->mr_tid.weak == WriteTxnOwner) + if (reader->tid.weak == CurrentTid || reader->tid.weak == WriteTxnOwner) goto skip_lck; - rc = suspend_and_append(array, (mdbx_tid_t)reader->mr_tid.weak); + rc = suspend_and_append(array, (mdbx_tid_t)reader->tid.weak); if (rc != MDBX_SUCCESS) { bailout_lck: (void)osal_resume_threads_after_remap(*array); @@ -360,7 +262,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { } else { /* Without LCK (i.e. read-only mode). * Walk through a snapshot of all running threads */ - eASSERT(env, env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)); + eASSERT(env, env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)); const HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); if (hSnapshot == INVALID_HANDLE_VALUE) return (int)GetLastError(); @@ -377,7 +279,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { } do { - if (entry.th32OwnerProcessID != env->me_pid || + if (entry.th32OwnerProcessID != env->pid || entry.th32ThreadID == CurrentTid) continue; @@ -396,8 +298,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int -osal_resume_threads_after_remap(mdbx_handle_array_t *array) { +MDBX_INTERNAL int osal_resume_threads_after_remap(mdbx_handle_array_t *array) { int rc = MDBX_SUCCESS; for (unsigned i = 0; i < array->count; ++i) { const HANDLE hThread = array->handles[i]; @@ -426,6 +327,7 @@ osal_resume_threads_after_remap(mdbx_handle_array_t *array) { * Only 6 states of FSM are used, which 2 of ones are transitive. * * States: + * LO HI * ?-? = free, i.e. unlocked * S-? = used, i.e. shared lock * E-? = exclusive-read, i.e. operational exclusive @@ -436,39 +338,39 @@ osal_resume_threads_after_remap(mdbx_handle_array_t *array) { * E-S * E-E = exclusive-write, i.e. exclusive due (re)initialization * - * The osal_lck_seize() moves the locking-FSM from the initial free/unlocked + * The lck_seize() moves the locking-FSM from the initial free/unlocked * state to the "exclusive write" (and returns MDBX_RESULT_TRUE) if possible, * or to the "used" (and returns MDBX_RESULT_FALSE). * - * The osal_lck_downgrade() moves the locking-FSM from "exclusive write" + * The lck_downgrade() moves the locking-FSM from "exclusive write" * state to the "used" (i.e. shared) state. * - * The osal_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) + * The lck_upgrade() moves the locking-FSM from "used" (i.e. shared) * state to the "exclusive write" state. */ static void lck_unlock(MDBX_env *env) { int err; - if (env->me_lfd != INVALID_HANDLE_VALUE) { + if (env->lck_mmap.fd != INVALID_HANDLE_VALUE) { /* double `unlock` for robustly remove overlapped shared/exclusive locks */ do - err = funlock(env->me_lfd, LCK_LOWER); + err = funlock(env->lck_mmap.fd, LCK_LOWER); while (err == MDBX_SUCCESS); assert(err == ERROR_NOT_LOCKED || - (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION)); + (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); do - err = funlock(env->me_lfd, LCK_UPPER); + err = funlock(env->lck_mmap.fd, LCK_UPPER); while (err == MDBX_SUCCESS); assert(err == ERROR_NOT_LOCKED || - (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION)); + (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); } const HANDLE fd4data = - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; if (fd4data != INVALID_HANDLE_VALUE) { /* explicitly unlock to avoid latency for other processes (windows kernel * releases such locks via deferred queues) */ @@ -476,14 +378,14 @@ static void lck_unlock(MDBX_env *env) { err = funlock(fd4data, DXB_BODY); while (err == MDBX_SUCCESS); assert(err == ERROR_NOT_LOCKED || - (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION)); + (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); do err = funlock(fd4data, DXB_WHOLE); while (err == MDBX_SUCCESS); assert(err == ERROR_NOT_LOCKED || - (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION)); + (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); } } @@ -539,16 +441,16 @@ static int internal_seize_lck(HANDLE lfd) { return rc; } -MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) { +MDBX_INTERNAL int lck_seize(MDBX_env *env) { const HANDLE fd4data = - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; assert(fd4data != INVALID_HANDLE_VALUE); - if (env->me_flags & MDBX_EXCLUSIVE) + if (env->flags & MDBX_EXCLUSIVE) return MDBX_RESULT_TRUE /* nope since files were must be opened non-shareable */ ; - if (env->me_lfd == INVALID_HANDLE_VALUE) { + if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) { /* LY: without-lck mode (e.g. on read-only filesystem) */ jitter4testing(false); int rc = flock_data(env, LCK_SHARED | LCK_DONTWAIT, DXB_WHOLE); @@ -557,9 +459,9 @@ MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) { return rc; } - int rc = internal_seize_lck(env->me_lfd); + int rc = internal_seize_lck(env->lck_mmap.fd); jitter4testing(false); - if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_RDONLY) == 0) { + if (rc == MDBX_RESULT_TRUE && (env->flags & MDBX_RDONLY) == 0) { /* Check that another process don't operates in without-lck mode. * Doing such check by exclusive locking the body-part of db. Should be * noted: @@ -583,24 +485,24 @@ MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) { return rc; } -MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { +MDBX_INTERNAL int lck_downgrade(MDBX_env *env) { const HANDLE fd4data = - env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; /* Transite from exclusive-write state (E-E) to used (S-?) */ assert(fd4data != INVALID_HANDLE_VALUE); - assert(env->me_lfd != INVALID_HANDLE_VALUE); + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); - if (env->me_flags & MDBX_EXCLUSIVE) + if (env->flags & MDBX_EXCLUSIVE) return MDBX_SUCCESS /* nope since files were must be opened non-shareable */ ; /* 1) now at E-E (exclusive-write), transition to ?_E (middle) */ - int rc = funlock(env->me_lfd, LCK_LOWER); + int rc = funlock(env->lck_mmap.fd, LCK_LOWER); if (rc != MDBX_SUCCESS) mdbx_panic("%s(%s) failed: err %u", __func__, "E-E(exclusive-write) >> ?-E(middle)", rc); /* 2) now at ?-E (middle), transition to S-E (locked) */ - rc = flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER); + rc = flock(env->lck_mmap.fd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER); if (rc != MDBX_SUCCESS) { /* 3) something went wrong, give up */; ERROR("%s, err %u", "?-E(middle) >> S-E(locked)", rc); @@ -608,7 +510,7 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { } /* 4) got S-E (locked), continue transition to S-? (used) */ - rc = funlock(env->me_lfd, LCK_UPPER); + rc = funlock(env->lck_mmap.fd, LCK_UPPER); if (rc != MDBX_SUCCESS) mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)", rc); @@ -616,17 +518,17 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return MDBX_SUCCESS /* 5) now at S-? (used), done */; } -MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { +MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { /* Transite from used state (S-?) to exclusive-write (E-E) */ - assert(env->me_lfd != INVALID_HANDLE_VALUE); + assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); - if (env->me_flags & MDBX_EXCLUSIVE) + if (env->flags & MDBX_EXCLUSIVE) return MDBX_SUCCESS /* nope since files were must be opened non-shareable */ ; /* 1) now on S-? (used), try S-E (locked) */ jitter4testing(false); - int rc = flock(env->me_lfd, + int rc = flock(env->lck_mmap.fd, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_UPPER); if (rc != MDBX_SUCCESS) { @@ -636,14 +538,14 @@ MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { } /* 3) now on S-E (locked), transition to ?-E (middle) */ - rc = funlock(env->me_lfd, LCK_LOWER); + rc = funlock(env->lck_mmap.fd, LCK_LOWER); if (rc != MDBX_SUCCESS) mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> ?-E(middle)", rc); /* 4) now on ?-E (middle), try E-E (exclusive-write) */ jitter4testing(false); - rc = flock(env->me_lfd, + rc = flock(env->lck_mmap.fd, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_LOWER); if (rc != MDBX_SUCCESS) { @@ -655,25 +557,24 @@ MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { return MDBX_SUCCESS /* 6) now at E-E (exclusive-write), done */; } -MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, - MDBX_env *inprocess_neighbor, - int global_uniqueness_flag) { +MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, + int global_uniqueness_flag) { (void)env; (void)inprocess_neighbor; (void)global_uniqueness_flag; - if (mdbx_SetFileIoOverlappedRange && !(env->me_flags & MDBX_RDONLY)) { + if (imports.SetFileIoOverlappedRange && !(env->flags & MDBX_RDONLY)) { HANDLE token = INVALID_HANDLE_VALUE; TOKEN_PRIVILEGES privileges; privileges.PrivilegeCount = 1; privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &token) || - !LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, + !LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &privileges.Privileges[0].Luid) || !AdjustTokenPrivileges(token, FALSE, &privileges, sizeof(privileges), nullptr, nullptr) || GetLastError() != ERROR_SUCCESS) - mdbx_SetFileIoOverlappedRange = NULL; + imports.SetFileIoOverlappedRange = nullptr; if (token != INVALID_HANDLE_VALUE) CloseHandle(token); @@ -681,21 +582,21 @@ MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor, - const uint32_t current_pid) { +MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, + const uint32_t current_pid) { (void)current_pid; /* LY: should unmap before releasing the locks to avoid race condition and * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ - if (env->me_map) - osal_munmap(&env->me_dxb_mmap); - if (env->me_lck_mmap.lck) { - const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0; - osal_munmap(&env->me_lck_mmap); - if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE && - osal_lck_upgrade(env, true) == MDBX_SUCCESS) + if (env->dxb_mmap.base) + osal_munmap(&env->dxb_mmap); + if (env->lck_mmap.lck) { + const bool synced = env->lck_mmap.lck->unsynced_pages.weak == 0; + osal_munmap(&env->lck_mmap); + if (synced && !inprocess_neighbor && + env->lck_mmap.fd != INVALID_HANDLE_VALUE && + lck_upgrade(env, true) == MDBX_SUCCESS) /* this will fail if LCK is used/mmapped by other process(es) */ - osal_ftruncate(env->me_lfd, 0); + osal_ftruncate(env->lck_mmap.fd, 0); } lck_unlock(env); return MDBX_SUCCESS; @@ -704,12 +605,12 @@ MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, /*----------------------------------------------------------------------------*/ /* reader checking (by pid) */ -MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env) { +MDBX_INTERNAL int lck_rpid_set(MDBX_env *env) { (void)env; return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env) { +MDBX_INTERNAL int lck_rpid_clear(MDBX_env *env) { (void)env; return MDBX_SUCCESS; } @@ -720,7 +621,7 @@ MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env) { * MDBX_RESULT_TRUE, if pid is live (unable to acquire lock) * MDBX_RESULT_FALSE, if pid is dead (lock acquired) * or otherwise the errcode. */ -MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) { +MDBX_INTERNAL int lck_rpid_check(MDBX_env *env, uint32_t pid) { (void)env; HANDLE hProcess = OpenProcess(SYNCHRONIZE, FALSE, pid); int rc; @@ -753,169 +654,4 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) { } } -//---------------------------------------------------------------------------- -// Stub for slim read-write lock -// Copyright (C) 1995-2002 Brad Wilson - -static void WINAPI stub_srwlock_Init(osal_srwlock_t *srwl) { - srwl->readerCount = srwl->writerCount = 0; -} - -static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) { - while (true) { - assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); - - // If there's a writer already, spin without unnecessarily - // interlocking the CPUs - if (srwl->writerCount != 0) { - SwitchToThread(); - continue; - } - - // Add to the readers list - _InterlockedIncrement(&srwl->readerCount); - - // Check for writers again (we may have been preempted). If - // there are no writers writing or waiting, then we're done. - if (srwl->writerCount == 0) - break; - - // Remove from the readers list, spin, try again - _InterlockedDecrement(&srwl->readerCount); - SwitchToThread(); - } -} - -static void WINAPI stub_srwlock_ReleaseShared(osal_srwlock_t *srwl) { - assert(srwl->readerCount > 0); - _InterlockedDecrement(&srwl->readerCount); -} - -static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) { - while (true) { - assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); - - // If there's a writer already, spin without unnecessarily - // interlocking the CPUs - if (srwl->writerCount != 0) { - SwitchToThread(); - continue; - } - - // See if we can become the writer (expensive, because it inter- - // locks the CPUs, so writing should be an infrequent process) - if (_InterlockedExchange(&srwl->writerCount, 1) == 0) - break; - } - - // Now we're the writer, but there may be outstanding readers. - // Spin until there aren't any more; new readers will wait now - // that we're the writer. - while (srwl->readerCount != 0) { - assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); - SwitchToThread(); - } -} - -static void WINAPI stub_srwlock_ReleaseExclusive(osal_srwlock_t *srwl) { - assert(srwl->writerCount == 1 && srwl->readerCount >= 0); - srwl->writerCount = 0; -} - -static uint64_t WINAPI stub_GetTickCount64(void) { - LARGE_INTEGER Counter, Frequency; - return (QueryPerformanceFrequency(&Frequency) && - QueryPerformanceCounter(&Counter)) - ? Counter.QuadPart * 1000ul / Frequency.QuadPart - : 0; -} - -/*----------------------------------------------------------------------------*/ - -#ifndef xMDBX_ALLOY -osal_srwlock_t_function osal_srwlock_Init, osal_srwlock_AcquireShared, - osal_srwlock_ReleaseShared, osal_srwlock_AcquireExclusive, - osal_srwlock_ReleaseExclusive; - -MDBX_NtExtendSection mdbx_NtExtendSection; -MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx; -MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW; -MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW; -MDBX_SetFileInformationByHandle mdbx_SetFileInformationByHandle; -MDBX_NtFsControlFile mdbx_NtFsControlFile; -MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; -MDBX_GetTickCount64 mdbx_GetTickCount64; -MDBX_RegGetValueA mdbx_RegGetValueA; -MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange; -#endif /* xMDBX_ALLOY */ - -#if __GNUC_PREREQ(8, 0) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wcast-function-type" -#endif /* GCC/MINGW */ - -static void mdbx_winnt_import(void) { -#define GET_PROC_ADDR(dll, ENTRY) \ - mdbx_##ENTRY = (MDBX_##ENTRY)GetProcAddress(dll, #ENTRY) - - const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll"); - if (hNtdll) { - if (GetProcAddress(hNtdll, "wine_get_version")) { - assert(mdbx_RunningUnderWine()); - } else { - GET_PROC_ADDR(hNtdll, NtFsControlFile); - GET_PROC_ADDR(hNtdll, NtExtendSection); - assert(!mdbx_RunningUnderWine()); - } - } - - const HINSTANCE hKernel32dll = GetModuleHandleA("kernel32.dll"); - if (hKernel32dll) { - GET_PROC_ADDR(hKernel32dll, GetFileInformationByHandleEx); - GET_PROC_ADDR(hKernel32dll, GetTickCount64); - if (!mdbx_GetTickCount64) - mdbx_GetTickCount64 = stub_GetTickCount64; - if (!mdbx_RunningUnderWine()) { - GET_PROC_ADDR(hKernel32dll, SetFileInformationByHandle); - GET_PROC_ADDR(hKernel32dll, GetVolumeInformationByHandleW); - GET_PROC_ADDR(hKernel32dll, GetFinalPathNameByHandleW); - GET_PROC_ADDR(hKernel32dll, PrefetchVirtualMemory); - GET_PROC_ADDR(hKernel32dll, SetFileIoOverlappedRange); - } - } - - const osal_srwlock_t_function init = - (osal_srwlock_t_function)(hKernel32dll - ? GetProcAddress(hKernel32dll, - "InitializeSRWLock") - : nullptr); - if (init != NULL) { - osal_srwlock_Init = init; - osal_srwlock_AcquireShared = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "AcquireSRWLockShared"); - osal_srwlock_ReleaseShared = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "ReleaseSRWLockShared"); - osal_srwlock_AcquireExclusive = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "AcquireSRWLockExclusive"); - osal_srwlock_ReleaseExclusive = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "ReleaseSRWLockExclusive"); - } else { - osal_srwlock_Init = stub_srwlock_Init; - osal_srwlock_AcquireShared = stub_srwlock_AcquireShared; - osal_srwlock_ReleaseShared = stub_srwlock_ReleaseShared; - osal_srwlock_AcquireExclusive = stub_srwlock_AcquireExclusive; - osal_srwlock_ReleaseExclusive = stub_srwlock_ReleaseExclusive; - } - - const HINSTANCE hAdvapi32dll = GetModuleHandleA("advapi32.dll"); - if (hAdvapi32dll) { - GET_PROC_ADDR(hAdvapi32dll, RegGetValueA); - } -#undef GET_PROC_ADDR -} - -#if __GNUC_PREREQ(8, 0) -#pragma GCC diagnostic pop -#endif /* GCC/MINGW */ - -#endif /* Windows LCK-implementation */ +#endif /* Windows */ diff --git a/src/lck.c b/src/lck.c new file mode 100644 index 00000000..c6b7228c --- /dev/null +++ b/src/lck.c @@ -0,0 +1,193 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold static int lck_setup_locked(MDBX_env *env) { + int err = rthc_register(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + int lck_seize_rc = lck_seize(env); + if (unlikely(MDBX_IS_ERROR(lck_seize_rc))) + return lck_seize_rc; + + if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) { + env->lck = lckless_stub(env); + env->max_readers = UINT_MAX; + DEBUG("lck-setup:%s%s%s", " lck-less", + (env->flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + return lck_seize_rc; + } + + DEBUG("lck-setup:%s%s%s", " with-lck", + (env->flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + + MDBX_env *inprocess_neighbor = nullptr; + err = rthc_uniq_check(&env->lck_mmap, &inprocess_neighbor); + if (unlikely(MDBX_IS_ERROR(err))) + return err; + if (inprocess_neighbor) { + if ((globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || + (inprocess_neighbor->flags & MDBX_EXCLUSIVE) != 0) + return MDBX_BUSY; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + err = lck_downgrade(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + lck_seize_rc = MDBX_RESULT_FALSE; + } + } + + uint64_t size = 0; + err = osal_filesize(env->lck_mmap.fd, &size); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + if (lck_seize_rc == MDBX_RESULT_TRUE) { + size = + ceil_powerof2(env->max_readers * sizeof(reader_slot_t) + sizeof(lck_t), + globals.sys_pagesize); + jitter4testing(false); + } else { + if (env->flags & MDBX_EXCLUSIVE) + return MDBX_BUSY; + if (size > INT_MAX || (size & (globals.sys_pagesize - 1)) != 0 || + size < globals.sys_pagesize) { + ERROR("lck-file has invalid size %" PRIu64 " bytes", size); + return MDBX_PROBLEM; + } + } + + const size_t maxreaders = + ((size_t)size - sizeof(lck_t)) / sizeof(reader_slot_t); + if (maxreaders < 4) { + ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); + return MDBX_PROBLEM; + } + env->max_readers = (maxreaders <= MDBX_READERS_LIMIT) + ? (unsigned)maxreaders + : (unsigned)MDBX_READERS_LIMIT; + + err = osal_mmap((env->flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, &env->lck_mmap, + (size_t)size, (size_t)size, + lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE + : MMAP_OPTION_SEMAPHORE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_ENABLE_MADVISE +#ifdef MADV_DODUMP + err = madvise(env->lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_DODUMP */ + +#ifdef MADV_WILLNEED + err = madvise(env->lck_mmap.lck, size, MADV_WILLNEED) ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_WILLNEED) + err = ignore_enosys( + posix_madvise(env->lck_mmap.lck, size, POSIX_MADV_WILLNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_WILLNEED */ +#endif /* MDBX_ENABLE_MADVISE */ + + lck_t *lck = env->lck_mmap.lck; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + /* If we succeed got exclusive lock, then nobody is using the lock region + * and we should initialize it. */ + memset(lck, 0, (size_t)size); + jitter4testing(false); + lck->magic_and_version = MDBX_LOCK_MAGIC; + lck->os_and_format = MDBX_LOCK_FORMAT; +#if MDBX_ENABLE_PGOP_STAT + lck->pgops.wops.weak = 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + err = osal_msync(&env->lck_mmap, 0, (size_t)size, + MDBX_SYNC_DATA | MDBX_SYNC_SIZE); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); + eASSERT(env, MDBX_IS_ERROR(err)); + return err; + } + } else { + if (lck->magic_and_version != MDBX_LOCK_MAGIC) { + const bool invalid = (lck->magic_and_version >> 8) != MDBX_MAGIC; + ERROR("lock region has %s", + invalid + ? "invalid magic" + : "incompatible version (only applications with nearly or the " + "same versions of libmdbx can share the same database)"); + return invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; + } + if (lck->os_and_format != MDBX_LOCK_FORMAT) { + ERROR("lock region has os/format signature 0x%" PRIx32 + ", expected 0x%" PRIx32, + lck->os_and_format, MDBX_LOCK_FORMAT); + return MDBX_VERSION_MISMATCH; + } + } + + err = lck_init(env, inprocess_neighbor, lck_seize_rc); + if (unlikely(err != MDBX_SUCCESS)) { + eASSERT(env, MDBX_IS_ERROR(err)); + return err; + } + + env->lck = lck; + eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); + return lck_seize_rc; +} + +__cold int lck_setup(MDBX_env *env, mdbx_mode_t mode) { + eASSERT(env, env->lazy_fd != INVALID_HANDLE_VALUE); + eASSERT(env, env->lck_mmap.fd == INVALID_HANDLE_VALUE); + + int err = osal_openfile(MDBX_OPEN_LCK, env, env->pathname.lck, + &env->lck_mmap.fd, mode); + if (err != MDBX_SUCCESS) { + switch (err) { + default: + return err; + case MDBX_ENOFILE: + case MDBX_EACCESS: + case MDBX_EPERM: + if (!F_ISSET(env->flags, MDBX_RDONLY | MDBX_EXCLUSIVE)) + return err; + break; + case MDBX_EROFS: + if ((env->flags & MDBX_RDONLY) == 0) + return err; + break; + } + + if (err != MDBX_ENOFILE) { + /* ENSURE the file system is read-only */ + err = osal_check_fs_rdonly(env->lazy_fd, env->pathname.lck, err); + if (err != MDBX_SUCCESS && + /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ + !(err == MDBX_ENOSYS && (env->flags & MDBX_EXCLUSIVE))) + return err; + } + + /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ + env->lck_mmap.fd = INVALID_HANDLE_VALUE; + } + + rthc_lock(); + err = lck_setup_locked(env); + rthc_unlock(); + return err; +} + +void mincore_clean_cache(const MDBX_env *const env) { + memset(env->lck->mincore_cache.begin, -1, + sizeof(env->lck->mincore_cache.begin)); +} diff --git a/src/lck.h b/src/lck.h new file mode 100644 index 00000000..00ef6189 --- /dev/null +++ b/src/lck.h @@ -0,0 +1,112 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +MDBX_INTERNAL int lck_setup(MDBX_env *env, mdbx_mode_t mode); +#if MDBX_LOCKING > MDBX_LOCKING_SYSV +MDBX_INTERNAL int lck_ipclock_stubinit(osal_ipclock_t *ipc); +MDBX_INTERNAL int lck_ipclock_destroy(osal_ipclock_t *ipc); +#endif /* MDBX_LOCKING > MDBX_LOCKING_SYSV */ + +/// \brief Initialization of synchronization primitives linked with MDBX_env +/// instance both in LCK-file and within the current process. +/// \param +/// global_uniqueness_flag = true - denotes that there are no other processes +/// working with DB and LCK-file. Thus the function MUST initialize +/// shared synchronization objects in memory-mapped LCK-file. +/// global_uniqueness_flag = false - denotes that at least one process is +/// already working with DB and LCK-file, including the case when DB +/// has already been opened in the current process. Thus the function +/// MUST NOT initialize shared synchronization objects in memory-mapped +/// LCK-file that are already in use. +/// \return Error code or zero on success. +MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, + int global_uniqueness_flag); + +/// \brief Disconnects from shared interprocess objects and destructs +/// synchronization objects linked with MDBX_env instance +/// within the current process. +/// \param +/// inprocess_neighbor = nullptr - if the current process does not have other +/// instances of MDBX_env linked with the DB being closed. +/// Thus the function MUST check for other processes working with DB or +/// LCK-file, and keep or destroy shared synchronization objects in +/// memory-mapped LCK-file depending on the result. +/// inprocess_neighbor = not-nullptr - pointer to another instance of MDBX_env +/// (anyone of there is several) working with DB or LCK-file within the +/// current process. Thus the function MUST NOT try to acquire exclusive +/// lock and/or try to destruct shared synchronization objects linked with +/// DB or LCK-file. Moreover, the implementation MUST ensure correct work +/// of other instances of MDBX_env within the current process, e.g. +/// restore POSIX-fcntl locks after the closing of file descriptors. +/// \return Error code (MDBX_PANIC) or zero on success. +MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, + const uint32_t current_pid); + +/// \brief Connects to shared interprocess locking objects and tries to acquire +/// the maximum lock level (shared if exclusive is not available) +/// Depending on implementation or/and platform (Windows) this function may +/// acquire the non-OS super-level lock (e.g. for shared synchronization +/// objects initialization), which will be downgraded to OS-exclusive or +/// shared via explicit calling of lck_downgrade(). +/// \return +/// MDBX_RESULT_TRUE (-1) - if an exclusive lock was acquired and thus +/// the current process is the first and only after the last use of DB. +/// MDBX_RESULT_FALSE (0) - if a shared lock was acquired and thus +/// DB has already been opened and now is used by other processes. +/// Otherwise (not 0 and not -1) - error code. +MDBX_INTERNAL int lck_seize(MDBX_env *env); + +/// \brief Downgrades the level of initially acquired lock to +/// operational level specified by argument. The reason for such downgrade: +/// - unblocking of other processes that are waiting for access, i.e. +/// if (env->flags & MDBX_EXCLUSIVE) != 0, then other processes +/// should be made aware that access is unavailable rather than +/// wait for it. +/// - freeing locks that interfere file operation (especially for Windows) +/// (env->flags & MDBX_EXCLUSIVE) == 0 - downgrade to shared lock. +/// (env->flags & MDBX_EXCLUSIVE) != 0 - downgrade to exclusive +/// operational lock. +/// \return Error code or zero on success +MDBX_INTERNAL int lck_downgrade(MDBX_env *env); + +MDBX_MAYBE_UNUSED MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait); + +/// \brief Locks LCK-file or/and table of readers for (de)registering. +/// \return Error code or zero on success +MDBX_INTERNAL int lck_rdt_lock(MDBX_env *env); + +/// \brief Unlocks LCK-file or/and table of readers after (de)registering. +MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env); + +/// \brief Acquires write-transaction lock. +/// \return Error code or zero on success +MDBX_INTERNAL int lck_txn_lock(MDBX_env *env, bool dont_wait); + +/// \brief Releases write-transaction lock.. +MDBX_INTERNAL void lck_txn_unlock(MDBX_env *env); + +/// \brief Sets alive-flag of reader presence (indicative lock) for PID of +/// the current process. The function does no more than needed for +/// the correct working of lck_rpid_check() in other processes. +/// \return Error code or zero on success +MDBX_INTERNAL int lck_rpid_set(MDBX_env *env); + +/// \brief Resets alive-flag of reader presence (indicative lock) +/// for PID of the current process. The function does no more than needed +/// for the correct working of lck_rpid_check() in other processes. +/// \return Error code or zero on success +MDBX_INTERNAL int lck_rpid_clear(MDBX_env *env); + +/// \brief Checks for reading process status with the given pid with help of +/// alive-flag of presence (indicative lock) or using another way. +/// \return +/// MDBX_RESULT_TRUE (-1) - if the reader process with the given PID is alive +/// and working with DB (indicative lock is present). +/// MDBX_RESULT_FALSE (0) - if the reader process with the given PID is absent +/// or not working with DB (indicative lock is not present). +/// Otherwise (not 0 and not -1) - error code. +MDBX_INTERNAL int lck_rpid_check(MDBX_env *env, uint32_t pid); diff --git a/src/logging_and_debug.c b/src/logging_and_debug.c new file mode 100644 index 00000000..a3f52cf1 --- /dev/null +++ b/src/logging_and_debug.c @@ -0,0 +1,261 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold void debug_log_va(int level, const char *function, int line, + const char *fmt, va_list args) { + ENSURE(nullptr, osal_fastmutex_acquire(&globals.debug_lock) == 0); + if (globals.logger.ptr) { + if (globals.logger_buffer == nullptr) + globals.logger.fmt(level, function, line, fmt, args); + else { + const int len = vsnprintf(globals.logger_buffer, + globals.logger_buffer_size, fmt, args); + if (len > 0) + globals.logger.nofmt(level, function, line, globals.logger_buffer, len); + } + } else { +#if defined(_WIN32) || defined(_WIN64) + if (IsDebuggerPresent()) { + int prefix_len = 0; + char *prefix = nullptr; + if (function && line > 0) + prefix_len = osal_asprintf(&prefix, "%s:%d ", function, line); + else if (function) + prefix_len = osal_asprintf(&prefix, "%s: ", function); + else if (line > 0) + prefix_len = osal_asprintf(&prefix, "%d: ", line); + if (prefix_len > 0 && prefix) { + OutputDebugStringA(prefix); + osal_free(prefix); + } + char *msg = nullptr; + int msg_len = osal_vasprintf(&msg, fmt, args); + if (msg_len > 0 && msg) { + OutputDebugStringA(msg); + osal_free(msg); + } + } +#else + if (function && line > 0) + fprintf(stderr, "%s:%d ", function, line); + else if (function) + fprintf(stderr, "%s: ", function); + else if (line > 0) + fprintf(stderr, "%d: ", line); + vfprintf(stderr, fmt, args); + fflush(stderr); +#endif + } + ENSURE(nullptr, osal_fastmutex_release(&globals.debug_lock) == 0); +} + +__cold void debug_log(int level, const char *function, int line, + const char *fmt, ...) { + va_list args; + va_start(args, fmt); + debug_log_va(level, function, line, fmt, args); + va_end(args); +} + +/* Dump a val in ascii or hexadecimal. */ +__cold const char *mdbx_dump_val(const MDBX_val *val, char *const buf, + const size_t bufsize) { + if (!val) + return ""; + if (!val->iov_len) + return ""; + if (!buf || bufsize < 4) + return nullptr; + + if (!val->iov_base) { + int len = snprintf(buf, bufsize, "", val->iov_len); + assert(len > 0 && (size_t)len < bufsize); + (void)len; + return buf; + } + + bool is_ascii = true; + const uint8_t *const data = val->iov_base; + for (size_t i = 0; i < val->iov_len; i++) + if (data[i] < ' ' || data[i] > '~') { + is_ascii = false; + break; + } + + if (is_ascii) { + int len = + snprintf(buf, bufsize, "%.*s", + (val->iov_len > INT_MAX) ? INT_MAX : (int)val->iov_len, data); + assert(len > 0 && (size_t)len < bufsize); + (void)len; + } else { + char *const detent = buf + bufsize - 2; + char *ptr = buf; + *ptr++ = '<'; + for (size_t i = 0; i < val->iov_len && ptr < detent; i++) { + const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + *ptr++ = hex[data[i] >> 4]; + *ptr++ = hex[data[i] & 15]; + } + if (ptr < detent) + *ptr++ = '>'; + *ptr = '\0'; + } + return buf; +} + +/*------------------------------------------------------------------------------ + LY: debug stuff */ + +__cold const char *pagetype_caption(const uint8_t type, char buf4unknown[16]) { + switch (type) { + case P_BRANCH: + return "branch"; + case P_LEAF: + return "leaf"; + case P_LEAF | P_SUBP: + return "subleaf"; + case P_LEAF | P_DUPFIX: + return "dupfix-leaf"; + case P_LEAF | P_DUPFIX | P_SUBP: + return "dupfix-subleaf"; + case P_LEAF | P_DUPFIX | P_SUBP | P_LEGACY_DIRTY: + return "dupfix-subleaf.legacy-dirty"; + case P_LARGE: + return "large"; + default: + snprintf(buf4unknown, 16, "unknown_0x%x", type); + return buf4unknown; + } +} + +__cold static const char *leafnode_type(node_t *n) { + static const char *const tp[2][2] = {{"", ": DB"}, + {": sub-page", ": sub-DB"}}; + return (node_flags(n) & N_BIGDATA) + ? ": large page" + : tp[!!(node_flags(n) & N_DUPDATA)][!!(node_flags(n) & N_SUBDATA)]; +} + +/* Display all the keys in the page. */ +__cold void page_list(page_t *mp) { + pgno_t pgno = mp->pgno; + const char *type; + node_t *node; + size_t i, nkeys, nsize, total = 0; + MDBX_val key; + DKBUF; + + switch (page_type(mp)) { + case P_BRANCH: + type = "Branch page"; + break; + case P_LEAF: + type = "Leaf page"; + break; + case P_LEAF | P_SUBP: + type = "Leaf sub-page"; + break; + case P_LEAF | P_DUPFIX: + type = "Leaf2 page"; + break; + case P_LEAF | P_DUPFIX | P_SUBP: + type = "Leaf2 sub-page"; + break; + case P_LARGE: + VERBOSE("Overflow page %" PRIaPGNO " pages %u\n", pgno, mp->pages); + return; + case P_META: + VERBOSE("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno, + unaligned_peek_u64(4, page_meta(mp)->txnid_a)); + return; + default: + VERBOSE("Bad page %" PRIaPGNO " flags 0x%X\n", pgno, mp->flags); + return; + } + + nkeys = page_numkeys(mp); + VERBOSE("%s %" PRIaPGNO " numkeys %zu\n", type, pgno, nkeys); + + for (i = 0; i < nkeys; i++) { + if (is_dupfix_leaf( + mp)) { /* DUPFIX pages have no entries[] or node headers */ + key = page_dupfix_key(mp, i, nsize = mp->dupfix_ksize); + total += nsize; + VERBOSE("key %zu: nsize %zu, %s\n", i, nsize, DKEY(&key)); + continue; + } + node = page_node(mp, i); + key.iov_len = node_ks(node); + key.iov_base = node->payload; + nsize = NODESIZE + key.iov_len; + if (is_branch(mp)) { + VERBOSE("key %zu: page %" PRIaPGNO ", %s\n", i, node_pgno(node), + DKEY(&key)); + total += nsize; + } else { + if (node_flags(node) & N_BIGDATA) + nsize += sizeof(pgno_t); + else + nsize += node_ds(node); + total += nsize; + nsize += sizeof(indx_t); + VERBOSE("key %zu: nsize %zu, %s%s\n", i, nsize, DKEY(&key), + leafnode_type(node)); + } + total = EVEN_CEIL(total); + } + VERBOSE("Total: header %u + contents %zu + unused %zu\n", + is_dupfix_leaf(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->lower, total, + page_room(mp)); +} + +__cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, + union logger_union logger, char *buffer, + size_t buffer_size) { + ENSURE(nullptr, osal_fastmutex_acquire(&globals.debug_lock) == 0); + + const int rc = globals.runtime_flags | (globals.loglevel << 16); + if (level != MDBX_LOG_DONTCHANGE) + globals.loglevel = (uint8_t)level; + + if (flags != MDBX_DBG_DONTCHANGE) { + flags &= +#if MDBX_DEBUG + MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER | +#endif + MDBX_DBG_DUMP | MDBX_DBG_LEGACY_MULTIOPEN | MDBX_DBG_LEGACY_OVERLAP | + MDBX_DBG_DONT_UPGRADE; + globals.runtime_flags = (uint8_t)flags; + } + + assert(MDBX_LOGGER_DONTCHANGE == ((MDBX_debug_func *)(intptr_t)-1)); + if (logger.ptr != (void *)((intptr_t)-1)) { + globals.logger.ptr = logger.ptr; + globals.logger_buffer = buffer; + globals.logger_buffer_size = buffer_size; + } + + ENSURE(nullptr, osal_fastmutex_release(&globals.debug_lock) == 0); + return rc; +} + +__cold int mdbx_setup_debug_nofmt(MDBX_log_level_t level, + MDBX_debug_flags_t flags, + MDBX_debug_func_nofmt *logger, char *buffer, + size_t buffer_size) { + union logger_union thunk; + thunk.nofmt = + (logger && buffer && buffer_size) ? logger : MDBX_LOGGER_NOFMT_DONTCHANGE; + return setup_debug(level, flags, thunk, buffer, buffer_size); +} + +__cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, + MDBX_debug_func *logger) { + union logger_union thunk; + thunk.fmt = logger; + return setup_debug(level, flags, thunk, nullptr, 0); +} diff --git a/src/logging_and_debug.h b/src/logging_and_debug.h new file mode 100644 index 00000000..bfb45631 --- /dev/null +++ b/src/logging_and_debug.h @@ -0,0 +1,160 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +#ifndef __Wpedantic_format_voidptr +MDBX_MAYBE_UNUSED static inline const void * +__Wpedantic_format_voidptr(const void *ptr) { + return ptr; +} +#define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG) +#endif /* __Wpedantic_format_voidptr */ + +MDBX_INTERNAL void MDBX_PRINTF_ARGS(4, 5) + debug_log(int level, const char *function, int line, const char *fmt, ...) + MDBX_PRINTF_ARGS(4, 5); +MDBX_INTERNAL void debug_log_va(int level, const char *function, int line, + const char *fmt, va_list args); + +#if MDBX_DEBUG +#define LOG_ENABLED(LVL) unlikely(LVL <= globals.loglevel) +#define AUDIT_ENABLED() \ + unlikely((globals.runtime_flags & (unsigned)MDBX_DBG_AUDIT)) +#else /* MDBX_DEBUG */ +#define LOG_ENABLED(LVL) (LVL < MDBX_LOG_VERBOSE && LVL <= globals.loglevel) +#define AUDIT_ENABLED() (0) +#endif /* LOG_ENABLED() & AUDIT_ENABLED() */ + +#if MDBX_FORCE_ASSERTIONS +#define ASSERT_ENABLED() (1) +#elif MDBX_DEBUG +#define ASSERT_ENABLED() \ + likely((globals.runtime_flags & (unsigned)MDBX_DBG_ASSERT)) +#else +#define ASSERT_ENABLED() (0) +#endif /* ASSERT_ENABLED() */ + +#define DEBUG_EXTRA(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ + debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__); \ + } while (0) + +#define DEBUG_EXTRA_PRINT(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ + debug_log(MDBX_LOG_EXTRA, nullptr, 0, fmt, __VA_ARGS__); \ + } while (0) + +#define TRACE(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_TRACE)) \ + debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ + } while (0) + +#define DEBUG(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_DEBUG)) \ + debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ + } while (0) + +#define VERBOSE(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_VERBOSE)) \ + debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ + } while (0) + +#define NOTICE(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_NOTICE)) \ + debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ + } while (0) + +#define WARNING(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_WARN)) \ + debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ + } while (0) + +#undef ERROR /* wingdi.h \ + Yeah, morons from M$ put such definition to the public header. */ + +#define ERROR(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_ERROR)) \ + debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ + } while (0) + +#define FATAL(fmt, ...) \ + debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__); + +#if MDBX_DEBUG +#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line) +#else /* MDBX_DEBUG */ +MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, + unsigned line); +#define ASSERT_FAIL(env, msg, func, line) \ + do { \ + (void)(env); \ + assert_fail(msg, func, line); \ + } while (0) +#endif /* MDBX_DEBUG */ + +#define ENSURE_MSG(env, expr, msg) \ + do { \ + if (unlikely(!(expr))) \ + ASSERT_FAIL(env, msg, __func__, __LINE__); \ + } while (0) + +#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr) + +/* assert(3) variant in environment context */ +#define eASSERT(env, expr) \ + do { \ + if (ASSERT_ENABLED()) \ + ENSURE(env, expr); \ + } while (0) + +/* assert(3) variant in cursor context */ +#define cASSERT(mc, expr) eASSERT((mc)->txn->env, expr) + +/* assert(3) variant in transaction context */ +#define tASSERT(txn, expr) eASSERT((txn)->env, expr) + +#ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */ +#undef assert +#define assert(expr) eASSERT(nullptr, expr) +#endif + +MDBX_MAYBE_UNUSED static inline void jitter4testing(bool tiny) { +#if MDBX_DEBUG + if (globals.runtime_flags & (unsigned)MDBX_DBG_JITTER) + osal_jitter(tiny); +#else + (void)tiny; +#endif +} + +MDBX_MAYBE_UNUSED MDBX_INTERNAL void page_list(page_t *mp); + +MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, + char buf4unknown[16]); +/* Key size which fits in a DKBUF (debug key buffer). */ +#define DKBUF_MAX 127 +#define DKBUF char dbg_kbuf[DKBUF_MAX * 4 + 2] +#define DKEY(x) mdbx_dump_val(x, dbg_kbuf, DKBUF_MAX * 2 + 1) +#define DVAL(x) \ + mdbx_dump_val(x, dbg_kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1) + +#if MDBX_DEBUG +#define DKBUF_DEBUG DKBUF +#define DKEY_DEBUG(x) DKEY(x) +#define DVAL_DEBUG(x) DVAL(x) +#else +#define DKBUF_DEBUG ((void)(0)) +#define DKEY_DEBUG(x) ("-") +#define DVAL_DEBUG(x) ("-") +#endif diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 8f4e740d..cc5b35ef 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1,18 +1,14 @@ -// -// Copyright (c) 2020-2024, Leonid Yuriev . -// SPDX-License-Identifier: Apache-2.0 -// -// Non-inline part of the libmdbx C++ API -// +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2020-2024 +/// +/// \brief Non-inline part of the libmdbx C++ API +/// -#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) -#define _CRT_SECURE_NO_WARNINGS -#endif /* _CRT_SECURE_NO_WARNINGS */ +#include "essentials.h" -#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \ - !defined(__USE_MINGW_ANSI_STDIO) -#define __USE_MINGW_ANSI_STDIO 1 -#endif /* MinGW */ +#if !defined(MDBX_BUILD_CXX) || MDBX_BUILD_CXX != 1 +#error "Build is misconfigured! Expecting MDBX_BUILD_CXX=1 for C++ API." +#endif /* MDBX_BUILD_CXX*/ /* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */ #if defined(_MSC_VER) && defined(__SANITIZE_ADDRESS__) && \ @@ -22,8 +18,6 @@ #include "../mdbx.h++" -#include "internals.h" - #include #include #include // for isxdigit(), etc @@ -402,6 +396,7 @@ __cold void error::throw_exception() const { CASE_EXCEPTION(incompatible_operation, MDBX_INCOMPATIBLE); CASE_EXCEPTION(internal_page_full, MDBX_PAGE_FULL); CASE_EXCEPTION(internal_problem, MDBX_PROBLEM); + CASE_EXCEPTION(key_exists, MDBX_KEYEXIST); CASE_EXCEPTION(key_mismatch, MDBX_EKEYMISMATCH); CASE_EXCEPTION(max_maps_reached, MDBX_DBS_FULL); CASE_EXCEPTION(max_readers_reached, MDBX_READERS_FULL); @@ -1227,7 +1222,7 @@ env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { if (options.nested_write_transactions) flags &= ~MDBX_WRITEMAP; if (reclaiming.coalesce) - flags |= MDBX_env_flags_t(MDBX_DEPRECATED_COALESCE); + flags |= MDBX_COALESCE; if (reclaiming.lifo) flags |= MDBX_LIFORECLAIM; switch (durability) { @@ -1272,7 +1267,7 @@ env::durability env::operate_parameters::durability_from_flags( env::reclaiming_options::reclaiming_options(MDBX_env_flags_t flags) noexcept : lifo((flags & MDBX_LIFORECLAIM) ? true : false), - coalesce((flags & MDBX_DEPRECATED_COALESCE) ? true : false) {} + coalesce((flags & MDBX_COALESCE) ? true : false) {} env::operate_options::operate_options(MDBX_env_flags_t flags) noexcept : no_sticky_threads(((flags & (MDBX_NOSTICKYTHREADS | MDBX_EXCLUSIVE)) == @@ -1742,21 +1737,20 @@ __cold ::std::ostream &operator<<(::std::ostream &out, const char *suffix; } static const scales[] = { #if MDBX_WORDBITS > 32 - {env_managed::geometry::EiB, "EiB"}, - {env_managed::geometry::EB, "EB"}, - {env_managed::geometry::PiB, "PiB"}, - {env_managed::geometry::PB, "PB"}, - {env_managed::geometry::TiB, "TiB"}, - {env_managed::geometry::TB, "TB"}, + {env_managed::geometry::EiB, "EiB"}, + {env_managed::geometry::EB, "EB"}, + {env_managed::geometry::PiB, "PiB"}, + {env_managed::geometry::PB, "PB"}, + {env_managed::geometry::TiB, "TiB"}, + {env_managed::geometry::TB, "TB"}, #endif - {env_managed::geometry::GiB, "GiB"}, - {env_managed::geometry::GB, "GB"}, - {env_managed::geometry::MiB, "MiB"}, - {env_managed::geometry::MB, "MB"}, - {env_managed::geometry::KiB, "KiB"}, - {env_managed::geometry::kB, "kB"}, - {1, " bytes"} - }; + {env_managed::geometry::GiB, "GiB"}, + {env_managed::geometry::GB, "GB"}, + {env_managed::geometry::MiB, "MiB"}, + {env_managed::geometry::MB, "MB"}, + {env_managed::geometry::KiB, "KiB"}, + {env_managed::geometry::kB, "kB"}, + {1, " bytes"}}; for (const auto i : scales) if (bytes % i.one == 0) diff --git a/src/meta.c b/src/meta.c new file mode 100644 index 00000000..5a4ced84 --- /dev/null +++ b/src/meta.c @@ -0,0 +1,746 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +typedef struct meta_snap { + uint64_t txnid; + size_t is_steady; +} meta_snap_t; + +static inline txnid_t fetch_txnid(const volatile mdbx_atomic_uint32_t *ptr) { +#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ + MDBX_UNALIGNED_OK >= 8 + return atomic_load64((const volatile mdbx_atomic_uint64_t *)ptr, + mo_AcquireRelease); +#else + const uint32_t l = atomic_load32( + &ptr[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); + const uint32_t h = atomic_load32( + &ptr[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); + return (uint64_t)h << 32 | l; +#endif +} + +static inline meta_snap_t meta_snap(const volatile meta_t *meta) { + txnid_t txnid = fetch_txnid(meta->txnid_a); + jitter4testing(true); + size_t is_steady = meta_is_steady(meta) && txnid >= MIN_TXNID; + jitter4testing(true); + if (unlikely(txnid != fetch_txnid(meta->txnid_b))) + txnid = is_steady = 0; + meta_snap_t r = {txnid, is_steady}; + return r; +} + +txnid_t meta_txnid(const volatile meta_t *meta) { + return meta_snap(meta).txnid; +} + +meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n) { + eASSERT(env, n < NUM_METAS); + meta_ptr_t r; + meta_snap_t snap = meta_snap(r.ptr_v = METAPAGE(env, n)); + r.txnid = snap.txnid; + r.is_steady = snap.is_steady; + return r; +} + +static uint8_t meta_cmp2pack(uint8_t c01, uint8_t c02, uint8_t c12, bool s0, + bool s1, bool s2) { + assert(c01 < 3 && c02 < 3 && c12 < 3); + /* assert(s0 < 2 && s1 < 2 && s2 < 2); */ + const uint8_t recent = meta_cmp2recent(c01, s0, s1) + ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) + : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); + const uint8_t prefer_steady = meta_cmp2steady(c01, s0, s1) + ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) + : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); + + uint8_t tail; + if (recent == 0) + tail = meta_cmp2steady(c12, s1, s2) ? 2 : 1; + else if (recent == 1) + tail = meta_cmp2steady(c02, s0, s2) ? 2 : 0; + else + tail = meta_cmp2steady(c01, s0, s1) ? 1 : 0; + + const bool valid = + c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; + const bool strict = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && + (c12 != 1 || s1 != s2); + return tail | recent << 2 | prefer_steady << 4 | strict << 6 | valid << 7; +} + +static inline void meta_troika_unpack(troika_t *troika, const uint8_t packed) { + troika->recent = (packed >> 2) & 3; + troika->prefer_steady = (packed >> 4) & 3; + troika->tail_and_flags = packed & 0xC3; +#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */ + troika->unused_pad = 0; +#endif +} + +static const uint8_t troika_fsm_map[2 * 2 * 2 * 3 * 3 * 3] = { + 232, 201, 216, 216, 232, 233, 232, 232, 168, 201, 216, 152, 168, 233, 232, + 168, 233, 201, 216, 201, 233, 233, 232, 233, 168, 201, 152, 216, 232, 169, + 232, 168, 168, 193, 152, 152, 168, 169, 232, 168, 169, 193, 152, 194, 233, + 169, 232, 169, 232, 201, 216, 216, 232, 201, 232, 232, 168, 193, 216, 152, + 168, 193, 232, 168, 193, 193, 210, 194, 225, 193, 225, 193, 168, 137, 212, + 214, 232, 233, 168, 168, 168, 137, 212, 150, 168, 233, 168, 168, 169, 137, + 216, 201, 233, 233, 168, 169, 168, 137, 148, 214, 232, 169, 168, 168, 40, + 129, 148, 150, 168, 169, 168, 40, 169, 129, 152, 194, 233, 169, 168, 169, + 168, 137, 214, 214, 232, 201, 168, 168, 168, 129, 214, 150, 168, 193, 168, + 168, 129, 129, 210, 194, 225, 193, 161, 129, 212, 198, 212, 214, 228, 228, + 212, 212, 148, 201, 212, 150, 164, 233, 212, 148, 233, 201, 216, 201, 233, + 233, 216, 233, 148, 198, 148, 214, 228, 164, 212, 148, 148, 194, 148, 150, + 164, 169, 212, 148, 169, 194, 152, 194, 233, 169, 216, 169, 214, 198, 214, + 214, 228, 198, 212, 214, 150, 194, 214, 150, 164, 193, 212, 150, 194, 194, + 210, 194, 225, 193, 210, 194}; + +__cold bool troika_verify_fsm(void) { + bool ok = true; + for (size_t i = 0; i < 2 * 2 * 2 * 3 * 3 * 3; ++i) { + const bool s0 = (i >> 0) & 1; + const bool s1 = (i >> 1) & 1; + const bool s2 = (i >> 2) & 1; + const uint8_t c01 = (i / (8 * 1)) % 3; + const uint8_t c02 = (i / (8 * 3)) % 3; + const uint8_t c12 = (i / (8 * 9)) % 3; + + const uint8_t packed = meta_cmp2pack(c01, c02, c12, s0, s1, s2); + troika_t troika; + troika.fsm = (uint8_t)i; + meta_troika_unpack(&troika, packed); + + const uint8_t tail = TROIKA_TAIL(&troika); + const bool strict = TROIKA_STRICT_VALID(&troika); + const bool valid = TROIKA_VALID(&troika); + + const uint8_t recent_chk = meta_cmp2recent(c01, s0, s1) + ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) + : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); + const uint8_t prefer_steady_chk = + meta_cmp2steady(c01, s0, s1) ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) + : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); + + uint8_t tail_chk; + if (recent_chk == 0) + tail_chk = meta_cmp2steady(c12, s1, s2) ? 2 : 1; + else if (recent_chk == 1) + tail_chk = meta_cmp2steady(c02, s0, s2) ? 2 : 0; + else + tail_chk = meta_cmp2steady(c01, s0, s1) ? 1 : 0; + + const bool valid_chk = + c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; + const bool strict_chk = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && + (c12 != 1 || s1 != s2); + assert(troika.recent == recent_chk); + assert(troika.prefer_steady == prefer_steady_chk); + assert(tail == tail_chk); + assert(valid == valid_chk); + assert(strict == strict_chk); + assert(troika_fsm_map[troika.fsm] == packed); + if (troika.recent != recent_chk || + troika.prefer_steady != prefer_steady_chk || tail != tail_chk || + valid != valid_chk || strict != strict_chk || + troika_fsm_map[troika.fsm] != packed) { + ok = false; + } + } + return ok; +} + +__hot troika_t meta_tap(const MDBX_env *env) { + meta_snap_t snap; + troika_t troika; + snap = meta_snap(METAPAGE(env, 0)); + troika.txnid[0] = snap.txnid; + troika.fsm = (uint8_t)snap.is_steady << 0; + snap = meta_snap(METAPAGE(env, 1)); + troika.txnid[1] = snap.txnid; + troika.fsm += (uint8_t)snap.is_steady << 1; + troika.fsm += meta_cmp2int(troika.txnid[0], troika.txnid[1], 8); + snap = meta_snap(METAPAGE(env, 2)); + troika.txnid[2] = snap.txnid; + troika.fsm += (uint8_t)snap.is_steady << 2; + troika.fsm += meta_cmp2int(troika.txnid[0], troika.txnid[2], 8 * 3); + troika.fsm += meta_cmp2int(troika.txnid[1], troika.txnid[2], 8 * 3 * 3); + + meta_troika_unpack(&troika, troika_fsm_map[troika.fsm]); + return troika; +} + +txnid_t recent_committed_txnid(const MDBX_env *env) { + const txnid_t m0 = meta_txnid(METAPAGE(env, 0)); + const txnid_t m1 = meta_txnid(METAPAGE(env, 1)); + const txnid_t m2 = meta_txnid(METAPAGE(env, 2)); + return (m0 > m1) ? ((m0 > m2) ? m0 : m2) : ((m1 > m2) ? m1 : m2); +} + +static inline bool meta_eq(const troika_t *troika, size_t a, size_t b) { + assert(a < NUM_METAS && b < NUM_METAS); + return troika->txnid[a] == troika->txnid[b] && + (((troika->fsm >> a) ^ (troika->fsm >> b)) & 1) == 0 && + troika->txnid[a]; +} + +unsigned meta_eq_mask(const troika_t *troika) { + return meta_eq(troika, 0, 1) | meta_eq(troika, 1, 2) << 1 | + meta_eq(troika, 2, 0) << 2; +} + +__hot bool meta_should_retry(const MDBX_env *env, troika_t *troika) { + const troika_t prev = *troika; + *troika = meta_tap(env); + return prev.fsm != troika->fsm || prev.txnid[0] != troika->txnid[0] || + prev.txnid[1] != troika->txnid[1] || prev.txnid[2] != troika->txnid[2]; +} + +const char *durable_caption(const meta_t *const meta) { + if (meta_is_steady(meta)) + return (meta_sign_get(meta) == meta_sign_calculate(meta)) ? "Steady" + : "Tainted"; + return "Weak"; +} + +__cold void meta_troika_dump(const MDBX_env *env, const troika_t *troika) { + const meta_ptr_t recent = meta_recent(env, troika); + const meta_ptr_t prefer_steady = meta_prefer_steady(env, troika); + const meta_ptr_t tail = meta_tail(env, troika); + NOTICE("troika: %" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " + "head=%d-%" PRIaTXN ".%c, " + "base=%d-%" PRIaTXN ".%c, " + "tail=%d-%" PRIaTXN ".%c, " + "valid %c, strict %c", + troika->txnid[0], (troika->fsm & 1) ? 's' : 'w', troika->txnid[1], + (troika->fsm & 2) ? 's' : 'w', troika->txnid[2], + (troika->fsm & 4) ? 's' : 'w', troika->fsm, troika->recent, + recent.txnid, recent.is_steady ? 's' : 'w', troika->prefer_steady, + prefer_steady.txnid, prefer_steady.is_steady ? 's' : 'w', + troika->tail_and_flags % NUM_METAS, tail.txnid, + tail.is_steady ? 's' : 'w', TROIKA_VALID(troika) ? 'Y' : 'N', + TROIKA_STRICT_VALID(troika) ? 'Y' : 'N'); +} + +/*----------------------------------------------------------------------------*/ + +static int meta_unsteady(MDBX_env *env, const txnid_t inclusive_upto, + const pgno_t pgno) { + meta_t *const meta = METAPAGE(env, pgno); + const txnid_t txnid = constmeta_txnid(meta); + if (!meta_is_steady(meta) || txnid > inclusive_upto) + return MDBX_RESULT_FALSE; + + WARNING("wipe txn #%" PRIaTXN ", meta %" PRIaPGNO, txnid, pgno); + const uint64_t wipe = DATASIGN_NONE; + const void *ptr = &wipe; + size_t bytes = sizeof(meta->sign), + offset = ptr_dist(&meta->sign, env->dxb_mmap.base); + if (env->flags & MDBX_WRITEMAP) { + unaligned_poke_u64(4, meta->sign, wipe); + osal_flush_incoherent_cpu_writeback(); + if (!MDBX_AVOID_MSYNC) + return MDBX_RESULT_TRUE; + ptr = data_page(meta); + offset = ptr_dist(ptr, env->dxb_mmap.base); + bytes = env->ps; + } + +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.wops.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + int err = osal_pwrite(env->fd4meta, ptr, bytes, offset); + return likely(err == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : err; +} + +__cold int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto) { + int err = meta_unsteady(env, inclusive_upto, 0); + if (likely(!MDBX_IS_ERROR(err))) + err = meta_unsteady(env, inclusive_upto, 1); + if (likely(!MDBX_IS_ERROR(err))) + err = meta_unsteady(env, inclusive_upto, 2); + + if (err == MDBX_RESULT_TRUE) { + err = MDBX_SUCCESS; + if (!MDBX_AVOID_MSYNC && (env->flags & MDBX_WRITEMAP)) { + err = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), + MDBX_SYNC_DATA | MDBX_SYNC_IODQ); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } else if (env->fd4meta == env->lazy_fd) { + err = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } + } + + osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), + globals.sys_pagesize); + + /* force oldest refresh */ + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); + + env->basal_txn->tw.troika = meta_tap(env); + for (MDBX_txn *scan = env->basal_txn->nested; scan; scan = scan->nested) + scan->tw.troika = env->basal_txn->tw.troika; + return err; +} + +int meta_sync(const MDBX_env *env, const meta_ptr_t head) { + eASSERT(env, atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != + (uint32_t)head.txnid); + /* Функция может вызываться (в том числе) при (env->flags & + * MDBX_NOMETASYNC) == 0 и env->fd4meta == env->dsync_fd, например если + * предыдущая транзакция была выполненна с флагом MDBX_NOMETASYNC. */ + + int rc = MDBX_RESULT_TRUE; + if (env->flags & MDBX_WRITEMAP) { + if (!MDBX_AVOID_MSYNC) { + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), + MDBX_SYNC_DATA | MDBX_SYNC_IODQ); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } else { +#if MDBX_ENABLE_PGOP_ST + env->lck->pgops.wops.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + const page_t *page = data_page(head.ptr_c); + rc = osal_pwrite(env->fd4meta, page, env->ps, + ptr_dist(page, env->dxb_mmap.base)); + + if (likely(rc == MDBX_SUCCESS) && env->fd4meta == env->lazy_fd) { + rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } + } + } else { + rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } + + if (likely(rc == MDBX_SUCCESS)) + env->lck->meta_sync_txnid.weak = (uint32_t)head.txnid; + return rc; +} + +__cold static page_t *meta_model(const MDBX_env *env, page_t *model, + size_t num) { + ENSURE(env, is_powerof2(env->ps)); + ENSURE(env, env->ps >= MDBX_MIN_PAGESIZE); + ENSURE(env, env->ps <= MDBX_MAX_PAGESIZE); + ENSURE(env, env->geo_in_bytes.lower >= MIN_MAPSIZE); + ENSURE(env, env->geo_in_bytes.upper <= MAX_MAPSIZE); + ENSURE(env, env->geo_in_bytes.now >= env->geo_in_bytes.lower); + ENSURE(env, env->geo_in_bytes.now <= env->geo_in_bytes.upper); + + memset(model, 0, env->ps); + model->pgno = (pgno_t)num; + model->flags = P_META; + meta_t *const model_meta = page_meta(model); + unaligned_poke_u64(4, model_meta->magic_and_version, MDBX_DATA_MAGIC); + + model_meta->geometry.lower = bytes2pgno(env, env->geo_in_bytes.lower); + model_meta->geometry.upper = bytes2pgno(env, env->geo_in_bytes.upper); + model_meta->geometry.grow_pv = + pages2pv(bytes2pgno(env, env->geo_in_bytes.grow)); + model_meta->geometry.shrink_pv = + pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink)); + model_meta->geometry.now = bytes2pgno(env, env->geo_in_bytes.now); + model_meta->geometry.first_unallocated = NUM_METAS; + + ENSURE(env, model_meta->geometry.lower >= MIN_PAGENO); + ENSURE(env, model_meta->geometry.upper <= MAX_PAGENO + 1); + ENSURE(env, model_meta->geometry.now >= model_meta->geometry.lower); + ENSURE(env, model_meta->geometry.now <= model_meta->geometry.upper); + ENSURE(env, model_meta->geometry.first_unallocated >= MIN_PAGENO); + ENSURE(env, + model_meta->geometry.first_unallocated <= model_meta->geometry.now); + ENSURE(env, model_meta->geometry.grow_pv == + pages2pv(pv2pages(model_meta->geometry.grow_pv))); + ENSURE(env, model_meta->geometry.shrink_pv == + pages2pv(pv2pages(model_meta->geometry.shrink_pv))); + + model_meta->pagesize = env->ps; + model_meta->trees.gc.flags = MDBX_INTEGERKEY; + model_meta->trees.gc.root = P_INVALID; + model_meta->trees.main.root = P_INVALID; + meta_set_txnid(env, model_meta, MIN_TXNID + num); + unaligned_poke_u64(4, model_meta->sign, meta_sign_calculate(model_meta)); + eASSERT(env, coherency_check_meta(env, model_meta, true)); + return ptr_disp(model, env->ps); +} + +__cold meta_t *meta_init_triplet(const MDBX_env *env, void *buffer) { + page_t *page0 = (page_t *)buffer; + page_t *page1 = meta_model(env, page0, 0); + page_t *page2 = meta_model(env, page1, 1); + meta_model(env, page2, 2); + return page_meta(page2); +} + +__cold int __must_check_result meta_override(MDBX_env *env, size_t target, + txnid_t txnid, + const meta_t *shape) { + int rc = env_page_auxbuffer(env); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + page_t *const page = env->page_auxbuf; + meta_model(env, page, target); + meta_t *const model = page_meta(page); + meta_set_txnid(env, model, txnid); + if (txnid) + eASSERT(env, coherency_check_meta(env, model, true)); + if (shape) { + if (txnid && unlikely(!coherency_check_meta(env, shape, false))) { + ERROR("bailout overriding meta-%zu since model failed " + "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, + target, "pre", constmeta_txnid(shape)); + return MDBX_PROBLEM; + } + if (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) + memcpy(&model->magic_and_version, &shape->magic_and_version, + sizeof(model->magic_and_version)); + model->reserve16 = shape->reserve16; + model->validator_id = shape->validator_id; + model->extra_pagehdr = shape->extra_pagehdr; + memcpy(&model->geometry, &shape->geometry, sizeof(model->geometry)); + memcpy(&model->trees, &shape->trees, sizeof(model->trees)); + memcpy(&model->canary, &shape->canary, sizeof(model->canary)); + memcpy(&model->pages_retired, &shape->pages_retired, + sizeof(model->pages_retired)); + if (txnid) { + if ((!model->trees.gc.mod_txnid && model->trees.gc.root != P_INVALID) || + (!model->trees.main.mod_txnid && model->trees.main.root != P_INVALID)) + memcpy(&model->magic_and_version, &shape->magic_and_version, + sizeof(model->magic_and_version)); + if (unlikely(!coherency_check_meta(env, model, false))) { + ERROR("bailout overriding meta-%zu since model failed " + "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, + target, "post", txnid); + return MDBX_PROBLEM; + } + } + } + meta_sign_as_steady(model); + rc = meta_validate(env, model, page, (pgno_t)target, nullptr); + if (unlikely(MDBX_IS_ERROR(rc))) + return MDBX_PROBLEM; + + if (shape && memcmp(model, shape, sizeof(meta_t)) == 0) { + NOTICE("skip overriding meta-%zu since no changes " + "for txnid #%" PRIaTXN, + target, txnid); + return MDBX_SUCCESS; + } + + if (env->flags & MDBX_WRITEMAP) { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_msync(&env->dxb_mmap, 0, + pgno_align2os_bytes(env, model->geometry.first_unallocated), + MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + /* meta_override() called only while current process have exclusive + * lock of a DB file. So meta-page could be updated directly without + * clearing consistency flag by mdbx_meta_update_begin() */ + memcpy(pgno2page(env, target), page, env->ps); + osal_flush_incoherent_cpu_writeback(); +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.msync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, target + 1), + MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + } else { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.wops.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_pwrite(env->fd4meta, page, env->ps, pgno2bytes(env, target)); + if (rc == MDBX_SUCCESS && env->fd4meta == env->lazy_fd) { +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.fsync.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + } + osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), + globals.sys_pagesize); + } + eASSERT(env, (!env->txn && !env->basal_txn) || + (env->stuck_meta == (int)target && + (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == + MDBX_EXCLUSIVE)); + return rc; +} + +__cold int meta_validate(MDBX_env *env, meta_t *const meta, + const page_t *const page, const unsigned meta_number, + unsigned *guess_pagesize) { + const uint64_t magic_and_version = + unaligned_peek_u64(4, &meta->magic_and_version); + if (unlikely(magic_and_version != MDBX_DATA_MAGIC && + magic_and_version != MDBX_DATA_MAGIC_LEGACY_COMPAT && + magic_and_version != MDBX_DATA_MAGIC_LEGACY_DEVEL)) { + ERROR("meta[%u] has invalid magic/version %" PRIx64, meta_number, + magic_and_version); + return ((magic_and_version >> 8) != MDBX_MAGIC) ? MDBX_INVALID + : MDBX_VERSION_MISMATCH; + } + + if (unlikely(page->pgno != meta_number)) { + ERROR("meta[%u] has invalid pageno %" PRIaPGNO, meta_number, page->pgno); + return MDBX_INVALID; + } + + if (unlikely(page->flags != P_META)) { + ERROR("page #%u not a meta-page", meta_number); + return MDBX_INVALID; + } + + if (unlikely(!is_powerof2(meta->pagesize) || + meta->pagesize < MDBX_MIN_PAGESIZE || + meta->pagesize > MDBX_MAX_PAGESIZE)) { + WARNING("meta[%u] has invalid pagesize (%u), skip it", meta_number, + meta->pagesize); + return is_powerof2(meta->pagesize) ? MDBX_VERSION_MISMATCH : MDBX_INVALID; + } + + if (guess_pagesize && *guess_pagesize != meta->pagesize) { + *guess_pagesize = meta->pagesize; + VERBOSE("meta[%u] took pagesize %u", meta_number, meta->pagesize); + } + + const txnid_t txnid = unaligned_peek_u64(4, &meta->txnid_a); + if (unlikely(txnid != unaligned_peek_u64(4, &meta->txnid_b))) { + WARNING("meta[%u] not completely updated, skip it", meta_number); + return MDBX_RESULT_TRUE; + } + + /* LY: check signature as a checksum */ + const uint64_t sign = meta_sign_get(meta); + const uint64_t sign_stready = meta_sign_calculate(meta); + if (SIGN_IS_STEADY(sign) && unlikely(sign != sign_stready)) { + WARNING("meta[%u] has invalid steady-checksum (0x%" PRIx64 " != 0x%" PRIx64 + "), skip it", + meta_number, sign, sign_stready); + return MDBX_RESULT_TRUE; + } + + if (unlikely(meta->trees.gc.flags != MDBX_INTEGERKEY)) { + WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + "GC/FreeDB", meta->trees.gc.flags); + return MDBX_INCOMPATIBLE; + } + + if (unlikely(!check_sdb_flags(meta->trees.main.flags))) { + WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + "MainDB", meta->trees.main.flags); + return MDBX_INCOMPATIBLE; + } + + DEBUG("checking meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO + ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO + " +%u -%u, txn_id %" PRIaTXN ", %s", + page->pgno, meta->trees.main.root, meta->trees.gc.root, + meta->geometry.lower, meta->geometry.first_unallocated, + meta->geometry.now, meta->geometry.upper, + pv2pages(meta->geometry.grow_pv), pv2pages(meta->geometry.shrink_pv), + txnid, durable_caption(meta)); + + if (unlikely(txnid < MIN_TXNID || txnid > MAX_TXNID)) { + WARNING("meta[%u] has invalid txnid %" PRIaTXN ", skip it", meta_number, + txnid); + return MDBX_RESULT_TRUE; + } + + if (unlikely(meta->geometry.lower < MIN_PAGENO || + meta->geometry.lower > MAX_PAGENO + 1)) { + WARNING("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it", + meta_number, meta->geometry.lower); + return MDBX_INVALID; + } + + if (unlikely(meta->geometry.upper < MIN_PAGENO || + meta->geometry.upper > MAX_PAGENO + 1 || + meta->geometry.upper < meta->geometry.lower)) { + WARNING("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it", + meta_number, meta->geometry.upper); + return MDBX_INVALID; + } + + if (unlikely(meta->geometry.first_unallocated < MIN_PAGENO || + meta->geometry.first_unallocated - 1 > MAX_PAGENO)) { + WARNING("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it", + meta_number, meta->geometry.first_unallocated); + return MDBX_CORRUPTED; + } + + const uint64_t used_bytes = + meta->geometry.first_unallocated * (uint64_t)meta->pagesize; + if (unlikely(used_bytes > env->dxb_mmap.filesize)) { + /* Here could be a race with DB-shrinking performed by other process */ + int err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); + if (unlikely(err != MDBX_SUCCESS)) + return err; + if (unlikely(used_bytes > env->dxb_mmap.filesize)) { + WARNING("meta[%u] used-bytes (%" PRIu64 ") beyond filesize (%" PRIu64 + "), skip it", + meta_number, used_bytes, env->dxb_mmap.filesize); + return MDBX_CORRUPTED; + } + } + if (unlikely(meta->geometry.first_unallocated - 1 > MAX_PAGENO || + used_bytes > MAX_MAPSIZE)) { + WARNING("meta[%u] has too large used-space (%" PRIu64 "), skip it", + meta_number, used_bytes); + return MDBX_TOO_LARGE; + } + + pgno_t geo_lower = meta->geometry.lower; + uint64_t mapsize_min = geo_lower * (uint64_t)meta->pagesize; + STATIC_ASSERT(MAX_MAPSIZE < PTRDIFF_MAX - MDBX_MAX_PAGESIZE); + STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); + STATIC_ASSERT((uint64_t)(MAX_PAGENO + 1) * MDBX_MIN_PAGESIZE % (4ul << 20) == + 0); + if (unlikely(mapsize_min < MIN_MAPSIZE || mapsize_min > MAX_MAPSIZE)) { + if (MAX_MAPSIZE != MAX_MAPSIZE64 && mapsize_min > MAX_MAPSIZE && + mapsize_min <= MAX_MAPSIZE64) { + eASSERT(env, meta->geometry.first_unallocated - 1 <= MAX_PAGENO && + used_bytes <= MAX_MAPSIZE); + WARNING("meta[%u] has too large min-mapsize (%" PRIu64 "), " + "but size of used space still acceptable (%" PRIu64 ")", + meta_number, mapsize_min, used_bytes); + geo_lower = (pgno_t)((mapsize_min = MAX_MAPSIZE) / meta->pagesize); + if (geo_lower > MAX_PAGENO + 1) { + geo_lower = MAX_PAGENO + 1; + mapsize_min = geo_lower * (uint64_t)meta->pagesize; + } + WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO + " instead of wrong %" PRIaPGNO + ", will be corrected on next commit(s)", + meta_number, "lower", geo_lower, meta->geometry.lower); + meta->geometry.lower = geo_lower; + } else { + WARNING("meta[%u] has invalid min-mapsize (%" PRIu64 "), skip it", + meta_number, mapsize_min); + return MDBX_VERSION_MISMATCH; + } + } + + pgno_t geo_upper = meta->geometry.upper; + uint64_t mapsize_max = geo_upper * (uint64_t)meta->pagesize; + STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); + if (unlikely(mapsize_max > MAX_MAPSIZE || + (MAX_PAGENO + 1) < + ceil_powerof2((size_t)mapsize_max, globals.sys_pagesize) / + (size_t)meta->pagesize)) { + if (mapsize_max > MAX_MAPSIZE64) { + WARNING("meta[%u] has invalid max-mapsize (%" PRIu64 "), skip it", + meta_number, mapsize_max); + return MDBX_VERSION_MISMATCH; + } + /* allow to open large DB from a 32-bit environment */ + eASSERT(env, meta->geometry.first_unallocated - 1 <= MAX_PAGENO && + used_bytes <= MAX_MAPSIZE); + WARNING("meta[%u] has too large max-mapsize (%" PRIu64 "), " + "but size of used space still acceptable (%" PRIu64 ")", + meta_number, mapsize_max, used_bytes); + geo_upper = (pgno_t)((mapsize_max = MAX_MAPSIZE) / meta->pagesize); + if (geo_upper > MAX_PAGENO + 1) { + geo_upper = MAX_PAGENO + 1; + mapsize_max = geo_upper * (uint64_t)meta->pagesize; + } + WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO + " instead of wrong %" PRIaPGNO + ", will be corrected on next commit(s)", + meta_number, "upper", geo_upper, meta->geometry.upper); + meta->geometry.upper = geo_upper; + } + + /* LY: check and silently put geometry.now into [geo.lower...geo.upper]. + * + * Copy-with-compaction by old version of libmdbx could produce DB-file + * less than meta.geo.lower bound, in case actual filling is low or no data + * at all. This is not a problem as there is no damage or loss of data. + * Therefore it is better not to consider such situation as an error, but + * silently correct it. */ + pgno_t geo_now = meta->geometry.now; + if (geo_now < geo_lower) + geo_now = geo_lower; + if (geo_now > geo_upper && meta->geometry.first_unallocated <= geo_upper) + geo_now = geo_upper; + + if (unlikely(meta->geometry.first_unallocated > geo_now)) { + WARNING("meta[%u] next-pageno (%" PRIaPGNO + ") is beyond end-pgno (%" PRIaPGNO "), skip it", + meta_number, meta->geometry.first_unallocated, geo_now); + return MDBX_CORRUPTED; + } + if (meta->geometry.now != geo_now) { + WARNING("meta[%u] consider geo-%s pageno is %" PRIaPGNO + " instead of wrong %" PRIaPGNO + ", will be corrected on next commit(s)", + meta_number, "now", geo_now, meta->geometry.now); + meta->geometry.now = geo_now; + } + + /* GC */ + if (meta->trees.gc.root == P_INVALID) { + if (unlikely(meta->trees.gc.branch_pages || meta->trees.gc.height || + meta->trees.gc.items || meta->trees.gc.leaf_pages || + meta->trees.gc.large_pages)) { + WARNING("meta[%u] has false-empty %s, skip it", meta_number, "GC"); + return MDBX_CORRUPTED; + } + } else if (unlikely(meta->trees.gc.root >= + meta->geometry.first_unallocated)) { + WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, + "GC", meta->trees.gc.root); + return MDBX_CORRUPTED; + } + + /* MainDB */ + if (meta->trees.main.root == P_INVALID) { + if (unlikely(meta->trees.main.branch_pages || meta->trees.main.height || + meta->trees.main.items || meta->trees.main.leaf_pages || + meta->trees.main.large_pages)) { + WARNING("meta[%u] has false-empty %s", meta_number, "MainDB"); + return MDBX_CORRUPTED; + } + } else if (unlikely(meta->trees.main.root >= + meta->geometry.first_unallocated)) { + WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, + "MainDB", meta->trees.main.root); + return MDBX_CORRUPTED; + } + + if (unlikely(meta->trees.gc.mod_txnid > txnid)) { + WARNING("meta[%u] has wrong mod_txnid %" PRIaTXN " for %s, skip it", + meta_number, meta->trees.gc.mod_txnid, "GC"); + return MDBX_CORRUPTED; + } + + if (unlikely(meta->trees.main.mod_txnid > txnid)) { + WARNING("meta[%u] has wrong mod_txnid %" PRIaTXN " for %s, skip it", + meta_number, meta->trees.main.mod_txnid, "MainDB"); + return MDBX_CORRUPTED; + } + + return MDBX_SUCCESS; +} + +__cold int meta_validate_copy(MDBX_env *env, const meta_t *meta, meta_t *dest) { + *dest = *meta; + return meta_validate(env, dest, data_page(meta), + bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), + nullptr); +} diff --git a/src/meta.h b/src/meta.h new file mode 100644 index 00000000..51a8d66f --- /dev/null +++ b/src/meta.h @@ -0,0 +1,203 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +static inline uint64_t meta_sign_calculate(const meta_t *meta) { + uint64_t sign = DATASIGN_NONE; +#if 0 /* TODO */ + sign = hippeus_hash64(...); +#else + (void)meta; +#endif + /* LY: newer returns DATASIGN_NONE or DATASIGN_WEAK */ + return (sign > DATASIGN_WEAK) ? sign : ~sign; +} + +static inline uint64_t meta_sign_get(const volatile meta_t *meta) { + return unaligned_peek_u64_volatile(4, meta->sign); +} + +static inline void meta_sign_as_steady(meta_t *meta) { + unaligned_poke_u64(4, meta->sign, meta_sign_calculate(meta)); +} + +static inline bool meta_is_steady(const volatile meta_t *meta) { + return SIGN_IS_STEADY(meta_sign_get(meta)); +} + +MDBX_INTERNAL troika_t meta_tap(const MDBX_env *env); +MDBX_INTERNAL unsigned meta_eq_mask(const troika_t *troika); +MDBX_INTERNAL bool meta_should_retry(const MDBX_env *env, troika_t *troika); +MDBX_MAYBE_UNUSED MDBX_INTERNAL bool troika_verify_fsm(void); + +struct meta_ptr { + txnid_t txnid; + union { + const volatile meta_t *ptr_v; + const meta_t *ptr_c; + }; + size_t is_steady; +}; + +MDBX_INTERNAL meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n); +MDBX_INTERNAL txnid_t meta_txnid(const volatile meta_t *meta); +MDBX_INTERNAL txnid_t recent_committed_txnid(const MDBX_env *env); +MDBX_INTERNAL int meta_sync(const MDBX_env *env, const meta_ptr_t head); + +MDBX_INTERNAL const char *durable_caption(const meta_t *const meta); +MDBX_INTERNAL void meta_troika_dump(const MDBX_env *env, + const troika_t *troika); + +#define METAPAGE(env, n) page_meta(pgno2page(env, n)) +#define METAPAGE_END(env) METAPAGE(env, NUM_METAS) + +static inline meta_ptr_t meta_recent(const MDBX_env *env, + const troika_t *troika) { + meta_ptr_t r; + r.txnid = troika->txnid[troika->recent]; + r.ptr_v = METAPAGE(env, troika->recent); + r.is_steady = (troika->fsm >> troika->recent) & 1; + return r; +} + +static inline meta_ptr_t meta_prefer_steady(const MDBX_env *env, + const troika_t *troika) { + meta_ptr_t r; + r.txnid = troika->txnid[troika->prefer_steady]; + r.ptr_v = METAPAGE(env, troika->prefer_steady); + r.is_steady = (troika->fsm >> troika->prefer_steady) & 1; + return r; +} + +static inline meta_ptr_t meta_tail(const MDBX_env *env, + const troika_t *troika) { + const uint8_t tail = troika->tail_and_flags & 3; + MDBX_ANALYSIS_ASSUME(tail < NUM_METAS); + meta_ptr_t r; + r.txnid = troika->txnid[tail]; + r.ptr_v = METAPAGE(env, tail); + r.is_steady = (troika->fsm >> tail) & 1; + return r; +} + +static inline bool meta_bootid_match(const meta_t *meta) { + return memcmp(&meta->bootid, &globals.bootid, 16) == 0 && + (globals.bootid.x | globals.bootid.y) != 0; +} + +static inline bool meta_weak_acceptable(const MDBX_env *env, const meta_t *meta, + const int lck_exclusive) { + return lck_exclusive + ? /* exclusive lock */ meta_bootid_match(meta) + : /* db already opened */ env->lck_mmap.lck && + (env->lck_mmap.lck->envmode.weak & MDBX_RDONLY) == 0; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline txnid_t +constmeta_txnid(const meta_t *meta) { + const txnid_t a = unaligned_peek_u64(4, &meta->txnid_a); + const txnid_t b = unaligned_peek_u64(4, &meta->txnid_b); + return likely(a == b) ? a : 0; +} + +static inline void meta_update_begin(const MDBX_env *env, meta_t *meta, + txnid_t txnid) { + eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env)); + eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) < txnid && + unaligned_peek_u64(4, meta->txnid_b) < txnid); + (void)env; +#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ + MDBX_UNALIGNED_OK >= 8 + atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, 0, mo_AcquireRelease); + atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_a, txnid, + mo_AcquireRelease); +#else + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], 0, + mo_AcquireRelease); + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], 0, + mo_AcquireRelease); + atomic_store32(&meta->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], + (uint32_t)txnid, mo_AcquireRelease); + atomic_store32(&meta->txnid_a[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], + (uint32_t)(txnid >> 32), mo_AcquireRelease); +#endif +} + +static inline void meta_update_end(const MDBX_env *env, meta_t *meta, + txnid_t txnid) { + eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env)); + eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) == txnid); + eASSERT(env, unaligned_peek_u64(4, meta->txnid_b) < txnid); + (void)env; + jitter4testing(true); + memcpy(&meta->bootid, &globals.bootid, 16); +#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ + MDBX_UNALIGNED_OK >= 8 + atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, txnid, + mo_AcquireRelease); +#else + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], + (uint32_t)txnid, mo_AcquireRelease); + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], + (uint32_t)(txnid >> 32), mo_AcquireRelease); +#endif +} + +static inline void meta_set_txnid(const MDBX_env *env, meta_t *meta, + const txnid_t txnid) { + eASSERT(env, !env->dxb_mmap.base || meta < METAPAGE(env, 0) || + meta >= METAPAGE_END(env)); + (void)env; + /* update inconsistently since this function used ONLY for filling meta-image + * for writing, but not the actual meta-page */ + memcpy(&meta->bootid, &globals.bootid, 16); + unaligned_poke_u64(4, meta->txnid_a, txnid); + unaligned_poke_u64(4, meta->txnid_b, txnid); +} + +static inline uint8_t meta_cmp2int(txnid_t a, txnid_t b, uint8_t s) { + return unlikely(a == b) ? 1 * s : (a > b) ? 2 * s : 0 * s; +} + +static inline uint8_t meta_cmp2recent(uint8_t ab_cmp2int, bool a_steady, + bool b_steady) { + assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */); + return ab_cmp2int > 1 || (ab_cmp2int == 1 && a_steady > b_steady); +} + +static inline uint8_t meta_cmp2steady(uint8_t ab_cmp2int, bool a_steady, + bool b_steady) { + assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */); + return a_steady > b_steady || (a_steady == b_steady && ab_cmp2int > 1); +} + +static inline bool meta_choice_recent(txnid_t a_txnid, bool a_steady, + txnid_t b_txnid, bool b_steady) { + return meta_cmp2recent(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady); +} + +static inline bool meta_choice_steady(txnid_t a_txnid, bool a_steady, + txnid_t b_txnid, bool b_steady) { + return meta_cmp2steady(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady); +} + +MDBX_INTERNAL meta_t *meta_init_triplet(const MDBX_env *env, void *buffer); + +MDBX_INTERNAL int meta_validate(MDBX_env *env, meta_t *const meta, + const page_t *const page, + const unsigned meta_number, + unsigned *guess_pagesize); + +MDBX_INTERNAL int __must_check_result meta_validate_copy(MDBX_env *env, + const meta_t *meta, + meta_t *dest); + +MDBX_INTERNAL int __must_check_result meta_override(MDBX_env *env, + size_t target, + txnid_t txnid, + const meta_t *shape); + +MDBX_INTERNAL int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto); diff --git a/src/misc.c b/src/misc.c new file mode 100644 index 00000000..70acce0b --- /dev/null +++ b/src/misc.c @@ -0,0 +1,252 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { + if (volume <= 1024 * 1024 * 4ul) + return MDBX_RESULT_TRUE; + + intptr_t pagesize, total_ram_pages; + int err = mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + const int log2page = log2n_powerof2(pagesize); + const intptr_t volume_pages = (volume + pagesize - 1) >> log2page; + const intptr_t redundancy_pages = + (redundancy < 0) ? -(intptr_t)((-redundancy + pagesize - 1) >> log2page) + : (intptr_t)(redundancy + pagesize - 1) >> log2page; + if (volume_pages >= total_ram_pages || + volume_pages + redundancy_pages >= total_ram_pages) + return MDBX_RESULT_FALSE; + + intptr_t avail_ram_pages; + err = mdbx_get_sysraminfo(nullptr, nullptr, &avail_ram_pages); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + return (volume_pages + redundancy_pages >= avail_ram_pages) + ? MDBX_RESULT_FALSE + : MDBX_RESULT_TRUE; +} + +int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, + uint64_t increment) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { + rc = sdb_fetch(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + tree_t *dbs = &txn->dbs[dbi]; + if (likely(result)) + *result = dbs->sequence; + + if (likely(increment > 0)) { + if (unlikely(dbi == FREE_DBI || (txn->flags & MDBX_TXN_RDONLY) != 0)) + return MDBX_EACCESS; + + uint64_t new = dbs->sequence + increment; + if (unlikely(new < increment)) + return MDBX_RESULT_TRUE; + + tASSERT(txn, new > dbs->sequence); + dbs->sequence = new; + txn->flags |= MDBX_TXN_DIRTY; + txn->dbi_state[dbi] |= DBI_DIRTY; + } + + return MDBX_SUCCESS; +} + +int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, + const MDBX_val *b) { + eASSERT(nullptr, txn->signature == txn_signature); + tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); + tASSERT(txn, + dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0); + return txn->env->kvs[dbi].clc.k.cmp(a, b); +} + +int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, + const MDBX_val *b) { + eASSERT(nullptr, txn->signature == txn_signature); + tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); + tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID)); + return txn->env->kvs[dbi].clc.v.cmp(a, b); +} + +__cold MDBX_cmp_func *mdbx_get_keycmp(MDBX_db_flags_t flags) { + return builtin_keycmp(flags); +} + +__cold MDBX_cmp_func *mdbx_get_datacmp(MDBX_db_flags_t flags) { + return builtin_datacmp(flags); +} + +/*----------------------------------------------------------------------------*/ + +__cold const char *mdbx_liberr2str(int errnum) { + /* Table of descriptions for MDBX errors */ + static const char *const tbl[] = { + "MDBX_KEYEXIST: Key/data pair already exists", + "MDBX_NOTFOUND: No matching key/data pair found", + "MDBX_PAGE_NOTFOUND: Requested page not found", + "MDBX_CORRUPTED: Database is corrupted", + "MDBX_PANIC: Environment had fatal error", + "MDBX_VERSION_MISMATCH: DB version mismatch libmdbx", + "MDBX_INVALID: File is not an MDBX file", + "MDBX_MAP_FULL: Environment mapsize limit reached", + "MDBX_DBS_FULL: Too many DBI-handles (maxdbs reached)", + "MDBX_READERS_FULL: Too many readers (maxreaders reached)", + nullptr /* MDBX_TLS_FULL (-30789): unused in MDBX */, + "MDBX_TXN_FULL: Transaction has too many dirty pages," + " i.e transaction is too big", + "MDBX_CURSOR_FULL: Cursor stack limit reachedn - this usually indicates" + " corruption, i.e branch-pages loop", + "MDBX_PAGE_FULL: Internal error - Page has no more space", + "MDBX_UNABLE_EXTEND_MAPSIZE: Database engine was unable to extend" + " mapping, e.g. since address space is unavailable or busy," + " or Operation system not supported such operations", + "MDBX_INCOMPATIBLE: Environment or database is not compatible" + " with the requested operation or the specified flags", + "MDBX_BAD_RSLOT: Invalid reuse of reader locktable slot," + " e.g. read-transaction already run for current thread", + "MDBX_BAD_TXN: Transaction is not valid for requested operation," + " e.g. had errored and be must aborted, has a child, or is invalid", + "MDBX_BAD_VALSIZE: Invalid size or alignment of key or data" + " for target database, either invalid subDB name", + "MDBX_BAD_DBI: The specified DBI-handle is invalid" + " or changed by another thread/transaction", + "MDBX_PROBLEM: Unexpected internal error, transaction should be aborted", + "MDBX_BUSY: Another write transaction is running," + " or environment is already used while opening with MDBX_EXCLUSIVE flag", + }; + + if (errnum >= MDBX_KEYEXIST && errnum <= MDBX_BUSY) { + int i = errnum - MDBX_KEYEXIST; + return tbl[i]; + } + + switch (errnum) { + case MDBX_SUCCESS: + return "MDBX_SUCCESS: Successful"; + case MDBX_EMULTIVAL: + return "MDBX_EMULTIVAL: The specified key has" + " more than one associated value"; + case MDBX_EBADSIGN: + return "MDBX_EBADSIGN: Wrong signature of a runtime object(s)," + " e.g. memory corruption or double-free"; + case MDBX_WANNA_RECOVERY: + return "MDBX_WANNA_RECOVERY: Database should be recovered," + " but this could NOT be done automatically for now" + " since it opened in read-only mode"; + case MDBX_EKEYMISMATCH: + return "MDBX_EKEYMISMATCH: The given key value is mismatched to the" + " current cursor position"; + case MDBX_TOO_LARGE: + return "MDBX_TOO_LARGE: Database is too large for current system," + " e.g. could NOT be mapped into RAM"; + case MDBX_THREAD_MISMATCH: + return "MDBX_THREAD_MISMATCH: A thread has attempted to use a not" + " owned object, e.g. a transaction that started by another thread"; + case MDBX_TXN_OVERLAPPING: + return "MDBX_TXN_OVERLAPPING: Overlapping read and write transactions for" + " the current thread"; + case MDBX_DUPLICATED_CLK: + return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists," + " please keep one and remove unused other"; + case MDBX_DANGLING_DBI: + return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be" + " closed before subDb or corresponding DBI-handle could be (re)used"; + default: + return nullptr; + } +} + +__cold const char *mdbx_strerror_r(int errnum, char *buf, size_t buflen) { + const char *msg = mdbx_liberr2str(errnum); + if (!msg && buflen > 0 && buflen < INT_MAX) { +#if defined(_WIN32) || defined(_WIN64) + const DWORD size = FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, + errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, + nullptr); + return size ? buf : "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed"; +#elif defined(_GNU_SOURCE) && defined(__GLIBC__) + /* GNU-specific */ + if (errnum > 0) + msg = strerror_r(errnum, buf, buflen); +#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) + /* XSI-compliant */ + if (errnum > 0 && strerror_r(errnum, buf, buflen) == 0) + msg = buf; +#else + if (errnum > 0) { + msg = strerror(errnum); + if (msg) { + strncpy(buf, msg, buflen); + msg = buf; + } + } +#endif + if (!msg) { + (void)snprintf(buf, buflen, "error %d", errnum); + msg = buf; + } + buf[buflen - 1] = '\0'; + } + return msg; +} + +__cold const char *mdbx_strerror(int errnum) { +#if defined(_WIN32) || defined(_WIN64) + static char buf[1024]; + return mdbx_strerror_r(errnum, buf, sizeof(buf)); +#else + const char *msg = mdbx_liberr2str(errnum); + if (!msg) { + if (errnum > 0) + msg = strerror(errnum); + if (!msg) { + static char buf[32]; + (void)snprintf(buf, sizeof(buf) - 1, "error %d", errnum); + msg = buf; + } + } + return msg; +#endif +} + +#if defined(_WIN32) || defined(_WIN64) /* Bit of madness for Windows */ +const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, size_t buflen) { + const char *msg = mdbx_liberr2str(errnum); + if (!msg && buflen > 0 && buflen < INT_MAX) { + const DWORD size = FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, + errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, + nullptr); + if (!size) + msg = "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed"; + else if (!CharToOemBuffA(buf, buf, size)) + msg = "CharToOemBuffA() failed"; + else + msg = buf; + } + return msg; +} + +const char *mdbx_strerror_ANSI2OEM(int errnum) { + static char buf[1024]; + return mdbx_strerror_r_ANSI2OEM(errnum, buf, sizeof(buf)); +} +#endif /* Bit of madness for Windows */ diff --git a/src/mvcc-readers.c b/src/mvcc-readers.c new file mode 100644 index 00000000..025881df --- /dev/null +++ b/src/mvcc-readers.c @@ -0,0 +1,477 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +bsr_t mvcc_bind_slot(MDBX_env *env, const uintptr_t tid) { + eASSERT(env, env->lck_mmap.lck); + eASSERT(env, env->lck->magic_and_version == MDBX_LOCK_MAGIC); + eASSERT(env, env->lck->os_and_format == MDBX_LOCK_FORMAT); + + bsr_t result = {lck_rdt_lock(env), nullptr}; + if (unlikely(MDBX_IS_ERROR(result.err))) + return result; + if (unlikely(env->flags & ENV_FATAL_ERROR)) { + lck_rdt_unlock(env); + result.err = MDBX_PANIC; + return result; + } + if (unlikely(!env->dxb_mmap.base)) { + lck_rdt_unlock(env); + result.err = MDBX_EPERM; + return result; + } + + if (unlikely(env->registered_reader_pid != env->pid)) { + result.err = lck_rpid_set(env); + if (unlikely(result.err != MDBX_SUCCESS)) { + lck_rdt_unlock(env); + return result; + } + env->registered_reader_pid = env->pid; + } + + result.err = MDBX_SUCCESS; + size_t slot, nreaders; + while (1) { + nreaders = env->lck->rdt_length.weak; + for (slot = 0; slot < nreaders; slot++) + if (!atomic_load32(&env->lck->rdt[slot].pid, mo_AcquireRelease)) + break; + + if (likely(slot < env->max_readers)) + break; + + result.err = mvcc_cleanup_dead(env, true, nullptr); + if (result.err != MDBX_RESULT_TRUE) { + lck_rdt_unlock(env); + result.err = + (result.err == MDBX_SUCCESS) ? MDBX_READERS_FULL : result.err; + return result; + } + } + + result.rslot = &env->lck->rdt[slot]; + /* Claim the reader slot, carefully since other code + * uses the reader table un-mutexed: First reset the + * slot, next publish it in lck->rdt_length. After + * that, it is safe for mdbx_env_close() to touch it. + * When it will be closed, we can finally claim it. */ + atomic_store32(&result.rslot->pid, 0, mo_AcquireRelease); + safe64_reset(&result.rslot->txnid, true); + if (slot == nreaders) + env->lck->rdt_length.weak = (uint32_t)++nreaders; + result.rslot->tid.weak = (env->flags & MDBX_NOSTICKYTHREADS) ? 0 : tid; + atomic_store32(&result.rslot->pid, env->pid, mo_AcquireRelease); + lck_rdt_unlock(env); + + if (likely(env->flags & ENV_TXKEY)) { + eASSERT(env, env->registered_reader_pid == env->pid); + thread_rthc_set(env->me_txkey, result.rslot); + } + return result; +} + +__hot txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady) { + const uint32_t nothing_changed = MDBX_STRING_TETRAD("None"); + eASSERT(env, steady <= env->basal_txn->txnid); + + lck_t *const lck = env->lck_mmap.lck; + if (unlikely(lck == nullptr /* exclusive without-lck mode */)) { + eASSERT(env, env->lck == lckless_stub(env)); + env->lck->rdt_refresh_flag.weak = nothing_changed; + return env->lck->cached_oldest.weak = steady; + } + + const txnid_t prev_oldest = + atomic_load64(&lck->cached_oldest, mo_AcquireRelease); + eASSERT(env, steady >= prev_oldest); + + txnid_t new_oldest = prev_oldest; + while (nothing_changed != + atomic_load32(&lck->rdt_refresh_flag, mo_AcquireRelease)) { + lck->rdt_refresh_flag.weak = nothing_changed; + jitter4testing(false); + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + new_oldest = steady; + + for (size_t i = 0; i < snap_nreaders; ++i) { + const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease); + if (!pid) + continue; + jitter4testing(true); + + const txnid_t rtxn = safe64_read(&lck->rdt[i].txnid); + if (unlikely(rtxn < prev_oldest)) { + if (unlikely(nothing_changed == atomic_load32(&lck->rdt_refresh_flag, + mo_AcquireRelease)) && + safe64_reset_compare(&lck->rdt[i].txnid, rtxn)) { + NOTICE("kick stuck reader[%zu of %zu].pid_%u %" PRIaTXN + " < prev-oldest %" PRIaTXN ", steady-txn %" PRIaTXN, + i, snap_nreaders, pid, rtxn, prev_oldest, steady); + } + continue; + } + + if (rtxn < new_oldest) { + new_oldest = rtxn; + if (!MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS && new_oldest == prev_oldest) + break; + } + } + } + + if (new_oldest != prev_oldest) { + VERBOSE("update oldest %" PRIaTXN " -> %" PRIaTXN, prev_oldest, new_oldest); + eASSERT(env, new_oldest >= lck->cached_oldest.weak); + atomic_store64(&lck->cached_oldest, new_oldest, mo_Relaxed); + } + return new_oldest; +} + +pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page) { + lck_t *const lck = env->lck_mmap.lck; + if (likely(lck != nullptr /* check for exclusive without-lck mode */)) { + retry:; + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + for (size_t i = 0; i < snap_nreaders; ++i) { + if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { + /* jitter4testing(true); */ + const pgno_t snap_pages = + atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed); + const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); + if (unlikely(snap_pages != + atomic_load32(&lck->rdt[i].snapshot_pages_used, + mo_AcquireRelease) || + snap_txnid != safe64_read(&lck->rdt[i].txnid))) + goto retry; + if (last_used_page < snap_pages && snap_txnid <= env->basal_txn->txnid) + last_used_page = snap_pages; + } + } + } + + return last_used_page; +} + +/* Find largest mvcc-snapshot still referenced by this process. */ +pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest) { + lck_t *const lck = env->lck_mmap.lck; + if (likely(lck != nullptr /* exclusive mode */)) { + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + for (size_t i = 0; i < snap_nreaders; ++i) { + retry: + if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease) == env->pid) { + /* jitter4testing(true); */ + const pgno_t snap_pages = + atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed); + const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); + if (unlikely(snap_pages != + atomic_load32(&lck->rdt[i].snapshot_pages_used, + mo_AcquireRelease) || + snap_txnid != safe64_read(&lck->rdt[i].txnid))) + goto retry; + if (largest < snap_pages && + atomic_load64(&lck->cached_oldest, mo_AcquireRelease) <= + /* ignore pending updates */ snap_txnid && + snap_txnid <= MAX_TXNID) + largest = snap_pages; + } + } + } + return largest; +} + +static bool pid_insert(uint32_t *list, uint32_t pid) { + /* binary search of pid in list */ + size_t base = 0; + size_t cursor = 1; + int32_t val = 0; + size_t n = /* length */ list[0]; + + while (n > 0) { + size_t pivot = n >> 1; + cursor = base + pivot + 1; + val = pid - list[cursor]; + + if (val < 0) { + n = pivot; + } else if (val > 0) { + base = cursor; + n -= pivot + 1; + } else { + /* found, so it's a duplicate */ + return false; + } + } + + if (val > 0) + ++cursor; + + list[0]++; + for (n = list[0]; n > cursor; n--) + list[n] = list[n - 1]; + list[n] = pid; + return true; +} + +__cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, + int *dead) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + eASSERT(env, rdt_locked >= 0); + lck_t *const lck = env->lck_mmap.lck; + if (unlikely(lck == nullptr)) { + /* exclusive mode */ + if (dead) + *dead = 0; + return MDBX_SUCCESS; + } + + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + uint32_t pidsbuf_onstask[142]; + uint32_t *const pids = + (snap_nreaders < ARRAY_LENGTH(pidsbuf_onstask)) + ? pidsbuf_onstask + : osal_malloc((snap_nreaders + 1) * sizeof(uint32_t)); + if (unlikely(!pids)) + return MDBX_ENOMEM; + + pids[0] = 0; + int count = 0; + for (size_t i = 0; i < snap_nreaders; i++) { + const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease); + if (pid == 0) + continue /* skip empty */; + if (pid == env->pid) + continue /* skip self */; + if (!pid_insert(pids, pid)) + continue /* such pid already processed */; + + int err = lck_rpid_check(env, pid); + if (err == MDBX_RESULT_TRUE) + continue /* reader is live */; + + if (err != MDBX_SUCCESS) { + rc = err; + break /* lck_rpid_check() failed */; + } + + /* stale reader found */ + if (!rdt_locked) { + err = lck_rdt_lock(env); + if (MDBX_IS_ERROR(err)) { + rc = err; + break; + } + + rdt_locked = -1; + if (err == MDBX_RESULT_TRUE) { + /* mutex recovered, the mdbx_ipclock_failed() checked all readers */ + rc = MDBX_RESULT_TRUE; + break; + } + + /* a other process may have clean and reused slot, recheck */ + if (lck->rdt[i].pid.weak != pid) + continue; + + err = lck_rpid_check(env, pid); + if (MDBX_IS_ERROR(err)) { + rc = err; + break; + } + + if (err != MDBX_SUCCESS) + continue /* the race with other process, slot reused */; + } + + /* clean it */ + for (size_t ii = i; ii < snap_nreaders; ii++) { + if (lck->rdt[ii].pid.weak == pid) { + DEBUG("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN, (size_t)pid, + lck->rdt[ii].txnid.weak); + atomic_store32(&lck->rdt[ii].pid, 0, mo_Relaxed); + atomic_store32(&lck->rdt_refresh_flag, true, mo_AcquireRelease); + count++; + } + } + } + + if (likely(!MDBX_IS_ERROR(rc))) + atomic_store64(&lck->readers_check_timestamp, osal_monotime(), mo_Relaxed); + + if (rdt_locked < 0) + lck_rdt_unlock(env); + + if (pids != pidsbuf_onstask) + osal_free(pids); + + if (dead) + *dead = count; + return rc; +} + +__cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { + DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler); + osal_memory_fence(mo_AcquireRelease, false); + MDBX_hsr_func *const callback = env->hsr_callback; + txnid_t oldest = 0; + bool notify_eof_of_loop = false; + int retry = 0; + do { + const txnid_t steady = + env->txn->tw.troika.txnid[env->txn->tw.troika.prefer_steady]; + env->lck->rdt_refresh_flag.weak = /* force refresh */ true; + oldest = mvcc_shapshot_oldest(env, steady); + eASSERT(env, oldest < env->basal_txn->txnid); + eASSERT(env, oldest >= straggler); + eASSERT(env, oldest >= env->lck->cached_oldest.weak); + + lck_t *const lck = env->lck_mmap.lck; + if (oldest == steady || oldest > straggler || /* without-LCK mode */ !lck) + break; + + if (MDBX_IS_ERROR(mvcc_cleanup_dead(env, false, nullptr))) + break; + + if (!callback) + break; + + reader_slot_t *stucked = nullptr; + uint64_t hold_retired = 0; + for (size_t i = 0; i < lck->rdt_length.weak; ++i) { + const uint64_t snap_retired = + atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed); + const txnid_t rtxn = safe64_read(&lck->rdt[i].txnid); + if (rtxn == straggler && + atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { + hold_retired = snap_retired; + stucked = &lck->rdt[i]; + } + } + + if (!stucked) + break; + + uint32_t pid = atomic_load32(&stucked->pid, mo_AcquireRelease); + uint64_t tid = atomic_load64(&stucked->tid, mo_AcquireRelease); + if (safe64_read(&stucked->txnid) != straggler || !pid || + stucked->snapshot_pages_retired.weak != hold_retired) + continue; + + const meta_ptr_t head = meta_recent(env, &env->txn->tw.troika); + const txnid_t gap = (head.txnid - straggler) / xMDBX_TXNID_STEP; + const uint64_t head_retired = + unaligned_peek_u64(4, head.ptr_c->pages_retired); + const size_t space = + (head_retired > hold_retired) + ? pgno2bytes(env, (pgno_t)(head_retired - hold_retired)) + : 0; + int rc = + callback(env, env->txn, pid, (mdbx_tid_t)((intptr_t)tid), straggler, + (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry); + if (rc < 0) + /* hsr returned error and/or agree MDBX_MAP_FULL error */ + break; + + if (rc > 0) { + if (rc == 1) { + /* hsr reported transaction (will be) aborted asynchronous */ + safe64_reset_compare(&stucked->txnid, straggler); + } else { + /* hsr reported reader process was killed and slot should be cleared */ + safe64_reset(&stucked->txnid, true); + atomic_store64(&stucked->tid, 0, mo_Relaxed); + atomic_store32(&stucked->pid, 0, mo_AcquireRelease); + } + } else if (!notify_eof_of_loop) { +#if MDBX_ENABLE_PROFGC + env->lck->pgops.gc_prof.kicks += 1; +#endif /* MDBX_ENABLE_PROFGC */ + notify_eof_of_loop = true; + } + + } while (++retry < INT_MAX); + + if (notify_eof_of_loop) { + /* notify end of hsr-loop */ + const txnid_t turn = oldest - straggler; + if (turn) + NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, + straggler, oldest, turn); + callback(env, env->txn, 0, 0, straggler, + (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry); + } + return oldest; +} + +/*----------------------------------------------------------------------------*/ + +__cold int mdbx_thread_register(const MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!env->lck_mmap.lck)) + return (env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM; + + if (unlikely((env->flags & ENV_TXKEY) == 0)) { + eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); + return MDBX_EINVAL /* MDBX_NOSTICKYTHREADS mode */; + } + + eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); + reader_slot_t *r = thread_rthc_get(env->me_txkey); + if (unlikely(r != nullptr)) { + eASSERT(env, r->pid.weak == env->pid); + eASSERT(env, r->tid.weak == osal_thread_self()); + if (unlikely(r->pid.weak != env->pid)) + return MDBX_BAD_RSLOT; + return MDBX_RESULT_TRUE /* already registered */; + } + + const uintptr_t tid = osal_thread_self(); + if (env->txn && unlikely(env->basal_txn->owner == tid)) + return MDBX_TXN_OVERLAPPING; + return mvcc_bind_slot((MDBX_env *)env, tid).err; +} + +__cold int mdbx_thread_unregister(const MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!env->lck_mmap.lck)) + return MDBX_RESULT_TRUE; + + if (unlikely((env->flags & ENV_TXKEY) == 0)) { + eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); + return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */; + } + + eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); + reader_slot_t *r = thread_rthc_get(env->me_txkey); + if (unlikely(r == nullptr)) + return MDBX_RESULT_TRUE /* not registered */; + + eASSERT(env, r->pid.weak == env->pid); + eASSERT(env, r->tid.weak == osal_thread_self()); + if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self())) + return MDBX_BAD_RSLOT; + + eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD); + if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD)) + return MDBX_BUSY /* transaction is still active */; + + atomic_store32(&r->pid, 0, mo_Relaxed); + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease); + thread_rthc_set(env->me_txkey, nullptr); + return MDBX_SUCCESS; +} diff --git a/src/node.c b/src/node.c new file mode 100644 index 00000000..525318d6 --- /dev/null +++ b/src/node.c @@ -0,0 +1,395 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__hot int __must_check_result node_add_dupfix(MDBX_cursor *mc, size_t indx, + const MDBX_val *key) { + page_t *mp = mc->pg[mc->top]; + MDBX_ANALYSIS_ASSUME(key != nullptr); + DKBUF_DEBUG; + DEBUG("add to leaf2-%spage %" PRIaPGNO " index %zi, " + " key size %" PRIuPTR " [%s]", + is_subpage(mp) ? "sub-" : "", mp->pgno, indx, key ? key->iov_len : 0, + DKEY_DEBUG(key)); + + cASSERT(mc, key); + cASSERT(mc, page_type_compat(mp) == (P_LEAF | P_DUPFIX)); + const size_t ksize = mc->tree->dupfix_size; + cASSERT(mc, ksize == key->iov_len); + const size_t nkeys = page_numkeys(mp); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->upper) & 1) == 0); + + /* Just using these for counting */ + const intptr_t lower = mp->lower + sizeof(indx_t); + const intptr_t upper = mp->upper - (ksize - sizeof(indx_t)); + if (unlikely(lower > upper)) { + mc->txn->flags |= MDBX_TXN_ERROR; + return MDBX_PAGE_FULL; + } + mp->lower = (indx_t)lower; + mp->upper = (indx_t)upper; + + void *const ptr = page_dupfix_ptr(mp, indx, ksize); + cASSERT(mc, nkeys >= indx); + const size_t diff = nkeys - indx; + if (likely(diff > 0)) + /* Move higher keys up one slot. */ + memmove(ptr_disp(ptr, ksize), ptr, diff * ksize); + /* insert new key */ + memcpy(ptr, key->iov_base, ksize); + + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->upper) & 1) == 0); + return MDBX_SUCCESS; +} + +int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, + const MDBX_val *key, pgno_t pgno) { + page_t *mp = mc->pg[mc->top]; + DKBUF_DEBUG; + DEBUG("add to branch-%spage %" PRIaPGNO " index %zi, node-pgno %" PRIaPGNO + " key size %" PRIuPTR " [%s]", + is_subpage(mp) ? "sub-" : "", mp->pgno, indx, pgno, + key ? key->iov_len : 0, DKEY_DEBUG(key)); + + cASSERT(mc, page_type(mp) == P_BRANCH); + STATIC_ASSERT(NODESIZE % 2 == 0); + + /* Move higher pointers up one slot. */ + const size_t nkeys = page_numkeys(mp); + cASSERT(mc, nkeys >= indx); + for (size_t i = nkeys; i > indx; --i) + mp->entries[i] = mp->entries[i - 1]; + + /* Adjust free space offsets. */ + const size_t branch_bytes = branch_size(mc->txn->env, key); + const intptr_t lower = mp->lower + sizeof(indx_t); + const intptr_t upper = mp->upper - (branch_bytes - sizeof(indx_t)); + if (unlikely(lower > upper)) { + mc->txn->flags |= MDBX_TXN_ERROR; + return MDBX_PAGE_FULL; + } + mp->lower = (indx_t)lower; + mp->entries[indx] = mp->upper = (indx_t)upper; + + /* Write the node data. */ + node_t *node = page_node(mp, indx); + node_set_pgno(node, pgno); + node_set_flags(node, 0); + UNALIGNED_POKE_8(node, node_t, extra, 0); + node_set_ks(node, 0); + if (likely(key != nullptr)) { + node_set_ks(node, key->iov_len); + memcpy(node_key(node), key->iov_base, key->iov_len); + } + return MDBX_SUCCESS; +} + +__hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, + const MDBX_val *key, MDBX_val *data, + unsigned flags) { + MDBX_ANALYSIS_ASSUME(key != nullptr); + MDBX_ANALYSIS_ASSUME(data != nullptr); + page_t *mp = mc->pg[mc->top]; + DKBUF_DEBUG; + DEBUG("add to leaf-%spage %" PRIaPGNO " index %zi, data size %" PRIuPTR + " key size %" PRIuPTR " [%s]", + is_subpage(mp) ? "sub-" : "", mp->pgno, indx, data ? data->iov_len : 0, + key ? key->iov_len : 0, DKEY_DEBUG(key)); + cASSERT(mc, key != nullptr && data != nullptr); + cASSERT(mc, page_type_compat(mp) == P_LEAF); + page_t *largepage = nullptr; + + size_t node_bytes; + if (unlikely(flags & N_BIGDATA)) { + /* Data already on large/overflow page. */ + STATIC_ASSERT(sizeof(pgno_t) % 2 == 0); + node_bytes = + node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); + cASSERT(mc, page_room(mp) >= node_bytes); + } else if (unlikely(node_size(key, data) > mc->txn->env->leaf_nodemax)) { + /* Put data on large/overflow page. */ + if (unlikely(mc->tree->flags & MDBX_DUPSORT)) { + ERROR("Unexpected target %s flags 0x%x for large data-item", "dupsort-db", + mc->tree->flags); + return MDBX_PROBLEM; + } + if (unlikely(flags & (N_DUPDATA | N_SUBDATA))) { + ERROR("Unexpected target %s flags 0x%x for large data-item", "node", + flags); + return MDBX_PROBLEM; + } + cASSERT(mc, page_room(mp) >= leaf_size(mc->txn->env, key, data)); + const pgno_t ovpages = largechunk_npages(mc->txn->env, data->iov_len); + const pgr_t npr = page_new_large(mc, ovpages); + if (unlikely(npr.err != MDBX_SUCCESS)) + return npr.err; + largepage = npr.page; + DEBUG("allocated %u large/overflow page(s) %" PRIaPGNO "for %" PRIuPTR + " data bytes", + largepage->pages, largepage->pgno, data->iov_len); + flags |= N_BIGDATA; + node_bytes = + node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); + cASSERT(mc, node_bytes == leaf_size(mc->txn->env, key, data)); + } else { + cASSERT(mc, page_room(mp) >= leaf_size(mc->txn->env, key, data)); + node_bytes = node_size(key, data) + sizeof(indx_t); + cASSERT(mc, node_bytes == leaf_size(mc->txn->env, key, data)); + } + + /* Move higher pointers up one slot. */ + const size_t nkeys = page_numkeys(mp); + cASSERT(mc, nkeys >= indx); + for (size_t i = nkeys; i > indx; --i) + mp->entries[i] = mp->entries[i - 1]; + + /* Adjust free space offsets. */ + const intptr_t lower = mp->lower + sizeof(indx_t); + const intptr_t upper = mp->upper - (node_bytes - sizeof(indx_t)); + if (unlikely(lower > upper)) { + mc->txn->flags |= MDBX_TXN_ERROR; + return MDBX_PAGE_FULL; + } + mp->lower = (indx_t)lower; + mp->entries[indx] = mp->upper = (indx_t)upper; + + /* Write the node data. */ + node_t *node = page_node(mp, indx); + node_set_ks(node, key->iov_len); + node_set_flags(node, (uint8_t)flags); + UNALIGNED_POKE_8(node, node_t, extra, 0); + node_set_ds(node, data->iov_len); + memcpy(node_key(node), key->iov_base, key->iov_len); + + void *nodedata = node_data(node); + if (likely(largepage == nullptr)) { + if (unlikely(flags & N_BIGDATA)) { + memcpy(nodedata, data->iov_base, sizeof(pgno_t)); + return MDBX_SUCCESS; + } + } else { + poke_pgno(nodedata, largepage->pgno); + nodedata = page_data(largepage); + } + if (unlikely(flags & MDBX_RESERVE)) + data->iov_base = nodedata; + else if (likely(data->iov_len /* to avoid UBSAN traps */)) + memcpy(nodedata, data->iov_base, data->iov_len); + return MDBX_SUCCESS; +} + +__hot void node_del(MDBX_cursor *mc, size_t ksize) { + page_t *mp = mc->pg[mc->top]; + const size_t hole = mc->ki[mc->top]; + const size_t nkeys = page_numkeys(mp); + + DEBUG("delete node %zu on %s page %" PRIaPGNO, hole, + is_leaf(mp) ? "leaf" : "branch", mp->pgno); + cASSERT(mc, hole < nkeys); + + if (is_dupfix_leaf(mp)) { + cASSERT(mc, ksize >= sizeof(indx_t)); + size_t diff = nkeys - 1 - hole; + void *const base = page_dupfix_ptr(mp, hole, ksize); + if (diff) + memmove(base, ptr_disp(base, ksize), diff * ksize); + cASSERT(mc, mp->lower >= sizeof(indx_t)); + mp->lower -= sizeof(indx_t); + cASSERT(mc, (size_t)UINT16_MAX - mp->upper >= ksize - sizeof(indx_t)); + mp->upper += (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->upper) & 1) == 0); + return; + } + + node_t *node = page_node(mp, hole); + cASSERT(mc, !is_branch(mp) || hole || node_ks(node) == 0); + size_t hole_size = NODESIZE + node_ks(node); + if (is_leaf(mp)) + hole_size += + (node_flags(node) & N_BIGDATA) ? sizeof(pgno_t) : node_ds(node); + hole_size = EVEN_CEIL(hole_size); + + const indx_t hole_offset = mp->entries[hole]; + size_t r, w; + for (r = w = 0; r < nkeys; r++) + if (r != hole) + mp->entries[w++] = (mp->entries[r] < hole_offset) + ? mp->entries[r] + (indx_t)hole_size + : mp->entries[r]; + + void *const base = ptr_disp(mp, mp->upper + PAGEHDRSZ); + memmove(ptr_disp(base, hole_size), base, hole_offset - mp->upper); + + cASSERT(mc, mp->lower >= sizeof(indx_t)); + mp->lower -= sizeof(indx_t); + cASSERT(mc, (size_t)UINT16_MAX - mp->upper >= hole_size); + mp->upper += (indx_t)hole_size; + + if (AUDIT_ENABLED()) { + const uint8_t checking = mc->checking; + mc->checking |= z_updating; + const int page_check_err = page_check(mc, mp); + mc->checking = checking; + cASSERT(mc, page_check_err == MDBX_SUCCESS); + } +} + +__noinline int node_read_bigdata(MDBX_cursor *mc, const node_t *node, + MDBX_val *data, const page_t *mp) { + cASSERT(mc, node_flags(node) == N_BIGDATA && data->iov_len == node_ds(node)); + + pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); + if (unlikely((lp.err != MDBX_SUCCESS))) { + DEBUG("read large/overflow page %" PRIaPGNO " failed", + node_largedata_pgno(node)); + return lp.err; + } + + cASSERT(mc, page_type(lp.page) == P_LARGE); + data->iov_base = page_data(lp.page); + if (!MDBX_DISABLE_VALIDATION) { + const MDBX_env *env = mc->txn->env; + const size_t dsize = data->iov_len; + const unsigned npages = largechunk_npages(env, dsize); + if (unlikely(lp.page->pages < npages)) + return bad_page(lp.page, + "too less n-pages %u for bigdata-node (%zu bytes)", + lp.page->pages, dsize); + } + return MDBX_SUCCESS; +} + +node_t *node_shrink(page_t *mp, size_t indx, node_t *node) { + assert(node == page_node(mp, indx)); + page_t *sp = (page_t *)node_data(node); + assert(is_subpage(sp) && page_numkeys(sp) > 0); + const size_t delta = + EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); + if (unlikely(delta) == 0) + return node; + + /* Prepare to shift upward, set len = length(subpage part to shift) */ + size_t nsize = node_ds(node) - delta, len = nsize; + assert(nsize % 1 == 0); + if (!is_dupfix_leaf(sp)) { + len = PAGEHDRSZ; + page_t *xp = ptr_disp(sp, delta); /* destination subpage */ + for (intptr_t i = page_numkeys(sp); --i >= 0;) { + assert(sp->entries[i] >= delta); + xp->entries[i] = (indx_t)(sp->entries[i] - delta); + } + } + assert(sp->upper >= sp->lower + delta); + sp->upper -= (indx_t)delta; + sp->pgno = mp->pgno; + node_set_ds(node, nsize); + + /* Shift upward */ + void *const base = ptr_disp(mp, mp->upper + PAGEHDRSZ); + memmove(ptr_disp(base, delta), base, ptr_dist(sp, base) + len); + + const size_t pivot = mp->entries[indx]; + for (intptr_t i = page_numkeys(mp); --i >= 0;) { + if (mp->entries[i] <= pivot) { + assert((size_t)UINT16_MAX - mp->entries[i] >= delta); + mp->entries[i] += (indx_t)delta; + } + } + assert((size_t)UINT16_MAX - mp->upper >= delta); + mp->upper += (indx_t)delta; + + return ptr_disp(node, delta); +} + +__hot struct node_search_result node_search(MDBX_cursor *mc, + const MDBX_val *key) { + page_t *mp = mc->pg[mc->top]; + const intptr_t nkeys = page_numkeys(mp); + DKBUF_DEBUG; + + DEBUG("searching %zu keys in %s %spage %" PRIaPGNO, nkeys, + is_leaf(mp) ? "leaf" : "branch", is_subpage(mp) ? "sub-" : "", + mp->pgno); + + struct node_search_result ret; + ret.exact = false; + STATIC_ASSERT(P_BRANCH == 1); + intptr_t low = mp->flags & P_BRANCH; + intptr_t high = nkeys - 1; + if (unlikely(high < low)) { + mc->ki[mc->top] = 0; + ret.node = nullptr; + return ret; + } + + intptr_t i; + MDBX_cmp_func *cmp = mc->clc->k.cmp; + MDBX_val nodekey; + if (unlikely(is_dupfix_leaf(mp))) { + cASSERT(mc, mp->dupfix_ksize == mc->tree->dupfix_size); + nodekey.iov_len = mp->dupfix_ksize; + do { + i = (low + high) >> 1; + nodekey.iov_base = page_dupfix_ptr(mp, i, nodekey.iov_len); + cASSERT(mc, ptr_disp(mp, mc->txn->env->ps) >= + ptr_disp(nodekey.iov_base, nodekey.iov_len)); + int cr = cmp(key, &nodekey); + DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); + if (cr > 0) + low = ++i; + else if (cr < 0) + high = i - 1; + else { + ret.exact = true; + break; + } + } while (likely(low <= high)); + + /* store the key index */ + mc->ki[mc->top] = (indx_t)i; + ret.node = + (i < nkeys) + ? /* fake for DUPFIX */ (node_t *)(intptr_t)-1 + : /* There is no entry larger or equal to the key. */ nullptr; + return ret; + } + + if (MDBX_UNALIGNED_OK < 4 && is_branch(mp) && cmp == cmp_int_align2) + /* Branch pages have no data, so if using integer keys, + * alignment is guaranteed. Use faster cmp_int_align4(). */ + cmp = cmp_int_align4; + + node_t *node; + do { + i = (low + high) >> 1; + node = page_node(mp, i); + nodekey.iov_len = node_ks(node); + nodekey.iov_base = node_key(node); + cASSERT(mc, ptr_disp(mp, mc->txn->env->ps) >= + ptr_disp(nodekey.iov_base, nodekey.iov_len)); + int cr = cmp(key, &nodekey); + if (is_leaf(mp)) + DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); + else + DEBUG("found branch index %zu [%s -> %" PRIaPGNO "], rc = %i", i, + DKEY_DEBUG(&nodekey), node_pgno(node), cr); + if (cr > 0) + low = ++i; + else if (cr < 0) + high = i - 1; + else { + ret.exact = true; + break; + } + } while (likely(low <= high)); + + /* store the key index */ + mc->ki[mc->top] = (indx_t)i; + ret.node = (i < nkeys) + ? page_node(mp, i) + : /* There is no entry larger or equal to the key. */ nullptr; + return ret; +} diff --git a/src/node.h b/src/node.h new file mode 100644 index 00000000..b5437f18 --- /dev/null +++ b/src/node.h @@ -0,0 +1,125 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +/* valid flags for mdbx_node_add() */ +#define NODE_ADD_FLAGS (N_DUPDATA | N_SUBDATA | MDBX_RESERVE | MDBX_APPEND) + +/* Get the page number pointed to by a branch node */ +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t +node_pgno(const node_t *const __restrict node) { + pgno_t pgno = UNALIGNED_PEEK_32(node, node_t, child_pgno); + return pgno; +} + +/* Set the page number in a branch node */ +static inline void node_set_pgno(node_t *const __restrict node, pgno_t pgno) { + assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO); + + UNALIGNED_POKE_32(node, node_t, child_pgno, (uint32_t)pgno); +} + +/* Get the size of the data in a leaf node */ +MDBX_NOTHROW_PURE_FUNCTION static inline size_t +node_ds(const node_t *const __restrict node) { + return UNALIGNED_PEEK_32(node, node_t, dsize); +} + +/* Set the size of the data for a leaf node */ +static inline void node_set_ds(node_t *const __restrict node, size_t size) { + assert(size < INT_MAX); + UNALIGNED_POKE_32(node, node_t, dsize, (uint32_t)size); +} + +/* The size of a key in a node */ +MDBX_NOTHROW_PURE_FUNCTION static inline size_t +node_ks(const node_t *const __restrict node) { + return UNALIGNED_PEEK_16(node, node_t, ksize); +} + +/* Set the size of the key for a leaf node */ +static inline void node_set_ks(node_t *const __restrict node, size_t size) { + assert(size < INT16_MAX); + UNALIGNED_POKE_16(node, node_t, ksize, (uint16_t)size); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t +node_flags(const node_t *const __restrict node) { + return UNALIGNED_PEEK_8(node, node_t, flags); +} + +static inline void node_set_flags(node_t *const __restrict node, + uint8_t flags) { + UNALIGNED_POKE_8(node, node_t, flags, flags); +} + +/* Address of the key for the node */ +MDBX_NOTHROW_PURE_FUNCTION static inline void * +node_key(const node_t *const __restrict node) { + return ptr_disp(node, NODESIZE); +} + +/* Address of the data for a node */ +MDBX_NOTHROW_PURE_FUNCTION static inline void * +node_data(const node_t *const __restrict node) { + return ptr_disp(node_key(node), node_ks(node)); +} + +/* Size of a node in a leaf page with a given key and data. + * This is node header plus key plus data size. */ +MDBX_NOTHROW_CONST_FUNCTION static inline size_t +node_size_len(const size_t key_len, const size_t value_len) { + return NODESIZE + EVEN_CEIL(key_len + value_len); +} +MDBX_NOTHROW_PURE_FUNCTION static inline size_t +node_size(const MDBX_val *key, const MDBX_val *value) { + return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0); +} + +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t +node_largedata_pgno(const node_t *const __restrict node) { + assert(node_flags(node) & N_BIGDATA); + return peek_pgno(node_data(node)); +} + +MDBX_INTERNAL int __must_check_result node_read_bigdata(MDBX_cursor *mc, + const node_t *node, + MDBX_val *data, + const page_t *mp); + +static inline int __must_check_result node_read(MDBX_cursor *mc, + const node_t *node, + MDBX_val *data, + const page_t *mp) { + data->iov_len = node_ds(node); + data->iov_base = node_data(node); + if (likely(node_flags(node) != N_BIGDATA)) + return MDBX_SUCCESS; + return node_read_bigdata(mc, node, data, mp); +} + +/*----------------------------------------------------------------------------*/ + +MDBX_INTERNAL nsr_t node_search(MDBX_cursor *mc, const MDBX_val *key); + +MDBX_INTERNAL int __must_check_result node_add_branch(MDBX_cursor *mc, + size_t indx, + const MDBX_val *key, + pgno_t pgno); + +MDBX_INTERNAL int __must_check_result node_add_leaf(MDBX_cursor *mc, + size_t indx, + const MDBX_val *key, + MDBX_val *data, + unsigned flags); + +MDBX_INTERNAL int __must_check_result node_add_dupfix(MDBX_cursor *mc, + size_t indx, + const MDBX_val *key); + +MDBX_INTERNAL void node_del(MDBX_cursor *mc, size_t ksize); + +MDBX_INTERNAL node_t *node_shrink(page_t *mp, size_t indx, node_t *node); diff --git a/src/options.h b/src/options.h index 73d892af..f3be513e 100644 --- a/src/options.h +++ b/src/options.h @@ -1,7 +1,10 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + /******************************************************************************* - ******************************************************************************* ******************************************************************************* * + * BUILD TIME * * #### ##### ##### # #### # # #### * # # # # # # # # ## # # @@ -13,6 +16,10 @@ * */ +#pragma once + +#include "essentials.h" + /** \defgroup build_option Build options * The libmdbx build options. @{ */ @@ -192,7 +199,11 @@ /** Avoid dependence from MSVC CRT and use ntdll.dll instead. */ #ifndef MDBX_WITHOUT_MSVC_CRT +#if !defined(MDBX_BUILD_CXX) || !MDBX_BUILD_CXX #define MDBX_WITHOUT_MSVC_CRT 1 +#else +#define MDBX_WITHOUT_MSVC_CRT 0 +#endif #elif !(MDBX_WITHOUT_MSVC_CRT == 0 || MDBX_WITHOUT_MSVC_CRT == 1) #error MDBX_WITHOUT_MSVC_CRT must be defined as 0 or 1 #endif /* MDBX_WITHOUT_MSVC_CRT */ @@ -499,6 +510,13 @@ #endif #endif /* MDBX_CACHELINE_SIZE */ +/* Max length of iov-vector passed to writev() call, used for auxilary writes */ +#define MDBX_AUXILARY_IOV_MAX 64 +#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX +#undef MDBX_AUXILARY_IOV_MAX +#define MDBX_AUXILARY_IOV_MAX IOV_MAX +#endif /* MDBX_AUXILARY_IOV_MAX */ + /** @} end of build options */ /******************************************************************************* ******************************************************************************* @@ -513,6 +531,9 @@ #else #define MDBX_DEBUG 1 #endif +#endif +#if MDBX_DEBUG < 0 || MDBX_DEBUG > 2 +#error "The MDBX_DEBUG must be defined to 0, 1 or 2" #endif /* MDBX_DEBUG */ #else @@ -532,7 +553,7 @@ * Also enables \ref MDBX_DBG_AUDIT if `MDBX_DEBUG >= 2`. * * \ingroup build_option */ -#define MDBX_DEBUG 0...7 +#define MDBX_DEBUG 0...2 /** Disables using of GNU libc extensions. */ #define MDBX_DISABLE_GNU_SOURCE 0 or 1 diff --git a/src/osal.c b/src/osal.c index 86f93d44..d8d58392 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1,18 +1,7 @@ -/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// https://en.wikipedia.org/wiki/Operating_system_abstraction_layer #include "internals.h" @@ -50,8 +39,8 @@ static int ntstatus2errcode(NTSTATUS status) { ov.Internal = status; /* Zap: '_Param_(1)' could be '0' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6387); - return GetOverlappedResult(NULL, &ov, &dummy, FALSE) ? MDBX_SUCCESS - : (int)GetLastError(); + return GetOverlappedResult(nullptr, &ov, &dummy, FALSE) ? MDBX_SUCCESS + : (int)GetLastError(); } /* We use native NT APIs to setup the memory map, so that we can @@ -157,7 +146,7 @@ __extern_C void __assert2(const char *file, int line, const char *function, __assert2(file, line, function, assertion) #elif defined(__UCLIBC__) -__extern_C void __assert(const char *, const char *, unsigned int, const char *) +__extern_C void __assert(const char *, const char *, unsigned, const char *) #ifdef __THROW __THROW #else @@ -233,8 +222,8 @@ __extern_C void __assert(const char *function, const char *file, int line, __cold void mdbx_assert_fail(const MDBX_env *env, const char *msg, const char *func, unsigned line) { #if MDBX_DEBUG - if (env && env->me_assert_func) - env->me_assert_func(env, msg, func, line); + if (env && env->assert_func) + env->assert_func(env, msg, func, line); #else (void)env; assert_fail(msg, func, line); @@ -244,7 +233,7 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line) { #endif /* MDBX_DEBUG */ - if (mdbx_static.logger.ptr) + if (globals.logger.ptr) debug_log(MDBX_LOG_FATAL, func, line, "assert: %s\n", msg); else { #if defined(_WIN32) || defined(_WIN64) @@ -287,7 +276,7 @@ __cold void mdbx_panic(const char *fmt, ...) { ? "" : message; - if (mdbx_static.logger.ptr) + if (globals.logger.ptr) debug_log(MDBX_LOG_FATAL, "panic", 0, "%s", const_message); while (1) { @@ -312,8 +301,7 @@ __cold void mdbx_panic(const char *fmt, ...) { /*----------------------------------------------------------------------------*/ #ifndef osal_vasprintf -MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, - va_list ap) { +MDBX_INTERNAL int osal_vasprintf(char **strp, const char *fmt, va_list ap) { va_list ones; va_copy(ones, ap); const int needed = vsnprintf(nullptr, 0, fmt, ones); @@ -345,7 +333,7 @@ MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, #endif /* osal_vasprintf */ #ifndef osal_asprintf -MDBX_INTERNAL_FUNC int osal_asprintf(char **strp, const char *fmt, ...) { +MDBX_INTERNAL int osal_asprintf(char **strp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); const int rc = osal_vasprintf(strp, fmt, ap); @@ -355,12 +343,13 @@ MDBX_INTERNAL_FUNC int osal_asprintf(char **strp, const char *fmt, ...) { #endif /* osal_asprintf */ #ifndef osal_memalign_alloc -MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes, - void **result) { +MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, + void **result) { assert(is_powerof2(alignment) && alignment >= sizeof(void *)); #if defined(_WIN32) || defined(_WIN64) (void)alignment; - *result = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + *result = + VirtualAlloc(nullptr, bytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); return *result ? MDBX_SUCCESS : MDBX_ENOMEM /* ERROR_OUTOFMEMORY */; #elif defined(_ISOC11_SOURCE) *result = aligned_alloc(alignment, ceil_powerof2(bytes, alignment)); @@ -379,7 +368,7 @@ MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes, #endif /* osal_memalign_alloc */ #ifndef osal_memalign_free -MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr) { +MDBX_INTERNAL void osal_memalign_free(void *ptr) { #if defined(_WIN32) || defined(_WIN64) VirtualFree(ptr, 0, MEM_RELEASE); #else @@ -391,7 +380,7 @@ MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr) { #ifndef osal_strdup char *osal_strdup(const char *str) { if (!str) - return NULL; + return nullptr; size_t bytes = strlen(str) + 1; char *dup = osal_malloc(bytes); if (dup) @@ -402,19 +391,19 @@ char *osal_strdup(const char *str) { /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair) { +MDBX_INTERNAL int osal_condpair_init(osal_condpair_t *condpair) { int rc; memset(condpair, 0, sizeof(osal_condpair_t)); #if defined(_WIN32) || defined(_WIN64) - if ((condpair->mutex = CreateMutexW(NULL, FALSE, NULL)) == NULL) { + if (!(condpair->mutex = CreateMutexW(nullptr, FALSE, nullptr))) { rc = (int)GetLastError(); goto bailout_mutex; } - if ((condpair->event[0] = CreateEventW(NULL, FALSE, FALSE, NULL)) == NULL) { + if (!(condpair->event[0] = CreateEventW(nullptr, FALSE, FALSE, nullptr))) { rc = (int)GetLastError(); goto bailout_event; } - if ((condpair->event[1] = CreateEventW(NULL, FALSE, FALSE, NULL)) != NULL) + if ((condpair->event[1] = CreateEventW(nullptr, FALSE, FALSE, nullptr))) return MDBX_SUCCESS; rc = (int)GetLastError(); @@ -422,13 +411,13 @@ MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair) { bailout_event: (void)CloseHandle(condpair->mutex); #else - rc = pthread_mutex_init(&condpair->mutex, NULL); + rc = pthread_mutex_init(&condpair->mutex, nullptr); if (unlikely(rc != 0)) goto bailout_mutex; - rc = pthread_cond_init(&condpair->cond[0], NULL); + rc = pthread_cond_init(&condpair->cond[0], nullptr); if (unlikely(rc != 0)) goto bailout_cond; - rc = pthread_cond_init(&condpair->cond[1], NULL); + rc = pthread_cond_init(&condpair->cond[1], nullptr); if (likely(rc == 0)) return MDBX_SUCCESS; @@ -441,7 +430,7 @@ bailout_mutex: return rc; } -MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair) { +MDBX_INTERNAL int osal_condpair_destroy(osal_condpair_t *condpair) { #if defined(_WIN32) || defined(_WIN64) int rc = CloseHandle(condpair->mutex) ? MDBX_SUCCESS : (int)GetLastError(); rc = CloseHandle(condpair->event[0]) ? rc : (int)GetLastError(); @@ -455,7 +444,7 @@ MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair) { return rc; } -MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair) { +MDBX_INTERNAL int osal_condpair_lock(osal_condpair_t *condpair) { #if defined(_WIN32) || defined(_WIN64) DWORD code = WaitForSingleObject(condpair->mutex, INFINITE); return waitstatus2errcode(code); @@ -464,7 +453,7 @@ MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair) { #endif } -MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair) { +MDBX_INTERNAL int osal_condpair_unlock(osal_condpair_t *condpair) { #if defined(_WIN32) || defined(_WIN64) return ReleaseMutex(condpair->mutex) ? MDBX_SUCCESS : (int)GetLastError(); #else @@ -472,8 +461,7 @@ MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair) { #endif } -MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair, - bool part) { +MDBX_INTERNAL int osal_condpair_signal(osal_condpair_t *condpair, bool part) { #if defined(_WIN32) || defined(_WIN64) return SetEvent(condpair->event[part]) ? MDBX_SUCCESS : (int)GetLastError(); #else @@ -481,8 +469,7 @@ MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair, #endif } -MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, - bool part) { +MDBX_INTERNAL int osal_condpair_wait(osal_condpair_t *condpair, bool part) { #if defined(_WIN32) || defined(_WIN64) DWORD code = SignalObjectAndWait(condpair->mutex, condpair->event[part], INFINITE, FALSE); @@ -499,7 +486,7 @@ MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex) { +MDBX_INTERNAL int osal_fastmutex_init(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(fastmutex); return MDBX_SUCCESS; @@ -518,7 +505,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex) { #endif } -MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex) { +MDBX_INTERNAL int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) DeleteCriticalSection(fastmutex); return MDBX_SUCCESS; @@ -527,7 +514,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex) { #endif } -MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex) { +MDBX_INTERNAL int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) __try { EnterCriticalSection(fastmutex); @@ -544,7 +531,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex) { #endif } -MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex) { +MDBX_INTERNAL int osal_fastmutex_release(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) LeaveCriticalSection(fastmutex); return MDBX_SUCCESS; @@ -557,7 +544,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) -MDBX_INTERNAL_FUNC int osal_mb2w(const char *const src, wchar_t **const pdst) { +MDBX_INTERNAL int osal_mb2w(const char *const src, wchar_t **const pdst) { const size_t dst_wlen = MultiByteToWideChar( CP_THREAD_ACP, MB_ERR_INVALID_CHARS, src, -1, nullptr, 0); wchar_t *dst = *pdst; @@ -630,11 +617,11 @@ static size_t osal_iov_max; #undef OSAL_IOV_MAX #endif /* OSAL_IOV_MAX */ -MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *ior +MDBX_INTERNAL int osal_ioring_create(osal_ioring_t *ior #if defined(_WIN32) || defined(_WIN64) - , - bool enable_direct, - mdbx_filehandle_t overlapped_fd + , + bool enable_direct, + mdbx_filehandle_t overlapped_fd #endif /* Windows */ ) { memset(ior, 0, sizeof(osal_ioring_t)); @@ -642,9 +629,8 @@ MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *ior #if defined(_WIN32) || defined(_WIN64) ior->overlapped_fd = overlapped_fd; ior->direct = enable_direct && overlapped_fd; - const unsigned pagesize = (unsigned)osal_syspagesize(); - ior->pagesize = pagesize; - ior->pagesize_ln2 = (uint8_t)log2n_powerof2(pagesize); + ior->pagesize = globals.sys_pagesize; + ior->pagesize_ln2 = globals.sys_pagesize_ln2; ior->async_done = ior_get_event(ior); if (!ior->async_done) return GetLastError(); @@ -658,7 +644,7 @@ MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *ior return MDBX_SUCCESS; } -static __inline size_t ior_offset(const ior_item_t *item) { +static inline size_t ior_offset(const ior_item_t *item) { #if defined(_WIN32) || defined(_WIN64) return item->ov.Offset | (size_t)((sizeof(size_t) > sizeof(item->ov.Offset)) ? (uint64_t)item->ov.OffsetHigh << 32 @@ -668,7 +654,7 @@ static __inline size_t ior_offset(const ior_item_t *item) { #endif /* !Windows */ } -static __inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) { +static inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) { #if defined(ior_sgv_element) assert(sgvcnt > 0); return (ior_item_t *)ptr_disp(item, sizeof(ior_item_t) - @@ -681,11 +667,12 @@ static __inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) { #endif } -MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ior, const size_t offset, - void *data, const size_t bytes) { +MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, + void *data, const size_t bytes) { assert(bytes && data); - assert(bytes % MIN_PAGESIZE == 0 && bytes <= MAX_WRITE); - assert(offset % MIN_PAGESIZE == 0 && offset + (uint64_t)bytes <= MAX_MAPSIZE); + assert(bytes % MDBX_MIN_PAGESIZE == 0 && bytes <= MAX_WRITE); + assert(offset % MDBX_MIN_PAGESIZE == 0 && + offset + (uint64_t)bytes <= MAX_MAPSIZE); #if defined(_WIN32) || defined(_WIN64) const unsigned segments = (unsigned)(bytes >> ior->pagesize_ln2); @@ -800,9 +787,10 @@ MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ior, const size_t offset, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC void osal_ioring_walk( - osal_ioring_t *ior, iov_ctx_t *ctx, - void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes)) { +MDBX_INTERNAL void osal_ioring_walk(osal_ioring_t *ior, iov_ctx_t *ctx, + void (*callback)(iov_ctx_t *ctx, + size_t offset, void *data, + size_t bytes)) { for (ior_item_t *item = ior->pool; item <= ior->last;) { #if defined(_WIN32) || defined(_WIN64) size_t offset = ior_offset(item); @@ -843,7 +831,7 @@ MDBX_INTERNAL_FUNC void osal_ioring_walk( } } -MDBX_INTERNAL_FUNC osal_ioring_write_result_t +MDBX_INTERNAL osal_ioring_write_result_t osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { osal_ioring_write_result_t r = {MDBX_SUCCESS, 0}; @@ -887,7 +875,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { ERROR("%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 ", err %d", "WriteFileGather", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((MDBX_page *)item->single.iov_base)->mp_pgno, + item - ior->pool, ((page_t *)item->single.iov_base)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; @@ -909,7 +897,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { ERROR("%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 ", err %d", "WriteFileEx", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((MDBX_page *)item->single.iov_base)->mp_pgno, + item - ior->pool, ((page_t *)item->single.iov_base)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; @@ -920,9 +908,8 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { "%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 ", err %d", "WriteFileEx", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((MDBX_page *)item->single.iov_base)->mp_pgno, - bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), - r.err); + item - ior->pool, ((page_t *)item->single.iov_base)->pgno, bytes, + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); SleepEx(0, true); goto retry; case ERROR_INVALID_USER_BUFFER: @@ -943,9 +930,8 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { ERROR("%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 ", err %d", "WriteFile", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((MDBX_page *)item->single.iov_base)->mp_pgno, - bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), - r.err); + item - ior->pool, ((page_t *)item->single.iov_base)->pgno, bytes, + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; } else if (unlikely(written != bytes)) { r.err = ERROR_WRITE_FAULT; @@ -1012,8 +998,8 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { ERROR("%s: item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 ", err %d", "GetOverlappedResult", __Wpedantic_format_voidptr(item), - item - ior->pool, - ((MDBX_page *)item->single.iov_base)->mp_pgno, bytes, + item - ior->pool, ((page_t *)item->single.iov_base)->pgno, + bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), (int)GetLastError()); goto bailout_geterr; @@ -1029,12 +1015,12 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { DWORD written = 0; r.err = (int)item->ov.Internal; if ((r.err & 0x80000000) && - GetOverlappedResult(NULL, &item->ov, &written, true)) + GetOverlappedResult(nullptr, &item->ov, &written, true)) r.err = (int)GetLastError(); ERROR("%s: item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 ", err %d", "Result", __Wpedantic_format_voidptr(item), item - ior->pool, - ((MDBX_page *)item->single.iov_base)->mp_pgno, bytes, + ((page_t *)item->single.iov_base)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), (int)GetLastError()); goto bailout_rc; @@ -1084,7 +1070,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { return r; } -MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *ior) { +MDBX_INTERNAL void osal_ioring_reset(osal_ioring_t *ior) { #if defined(_WIN32) || defined(_WIN64) if (ior->last) { for (ior_item_t *item = ior->pool; item <= ior->last;) { @@ -1126,13 +1112,13 @@ static void ior_cleanup(osal_ioring_t *ior, const size_t since) { #endif /* Windows */ } -MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *ior, size_t items) { +MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *ior, size_t items) { assert(items > 0 && items < INT_MAX / sizeof(ior_item_t)); #if defined(_WIN32) || defined(_WIN64) if (ior->state & IOR_STATE_LOCKED) return MDBX_SUCCESS; const bool useSetFileIoOverlappedRange = - ior->overlapped_fd && mdbx_SetFileIoOverlappedRange && items > 42; + ior->overlapped_fd && imports.SetFileIoOverlappedRange && items > 42; const size_t ceiling = useSetFileIoOverlappedRange ? ((items < 65536 / 2 / sizeof(ior_item_t)) ? 65536 : 65536 * 4) @@ -1174,7 +1160,8 @@ MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *ior, size_t items) { ior->boundary = ptr_disp(ior->pool, ior->allocated); #if defined(_WIN32) || defined(_WIN64) if (useSetFileIoOverlappedRange) { - if (mdbx_SetFileIoOverlappedRange(ior->overlapped_fd, ptr, (ULONG)bytes)) + if (imports.SetFileIoOverlappedRange(ior->overlapped_fd, ptr, + (ULONG)bytes)) ior->state += IOR_STATE_LOCKED; else return GetLastError(); @@ -1184,7 +1171,7 @@ MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *ior, size_t items) { return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *ior) { +MDBX_INTERNAL void osal_ioring_destroy(osal_ioring_t *ior) { if (ior->allocated) ior_cleanup(ior, 0); #if defined(_WIN32) || defined(_WIN64) @@ -1201,7 +1188,7 @@ MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *ior) { /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname) { +MDBX_INTERNAL int osal_removefile(const pathchar_t *pathname) { #if defined(_WIN32) || defined(_WIN64) return DeleteFileW(pathname) ? MDBX_SUCCESS : (int)GetLastError(); #else @@ -1213,7 +1200,7 @@ MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname) { static bool is_valid_fd(int fd) { return !(isatty(fd) < 0 && errno == EBADF); } #endif /*! Windows */ -MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname) { +MDBX_INTERNAL int osal_removedirectory(const pathchar_t *pathname) { #if defined(_WIN32) || defined(_WIN64) return RemoveDirectoryW(pathname) ? MDBX_SUCCESS : (int)GetLastError(); #else @@ -1221,7 +1208,7 @@ MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname) { #endif } -MDBX_INTERNAL_FUNC int osal_fileexists(const pathchar_t *pathname) { +MDBX_INTERNAL int osal_fileexists(const pathchar_t *pathname) { #if defined(_WIN32) || defined(_WIN64) if (GetFileAttributesW(pathname) != INVALID_FILE_ATTRIBUTES) return MDBX_RESULT_TRUE; @@ -1237,8 +1224,7 @@ MDBX_INTERNAL_FUNC int osal_fileexists(const pathchar_t *pathname) { #endif } -MDBX_INTERNAL_FUNC pathchar_t *osal_fileext(const pathchar_t *pathname, - size_t len) { +MDBX_INTERNAL pathchar_t *osal_fileext(const pathchar_t *pathname, size_t len) { const pathchar_t *ext = nullptr; for (size_t i = 0; i < len && pathname[i]; i++) if (pathname[i] == '.') @@ -1248,8 +1234,8 @@ MDBX_INTERNAL_FUNC pathchar_t *osal_fileext(const pathchar_t *pathname, return (pathchar_t *)ext; } -MDBX_INTERNAL_FUNC bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, - size_t len) { +MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, + size_t len) { #if defined(_WIN32) || defined(_WIN64) for (size_t i = 0; i < len; ++i) { pathchar_t a = l[i]; @@ -1265,11 +1251,10 @@ MDBX_INTERNAL_FUNC bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, #endif } -MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, - const MDBX_env *env, - const pathchar_t *pathname, - mdbx_filehandle_t *fd, - mdbx_mode_t unix_mode_bits) { +MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, + const MDBX_env *env, const pathchar_t *pathname, + mdbx_filehandle_t *fd, + mdbx_mode_t unix_mode_bits) { *fd = INVALID_HANDLE_VALUE; #if defined(_WIN32) || defined(_WIN64) @@ -1277,9 +1262,8 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, DWORD FlagsAndAttributes = FILE_FLAG_POSIX_SEMANTICS | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED; DWORD DesiredAccess = FILE_READ_ATTRIBUTES; - DWORD ShareMode = (env->me_flags & MDBX_EXCLUSIVE) - ? 0 - : (FILE_SHARE_READ | FILE_SHARE_WRITE); + DWORD ShareMode = + (env->flags & MDBX_EXCLUSIVE) ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE); switch (purpose) { default: @@ -1314,7 +1298,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, CreationDisposition = CREATE_NEW; ShareMode = 0; DesiredAccess |= GENERIC_WRITE; - if (env->me_psize >= env->me_os_psize) + if (env->ps >= globals.sys_pagesize) FlagsAndAttributes |= FILE_FLAG_NO_BUFFERING; break; case MDBX_OPEN_DELETE: @@ -1325,8 +1309,8 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, break; } - *fd = CreateFileW(pathname, DesiredAccess, ShareMode, NULL, - CreationDisposition, FlagsAndAttributes, NULL); + *fd = CreateFileW(pathname, DesiredAccess, ShareMode, nullptr, + CreationDisposition, FlagsAndAttributes, nullptr); if (*fd == INVALID_HANDLE_VALUE) { int err = (int)GetLastError(); if (err == ERROR_ACCESS_DENIED && purpose == MDBX_OPEN_LCK) { @@ -1384,7 +1368,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, } const bool direct_nocache_for_copy = - env->me_psize >= env->me_os_psize && purpose == MDBX_OPEN_COPY; + env->ps >= globals.sys_pagesize && purpose == MDBX_OPEN_COPY; if (direct_nocache_for_copy) { #if defined(O_DIRECT) flags |= O_DIRECT; @@ -1492,7 +1476,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd) { +MDBX_INTERNAL int osal_closefile(mdbx_filehandle_t fd) { #if defined(_WIN32) || defined(_WIN64) return CloseHandle(fd) ? MDBX_SUCCESS : (int)GetLastError(); #else @@ -1501,8 +1485,8 @@ MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd) { #endif } -MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, - uint64_t offset) { +MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, + uint64_t offset) { if (bytes > MAX_WRITE) return MDBX_EINVAL; #if defined(_WIN32) || defined(_WIN64) @@ -1528,8 +1512,8 @@ MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, return (bytes == (size_t)read) ? MDBX_SUCCESS : MDBX_ENODATA; } -MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf, - size_t bytes, uint64_t offset) { +MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, + size_t bytes, uint64_t offset) { while (true) { #if defined(_WIN32) || defined(_WIN64) OVERLAPPED ov; @@ -1564,8 +1548,8 @@ MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf, } } -MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf, - size_t bytes) { +MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, + size_t bytes) { while (true) { #if defined(_WIN32) || defined(_WIN64) DWORD written; @@ -1625,8 +1609,8 @@ int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, #endif } -MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd, - enum osal_syncmode_bits mode_bits) { +MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, + enum osal_syncmode_bits mode_bits) { #if defined(_WIN32) || defined(_WIN64) if ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) && !FlushFileBuffers(fd)) return (int)GetLastError(); @@ -1656,7 +1640,7 @@ MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd, break /* error */; #if defined(__linux__) || defined(__gnu_linux__) case MDBX_SYNC_SIZE: - assert(linux_kernel_version >= 0x03060000); + assert(globals.linux_kernel_version >= 0x03060000); return MDBX_SUCCESS; #endif /* Linux */ #endif /* _POSIX_SYNCHRONIZED_IO > 0 */ @@ -1691,7 +1675,7 @@ int osal_filesize(mdbx_filehandle_t fd, uint64_t *length) { return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd) { +MDBX_INTERNAL int osal_is_pipe(mdbx_filehandle_t fd) { #if defined(_WIN32) || defined(_WIN64) switch (GetFileType(fd)) { case FILE_TYPE_DISK: @@ -1722,20 +1706,20 @@ MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd) { #endif } -MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) { +MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) { #if defined(_WIN32) || defined(_WIN64) - if (mdbx_SetFileInformationByHandle) { + if (imports.SetFileInformationByHandle) { FILE_END_OF_FILE_INFO EndOfFileInfo; EndOfFileInfo.EndOfFile.QuadPart = length; - return mdbx_SetFileInformationByHandle(fd, FileEndOfFileInfo, - &EndOfFileInfo, - sizeof(FILE_END_OF_FILE_INFO)) + return imports.SetFileInformationByHandle(fd, FileEndOfFileInfo, + &EndOfFileInfo, + sizeof(FILE_END_OF_FILE_INFO)) ? MDBX_SUCCESS : (int)GetLastError(); } else { LARGE_INTEGER li; li.QuadPart = length; - return (SetFilePointerEx(fd, li, NULL, FILE_BEGIN) && SetEndOfFile(fd)) + return (SetFilePointerEx(fd, li, nullptr, FILE_BEGIN) && SetEndOfFile(fd)) ? MDBX_SUCCESS : (int)GetLastError(); } @@ -1746,12 +1730,12 @@ MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) { #endif } -MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) { +MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) { #if defined(_WIN32) || defined(_WIN64) LARGE_INTEGER li; li.QuadPart = pos; - return SetFilePointerEx(fd, li, NULL, FILE_BEGIN) ? MDBX_SUCCESS - : (int)GetLastError(); + return SetFilePointerEx(fd, li, nullptr, FILE_BEGIN) ? MDBX_SUCCESS + : (int)GetLastError(); #else STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); @@ -1761,19 +1745,19 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) { /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_FUNC int +MDBX_INTERNAL int osal_thread_create(osal_thread_t *thread, THREAD_RESULT(THREAD_CALL *start_routine)(void *), void *arg) { #if defined(_WIN32) || defined(_WIN64) - *thread = CreateThread(NULL, 0, start_routine, arg, 0, NULL); + *thread = CreateThread(nullptr, 0, start_routine, arg, 0, nullptr); return *thread ? MDBX_SUCCESS : (int)GetLastError(); #else - return pthread_create(thread, NULL, start_routine, arg); + return pthread_create(thread, nullptr, start_routine, arg); #endif } -MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread) { +MDBX_INTERNAL int osal_thread_join(osal_thread_t thread) { #if defined(_WIN32) || defined(_WIN64) DWORD code = WaitForSingleObject(thread, INFINITE); return waitstatus2errcode(code); @@ -1785,9 +1769,8 @@ MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread) { /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset, - size_t length, - enum osal_syncmode_bits mode_bits) { +MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, + size_t length, enum osal_syncmode_bits mode_bits) { if (!MDBX_MMAP_USE_MS_ASYNC && mode_bits == MDBX_SYNC_NONE) return MDBX_SUCCESS; @@ -1807,7 +1790,7 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset, // so just leave such optimization to the libc discretion. // NOTE: The MDBX_MMAP_USE_MS_ASYNC must be defined to 1 for such cases. // - // assert(linux_kernel_version > 0x02061300); + // assert(mdbx.linux_kernel_version > 0x02061300); // if (mode_bits <= MDBX_SYNC_KICK) // return MDBX_SUCCESS; #endif /* Linux */ @@ -1819,17 +1802,16 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, - const pathchar_t *pathname, - int err) { +MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, + const pathchar_t *pathname, int err) { #if defined(_WIN32) || defined(_WIN64) (void)pathname; (void)err; - if (!mdbx_GetVolumeInformationByHandleW) + if (!imports.GetVolumeInformationByHandleW) return MDBX_ENOSYS; DWORD unused, flags; - if (!mdbx_GetVolumeInformationByHandleW(handle, nullptr, 0, nullptr, &unused, - &flags, nullptr, 0)) + if (!imports.GetVolumeInformationByHandleW(handle, nullptr, 0, nullptr, + &unused, &flags, nullptr, 0)) return (int)GetLastError(); if ((flags & FILE_READ_ONLY_VOLUME) == 0) return MDBX_EACCESS; @@ -1849,7 +1831,7 @@ MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_check_fs_incore(mdbx_filehandle_t handle) { +MDBX_INTERNAL int osal_check_fs_incore(mdbx_filehandle_t handle) { #if defined(_WIN32) || defined(_WIN64) (void)handle; #else @@ -1895,17 +1877,17 @@ MDBX_INTERNAL_FUNC int osal_check_fs_incore(mdbx_filehandle_t handle) { static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { #if defined(_WIN32) || defined(_WIN64) - if (mdbx_RunningUnderWine() && !(flags & MDBX_EXCLUSIVE)) + if (globals.running_under_Wine && !(flags & MDBX_EXCLUSIVE)) return ERROR_NOT_CAPABLE /* workaround for Wine */; if (GetFileType(handle) != FILE_TYPE_DISK) return ERROR_FILE_OFFLINE; - if (mdbx_GetFileInformationByHandleEx) { + if (imports.GetFileInformationByHandleEx) { FILE_REMOTE_PROTOCOL_INFO RemoteProtocolInfo; - if (mdbx_GetFileInformationByHandleEx(handle, FileRemoteProtocolInfo, - &RemoteProtocolInfo, - sizeof(RemoteProtocolInfo))) { + if (imports.GetFileInformationByHandleEx(handle, FileRemoteProtocolInfo, + &RemoteProtocolInfo, + sizeof(RemoteProtocolInfo))) { if ((RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_OFFLINE) && !(flags & MDBX_RDONLY)) return ERROR_FILE_OFFLINE; @@ -1915,7 +1897,7 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { } } - if (mdbx_NtFsControlFile) { + if (imports.NtFsControlFile) { NTSTATUS rc; struct { WOF_EXTERNAL_INFO wof_info; @@ -1926,10 +1908,10 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { size_t reserved_for_microsoft_madness[42]; } GetExternalBacking_OutputBuffer; IO_STATUS_BLOCK StatusBlock; - rc = mdbx_NtFsControlFile(handle, NULL, NULL, NULL, &StatusBlock, - FSCTL_GET_EXTERNAL_BACKING, NULL, 0, - &GetExternalBacking_OutputBuffer, - sizeof(GetExternalBacking_OutputBuffer)); + rc = imports.NtFsControlFile(handle, nullptr, nullptr, nullptr, + &StatusBlock, FSCTL_GET_EXTERNAL_BACKING, + nullptr, 0, &GetExternalBacking_OutputBuffer, + sizeof(GetExternalBacking_OutputBuffer)); if (NT_SUCCESS(rc)) { if (!(flags & MDBX_EXCLUSIVE)) return ERROR_REMOTE_STORAGE_MEDIA_ERROR; @@ -1939,16 +1921,17 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { return ntstatus2errcode(rc); } - if (mdbx_GetVolumeInformationByHandleW && mdbx_GetFinalPathNameByHandleW) { + if (imports.GetVolumeInformationByHandleW && + imports.GetFinalPathNameByHandleW) { WCHAR *PathBuffer = osal_malloc(sizeof(WCHAR) * INT16_MAX); if (!PathBuffer) return MDBX_ENOMEM; int rc = MDBX_SUCCESS; DWORD VolumeSerialNumber, FileSystemFlags; - if (!mdbx_GetVolumeInformationByHandleW(handle, PathBuffer, INT16_MAX, - &VolumeSerialNumber, NULL, - &FileSystemFlags, NULL, 0)) { + if (!imports.GetVolumeInformationByHandleW(handle, PathBuffer, INT16_MAX, + &VolumeSerialNumber, nullptr, + &FileSystemFlags, nullptr, 0)) { rc = (int)GetLastError(); goto bailout; } @@ -1962,8 +1945,9 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { } } - if (mdbx_GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, - FILE_NAME_NORMALIZED | VOLUME_NAME_NT)) { + if (imports.GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, + FILE_NAME_NORMALIZED | + VOLUME_NAME_NT)) { if (_wcsnicmp(PathBuffer, L"\\Device\\Mup\\", 12) == 0) { if (!(flags & MDBX_EXCLUSIVE)) { rc = ERROR_REMOTE_STORAGE_MEDIA_ERROR; @@ -1978,9 +1962,9 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { goto bailout; } - if (mdbx_GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, - FILE_NAME_NORMALIZED | - VOLUME_NAME_DOS)) { + if (imports.GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, + FILE_NAME_NORMALIZED | + VOLUME_NAME_DOS)) { UINT DriveType = GetDriveTypeW(PathBuffer); if (DriveType == DRIVE_NO_ROOT_DIR && _wcsnicmp(PathBuffer, L"\\\\?\\", 4) == 0 && @@ -2196,15 +2180,15 @@ static int check_mmap_limit(const size_t limit) { return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size, - const size_t limit, const unsigned options) { +MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, + const size_t limit, const unsigned options) { assert(size <= limit); map->limit = 0; map->current = 0; map->base = nullptr; map->filesize = 0; #if defined(_WIN32) || defined(_WIN64) - map->section = NULL; + map->section = nullptr; #endif /* Windows */ int err = osal_check_fs_local(map->fd, flags); @@ -2243,28 +2227,29 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size, #if defined(_WIN32) || defined(_WIN64) LARGE_INTEGER SectionSize; SectionSize.QuadPart = size; - err = NtCreateSection( - &map->section, - /* DesiredAccess */ - (flags & MDBX_WRITEMAP) - ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | - SECTION_MAP_WRITE - : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, - /* ObjectAttributes */ NULL, /* MaximumSize (InitialSize) */ &SectionSize, - /* SectionPageProtection */ - (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, - /* AllocationAttributes */ SEC_RESERVE, map->fd); + err = NtCreateSection(&map->section, + /* DesiredAccess */ + (flags & MDBX_WRITEMAP) + ? SECTION_QUERY | SECTION_MAP_READ | + SECTION_EXTEND_SIZE | SECTION_MAP_WRITE + : SECTION_QUERY | SECTION_MAP_READ | + SECTION_EXTEND_SIZE, + /* ObjectAttributes */ nullptr, + /* MaximumSize (InitialSize) */ &SectionSize, + /* SectionPageProtection */ + (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, + /* AllocationAttributes */ SEC_RESERVE, map->fd); if (!NT_SUCCESS(err)) return ntstatus2errcode(err); - SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 - : mdbx_RunningUnderWine() ? size - : limit; + SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 + : globals.running_under_Wine ? size + : limit; err = NtMapViewOfSection( map->section, GetCurrentProcess(), &map->base, /* ZeroBits */ 0, /* CommitSize */ 0, - /* SectionOffset */ NULL, &ViewSize, + /* SectionOffset */ nullptr, &ViewSize, /* InheritDisposition */ ViewUnmap, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, /* Win32Protect */ @@ -2306,13 +2291,14 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size, #define MAP_NORESERVE 0 #endif - map->base = mmap( - NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, - MAP_SHARED | MAP_FILE | MAP_NORESERVE | - (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0) | - ((options & MMAP_OPTION_SEMAPHORE) ? MAP_HASSEMAPHORE | MAP_NOSYNC - : MAP_CONCEAL), - map->fd, 0); + map->base = mmap(nullptr, limit, + (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, + MAP_SHARED | MAP_FILE | MAP_NORESERVE | + (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0) | + ((options & MMAP_OPTION_SEMAPHORE) + ? MAP_HASSEMAPHORE | MAP_NOSYNC + : MAP_CONCEAL), + map->fd, 0); if (unlikely(map->base == MAP_FAILED)) { map->limit = 0; @@ -2340,7 +2326,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size, return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) { +MDBX_INTERNAL int osal_munmap(osal_mmap_t *map) { VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current); /* Unpoisoning is required for ASAN to avoid false-positive diagnostic * when this memory will re-used by malloc or another mmapping. @@ -2367,8 +2353,8 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) { return MDBX_SUCCESS; } -MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map, - size_t size, size_t limit) { +MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, + size_t limit) { int rc = osal_filesize(map->fd, &map->filesize); VERBOSE("flags 0x%x, size %zu, limit %zu, filesize %" PRIu64, flags, size, limit, map->filesize); @@ -2390,10 +2376,10 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map, map->current = size; return MDBX_SUCCESS; } else if (!(flags & MDBX_RDONLY) && - /* workaround for Wine */ mdbx_NtExtendSection) { + /* workaround for Wine */ imports.NtExtendSection) { /* growth rw-section */ SectionSize.QuadPart = size; - status = mdbx_NtExtendSection(map->section, &SectionSize); + status = imports.NtExtendSection(map->section, &SectionSize); if (!NT_SUCCESS(status)) return ntstatus2errcode(status); map->current = size; @@ -2443,14 +2429,14 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map, if (!NT_SUCCESS(status)) return ntstatus2errcode(status); status = NtClose(map->section); - map->section = NULL; - PVOID ReservedAddress = NULL; + map->section = nullptr; + PVOID ReservedAddress = nullptr; SIZE_T ReservedSize = limit; if (!NT_SUCCESS(status)) { bailout_ntstatus: err = ntstatus2errcode(status); - map->base = NULL; + map->base = nullptr; map->current = map->limit = 0; if (ReservedAddress) { ReservedSize = 0; @@ -2469,13 +2455,13 @@ retry_file_and_section: status = NtAllocateVirtualMemory(GetCurrentProcess(), &ReservedAddress, 0, &ReservedSize, MEM_RESERVE, PAGE_NOACCESS); if (!NT_SUCCESS(status)) { - ReservedAddress = NULL; + ReservedAddress = nullptr; if (status != (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018) goto bailout_ntstatus /* no way to recovery */; if (flags & MDBX_MRESIZE_MAY_MOVE) /* the base address could be changed */ - map->base = NULL; + map->base = nullptr; } if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) { @@ -2494,7 +2480,7 @@ retry_file_and_section: ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, - /* ObjectAttributes */ NULL, + /* ObjectAttributes */ nullptr, /* MaximumSize (InitialSize) */ &SectionSize, /* SectionPageProtection */ (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, @@ -2508,7 +2494,7 @@ retry_file_and_section: ReservedSize = 0; status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress, &ReservedSize, MEM_RELEASE); - ReservedAddress = NULL; + ReservedAddress = nullptr; if (!NT_SUCCESS(status)) goto bailout_ntstatus; } @@ -2519,7 +2505,7 @@ retry_mapview:; map->section, GetCurrentProcess(), &map->base, /* ZeroBits */ 0, /* CommitSize */ 0, - /* SectionOffset */ NULL, &ViewSize, + /* SectionOffset */ nullptr, &ViewSize, /* InheritDisposition */ ViewUnmap, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, /* Win32Protect */ @@ -2529,11 +2515,11 @@ retry_mapview:; if (status == (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 && map->base && (flags & MDBX_MRESIZE_MAY_MOVE) != 0) { /* try remap at another base address */ - map->base = NULL; + map->base = nullptr; goto retry_mapview; } NtClose(map->section); - map->section = NULL; + map->section = nullptr; if (map->base && (size != map->current || limit != map->limit)) { /* try remap with previously size and limit, @@ -2562,7 +2548,7 @@ retry_mapview:; map->current = (map->filesize > limit) ? limit : (size_t)map->filesize; } else { if (size > map->filesize || - (size < map->filesize && (flags & MDBX_SHRINK_ALLOWED))) { + (size < map->filesize && (flags & txn_shrink_allowed))) { rc = osal_ftruncate(map->fd, size); VERBOSE("ftruncate %zu, err %d", size, rc); if (rc != MDBX_SUCCESS) @@ -2769,7 +2755,7 @@ retry_mapview:; /*----------------------------------------------------------------------------*/ -__cold MDBX_INTERNAL_FUNC void osal_jitter(bool tiny) { +__cold MDBX_INTERNAL void osal_jitter(bool tiny) { for (;;) { #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ defined(__x86_64__) @@ -2825,7 +2811,7 @@ __cold static clockid_t choice_monoclock(void) { #define posix_clockid CLOCK_REALTIME #endif -MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16) { +MDBX_INTERNAL uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16) { #if defined(_WIN32) || defined(_WIN64) const uint64_t ratio = performance_frequency.QuadPart; #elif defined(__APPLE__) || defined(__MACH__) @@ -2838,7 +2824,7 @@ MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16) { } static uint64_t monotime_limit; -MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime) { +MDBX_INTERNAL uint32_t osal_monotime_to_16dot16(uint64_t monotime) { if (unlikely(monotime > monotime_limit)) return UINT32_MAX; @@ -2853,7 +2839,7 @@ MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime) { return ret; } -MDBX_INTERNAL_FUNC uint64_t osal_monotime(void) { +MDBX_INTERNAL uint64_t osal_monotime(void) { #if defined(_WIN32) || defined(_WIN64) LARGE_INTEGER counter; if (QueryPerformanceCounter(&counter)) @@ -2868,7 +2854,7 @@ MDBX_INTERNAL_FUNC uint64_t osal_monotime(void) { return 0; } -MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults) { +MDBX_INTERNAL uint64_t osal_cputime(size_t *optional_page_faults) { #if defined(_WIN32) || defined(_WIN64) if (optional_page_faults) { PROCESS_MEMORY_COUNTERS pmc; @@ -2967,11 +2953,11 @@ __cold static uint64_t windows_systemtime_ms() { __cold static uint64_t windows_bootime(void) { unsigned confirmed = 0; uint64_t boottime = 0; - uint64_t up0 = mdbx_GetTickCount64(); + uint64_t up0 = imports.GetTickCount64(); uint64_t st0 = windows_systemtime_ms(); for (uint64_t fuse = st0; up0 && st0 < fuse + 1000 * 1000u / 42;) { YieldProcessor(); - const uint64_t up1 = mdbx_GetTickCount64(); + const uint64_t up1 = imports.GetTickCount64(); const uint64_t st1 = windows_systemtime_ms(); if (st1 > fuse && st1 == st0 && up1 == up0) { uint64_t diff = st1 - up1; @@ -2995,30 +2981,32 @@ __cold static LSTATUS mdbx_RegGetValue(HKEY hKey, LPCSTR lpSubKey, LPCSTR lpValue, PVOID pvData, LPDWORD pcbData) { LSTATUS rc; - if (!mdbx_RegGetValueA) { + if (!imports.RegGetValueA) { /* an old Windows 2000/XP */ HKEY hSubKey; rc = RegOpenKeyA(hKey, lpSubKey, &hSubKey); if (rc == ERROR_SUCCESS) { - rc = RegQueryValueExA(hSubKey, lpValue, NULL, NULL, pvData, pcbData); + rc = + RegQueryValueExA(hSubKey, lpValue, nullptr, nullptr, pvData, pcbData); RegCloseKey(hSubKey); } return rc; } - rc = mdbx_RegGetValueA(hKey, lpSubKey, lpValue, RRF_RT_ANY, NULL, pvData, - pcbData); + rc = imports.RegGetValueA(hKey, lpSubKey, lpValue, RRF_RT_ANY, nullptr, + pvData, pcbData); if (rc != ERROR_FILE_NOT_FOUND) return rc; - rc = mdbx_RegGetValueA(hKey, lpSubKey, lpValue, - RRF_RT_ANY | 0x00010000 /* RRF_SUBKEY_WOW6464KEY */, - NULL, pvData, pcbData); + rc = imports.RegGetValueA(hKey, lpSubKey, lpValue, + RRF_RT_ANY | 0x00010000 /* RRF_SUBKEY_WOW6464KEY */, + nullptr, pvData, pcbData); if (rc != ERROR_FILE_NOT_FOUND) return rc; - return mdbx_RegGetValueA(hKey, lpSubKey, lpValue, - RRF_RT_ANY | 0x00020000 /* RRF_SUBKEY_WOW6432KEY */, - NULL, pvData, pcbData); + return imports.RegGetValueA(hKey, lpSubKey, lpValue, + RRF_RT_ANY | + 0x00020000 /* RRF_SUBKEY_WOW6432KEY */, + nullptr, pvData, pcbData); } #endif @@ -3063,7 +3051,7 @@ bootid_parse_uuid(bin128_t *s, const void *p, const size_t n) { return false; } -__cold MDBX_INTERNAL_FUNC bin128_t osal_bootid(void) { +__cold static bin128_t osal_bootid(void) { bin128_t bin = {{0, 0}}; bool got_machineid = false, got_boottime = false, got_bootseq = false; @@ -3234,7 +3222,7 @@ __cold MDBX_INTERNAL_FUNC bin128_t osal_bootid(void) { (int *) #endif mib, - ARRAY_LENGTH(mib), &buf, &len, NULL, 0) == 0) + ARRAY_LENGTH(mib), &buf, &len, nullptr, 0) == 0) got_machineid = bootid_parse_uuid(&bin, buf, len); } #endif /* CTL_HW && HW_UUID */ @@ -3249,7 +3237,7 @@ __cold MDBX_INTERNAL_FUNC bin128_t osal_bootid(void) { (int *) #endif mib, - ARRAY_LENGTH(mib), &buf, &len, NULL, 0) == 0) + ARRAY_LENGTH(mib), &buf, &len, nullptr, 0) == 0) got_machineid = bootid_parse_uuid(&bin, buf, len); } #endif /* CTL_KERN && KERN_HOSTUUID */ @@ -3258,7 +3246,7 @@ __cold MDBX_INTERNAL_FUNC bin128_t osal_bootid(void) { if (!got_machineid) { char buf[42]; size_t len = sizeof(buf); - if (sysctlbyname("machdep.dmi.system-uuid", buf, &len, NULL, 0) == 0) + if (sysctlbyname("machdep.dmi.system-uuid", buf, &len, nullptr, 0) == 0) got_machineid = bootid_parse_uuid(&bin, buf, len); } #endif /* __NetBSD__ */ @@ -3291,7 +3279,7 @@ __cold MDBX_INTERNAL_FUNC bin128_t osal_bootid(void) { (int *) #endif mib, - ARRAY_LENGTH(mib), &boottime, &len, NULL, 0) == 0 && + ARRAY_LENGTH(mib), &boottime, &len, nullptr, 0) == 0 && len == sizeof(boottime) && boottime.tv_sec) { bootid_collect(&bin, &boottime, len); got_boottime = true; @@ -3376,10 +3364,10 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, if (avail_pages) *avail_pages = -1; - const intptr_t pagesize = osal_syspagesize(); + const intptr_t pagesize = globals.sys_pagesize; if (page_size) *page_size = pagesize; - if (unlikely(pagesize < MIN_PAGESIZE || !is_powerof2(pagesize))) + if (unlikely(pagesize < MDBX_MIN_PAGESIZE || !is_powerof2(pagesize))) return MDBX_INCOMPATIBLE; MDBX_MAYBE_UNUSED const int log2page = log2n_powerof2(pagesize); @@ -3409,16 +3397,15 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #elif defined(HW_USERMEM) || defined(HW_PHYSMEM64) || defined(HW_MEMSIZE) || \ defined(HW_PHYSMEM) size_t ram, len = sizeof(ram); - static const int mib[] = { - CTL_HW, + static const int mib[] = {CTL_HW, #if defined(HW_USERMEM) - HW_USERMEM + HW_USERMEM #elif defined(HW_PHYSMEM64) - HW_PHYSMEM64 + HW_PHYSMEM64 #elif defined(HW_MEMSIZE) - HW_MEMSIZE + HW_MEMSIZE #else - HW_PHYSMEM + HW_PHYSMEM #endif }; if (sysctl( @@ -3426,7 +3413,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, (int *) #endif mib, - ARRAY_LENGTH(mib), &ram, &len, NULL, 0) != 0) + ARRAY_LENGTH(mib), &ram, &len, nullptr, 0) != 0) return errno; if (len != sizeof(ram)) return MDBX_ENOSYS; @@ -3459,12 +3446,11 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #elif defined(VM_TOTAL) || defined(VM_METER) struct vmtotal info; size_t len = sizeof(info); - static const int mib[] = { - CTL_VM, + static const int mib[] = {CTL_VM, #if defined(VM_TOTAL) - VM_TOTAL + VM_TOTAL #elif defined(VM_METER) - VM_METER + VM_METER #endif }; if (sysctl( @@ -3472,7 +3458,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, (int *) #endif mib, - ARRAY_LENGTH(mib), &info, &len, NULL, 0) != 0) + ARRAY_LENGTH(mib), &info, &len, nullptr, 0) != 0) return errno; if (len != sizeof(info)) return MDBX_ENOSYS; @@ -3488,9 +3474,6 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, return MDBX_SUCCESS; } -MDBX_INTERNAL_VAR_INSTA unsigned sys_pagesize, sys_pagesize_ln2, - sys_allocation_granularity; - void osal_ctor(void) { #if MDBX_HAVE_PWRITEV && defined(_SC_IOV_MAX) osal_iov_max = sysconf(_SC_IOV_MAX); @@ -3502,19 +3485,21 @@ void osal_ctor(void) { #if defined(_WIN32) || defined(_WIN64) SYSTEM_INFO si; GetSystemInfo(&si); - sys_pagesize = si.dwPageSize; - sys_allocation_granularity = si.dwAllocationGranularity; + globals.sys_pagesize = si.dwPageSize; + globals.sys_allocation_granularity = si.dwAllocationGranularity; #else - sys_pagesize = sysconf(_SC_PAGE_SIZE); - sys_allocation_granularity = (MDBX_WORDBITS > 32) ? 65536 : 4096; - sys_allocation_granularity = (sys_allocation_granularity > sys_pagesize) - ? sys_allocation_granularity - : sys_pagesize; + globals.sys_pagesize = sysconf(_SC_PAGE_SIZE); + globals.sys_allocation_granularity = (MDBX_WORDBITS > 32) ? 65536 : 4096; + globals.sys_allocation_granularity = + (globals.sys_allocation_granularity > globals.sys_pagesize) + ? globals.sys_allocation_granularity + : globals.sys_pagesize; #endif - assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0); - assert(sys_allocation_granularity >= sys_pagesize && - sys_allocation_granularity % sys_pagesize == 0); - sys_pagesize_ln2 = log2n_powerof2(sys_pagesize); + assert(globals.sys_pagesize > 0 && + (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0); + assert(globals.sys_allocation_granularity >= globals.sys_pagesize && + globals.sys_allocation_granularity % globals.sys_pagesize == 0); + globals.sys_pagesize_ln2 = log2n_powerof2(globals.sys_pagesize); #if defined(__linux__) || defined(__gnu_linux__) posix_clockid = choice_monoclock(); @@ -3528,6 +3513,21 @@ void osal_ctor(void) { ratio_16dot16_to_monotine = UINT64_C(1000000000) * ti.denom / ti.numer; #endif monotime_limit = osal_16dot16_to_monotime(UINT32_MAX - 1); + + uint32_t proba = UINT32_MAX; + while (true) { + unsigned time_conversion_checkup = + osal_monotime_to_16dot16(osal_16dot16_to_monotime(proba)); + unsigned one_more = (proba < UINT32_MAX) ? proba + 1 : proba; + unsigned one_less = (proba > 0) ? proba - 1 : proba; + ENSURE(nullptr, time_conversion_checkup >= one_less && + time_conversion_checkup <= one_more); + if (proba == 0) + break; + proba >>= 1; + } + + globals.bootid = osal_bootid(); } void osal_dtor(void) {} diff --git a/src/osal.h b/src/osal.h index bb1651fa..15831c99 100644 --- a/src/osal.h +++ b/src/osal.h @@ -1,50 +1,11 @@ -/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// https://en.wikipedia.org/wiki/Operating_system_abstraction_layer #pragma once -/*----------------------------------------------------------------------------*/ -/* C11 Atomics */ - -#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include() -#include -#define MDBX_HAVE_C11ATOMICS -#elif !defined(__cplusplus) && \ - (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) && \ - !defined(__STDC_NO_ATOMICS__) && \ - (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \ - !(defined(__GNUC__) || defined(__clang__))) -#include -#define MDBX_HAVE_C11ATOMICS -#elif defined(__GNUC__) || defined(__clang__) -#elif defined(_MSC_VER) -#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */ -#pragma warning(disable : 4133) /* 'function': incompatible types - from \ - 'size_t' to 'LONGLONG' */ -#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \ - 'std::size_t', possible loss of data */ -#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \ - 'long', possible loss of data */ -#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange) -#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64) -#elif defined(__APPLE__) -#include -#else -#error FIXME atomic-ops -#endif +#include "essentials.h" /*----------------------------------------------------------------------------*/ /* Memory/Compiler barriers, cache coherence */ @@ -58,7 +19,7 @@ #include #endif -MDBX_MAYBE_UNUSED static __inline void osal_compiler_barrier(void) { +MDBX_MAYBE_UNUSED static inline void osal_compiler_barrier(void) { #if defined(__clang__) || defined(__GNUC__) __asm__ __volatile__("" ::: "memory"); #elif defined(_MSC_VER) @@ -78,7 +39,7 @@ MDBX_MAYBE_UNUSED static __inline void osal_compiler_barrier(void) { #endif } -MDBX_MAYBE_UNUSED static __inline void osal_memory_barrier(void) { +MDBX_MAYBE_UNUSED static inline void osal_memory_barrier(void) { #ifdef MDBX_HAVE_C11ATOMICS atomic_thread_fence(memory_order_seq_cst); #elif defined(__ATOMIC_SEQ_CST) @@ -118,7 +79,7 @@ MDBX_MAYBE_UNUSED static __inline void osal_memory_barrier(void) { #define HAVE_SYS_TYPES_H typedef HANDLE osal_thread_t; typedef unsigned osal_thread_key_t; -#define MAP_FAILED NULL +#define MAP_FAILED nullptr #define HIGH_DWORD(v) ((DWORD)((sizeof(v) > 4) ? ((uint64_t)(v) >> 32) : 0)) #define THREAD_CALL WINAPI #define THREAD_RESULT DWORD @@ -210,19 +171,6 @@ typedef pthread_mutex_t osal_fastmutex_t; /*----------------------------------------------------------------------------*/ /* OS abstraction layer stuff */ -MDBX_INTERNAL_VAR_PROTO unsigned sys_pagesize; -MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR_PROTO unsigned sys_pagesize_ln2, - sys_allocation_granularity; - -/* Get the size of a memory page for the system. - * This is the basic size that the platform's memory manager uses, and is - * fundamental to the use of memory-mapped files. */ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t -osal_syspagesize(void) { - assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0); - return sys_pagesize; -} - #if defined(_WIN32) || defined(_WIN64) typedef wchar_t pathchar_t; #define MDBX_PRIsPATH "ls" @@ -234,7 +182,7 @@ typedef char pathchar_t; typedef struct osal_mmap { union { void *base; - struct MDBX_lockinfo *lck; + struct shared_lck *lck; }; mdbx_filehandle_t fd; size_t limit; /* mapping length, but NOT a size of file nor DB */ @@ -245,25 +193,6 @@ typedef struct osal_mmap { #endif } osal_mmap_t; -typedef union bin128 { - __anonymous_struct_extension__ struct { - uint64_t x, y; - }; - __anonymous_struct_extension__ struct { - uint32_t a, b, c, d; - }; -} bin128_t; - -#if defined(_WIN32) || defined(_WIN64) -typedef union osal_srwlock { - __anonymous_struct_extension__ struct { - long volatile readerCount; - long volatile writerCount; - }; - RTL_SRWLOCK native; -} osal_srwlock_t; -#endif /* Windows */ - #ifndef MDBX_HAVE_PWRITEV #if defined(_WIN32) || defined(_WIN64) @@ -346,32 +275,30 @@ typedef struct osal_ioring { char *boundary; } osal_ioring_t; -#ifndef __cplusplus - /* Actually this is not ioring for now, but on the way. */ -MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t * +MDBX_INTERNAL int osal_ioring_create(osal_ioring_t * #if defined(_WIN32) || defined(_WIN64) - , - bool enable_direct, - mdbx_filehandle_t overlapped_fd + , + bool enable_direct, + mdbx_filehandle_t overlapped_fd #endif /* Windows */ ); -MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items); -MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *); -MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *); -MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset, - void *data, const size_t bytes); +MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *, size_t items); +MDBX_INTERNAL void osal_ioring_destroy(osal_ioring_t *); +MDBX_INTERNAL void osal_ioring_reset(osal_ioring_t *); +MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ctx, const size_t offset, + void *data, const size_t bytes); typedef struct osal_ioring_write_result { int err; unsigned wops; } osal_ioring_write_result_t; -MDBX_INTERNAL_FUNC osal_ioring_write_result_t +MDBX_INTERNAL osal_ioring_write_result_t osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd); -typedef struct iov_ctx iov_ctx_t; -MDBX_INTERNAL_FUNC void osal_ioring_walk( - osal_ioring_t *ior, iov_ctx_t *ctx, - void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes)); +MDBX_INTERNAL void osal_ioring_walk(osal_ioring_t *ior, iov_ctx_t *ctx, + void (*callback)(iov_ctx_t *ctx, + size_t offset, void *data, + size_t bytes)); MDBX_MAYBE_UNUSED static inline unsigned osal_ioring_left(const osal_ioring_t *ior) { @@ -408,9 +335,9 @@ osal_ioring_prepare(osal_ioring_t *ior, size_t items, size_t bytes) { #define osal_asprintf asprintf #define osal_vasprintf vasprintf #else -MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC +MDBX_MAYBE_UNUSED MDBX_INTERNAL MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...); -MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, va_list ap); +MDBX_INTERNAL int osal_vasprintf(char **strp, const char *fmt, va_list ap); #endif #if !defined(MADV_DODUMP) && defined(MADV_CORE) @@ -421,8 +348,7 @@ MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, va_list ap); #define MADV_DONTDUMP MADV_NOCORE #endif /* MADV_NOCORE -> MADV_DONTDUMP */ -MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny); -MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny); +MDBX_MAYBE_UNUSED MDBX_INTERNAL void osal_jitter(bool tiny); /* max bytes to write in one call */ #if defined(_WIN64) @@ -472,19 +398,13 @@ MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny); #endif /* OFF_T_MAX */ #endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */ -#endif - -#if defined(__linux__) || defined(__gnu_linux__) -MDBX_INTERNAL_VAR_PROTO uint32_t linux_kernel_version; -MDBX_INTERNAL_VAR_PROTO bool - mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */; -#endif /* Linux */ +#endif /* !Windows */ #ifndef osal_strdup LIBMDBX_API char *osal_strdup(const char *str); #endif -MDBX_MAYBE_UNUSED static __inline int osal_get_errno(void) { +MDBX_MAYBE_UNUSED static inline int osal_get_errno(void) { #if defined(_WIN32) || defined(_WIN64) DWORD rc = GetLastError(); #else @@ -494,40 +414,39 @@ MDBX_MAYBE_UNUSED static __inline int osal_get_errno(void) { } #ifndef osal_memalign_alloc -MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes, - void **result); +MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, + void **result); #endif #ifndef osal_memalign_free -MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr); +MDBX_INTERNAL void osal_memalign_free(void *ptr); #endif -MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair); -MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair); -MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair); -MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair, - bool part); -MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, bool part); -MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair); +MDBX_INTERNAL int osal_condpair_init(osal_condpair_t *condpair); +MDBX_INTERNAL int osal_condpair_lock(osal_condpair_t *condpair); +MDBX_INTERNAL int osal_condpair_unlock(osal_condpair_t *condpair); +MDBX_INTERNAL int osal_condpair_signal(osal_condpair_t *condpair, bool part); +MDBX_INTERNAL int osal_condpair_wait(osal_condpair_t *condpair, bool part); +MDBX_INTERNAL int osal_condpair_destroy(osal_condpair_t *condpair); -MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex); -MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex); -MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex); -MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex); +MDBX_INTERNAL int osal_fastmutex_init(osal_fastmutex_t *fastmutex); +MDBX_INTERNAL int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex); +MDBX_INTERNAL int osal_fastmutex_release(osal_fastmutex_t *fastmutex); +MDBX_INTERNAL int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex); -MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, - size_t sgvcnt, uint64_t offset); -MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count, - uint64_t offset); -MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf, - size_t count, uint64_t offset); -MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf, - size_t count); +MDBX_INTERNAL int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, + size_t sgvcnt, uint64_t offset); +MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count, + uint64_t offset); +MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, + size_t count, uint64_t offset); +MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, + size_t count); -MDBX_INTERNAL_FUNC int +MDBX_INTERNAL int osal_thread_create(osal_thread_t *thread, THREAD_RESULT(THREAD_CALL *start_routine)(void *), void *arg); -MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread); +MDBX_INTERNAL int osal_thread_join(osal_thread_t thread); enum osal_syncmode_bits { MDBX_SYNC_NONE = 0, @@ -537,11 +456,11 @@ enum osal_syncmode_bits { MDBX_SYNC_IODQ = 8 }; -MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd, - const enum osal_syncmode_bits mode_bits); -MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length); -MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos); -MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length); +MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, + const enum osal_syncmode_bits mode_bits); +MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length); +MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos); +MDBX_INTERNAL int osal_filesize(mdbx_filehandle_t fd, uint64_t *length); enum osal_openfile_purpose { MDBX_OPEN_DXB_READ, @@ -556,7 +475,7 @@ enum osal_openfile_purpose { MDBX_OPEN_DELETE }; -MDBX_MAYBE_UNUSED static __inline bool osal_isdirsep(pathchar_t c) { +MDBX_MAYBE_UNUSED static inline bool osal_isdirsep(pathchar_t c) { return #if defined(_WIN32) || defined(_WIN64) c == '\\' || @@ -564,50 +483,45 @@ MDBX_MAYBE_UNUSED static __inline bool osal_isdirsep(pathchar_t c) { c == '/'; } -MDBX_INTERNAL_FUNC bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, - size_t len); -MDBX_INTERNAL_FUNC pathchar_t *osal_fileext(const pathchar_t *pathname, - size_t len); -MDBX_INTERNAL_FUNC int osal_fileexists(const pathchar_t *pathname); -MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, - const MDBX_env *env, - const pathchar_t *pathname, - mdbx_filehandle_t *fd, - mdbx_mode_t unix_mode_bits); -MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd); -MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname); -MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname); -MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd); -MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait); +MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, + size_t len); +MDBX_INTERNAL pathchar_t *osal_fileext(const pathchar_t *pathname, size_t len); +MDBX_INTERNAL int osal_fileexists(const pathchar_t *pathname); +MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, + const MDBX_env *env, const pathchar_t *pathname, + mdbx_filehandle_t *fd, + mdbx_mode_t unix_mode_bits); +MDBX_INTERNAL int osal_closefile(mdbx_filehandle_t fd); +MDBX_INTERNAL int osal_removefile(const pathchar_t *pathname); +MDBX_INTERNAL int osal_removedirectory(const pathchar_t *pathname); +MDBX_INTERNAL int osal_is_pipe(mdbx_filehandle_t fd); +MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait); #define MMAP_OPTION_TRUNCATE 1 #define MMAP_OPTION_SEMAPHORE 2 -MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size, - const size_t limit, const unsigned options); -MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map); +MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, + const size_t limit, const unsigned options); +MDBX_INTERNAL int osal_munmap(osal_mmap_t *map); #define MDBX_MRESIZE_MAY_MOVE 0x00000100 #define MDBX_MRESIZE_MAY_UNMAP 0x00000200 -MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map, - size_t size, size_t limit); +MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, + size_t limit); #if defined(_WIN32) || defined(_WIN64) typedef struct { unsigned limit, count; HANDLE handles[31]; } mdbx_handle_array_t; -MDBX_INTERNAL_FUNC int +MDBX_INTERNAL int osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array); -MDBX_INTERNAL_FUNC int -osal_resume_threads_after_remap(mdbx_handle_array_t *array); +MDBX_INTERNAL int osal_resume_threads_after_remap(mdbx_handle_array_t *array); #endif /* Windows */ -MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset, - size_t length, - enum osal_syncmode_bits mode_bits); -MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, - const pathchar_t *pathname, - int err); -MDBX_INTERNAL_FUNC int osal_check_fs_incore(mdbx_filehandle_t handle); +MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, + size_t length, enum osal_syncmode_bits mode_bits); +MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, + const pathchar_t *pathname, int err); +MDBX_INTERNAL int osal_check_fs_incore(mdbx_filehandle_t handle); -MDBX_MAYBE_UNUSED static __inline uint32_t osal_getpid(void) { +MDBX_MAYBE_UNUSED static inline uint32_t osal_getpid(void) { STATIC_ASSERT(sizeof(mdbx_pid_t) <= sizeof(uint32_t)); #if defined(_WIN32) || defined(_WIN64) return GetCurrentProcessId(); @@ -617,7 +531,7 @@ MDBX_MAYBE_UNUSED static __inline uint32_t osal_getpid(void) { #endif } -MDBX_MAYBE_UNUSED static __inline uintptr_t osal_thread_self(void) { +MDBX_MAYBE_UNUSED static inline uintptr_t osal_thread_self(void) { mdbx_tid_t thunk; STATIC_ASSERT(sizeof(uintptr_t) >= sizeof(thunk)); #if defined(_WIN32) || defined(_WIN64) @@ -630,22 +544,22 @@ MDBX_MAYBE_UNUSED static __inline uintptr_t osal_thread_self(void) { #if !defined(_WIN32) && !defined(_WIN64) #if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC) -MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void); +MDBX_INTERNAL int osal_check_tid4bionic(void); #else -static __inline int osal_check_tid4bionic(void) { return 0; } +static inline int osal_check_tid4bionic(void) { return 0; } #endif /* __ANDROID_API__ || ANDROID) || BIONIC */ -MDBX_MAYBE_UNUSED static __inline int +MDBX_MAYBE_UNUSED static inline int osal_pthread_mutex_lock(pthread_mutex_t *mutex) { int err = osal_check_tid4bionic(); return unlikely(err) ? err : pthread_mutex_lock(mutex); } #endif /* !Windows */ -MDBX_INTERNAL_FUNC uint64_t osal_monotime(void); -MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults); -MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16); -MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime); +MDBX_INTERNAL uint64_t osal_monotime(void); +MDBX_INTERNAL uint64_t osal_cputime(size_t *optional_page_faults); +MDBX_INTERNAL uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16); +MDBX_INTERNAL uint32_t osal_monotime_to_16dot16(uint64_t monotime); MDBX_MAYBE_UNUSED static inline uint32_t osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) { @@ -653,249 +567,18 @@ osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) { return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0); } -MDBX_INTERNAL_FUNC bin128_t osal_bootid(void); /*----------------------------------------------------------------------------*/ -/* lck stuff */ -/// \brief Initialization of synchronization primitives linked with MDBX_env -/// instance both in LCK-file and within the current process. -/// \param -/// global_uniqueness_flag = true - denotes that there are no other processes -/// working with DB and LCK-file. Thus the function MUST initialize -/// shared synchronization objects in memory-mapped LCK-file. -/// global_uniqueness_flag = false - denotes that at least one process is -/// already working with DB and LCK-file, including the case when DB -/// has already been opened in the current process. Thus the function -/// MUST NOT initialize shared synchronization objects in memory-mapped -/// LCK-file that are already in use. -/// \return Error code or zero on success. -MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, - MDBX_env *inprocess_neighbor, - int global_uniqueness_flag); - -/// \brief Disconnects from shared interprocess objects and destructs -/// synchronization objects linked with MDBX_env instance -/// within the current process. -/// \param -/// inprocess_neighbor = NULL - if the current process does not have other -/// instances of MDBX_env linked with the DB being closed. -/// Thus the function MUST check for other processes working with DB or -/// LCK-file, and keep or destroy shared synchronization objects in -/// memory-mapped LCK-file depending on the result. -/// inprocess_neighbor = not-NULL - pointer to another instance of MDBX_env -/// (anyone of there is several) working with DB or LCK-file within the -/// current process. Thus the function MUST NOT try to acquire exclusive -/// lock and/or try to destruct shared synchronization objects linked with -/// DB or LCK-file. Moreover, the implementation MUST ensure correct work -/// of other instances of MDBX_env within the current process, e.g. -/// restore POSIX-fcntl locks after the closing of file descriptors. -/// \return Error code (MDBX_PANIC) or zero on success. -MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor, - const uint32_t current_pid); - -/// \brief Connects to shared interprocess locking objects and tries to acquire -/// the maximum lock level (shared if exclusive is not available) -/// Depending on implementation or/and platform (Windows) this function may -/// acquire the non-OS super-level lock (e.g. for shared synchronization -/// objects initialization), which will be downgraded to OS-exclusive or -/// shared via explicit calling of osal_lck_downgrade(). -/// \return -/// MDBX_RESULT_TRUE (-1) - if an exclusive lock was acquired and thus -/// the current process is the first and only after the last use of DB. -/// MDBX_RESULT_FALSE (0) - if a shared lock was acquired and thus -/// DB has already been opened and now is used by other processes. -/// Otherwise (not 0 and not -1) - error code. -MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env); - -/// \brief Downgrades the level of initially acquired lock to -/// operational level specified by argument. The reason for such downgrade: -/// - unblocking of other processes that are waiting for access, i.e. -/// if (env->me_flags & MDBX_EXCLUSIVE) != 0, then other processes -/// should be made aware that access is unavailable rather than -/// wait for it. -/// - freeing locks that interfere file operation (especially for Windows) -/// (env->me_flags & MDBX_EXCLUSIVE) == 0 - downgrade to shared lock. -/// (env->me_flags & MDBX_EXCLUSIVE) != 0 - downgrade to exclusive -/// operational lock. -/// \return Error code or zero on success -MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env); -MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, - bool dont_wait); - -/// \brief Locks LCK-file or/and table of readers for (de)registering. -/// \return Error code or zero on success -MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env); - -/// \brief Unlocks LCK-file or/and table of readers after (de)registering. -MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env); - -/// \brief Acquires write-transaction lock. -/// \return Error code or zero on success -MDBX_INTERNAL_FUNC int osal_txn_lock(MDBX_env *env, bool dont_wait); - -/// \brief Releases write-transaction lock.. -MDBX_INTERNAL_FUNC void osal_txn_unlock(MDBX_env *env); - -/// \brief Sets alive-flag of reader presence (indicative lock) for PID of -/// the current process. The function does no more than needed for -/// the correct working of osal_rpid_check() in other processes. -/// \return Error code or zero on success -MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env); - -/// \brief Resets alive-flag of reader presence (indicative lock) -/// for PID of the current process. The function does no more than needed -/// for the correct working of osal_rpid_check() in other processes. -/// \return Error code or zero on success -MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env); - -/// \brief Checks for reading process status with the given pid with help of -/// alive-flag of presence (indicative lock) or using another way. -/// \return -/// MDBX_RESULT_TRUE (-1) - if the reader process with the given PID is alive -/// and working with DB (indicative lock is present). -/// MDBX_RESULT_FALSE (0) - if the reader process with the given PID is absent -/// or not working with DB (indicative lock is not present). -/// Otherwise (not 0 and not -1) - error code. -MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid); +MDBX_INTERNAL void osal_ctor(void); +MDBX_INTERNAL void osal_dtor(void); #if defined(_WIN32) || defined(_WIN64) - -MDBX_INTERNAL_FUNC int osal_mb2w(const char *const src, wchar_t **const pdst); - -typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *); -MDBX_INTERNAL_VAR_PROTO osal_srwlock_t_function osal_srwlock_Init, - osal_srwlock_AcquireShared, osal_srwlock_ReleaseShared, - osal_srwlock_AcquireExclusive, osal_srwlock_ReleaseExclusive; - -#if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */ -typedef enum _FILE_INFO_BY_HANDLE_CLASS { - FileBasicInfo, - FileStandardInfo, - FileNameInfo, - FileRenameInfo, - FileDispositionInfo, - FileAllocationInfo, - FileEndOfFileInfo, - FileStreamInfo, - FileCompressionInfo, - FileAttributeTagInfo, - FileIdBothDirectoryInfo, - FileIdBothDirectoryRestartInfo, - FileIoPriorityHintInfo, - FileRemoteProtocolInfo, - MaximumFileInfoByHandleClass -} FILE_INFO_BY_HANDLE_CLASS, - *PFILE_INFO_BY_HANDLE_CLASS; - -typedef struct _FILE_END_OF_FILE_INFO { - LARGE_INTEGER EndOfFile; -} FILE_END_OF_FILE_INFO, *PFILE_END_OF_FILE_INFO; - -#define REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK 0x00000001 -#define REMOTE_PROTOCOL_INFO_FLAG_OFFLINE 0x00000002 - -typedef struct _FILE_REMOTE_PROTOCOL_INFO { - USHORT StructureVersion; - USHORT StructureSize; - DWORD Protocol; - USHORT ProtocolMajorVersion; - USHORT ProtocolMinorVersion; - USHORT ProtocolRevision; - USHORT Reserved; - DWORD Flags; - struct { - DWORD Reserved[8]; - } GenericReserved; - struct { - DWORD Reserved[16]; - } ProtocolSpecificReserved; -} FILE_REMOTE_PROTOCOL_INFO, *PFILE_REMOTE_PROTOCOL_INFO; - -#endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */ - -typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)( - _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, - _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); -MDBX_INTERNAL_VAR_PROTO MDBX_GetFileInformationByHandleEx - mdbx_GetFileInformationByHandleEx; - -typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( - _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, - _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber, - _Out_opt_ LPDWORD lpMaximumComponentLength, - _Out_opt_ LPDWORD lpFileSystemFlags, - _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); -MDBX_INTERNAL_VAR_PROTO MDBX_GetVolumeInformationByHandleW - mdbx_GetVolumeInformationByHandleW; - -typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, - _Out_ LPWSTR lpszFilePath, - _In_ DWORD cchFilePath, - _In_ DWORD dwFlags); -MDBX_INTERNAL_VAR_PROTO MDBX_GetFinalPathNameByHandleW - mdbx_GetFinalPathNameByHandleW; - -typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)( - _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, - _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); -MDBX_INTERNAL_VAR_PROTO MDBX_SetFileInformationByHandle - mdbx_SetFileInformationByHandle; - -typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( - IN HANDLE FileHandle, IN OUT HANDLE Event, - IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext, - OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, - IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, - OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); -MDBX_INTERNAL_VAR_PROTO MDBX_NtFsControlFile mdbx_NtFsControlFile; - -typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void); -MDBX_INTERNAL_VAR_PROTO MDBX_GetTickCount64 mdbx_GetTickCount64; - -#if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8 -typedef struct _WIN32_MEMORY_RANGE_ENTRY { - PVOID VirtualAddress; - SIZE_T NumberOfBytes; -} WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY; -#endif /* Windows 8.x */ - -typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)( - HANDLE hProcess, ULONG_PTR NumberOfEntries, - PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); -MDBX_INTERNAL_VAR_PROTO MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; - -typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT; - -typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, - IN PLARGE_INTEGER NewSectionSize); -MDBX_INTERNAL_VAR_PROTO MDBX_NtExtendSection mdbx_NtExtendSection; - -static __inline bool mdbx_RunningUnderWine(void) { - return !mdbx_NtExtendSection; -} - -typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, - LPCSTR lpValue, DWORD dwFlags, - LPDWORD pdwType, PVOID pvData, - LPDWORD pcbData); -MDBX_INTERNAL_VAR_PROTO MDBX_RegGetValueA mdbx_RegGetValueA; - -NTSYSAPI ULONG RtlRandomEx(PULONG Seed); - -typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, - PUCHAR OverlappedRangeStart, - ULONG Length); -MDBX_INTERNAL_VAR_PROTO MDBX_SetFileIoOverlappedRange - mdbx_SetFileIoOverlappedRange; - +MDBX_INTERNAL int osal_mb2w(const char *const src, wchar_t **const pdst); #endif /* Windows */ -#endif /* !__cplusplus */ - /*----------------------------------------------------------------------------*/ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint64_t +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t osal_bswap64(uint64_t v) { #if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \ __has_builtin(__builtin_bswap64) @@ -916,7 +599,7 @@ osal_bswap64(uint64_t v) { #endif } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint32_t +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t osal_bswap32(uint32_t v) { #if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \ __has_builtin(__builtin_bswap32) @@ -932,33 +615,3 @@ osal_bswap32(uint32_t v) { ((v >> 8) & UINT32_C(0x0000ff00)); #endif } - -/*----------------------------------------------------------------------------*/ - -#if defined(_MSC_VER) && _MSC_VER >= 1900 -/* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros - * for internal format-args checker. */ -#undef PRIuPTR -#undef PRIiPTR -#undef PRIdPTR -#undef PRIxPTR -#define PRIuPTR "Iu" -#define PRIiPTR "Ii" -#define PRIdPTR "Id" -#define PRIxPTR "Ix" -#define PRIuSIZE "zu" -#define PRIiSIZE "zi" -#define PRIdSIZE "zd" -#define PRIxSIZE "zx" -#endif /* fix PRI*PTR for _MSC_VER */ - -#ifndef PRIuSIZE -#define PRIuSIZE PRIuPTR -#define PRIiSIZE PRIiPTR -#define PRIdSIZE PRIdPTR -#define PRIxSIZE PRIxPTR -#endif /* PRI*SIZE macros for MSVC */ - -#ifdef _MSC_VER -#pragma warning(pop) -#endif diff --git a/src/page-get.c b/src/page-get.c new file mode 100644 index 00000000..13828e1e --- /dev/null +++ b/src/page-get.c @@ -0,0 +1,579 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold int MDBX_PRINTF_ARGS(2, 3) + bad_page(const page_t *mp, const char *fmt, ...) { + if (LOG_ENABLED(MDBX_LOG_ERROR)) { + static const page_t *prev; + if (prev != mp) { + char buf4unknown[16]; + prev = mp; + debug_log(MDBX_LOG_ERROR, "badpage", 0, + "corrupted %s-page #%u, mod-txnid %" PRIaTXN "\n", + pagetype_caption(page_type(mp), buf4unknown), mp->pgno, + mp->txnid); + } + + va_list args; + va_start(args, fmt); + debug_log_va(MDBX_LOG_ERROR, "badpage", 0, fmt, args); + va_end(args); + } + return MDBX_CORRUPTED; +} + +__cold void MDBX_PRINTF_ARGS(2, 3) + poor_page(const page_t *mp, const char *fmt, ...) { + if (LOG_ENABLED(MDBX_LOG_NOTICE)) { + static const page_t *prev; + if (prev != mp) { + char buf4unknown[16]; + prev = mp; + debug_log(MDBX_LOG_NOTICE, "poorpage", 0, + "suboptimal %s-page #%u, mod-txnid %" PRIaTXN "\n", + pagetype_caption(page_type(mp), buf4unknown), mp->pgno, + mp->txnid); + } + + va_list args; + va_start(args, fmt); + debug_log_va(MDBX_LOG_NOTICE, "poorpage", 0, fmt, args); + va_end(args); + } +} + +MDBX_CONST_FUNCTION static clc_t value_clc(const MDBX_cursor *mc) { + if (likely((mc->flags & z_inner) == 0)) + return mc->clc->v; + else { + clc_t stub = {.cmp = cmp_equal_or_wrong, .lmin = 0, .lmax = 0}; + return stub; + } +} + +__cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { + DKBUF; + int rc = MDBX_SUCCESS; + if (unlikely(mp->pgno < MIN_PAGENO || mp->pgno > MAX_PAGENO)) + rc = bad_page(mp, "invalid pgno (%u)\n", mp->pgno); + + MDBX_env *const env = mc->txn->env; + const ptrdiff_t offset = ptr_dist(mp, env->dxb_mmap.base); + unsigned flags_mask = P_ILL_BITS; + unsigned flags_expected = 0; + if (offset < 0 || + offset > (ptrdiff_t)(pgno2bytes(env, mc->txn->geo.first_unallocated) - + ((mp->flags & P_SUBP) ? PAGEHDRSZ + 1 : env->ps))) { + /* should be dirty page without MDBX_WRITEMAP, or a subpage of. */ + flags_mask -= P_SUBP; + if ((env->flags & MDBX_WRITEMAP) != 0 || + (!is_shadowed(mc->txn, mp) && !(mp->flags & P_SUBP))) + rc = bad_page(mp, "invalid page-address %p, offset %zi\n", + __Wpedantic_format_voidptr(mp), offset); + } else if (offset & (env->ps - 1)) + flags_expected = P_SUBP; + + if (unlikely((mp->flags & flags_mask) != flags_expected)) + rc = bad_page(mp, "unknown/extra page-flags (have 0x%x, expect 0x%x)\n", + mp->flags & flags_mask, flags_expected); + + cASSERT(mc, (mc->checking & z_dupfix) == 0 || (mc->flags & z_inner) != 0); + const uint8_t type = page_type(mp); + switch (type) { + default: + return bad_page(mp, "invalid type (%u)\n", type); + case P_LARGE: + if (unlikely(mc->flags & z_inner)) + rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", "large", + "nested dupsort tree", mc->tree->flags); + const pgno_t npages = mp->pages; + if (unlikely(npages < 1 || npages >= MAX_PAGENO / 2)) + rc = bad_page(mp, "invalid n-pages (%u) for large-page\n", npages); + if (unlikely(mp->pgno + npages > mc->txn->geo.first_unallocated)) + rc = bad_page( + mp, "end of large-page beyond (%u) allocated space (%u next-pgno)\n", + mp->pgno + npages, mc->txn->geo.first_unallocated); + return rc; //-------------------------- end of large/overflow page handling + case P_LEAF | P_SUBP: + if (unlikely(mc->tree->height != 1)) + rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", + "leaf-sub", "nested dupsort db", mc->tree->flags); + /* fall through */ + __fallthrough; + case P_LEAF: + if (unlikely((mc->checking & z_dupfix) != 0)) + rc = bad_page(mp, + "unexpected leaf-page for dupfix subtree (db-lags 0x%x)\n", + mc->tree->flags); + break; + case P_LEAF | P_DUPFIX | P_SUBP: + if (unlikely(mc->tree->height != 1)) + rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", + "leaf2-sub", "nested dupsort db", mc->tree->flags); + /* fall through */ + __fallthrough; + case P_LEAF | P_DUPFIX: + if (unlikely((mc->checking & z_dupfix) == 0)) + rc = bad_page( + mp, + "unexpected leaf2-page for non-dupfix (sub)tree (db-flags 0x%x)\n", + mc->tree->flags); + break; + case P_BRANCH: + break; + } + + if (unlikely(mp->upper < mp->lower || (mp->lower & 1) || + PAGEHDRSZ + mp->upper > env->ps)) + rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", + mp->lower, mp->upper, page_space(env)); + + const char *const end_of_page = ptr_disp(mp, env->ps); + const size_t nkeys = page_numkeys(mp); + STATIC_ASSERT(P_BRANCH == 1); + if (unlikely(nkeys <= (uint8_t)(mp->flags & P_BRANCH))) { + if ((!(mc->flags & z_inner) || mc->tree->items) && + (!(mc->checking & z_updating) || + !(is_modifable(mc->txn, mp) || (mp->flags & P_SUBP)))) + rc = + bad_page(mp, "%s-page nkeys (%zu) < %u\n", + is_branch(mp) ? "branch" : "leaf", nkeys, 1 + is_branch(mp)); + } + + const size_t ksize_max = keysize_max(env->ps, 0); + const size_t leaf2_ksize = mp->dupfix_ksize; + if (is_dupfix_leaf(mp)) { + if (unlikely((mc->flags & z_inner) == 0 || + (mc->tree->flags & MDBX_DUPFIXED) == 0)) + rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", + mc->tree->flags); + else if (unlikely(leaf2_ksize != mc->tree->dupfix_size)) + rc = bad_page(mp, "invalid leaf2_ksize %zu\n", leaf2_ksize); + else if (unlikely(((leaf2_ksize & nkeys) ^ mp->upper) & 1)) + rc = bad_page( + mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", + mp->upper, nkeys, leaf2_ksize); + } else { + if (unlikely((mp->upper & 1) || + PAGEHDRSZ + mp->upper + nkeys * sizeof(node_t) + nkeys - 1 > + env->ps)) + rc = + bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", + mp->upper, nkeys, page_space(env)); + } + + MDBX_val here, prev = {0, 0}; + clc_t v_clc = value_clc(mc); + for (size_t i = 0; i < nkeys; ++i) { + if (is_dupfix_leaf(mp)) { + const char *const key = page_dupfix_ptr(mp, i, mc->tree->dupfix_size); + if (unlikely(end_of_page < key + leaf2_ksize)) { + rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", + key + leaf2_ksize - end_of_page); + continue; + } + + if (unlikely(leaf2_ksize != mc->clc->k.lmin)) { + if (unlikely(leaf2_ksize < mc->clc->k.lmin || + leaf2_ksize > mc->clc->k.lmax)) + rc = bad_page(mp, + "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", + leaf2_ksize, mc->clc->k.lmin, mc->clc->k.lmax); + else + mc->clc->k.lmin = mc->clc->k.lmax = leaf2_ksize; + } + if ((mc->checking & z_ignord) == 0) { + here.iov_base = (void *)key; + here.iov_len = leaf2_ksize; + if (prev.iov_base && unlikely(mc->clc->k.cmp(&prev, &here) >= 0)) + rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, + DKEY(&prev), DVAL(&here)); + prev = here; + } + } else { + const node_t *const node = page_node(mp, i); + const char *const node_end = ptr_disp(node, NODESIZE); + if (unlikely(node_end > end_of_page)) { + rc = bad_page(mp, "node[%zu] (%zu) beyond page-end\n", i, + node_end - end_of_page); + continue; + } + const size_t ksize = node_ks(node); + if (unlikely(ksize > ksize_max)) + rc = bad_page(mp, "node[%zu] too long key (%zu)\n", i, ksize); + const char *const key = node_key(node); + if (unlikely(end_of_page < key + ksize)) { + rc = bad_page(mp, "node[%zu] key (%zu) beyond page-end\n", i, + key + ksize - end_of_page); + continue; + } + if ((is_leaf(mp) || i > 0)) { + if (unlikely(ksize < mc->clc->k.lmin || ksize > mc->clc->k.lmax)) + rc = bad_page( + mp, "node[%zu] key size (%zu) <> min/max key-length (%zu/%zu)\n", + i, ksize, mc->clc->k.lmin, mc->clc->k.lmax); + if ((mc->checking & z_ignord) == 0) { + here.iov_base = (void *)key; + here.iov_len = ksize; + if (prev.iov_base && unlikely(mc->clc->k.cmp(&prev, &here) >= 0)) + rc = bad_page(mp, "node[%zu] key wrong order (%s >= %s)\n", i, + DKEY(&prev), DVAL(&here)); + prev = here; + } + } + if (is_branch(mp)) { + if ((mc->checking & z_updating) == 0 && i == 0 && unlikely(ksize != 0)) + rc = bad_page(mp, "branch-node[%zu] wrong 0-node key-length (%zu)\n", + i, ksize); + const pgno_t ref = node_pgno(node); + if (unlikely(ref < MIN_PAGENO) || + (unlikely(ref >= mc->txn->geo.first_unallocated) && + (unlikely(ref >= mc->txn->geo.now) || + !(mc->checking & z_retiring)))) + rc = bad_page(mp, "branch-node[%zu] wrong pgno (%u)\n", i, ref); + if (unlikely(node_flags(node))) + rc = bad_page(mp, "branch-node[%zu] wrong flags (%u)\n", i, + node_flags(node)); + continue; + } + + switch (node_flags(node)) { + default: + rc = + bad_page(mp, "invalid node[%zu] flags (%u)\n", i, node_flags(node)); + break; + case N_BIGDATA /* data on large-page */: + case 0 /* usual */: + case N_SUBDATA /* sub-db */: + case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + case N_DUPDATA /* short sub-page */: + break; + } + + const size_t dsize = node_ds(node); + const char *const data = node_data(node); + if (node_flags(node) & N_BIGDATA) { + if (unlikely(end_of_page < data + sizeof(pgno_t))) { + rc = bad_page( + mp, "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", + "bigdata-pgno", i, nkeys, dsize, data + dsize - end_of_page); + continue; + } + if (unlikely(dsize <= v_clc.lmin || dsize > v_clc.lmax)) + rc = bad_page( + mp, + "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n", + dsize, v_clc.lmin, v_clc.lmax); + if (unlikely(node_size_len(node_ks(node), dsize) <= + mc->txn->env->leaf_nodemax) && + mc->tree != &mc->txn->dbs[FREE_DBI]) + poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize); + + if ((mc->checking & z_retiring) == 0) { + const pgr_t lp = + page_get_large(mc, node_largedata_pgno(node), mp->txnid); + if (unlikely(lp.err != MDBX_SUCCESS)) + return lp.err; + cASSERT(mc, page_type(lp.page) == P_LARGE); + const unsigned npages = largechunk_npages(env, dsize); + if (unlikely(lp.page->pages != npages)) { + if (lp.page->pages < npages) + rc = bad_page(lp.page, + "too less n-pages %u for bigdata-node (%zu bytes)", + lp.page->pages, dsize); + else if (mc->tree != &mc->txn->dbs[FREE_DBI]) + poor_page(lp.page, + "extra n-pages %u for bigdata-node (%zu bytes)", + lp.page->pages, dsize); + } + } + continue; + } + + if (unlikely(end_of_page < data + dsize)) { + rc = bad_page(mp, + "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", + "data", i, nkeys, dsize, data + dsize - end_of_page); + continue; + } + + switch (node_flags(node)) { + default: + /* wrong, but already handled */ + continue; + case 0 /* usual */: + if (unlikely(dsize < v_clc.lmin || dsize > v_clc.lmax)) { + rc = bad_page( + mp, "node-data size (%zu) <> min/max value-length (%zu/%zu)\n", + dsize, v_clc.lmin, v_clc.lmax); + continue; + } + break; + case N_SUBDATA /* sub-db */: + if (unlikely(dsize != sizeof(tree_t))) { + rc = bad_page(mp, "invalid sub-db record size (%zu)\n", dsize); + continue; + } + break; + case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + if (unlikely(dsize != sizeof(tree_t))) { + rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", + dsize, sizeof(tree_t)); + continue; + } + break; + case N_DUPDATA /* short sub-page */: + if (unlikely(dsize <= PAGEHDRSZ)) { + rc = bad_page(mp, "invalid nested/sub-page record size (%zu)\n", + dsize); + continue; + } else { + const page_t *const sp = (page_t *)data; + switch (sp->flags & + /* ignore legacy P_DIRTY flag */ ~P_LEGACY_DIRTY) { + case P_LEAF | P_SUBP: + case P_LEAF | P_DUPFIX | P_SUBP: + break; + default: + rc = bad_page(mp, "invalid nested/sub-page flags (0x%02x)\n", + sp->flags); + continue; + } + + const char *const end_of_subpage = data + dsize; + const intptr_t nsubkeys = page_numkeys(sp); + if (unlikely(nsubkeys == 0) && !(mc->checking & z_updating) && + mc->tree->items) + rc = bad_page(mp, "no keys on a %s-page\n", + is_dupfix_leaf(sp) ? "leaf2-sub" : "leaf-sub"); + + MDBX_val sub_here, sub_prev = {0, 0}; + for (int ii = 0; ii < nsubkeys; ii++) { + if (is_dupfix_leaf(sp)) { + /* DUPFIX pages have no entries[] or node headers */ + const size_t sub_ksize = sp->dupfix_ksize; + const char *const sub_key = + page_dupfix_ptr(sp, ii, mc->tree->dupfix_size); + if (unlikely(end_of_subpage < sub_key + sub_ksize)) { + rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n", + sub_key + sub_ksize - end_of_subpage); + continue; + } + + if (unlikely(sub_ksize != v_clc.lmin)) { + if (unlikely(sub_ksize < v_clc.lmin || sub_ksize > v_clc.lmax)) + rc = bad_page(mp, + "nested-leaf2-key size (%zu) <> min/max " + "value-length (%zu/%zu)\n", + sub_ksize, v_clc.lmin, v_clc.lmax); + else + v_clc.lmin = v_clc.lmax = sub_ksize; + } + if ((mc->checking & z_ignord) == 0) { + sub_here.iov_base = (void *)sub_key; + sub_here.iov_len = sub_ksize; + if (sub_prev.iov_base && + unlikely(v_clc.cmp(&sub_prev, &sub_here) >= 0)) + rc = bad_page(mp, + "nested-leaf2-key #%u wrong order (%s >= %s)\n", + ii, DKEY(&sub_prev), DVAL(&sub_here)); + sub_prev = sub_here; + } + } else { + const node_t *const sub_node = page_node(sp, ii); + const char *const sub_node_end = ptr_disp(sub_node, NODESIZE); + if (unlikely(sub_node_end > end_of_subpage)) { + rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n", + end_of_subpage - sub_node_end); + continue; + } + if (unlikely(node_flags(sub_node) != 0)) + rc = bad_page(mp, "nested-node invalid flags (%u)\n", + node_flags(sub_node)); + + const size_t sub_ksize = node_ks(sub_node); + const char *const sub_key = node_key(sub_node); + const size_t sub_dsize = node_ds(sub_node); + /* char *sub_data = node_data(sub_node); */ + + if (unlikely(sub_ksize < v_clc.lmin || sub_ksize > v_clc.lmax)) + rc = bad_page(mp, + "nested-node-key size (%zu) <> min/max " + "value-length (%zu/%zu)\n", + sub_ksize, v_clc.lmin, v_clc.lmax); + if ((mc->checking & z_ignord) == 0) { + sub_here.iov_base = (void *)sub_key; + sub_here.iov_len = sub_ksize; + if (sub_prev.iov_base && + unlikely(v_clc.cmp(&sub_prev, &sub_here) >= 0)) + rc = bad_page(mp, + "nested-node-key #%u wrong order (%s >= %s)\n", + ii, DKEY(&sub_prev), DVAL(&sub_here)); + sub_prev = sub_here; + } + if (unlikely(sub_dsize != 0)) + rc = bad_page(mp, "nested-node non-empty data size (%zu)\n", + sub_dsize); + if (unlikely(end_of_subpage < sub_key + sub_ksize)) + rc = bad_page(mp, "nested-node-key beyond (%zu) nested-page\n", + sub_key + sub_ksize - end_of_subpage); + } + } + } + break; + } + } + } + return rc; +} + +static __always_inline int check_page_header(const uint16_t ILL, + const page_t *page, + MDBX_txn *const txn, + const txnid_t front) { + if (unlikely(page->flags & ILL)) { + if (ILL == P_ILL_BITS || (page->flags & P_ILL_BITS)) + return bad_page(page, "invalid page's flags (%u)\n", page->flags); + else if (ILL & P_LARGE) { + assert((ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) == 0); + assert(page->flags & (P_BRANCH | P_LEAF | P_DUPFIX)); + return bad_page(page, "unexpected %s instead of %s (%u)\n", + "large/overflow", "branch/leaf/leaf2", page->flags); + } else if (ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) { + assert((ILL & P_BRANCH) && (ILL & P_LEAF) && (ILL & P_DUPFIX)); + assert(page->flags & (P_BRANCH | P_LEAF | P_DUPFIX)); + return bad_page(page, "unexpected %s instead of %s (%u)\n", + "branch/leaf/leaf2", "large/overflow", page->flags); + } else { + assert(false); + } + } + + if (unlikely(page->txnid > front) && + unlikely(page->txnid > txn->front_txnid || front < txn->txnid)) + return bad_page( + page, + "invalid page' txnid (%" PRIaTXN ") for %s' txnid (%" PRIaTXN ")\n", + page->txnid, + (front == txn->front_txnid && front != txn->txnid) ? "front-txn" + : "parent-page", + front); + + if (((ILL & P_LARGE) || !is_largepage(page)) && + (ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) == 0) { + /* Контроль четности page->upper тут либо приводит к ложным ошибкам, + * либо слишком дорог по количеству операций. Заковырка в том, что upper + * может быть нечетным на DUPFIX-страницах, при нечетном количестве + * элементов нечетной длины. Поэтому четность page->upper здесь не + * проверяется, но соответствующие полные проверки есть в page_check(). */ + if (unlikely(page->upper < page->lower || (page->lower & 1) || + PAGEHDRSZ + page->upper > txn->env->ps)) + return bad_page(page, + "invalid page' lower(%u)/upper(%u) with limit %zu\n", + page->lower, page->upper, page_space(txn->env)); + + } else if ((ILL & P_LARGE) == 0) { + const pgno_t npages = page->pages; + if (unlikely(npages < 1) || unlikely(npages >= MAX_PAGENO / 2)) + return bad_page(page, "invalid n-pages (%u) for large-page\n", npages); + if (unlikely(page->pgno + npages > txn->geo.first_unallocated)) + return bad_page( + page, + "end of large-page beyond (%u) allocated space (%u next-pgno)\n", + page->pgno + npages, txn->geo.first_unallocated); + } else { + assert(false); + } + return MDBX_SUCCESS; +} + +__cold static __noinline pgr_t check_page_complete(const uint16_t ILL, + page_t *page, + const MDBX_cursor *const mc, + const txnid_t front) { + pgr_t r = {page, check_page_header(ILL, page, mc->txn, front)}; + if (likely(r.err == MDBX_SUCCESS)) + r.err = page_check(mc, page); + if (unlikely(r.err != MDBX_SUCCESS)) + mc->txn->flags |= MDBX_TXN_ERROR; + return r; +} + +static __always_inline pgr_t page_get_inline(const uint16_t ILL, + const MDBX_cursor *const mc, + const pgno_t pgno, + const txnid_t front) { + MDBX_txn *const txn = mc->txn; + tASSERT(txn, front <= txn->front_txnid); + + pgr_t r; + if (unlikely(pgno >= txn->geo.first_unallocated)) { + ERROR("page #%" PRIaPGNO " beyond next-pgno", pgno); + r.page = nullptr; + r.err = MDBX_PAGE_NOTFOUND; + bailout: + txn->flags |= MDBX_TXN_ERROR; + return r; + } + + eASSERT(txn->env, ((txn->flags ^ txn->env->flags) & MDBX_WRITEMAP) == 0); + r.page = pgno2page(txn->env, pgno); + if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0) { + const MDBX_txn *spiller = txn; + do { + /* Spilled pages were dirtied in this txn and flushed + * because the dirty list got full. Bring this page + * back in from the map (but don't unspill it here, + * leave that unless page_touch happens again). */ + if (unlikely(spiller->flags & MDBX_TXN_SPILLS) && + spill_search(spiller, pgno)) + break; + + const size_t i = dpl_search(spiller, pgno); + tASSERT(txn, (intptr_t)i > 0); + if (spiller->tw.dirtylist->items[i].pgno == pgno) { + r.page = spiller->tw.dirtylist->items[i].ptr; + break; + } + + spiller = spiller->parent; + } while (unlikely(spiller)); + } + + if (unlikely(r.page->pgno != pgno)) { + r.err = bad_page( + r.page, "pgno mismatch (%" PRIaPGNO ") != expected (%" PRIaPGNO ")\n", + r.page->pgno, pgno); + goto bailout; + } + + if (unlikely(mc->checking & z_pagecheck)) + return check_page_complete(ILL, r.page, mc, front); + +#if MDBX_DISABLE_VALIDATION + r.err = MDBX_SUCCESS; +#else + r.err = check_page_header(ILL, r.page, txn, front); + if (unlikely(r.err != MDBX_SUCCESS)) + goto bailout; +#endif /* MDBX_DISABLE_VALIDATION */ + return r; +} + +pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, + const txnid_t front) { + return page_get_inline(P_ILL_BITS, mc, pgno, front); +} + +__hot pgr_t page_get_three(const MDBX_cursor *const mc, const pgno_t pgno, + const txnid_t front) { + return page_get_inline(P_ILL_BITS | P_LARGE, mc, pgno, front); +} + +pgr_t page_get_large(const MDBX_cursor *const mc, const pgno_t pgno, + const txnid_t front) { + return page_get_inline(P_ILL_BITS | P_BRANCH | P_LEAF | P_DUPFIX, mc, pgno, + front); +} diff --git a/src/page-iov.c b/src/page-iov.c new file mode 100644 index 00000000..700ff5d0 --- /dev/null +++ b/src/page-iov.c @@ -0,0 +1,198 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, + mdbx_filehandle_t fd, bool check_coherence) { + ctx->env = txn->env; + ctx->ior = &txn->env->ioring; + ctx->fd = fd; + ctx->coherency_timestamp = + (check_coherence || txn->env->lck->pgops.incoherence.weak) + ? 0 + : UINT64_MAX /* не выполнять сверку */; + ctx->err = osal_ioring_prepare(ctx->ior, items, + pgno_align2os_bytes(txn->env, npages)); + if (likely(ctx->err == MDBX_SUCCESS)) { +#if MDBX_NEED_WRITTEN_RANGE + ctx->flush_begin = MAX_PAGENO; + ctx->flush_end = MIN_PAGENO; +#endif /* MDBX_NEED_WRITTEN_RANGE */ + osal_ioring_reset(ctx->ior); + } + return ctx->err; +} + +static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data, + size_t bytes) { + MDBX_env *const env = ctx->env; + eASSERT(env, (env->flags & MDBX_WRITEMAP) == 0); + + page_t *wp = (page_t *)data; + eASSERT(env, wp->pgno == bytes2pgno(env, offset)); + eASSERT(env, bytes2pgno(env, bytes) >= (is_largepage(wp) ? wp->pages : 1u)); + eASSERT(env, (wp->flags & P_ILL_BITS) == 0); + + if (likely(ctx->err == MDBX_SUCCESS)) { + const page_t *const rp = ptr_disp(env->dxb_mmap.base, offset); + VALGRIND_MAKE_MEM_DEFINED(rp, bytes); + MDBX_ASAN_UNPOISON_MEMORY_REGION(rp, bytes); + osal_flush_incoherent_mmap(rp, bytes, globals.sys_pagesize); + /* check with timeout as the workaround + * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 + * + * Проблема проявляется только при неупорядоченности: если записанная + * последней мета-страница "обгоняет" ранее записанные, т.е. когда + * записанное в файл позже становится видимым в отображении раньше, + * чем записанное ранее. + * + * Исходно здесь всегда выполнялась полная сверка. Это давало полную + * гарантию защиты от проявления проблемы, но порождало накладные расходы. + * В некоторых сценариях наблюдалось снижение производительности до 10-15%, + * а в синтетических тестах до 30%. Конечно никто не вникал в причины, + * а просто останавливался на мнении "libmdbx не быстрее LMDB", + * например: https://clck.ru/3386er + * + * Поэтому после серии экспериментов и тестов реализовано следующее: + * 0. Посредством опции сборки MDBX_FORCE_CHECK_MMAP_COHERENCY=1 + * можно включить полную сверку после записи. + * Остальные пункты являются взвешенным компромиссом между полной + * гарантией обнаружения проблемы и бесполезными затратами на системах + * без этого недостатка. + * 1. При старте транзакций проверяется соответствие выбранной мета-страницы + * корневым страницам b-tree проверяется. Эта проверка показала себя + * достаточной без сверки после записи. При обнаружении "некогерентности" + * эти случаи подсчитываются, а при их ненулевом счетчике выполняется + * полная сверка. Таким образом, произойдет переключение в режим полной + * сверки, если показавшая себя достаточной проверка заметит проявление + * проблемы хоты-бы раз. + * 2. Сверка не выполняется при фиксации транзакции, так как: + * - при наличии проблемы "не-когерентности" (при отложенном копировании + * или обновлении PTE, после возврата из write-syscall), проверка + * в этом процессе не гарантирует актуальность данных в другом + * процессе, который может запустить транзакцию сразу после коммита; + * - сверка только последнего блока позволяет почти восстановить + * производительность в больших транзакциях, но одновременно размывает + * уверенность в отсутствии сбоев, чем обесценивает всю затею; + * - после записи данных будет записана мета-страница, соответствие + * которой корневым страницам b-tree проверяется при старте + * транзакций, и только эта проверка показала себя достаточной; + * 3. При спиллинге производится полная сверка записанных страниц. Тут был + * соблазн сверять не полностью, а например начало и конец каждого блока. + * Но при спиллинге возможна ситуация повторного вытеснения страниц, в + * том числе large/overflow. При этом возникает риск прочитать в текущей + * транзакции старую версию страницы, до повторной записи. В этом случае + * могут возникать крайне редкие невоспроизводимые ошибки. С учетом того + * что спиллинг выполняет крайне редко, решено отказаться от экономии + * в пользу надежности. */ +#ifndef MDBX_FORCE_CHECK_MMAP_COHERENCY +#define MDBX_FORCE_CHECK_MMAP_COHERENCY 0 +#endif /* MDBX_FORCE_CHECK_MMAP_COHERENCY */ + if ((MDBX_FORCE_CHECK_MMAP_COHERENCY || + ctx->coherency_timestamp != UINT64_MAX) && + unlikely(memcmp(wp, rp, bytes))) { + ctx->coherency_timestamp = 0; + env->lck->pgops.incoherence.weak = + (env->lck->pgops.incoherence.weak >= INT32_MAX) + ? INT32_MAX + : env->lck->pgops.incoherence.weak + 1; + WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->pgno, + "(workaround for incoherent flaw of unified page/buffer cache)"); + do + if (coherency_timeout(&ctx->coherency_timestamp, wp->pgno, env) != + MDBX_RESULT_TRUE) { + ctx->err = MDBX_PROBLEM; + break; + } + while (unlikely(memcmp(wp, rp, bytes))); + } + } + + if (likely(bytes == env->ps)) + page_shadow_release(env, wp, 1); + else { + do { + eASSERT(env, wp->pgno == bytes2pgno(env, offset)); + eASSERT(env, (wp->flags & P_ILL_BITS) == 0); + size_t npages = is_largepage(wp) ? wp->pages : 1u; + size_t chunk = pgno2bytes(env, npages); + eASSERT(env, bytes >= chunk); + page_t *next = ptr_disp(wp, chunk); + page_shadow_release(env, wp, npages); + wp = next; + offset += chunk; + bytes -= chunk; + } while (bytes); + } +} + +static void iov_complete(iov_ctx_t *ctx) { + if ((ctx->env->flags & MDBX_WRITEMAP) == 0) + osal_ioring_walk(ctx->ior, ctx, iov_callback4dirtypages); + osal_ioring_reset(ctx->ior); +} + +int iov_write(iov_ctx_t *ctx) { + eASSERT(ctx->env, !iov_empty(ctx)); + osal_ioring_write_result_t r = osal_ioring_write(ctx->ior, ctx->fd); +#if MDBX_ENABLE_PGOP_STAT + ctx->env->lck->pgops.wops.weak += r.wops; +#endif /* MDBX_ENABLE_PGOP_STAT */ + ctx->err = r.err; + if (unlikely(ctx->err != MDBX_SUCCESS)) + ERROR("Write error: %s", mdbx_strerror(ctx->err)); + iov_complete(ctx); + return ctx->err; +} + +int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, size_t npages) { + MDBX_env *const env = txn->env; + tASSERT(txn, ctx->err == MDBX_SUCCESS); + tASSERT(txn, dp->pgno >= MIN_PAGENO && dp->pgno < txn->geo.first_unallocated); + tASSERT(txn, is_modifable(txn, dp)); + tASSERT(txn, !(dp->flags & ~(P_BRANCH | P_LEAF | P_DUPFIX | P_LARGE))); + + if (is_shadowed(txn, dp)) { + tASSERT(txn, !(txn->flags & MDBX_WRITEMAP)); + dp->txnid = txn->txnid; + tASSERT(txn, is_spilled(txn, dp)); +#if MDBX_AVOID_MSYNC + doit:; +#endif /* MDBX_AVOID_MSYNC */ + int err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->pgno), dp, + pgno2bytes(env, npages)); + if (unlikely(err != MDBX_SUCCESS)) { + ctx->err = err; + if (unlikely(err != MDBX_RESULT_TRUE)) { + iov_complete(ctx); + return err; + } + err = iov_write(ctx); + tASSERT(txn, iov_empty(ctx)); + if (likely(err == MDBX_SUCCESS)) { + err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->pgno), dp, + pgno2bytes(env, npages)); + if (unlikely(err != MDBX_SUCCESS)) { + iov_complete(ctx); + return ctx->err = err; + } + } + tASSERT(txn, ctx->err == MDBX_SUCCESS); + } + } else { + tASSERT(txn, txn->flags & MDBX_WRITEMAP); +#if MDBX_AVOID_MSYNC + goto doit; +#endif /* MDBX_AVOID_MSYNC */ + } + +#if MDBX_NEED_WRITTEN_RANGE + ctx->flush_begin = + (ctx->flush_begin < dp->pgno) ? ctx->flush_begin : dp->pgno; + ctx->flush_end = (ctx->flush_end > dp->pgno + (pgno_t)npages) + ? ctx->flush_end + : dp->pgno + (pgno_t)npages; +#endif /* MDBX_NEED_WRITTEN_RANGE */ + return MDBX_SUCCESS; +} diff --git a/src/page-iov.h b/src/page-iov.h new file mode 100644 index 00000000..397f6fbe --- /dev/null +++ b/src/page-iov.h @@ -0,0 +1,38 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +#if !(defined(_WIN32) || defined(_WIN64)) +#define MDBX_WRITETHROUGH_THRESHOLD_DEFAULT 2 +#endif + +struct iov_ctx { + MDBX_env *env; + osal_ioring_t *ior; + mdbx_filehandle_t fd; + int err; +#ifndef MDBX_NEED_WRITTEN_RANGE +#define MDBX_NEED_WRITTEN_RANGE 1 +#endif /* MDBX_NEED_WRITTEN_RANGE */ +#if MDBX_NEED_WRITTEN_RANGE + pgno_t flush_begin; + pgno_t flush_end; +#endif /* MDBX_NEED_WRITTEN_RANGE */ + uint64_t coherency_timestamp; +}; + +MDBX_INTERNAL __must_check_result int +iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, + mdbx_filehandle_t fd, bool check_coherence); + +static inline bool iov_empty(const iov_ctx_t *ctx) { + return osal_ioring_used(ctx->ior) == 0; +} + +MDBX_INTERNAL __must_check_result int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, + page_t *dp, size_t npages); + +MDBX_INTERNAL __must_check_result int iov_write(iov_ctx_t *ctx); diff --git a/src/page-ops.c b/src/page-ops.c new file mode 100644 index 00000000..b25c860e --- /dev/null +++ b/src/page-ops.c @@ -0,0 +1,772 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +static inline tree_t *outer_tree(MDBX_cursor *mc) { + cASSERT(mc, (mc->flags & z_inner) != 0); + subcur_t *mx = container_of(mc->tree, subcur_t, nested_tree); + cursor_couple_t *couple = container_of(mx, cursor_couple_t, inner); + cASSERT(mc, mc->tree == &couple->outer.subcur->nested_tree); + cASSERT(mc, &mc->clc->k == &couple->outer.clc->v); + return couple->outer.tree; +} + +pgr_t page_new(MDBX_cursor *mc, const unsigned flags) { + cASSERT(mc, (flags & P_LARGE) == 0); + pgr_t ret = gc_alloc_single(mc); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + + DEBUG("db %zu allocated new page %" PRIaPGNO, cursor_dbi(mc), ret.page->pgno); + ret.page->flags = (uint16_t)flags; + cASSERT(mc, *cursor_dbi_state(mc) & DBI_DIRTY); + cASSERT(mc, mc->txn->flags & MDBX_TXN_DIRTY); +#if MDBX_ENABLE_PGOP_STAT + mc->txn->env->lck->pgops.newly.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + + STATIC_ASSERT(P_BRANCH == 1); + const unsigned is_branch = flags & P_BRANCH; + + ret.page->lower = 0; + ret.page->upper = (indx_t)(mc->txn->env->ps - PAGEHDRSZ); + mc->tree->branch_pages += is_branch; + mc->tree->leaf_pages += 1 - is_branch; + if (unlikely(mc->flags & z_inner)) { + tree_t *outer = outer_tree(mc); + outer->branch_pages += is_branch; + outer->leaf_pages += 1 - is_branch; + } + return ret; +} + +pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) { + pgr_t ret = likely(npages == 1) ? gc_alloc_single(mc) + : gc_alloc_ex(mc, npages, ALLOC_DEFAULT); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; + + DEBUG("dbi %zu allocated new large-page %" PRIaPGNO ", num %zu", + cursor_dbi(mc), ret.page->pgno, npages); + ret.page->flags = P_LARGE; + cASSERT(mc, *cursor_dbi_state(mc) & DBI_DIRTY); + cASSERT(mc, mc->txn->flags & MDBX_TXN_DIRTY); +#if MDBX_ENABLE_PGOP_STAT + mc->txn->env->lck->pgops.newly.weak += npages; +#endif /* MDBX_ENABLE_PGOP_STAT */ + + mc->tree->large_pages += (pgno_t)npages; + ret.page->pages = (pgno_t)npages; + cASSERT(mc, !(mc->flags & z_inner)); + return ret; +} + +__hot void page_copy(page_t *const dst, const page_t *const src, + const size_t size) { + STATIC_ASSERT(UINT16_MAX > MDBX_MAX_PAGESIZE - PAGEHDRSZ); + STATIC_ASSERT(MDBX_MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4); + void *copy_dst = dst; + const void *copy_src = src; + size_t copy_len = size; + if (src->flags & P_DUPFIX) { + copy_len = PAGEHDRSZ + src->dupfix_ksize * page_numkeys(src); + if (unlikely(copy_len > size)) + goto bailout; + } else if ((src->flags & P_LARGE) == 0) { + size_t upper = src->upper, lower = src->lower; + intptr_t unused = upper - lower; + /* If page isn't full, just copy the used portion. Adjust + * alignment so memcpy may copy words instead of bytes. */ + if (unused > MDBX_CACHELINE_SIZE * 3) { + lower = ceil_powerof2(lower + PAGEHDRSZ, sizeof(void *)); + upper = floor_powerof2(upper + PAGEHDRSZ, sizeof(void *)); + if (unlikely(upper > copy_len)) + goto bailout; + memcpy(copy_dst, copy_src, lower); + copy_dst = ptr_disp(copy_dst, upper); + copy_src = ptr_disp(copy_src, upper); + copy_len -= upper; + } + } + memcpy(copy_dst, copy_src, copy_len); + return; + +bailout: + if (src->flags & P_DUPFIX) + bad_page(src, "%s addr %p, n-keys %zu, ksize %u", + "invalid/corrupted source page", __Wpedantic_format_voidptr(src), + page_numkeys(src), src->dupfix_ksize); + else + bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page", + __Wpedantic_format_voidptr(src), src->upper); + memset(dst, -1, size); +} + +__cold pgr_t __must_check_result page_unspill(MDBX_txn *const txn, + const page_t *const mp) { + VERBOSE("unspill page %" PRIaPGNO, mp->pgno); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0); + tASSERT(txn, is_spilled(txn, mp)); + const MDBX_txn *scan = txn; + pgr_t ret; + do { + tASSERT(txn, (scan->flags & MDBX_TXN_SPILLS) != 0); + const size_t si = spill_search(scan, mp->pgno); + if (!si) + continue; + const unsigned npages = is_largepage(mp) ? mp->pages : 1; + ret.page = page_shadow_alloc(txn, npages); + if (unlikely(!ret.page)) { + ret.err = MDBX_ENOMEM; + return ret; + } + page_copy(ret.page, mp, pgno2bytes(txn->env, npages)); + if (scan == txn) { + /* If in current txn, this page is no longer spilled. + * If it happens to be the last page, truncate the spill list. + * Otherwise mark it as deleted by setting the LSB. */ + spill_remove(txn, si, npages); + } /* otherwise, if belonging to a parent txn, the + * page remains spilled until child commits */ + + ret.err = page_dirty(txn, ret.page, npages); + if (unlikely(ret.err != MDBX_SUCCESS)) + return ret; +#if MDBX_ENABLE_PGOP_STAT + txn->env->lck->pgops.unspill.weak += npages; +#endif /* MDBX_ENABLE_PGOP_STAT */ + ret.page->flags |= (scan == txn) ? 0 : P_SPILLED; + ret.err = MDBX_SUCCESS; + return ret; + } while (likely((scan = scan->parent) != nullptr && + (scan->flags & MDBX_TXN_SPILLS) != 0)); + ERROR("Page %" PRIaPGNO " mod-txnid %" PRIaTXN + " not found in the spill-list(s), current txn %" PRIaTXN + " front %" PRIaTXN ", root txn %" PRIaTXN " front %" PRIaTXN, + mp->pgno, mp->txnid, txn->txnid, txn->front_txnid, + txn->env->basal_txn->txnid, txn->env->basal_txn->front_txnid); + ret.err = MDBX_PROBLEM; + ret.page = nullptr; + return ret; +} + +__hot int page_touch_modifable(MDBX_txn *txn, const page_t *const mp) { + tASSERT(txn, is_modifable(txn, mp) && txn->tw.dirtylist); + tASSERT(txn, !is_largepage(mp) && !is_subpage(mp)); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + + const size_t n = dpl_search(txn, mp->pgno); + if (MDBX_AVOID_MSYNC && + unlikely(txn->tw.dirtylist->items[n].pgno != mp->pgno)) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP)); + tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length + 1); + VERBOSE("unspill page %" PRIaPGNO, mp->pgno); +#if MDBX_ENABLE_PGOP_STAT + txn->env->lck->pgops.unspill.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + return page_dirty(txn, (page_t *)mp, 1); + } + + tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length); + tASSERT(txn, txn->tw.dirtylist->items[n].pgno == mp->pgno && + txn->tw.dirtylist->items[n].ptr == mp); + if (!MDBX_AVOID_MSYNC || (txn->flags & MDBX_WRITEMAP) == 0) { + size_t *const ptr = + ptr_disp(txn->tw.dirtylist->items[n].ptr, -(ptrdiff_t)sizeof(size_t)); + *ptr = txn->tw.dirtylru; + } + return MDBX_SUCCESS; +} + +__hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, + const page_t *const mp) { + tASSERT(txn, !is_modifable(txn, mp) && !is_largepage(mp)); + if (is_subpage(mp)) { + ((page_t *)mp)->txnid = txn->front_txnid; + return MDBX_SUCCESS; + } + + int rc; + page_t *np; + if (is_frozen(txn, mp)) { + /* CoW the page */ + rc = pnl_need(&txn->tw.retired_pages, 1); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + const pgr_t par = gc_alloc_single(mc); + rc = par.err; + np = par.page; + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + + const pgno_t pgno = np->pgno; + DEBUG("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, cursor_dbi_dbg(mc), + mp->pgno, pgno); + tASSERT(txn, mp->pgno != pgno); + pnl_append_prereserved(txn->tw.retired_pages, mp->pgno); + /* Update the parent page, if any, to point to the new page */ + if (likely(mc->top)) { + page_t *parent = mc->pg[mc->top - 1]; + node_t *node = page_node(parent, mc->ki[mc->top - 1]); + node_set_pgno(node, pgno); + } else { + mc->tree->root = pgno; + } + +#if MDBX_ENABLE_PGOP_STAT + txn->env->lck->pgops.cow.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + page_copy(np, mp, txn->env->ps); + np->pgno = pgno; + np->txnid = txn->front_txnid; + } else if (is_spilled(txn, mp)) { + pgr_t pur = page_unspill(txn, mp); + np = pur.page; + rc = pur.err; + if (likely(rc == MDBX_SUCCESS)) { + tASSERT(txn, np != nullptr); + goto done; + } + goto fail; + } else { + if (unlikely(!txn->parent)) { + ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s " + "page %" PRIaPGNO " mod-txnid %" PRIaTXN "," + " without parent transaction, current txn %" PRIaTXN + " front %" PRIaTXN, + is_branch(mp) ? "branch" : "leaf", mp->pgno, mp->txnid, + mc->txn->txnid, mc->txn->front_txnid); + rc = MDBX_PROBLEM; + goto fail; + } + + DEBUG("clone db %d page %" PRIaPGNO, cursor_dbi_dbg(mc), mp->pgno); + tASSERT(txn, + txn->tw.dirtylist->length <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); + /* No - copy it */ + np = page_shadow_alloc(txn, 1); + if (unlikely(!np)) { + rc = MDBX_ENOMEM; + goto fail; + } + page_copy(np, mp, txn->env->ps); + + /* insert a clone of parent's dirty page, so don't touch dirtyroom */ + rc = page_dirty(txn, np, 1); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + +#if MDBX_ENABLE_PGOP_STAT + txn->env->lck->pgops.clone.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } + +done: + /* Adjust cursors pointing to mp */ + mc->pg[mc->top] = np; + MDBX_cursor *m2 = txn->cursors[cursor_dbi(mc)]; + if (mc->flags & z_inner) { + for (; m2; m2 = m2->next) { + MDBX_cursor *m3 = &m2->subcur->cursor; + if (m3->top < mc->top) + continue; + if (m3->pg[mc->top] == mp) + m3->pg[mc->top] = np; + } + } else { + for (; m2; m2 = m2->next) { + if (m2->top < mc->top) + continue; + if (m2->pg[mc->top] == mp) { + m2->pg[mc->top] = np; + if (is_leaf(np) && inner_pointed(m2)) + cursor_inner_refresh(m2, np, m2->ki[mc->top]); + } + } + } + return MDBX_SUCCESS; + +fail: + txn->flags |= MDBX_TXN_ERROR; + return rc; +} + +page_t *page_shadow_alloc(MDBX_txn *txn, size_t num) { + MDBX_env *env = txn->env; + page_t *np = env->shadow_reserve; + size_t size = env->ps; + if (likely(num == 1 && np)) { + eASSERT(env, env->shadow_reserve_len > 0); + MDBX_ASAN_UNPOISON_MEMORY_REGION(np, size); + VALGRIND_MEMPOOL_ALLOC(env, ptr_disp(np, -(ptrdiff_t)sizeof(size_t)), + size + sizeof(size_t)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(np), sizeof(page_t *)); + env->shadow_reserve = page_next(np); + env->shadow_reserve_len -= 1; + } else { + size = pgno2bytes(env, num); + void *const ptr = osal_malloc(size + sizeof(size_t)); + if (unlikely(!ptr)) { + txn->flags |= MDBX_TXN_ERROR; + return nullptr; + } + VALGRIND_MEMPOOL_ALLOC(env, ptr, size + sizeof(size_t)); + np = ptr_disp(ptr, sizeof(size_t)); + } + + if ((env->flags & MDBX_NOMEMINIT) == 0) { + /* For a single page alloc, we init everything after the page header. + * For multi-page, we init the final page; if the caller needed that + * many pages they will be filling in at least up to the last page. */ + size_t skip = PAGEHDRSZ; + if (num > 1) + skip += pgno2bytes(env, num - 1); + memset(ptr_disp(np, skip), 0, size - skip); + } +#if MDBX_DEBUG + np->pgno = 0; +#endif + VALGRIND_MAKE_MEM_UNDEFINED(np, size); + np->flags = 0; + np->pages = (pgno_t)num; + return np; +} + +void page_shadow_release(MDBX_env *env, page_t *dp, size_t npages) { + VALGRIND_MAKE_MEM_UNDEFINED(dp, pgno2bytes(env, npages)); + MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages)); + if (unlikely(env->flags & MDBX_PAGEPERTURB)) + memset(dp, -1, pgno2bytes(env, npages)); + if (likely(npages == 1 && + env->shadow_reserve_len < env->options.dp_reserve_limit)) { + MDBX_ASAN_POISON_MEMORY_REGION(dp, env->ps); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(dp), sizeof(page_t *)); + page_next(dp) = env->shadow_reserve; + VALGRIND_MEMPOOL_FREE(env, ptr_disp(dp, -(ptrdiff_t)sizeof(size_t))); + env->shadow_reserve = dp; + env->shadow_reserve_len += 1; + } else { + /* large pages just get freed directly */ + void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); + VALGRIND_MEMPOOL_FREE(env, ptr); + osal_free(ptr); + } +} + +__cold static void page_kill(MDBX_txn *txn, page_t *mp, pgno_t pgno, + size_t npages) { + MDBX_env *const env = txn->env; + DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno); + eASSERT(env, pgno >= NUM_METAS && npages); + if (!is_frozen(txn, mp)) { + const size_t bytes = pgno2bytes(env, npages); + memset(mp, -1, bytes); + mp->pgno = pgno; + if ((txn->flags & MDBX_WRITEMAP) == 0) + osal_pwrite(env->lazy_fd, mp, bytes, pgno2bytes(env, pgno)); + } else { + struct iovec iov[MDBX_AUXILARY_IOV_MAX]; + iov[0].iov_len = env->ps; + iov[0].iov_base = ptr_disp(env->page_auxbuf, env->ps); + size_t iov_off = pgno2bytes(env, pgno), n = 1; + while (--npages) { + iov[n] = iov[0]; + if (++n == MDBX_AUXILARY_IOV_MAX) { + osal_pwritev(env->lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, iov_off); + iov_off += pgno2bytes(env, MDBX_AUXILARY_IOV_MAX); + n = 0; + } + } + osal_pwritev(env->lazy_fd, iov, n, iov_off); + } +} + +static inline bool suitable4loose(const MDBX_txn *txn, pgno_t pgno) { + /* TODO: + * 1) при включенной "экономии последовательностей" проверить, что + * страница не примыкает к какой-либо из уже находящийся в reclaimed. + * 2) стоит подумать над тем, чтобы при большом loose-списке отбрасывать + половину в reclaimed. */ + return txn->tw.loose_count < txn->env->options.dp_loose_limit && + (!MDBX_ENABLE_REFUND || + /* skip pages near to the end in favor of compactification */ + txn->geo.first_unallocated > + pgno + txn->env->options.dp_loose_limit || + txn->geo.first_unallocated <= txn->env->options.dp_loose_limit); +} + +/* Retire, loosen or free a single page. + * + * For dirty pages, saves single pages to a list for future reuse in this same + * txn. It has been pulled from the GC and already resides on the dirty list, + * but has been deleted. Use these pages first before pulling again from the GC. + * + * If the page wasn't dirtied in this txn, just add it + * to this txn's free list. */ +int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, + page_t *mp /* maybe null */, + unsigned pageflags /* maybe unknown/zero */) { + int rc; + MDBX_txn *const txn = mc->txn; + tASSERT(txn, !mp || (mp->pgno == pgno && mp->flags == pageflags)); + + /* During deleting entire subtrees, it is reasonable and possible to avoid + * reading leaf pages, i.e. significantly reduce hard page-faults & IOPs: + * - mp is null, i.e. the page has not yet been read; + * - pagetype is known and the P_LEAF bit is set; + * - we can determine the page status via scanning the lists + * of dirty and spilled pages. + * + * On the other hand, this could be suboptimal for WRITEMAP mode, since + * requires support the list of dirty pages and avoid explicit spilling. + * So for flexibility and avoid extra internal dependencies we just + * fallback to reading if dirty list was not allocated yet. */ + size_t di = 0, si = 0, npages = 1; + enum page_status { + unknown, + frozen, + spilled, + shadowed, + modifable + } status = unknown; + + if (unlikely(!mp)) { + if (ASSERT_ENABLED() && pageflags) { + pgr_t check; + check = page_get_any(mc, pgno, txn->front_txnid); + if (unlikely(check.err != MDBX_SUCCESS)) + return check.err; + tASSERT(txn, ((unsigned)check.page->flags & ~P_SPILLED) == + (pageflags & ~P_FROZEN)); + tASSERT(txn, !(pageflags & P_FROZEN) || is_frozen(txn, check.page)); + } + if (pageflags & P_FROZEN) { + status = frozen; + if (ASSERT_ENABLED()) { + for (MDBX_txn *scan = txn; scan; scan = scan->parent) { + tASSERT(txn, !txn->tw.spilled.list || !spill_search(scan, pgno)); + tASSERT(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno)); + } + } + goto status_done; + } else if (pageflags && txn->tw.dirtylist) { + if ((di = dpl_exist(txn, pgno)) != 0) { + mp = txn->tw.dirtylist->items[di].ptr; + tASSERT(txn, is_modifable(txn, mp)); + status = modifable; + goto status_done; + } + if ((si = spill_search(txn, pgno)) != 0) { + status = spilled; + goto status_done; + } + for (MDBX_txn *parent = txn->parent; parent; parent = parent->parent) { + if (dpl_exist(parent, pgno)) { + status = shadowed; + goto status_done; + } + if (spill_search(parent, pgno)) { + status = spilled; + goto status_done; + } + } + status = frozen; + goto status_done; + } + + pgr_t pg = page_get_any(mc, pgno, txn->front_txnid); + if (unlikely(pg.err != MDBX_SUCCESS)) + return pg.err; + mp = pg.page; + tASSERT(txn, !pageflags || mp->flags == pageflags); + pageflags = mp->flags; + } + + if (is_frozen(txn, mp)) { + status = frozen; + tASSERT(txn, !is_modifable(txn, mp)); + tASSERT(txn, !is_spilled(txn, mp)); + tASSERT(txn, !is_shadowed(txn, mp)); + tASSERT(txn, !debug_dpl_find(txn, pgno)); + tASSERT(txn, !txn->tw.spilled.list || !spill_search(txn, pgno)); + } else if (is_modifable(txn, mp)) { + status = modifable; + if (txn->tw.dirtylist) + di = dpl_exist(txn, pgno); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) || !is_spilled(txn, mp)); + tASSERT(txn, !txn->tw.spilled.list || !spill_search(txn, pgno)); + } else if (is_shadowed(txn, mp)) { + status = shadowed; + tASSERT(txn, !txn->tw.spilled.list || !spill_search(txn, pgno)); + tASSERT(txn, !debug_dpl_find(txn, pgno)); + } else { + tASSERT(txn, is_spilled(txn, mp)); + status = spilled; + si = spill_search(txn, pgno); + tASSERT(txn, !debug_dpl_find(txn, pgno)); + } + +status_done: + if (likely((pageflags & P_LARGE) == 0)) { + STATIC_ASSERT(P_BRANCH == 1); + const bool is_branch = pageflags & P_BRANCH; + cASSERT(mc, ((pageflags & P_LEAF) == 0) == is_branch); + if (unlikely(mc->flags & z_inner)) { + tree_t *outer = outer_tree(mc); + cASSERT(mc, !is_branch || outer->branch_pages > 0); + outer->branch_pages -= is_branch; + cASSERT(mc, is_branch || outer->leaf_pages > 0); + outer->leaf_pages -= 1 - is_branch; + } + cASSERT(mc, !is_branch || mc->tree->branch_pages > 0); + mc->tree->branch_pages -= is_branch; + cASSERT(mc, is_branch || mc->tree->leaf_pages > 0); + mc->tree->leaf_pages -= 1 - is_branch; + } else { + npages = mp->pages; + cASSERT(mc, mc->tree->large_pages >= npages); + mc->tree->large_pages -= (pgno_t)npages; + } + + if (status == frozen) { + retire: + DEBUG("retire %zu page %" PRIaPGNO, npages, pgno); + rc = pnl_append_span(&txn->tw.retired_pages, pgno, npages); + tASSERT(txn, dpl_check(txn)); + return rc; + } + + /* Возврат страниц в нераспределенный "хвост" БД. + * Содержимое страниц не уничтожается, а для вложенных транзакций граница + * нераспределенного "хвоста" БД сдвигается только при их коммите. */ + if (MDBX_ENABLE_REFUND && + unlikely(pgno + npages == txn->geo.first_unallocated)) { + const char *kind = nullptr; + if (status == modifable) { + /* Страница испачкана в этой транзакции, но до этого могла быть + * аллоцирована, испачкана и пролита в одной из родительских транзакций. + * Её МОЖНО вытолкнуть в нераспределенный хвост. */ + kind = "dirty"; + /* Remove from dirty list */ + page_wash(txn, di, mp, npages); + } else if (si) { + /* Страница пролита в этой транзакции, т.е. она аллоцирована + * и запачкана в этой или одной из родительских транзакций. + * Её МОЖНО вытолкнуть в нераспределенный хвост. */ + kind = "spilled"; + tASSERT(txn, status == spilled); + spill_remove(txn, si, npages); + } else { + /* Страница аллоцирована, запачкана и возможно пролита в одной + * из родительских транзакций. + * Её МОЖНО вытолкнуть в нераспределенный хвост. */ + kind = "parent's"; + if (ASSERT_ENABLED() && mp) { + kind = nullptr; + for (MDBX_txn *parent = txn->parent; parent; parent = parent->parent) { + if (spill_search(parent, pgno)) { + kind = "parent-spilled"; + tASSERT(txn, status == spilled); + break; + } + if (mp == debug_dpl_find(parent, pgno)) { + kind = "parent-dirty"; + tASSERT(txn, status == shadowed); + break; + } + } + tASSERT(txn, kind != nullptr); + } + tASSERT(txn, status == spilled || status == shadowed); + } + DEBUG("refunded %zu %s page %" PRIaPGNO, npages, kind, pgno); + txn->geo.first_unallocated = pgno; + txn_refund(txn); + return MDBX_SUCCESS; + } + + if (status == modifable) { + /* Dirty page from this transaction */ + /* If suitable we can reuse it through loose list */ + if (likely(npages == 1 && suitable4loose(txn, pgno)) && + (di || !txn->tw.dirtylist)) { + DEBUG("loosen dirty page %" PRIaPGNO, pgno); + if (MDBX_DEBUG != 0 || unlikely(txn->env->flags & MDBX_PAGEPERTURB)) + memset(page_data(mp), -1, txn->env->ps - PAGEHDRSZ); + mp->txnid = INVALID_TXNID; + mp->flags = P_LOOSE; + page_next(mp) = txn->tw.loose_pages; + txn->tw.loose_pages = mp; + txn->tw.loose_count++; +#if MDBX_ENABLE_REFUND + txn->tw.loose_refund_wl = (pgno + 2 > txn->tw.loose_refund_wl) + ? pgno + 2 + : txn->tw.loose_refund_wl; +#endif /* MDBX_ENABLE_REFUND */ + VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), txn->env->ps - PAGEHDRSZ); + MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), txn->env->ps - PAGEHDRSZ); + return MDBX_SUCCESS; + } + +#if !MDBX_DEBUG && !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) + if (unlikely(txn->env->flags & MDBX_PAGEPERTURB)) +#endif + { + /* Страница могла быть изменена в одной из родительских транзакций, + * в том числе, позже выгружена и затем снова загружена и изменена. + * В обоих случаях её нельзя затирать на диске и помечать недоступной + * в asan и/или valgrind */ + for (MDBX_txn *parent = txn->parent; + parent && (parent->flags & MDBX_TXN_SPILLS); + parent = parent->parent) { + if (spill_intersect(parent, pgno, npages)) + goto skip_invalidate; + if (dpl_intersect(parent, pgno, npages)) + goto skip_invalidate; + } + +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) + if (MDBX_DEBUG != 0 || unlikely(txn->env->flags & MDBX_PAGEPERTURB)) +#endif + page_kill(txn, mp, pgno, npages); + if ((txn->flags & MDBX_WRITEMAP) == 0) { + VALGRIND_MAKE_MEM_NOACCESS(page_data(pgno2page(txn->env, pgno)), + pgno2bytes(txn->env, npages) - PAGEHDRSZ); + MDBX_ASAN_POISON_MEMORY_REGION(page_data(pgno2page(txn->env, pgno)), + pgno2bytes(txn->env, npages) - + PAGEHDRSZ); + } + } + skip_invalidate: + + /* wash dirty page */ + page_wash(txn, di, mp, npages); + + reclaim: + DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno); + rc = pnl_insert_span(&txn->tw.relist, pgno, npages); + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + tASSERT(txn, dpl_check(txn)); + return rc; + } + + if (si) { + /* Page ws spilled in this txn */ + spill_remove(txn, si, npages); + /* Страница могла быть выделена и затем пролита в этой транзакции, + * тогда её необходимо поместить в reclaimed-список. + * Либо она могла быть выделена в одной из родительских транзакций и затем + * пролита в этой транзакции, тогда её необходимо поместить в + * retired-список для последующей фильтрации при коммите. */ + for (MDBX_txn *parent = txn->parent; parent; parent = parent->parent) { + if (dpl_exist(parent, pgno)) + goto retire; + } + /* Страница точно была выделена в этой транзакции + * и теперь может быть использована повторно. */ + goto reclaim; + } + + if (status == shadowed) { + /* Dirty page MUST BE a clone from (one of) parent transaction(s). */ + if (ASSERT_ENABLED()) { + const page_t *parent_dp = nullptr; + /* Check parent(s)'s dirty lists. */ + for (MDBX_txn *parent = txn->parent; parent && !parent_dp; + parent = parent->parent) { + tASSERT(txn, !spill_search(parent, pgno)); + parent_dp = debug_dpl_find(parent, pgno); + } + tASSERT(txn, parent_dp && (!mp || parent_dp == mp)); + } + /* Страница была выделена в родительской транзакции и теперь может быть + * использована повторно, но только внутри этой транзакции, либо дочерних. + */ + goto reclaim; + } + + /* Страница может входить в доступный читателям MVCC-снимок, либо же она + * могла быть выделена, а затем пролита в одной из родительских + * транзакций. Поэтому пока помещаем её в retired-список, который будет + * фильтроваться относительно dirty- и spilled-списков родительских + * транзакций при коммите дочерних транзакций, либо же будет записан + * в GC в неизменном виде. */ + goto retire; +} + +__hot int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, + size_t npages) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + mp->txnid = txn->front_txnid; + if (!txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + txn->tw.writemap_dirty_npages += npages; + tASSERT(txn, txn->tw.spilled.list == nullptr); + return MDBX_SUCCESS; + } + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + +#if xMDBX_DEBUG_SPILLING == 2 + txn->env->debug_dirtied_act += 1; + ENSURE(txn->env, txn->env->debug_dirtied_act < txn->env->debug_dirtied_est); + ENSURE(txn->env, txn->tw.dirtyroom + txn->tw.loose_count > 0); +#endif /* xMDBX_DEBUG_SPILLING == 2 */ + + int rc; + if (unlikely(txn->tw.dirtyroom == 0)) { + if (txn->tw.loose_count) { + page_t *lp = txn->tw.loose_pages; + DEBUG("purge-and-reclaim loose page %" PRIaPGNO, lp->pgno); + rc = pnl_insert_span(&txn->tw.relist, lp->pgno, 1); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + size_t di = dpl_search(txn, lp->pgno); + tASSERT(txn, txn->tw.dirtylist->items[di].ptr == lp); + dpl_remove(txn, di); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + txn->tw.loose_pages = page_next(lp); + txn->tw.loose_count--; + txn->tw.dirtyroom++; + if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) + page_shadow_release(txn->env, lp, 1); + } else { + ERROR("Dirtyroom is depleted, DPL length %zu", txn->tw.dirtylist->length); + if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) + page_shadow_release(txn->env, mp, npages); + return MDBX_TXN_FULL; + } + } + + rc = dpl_append(txn, mp->pgno, mp, npages); + if (unlikely(rc != MDBX_SUCCESS)) { + bailout: + txn->flags |= MDBX_TXN_ERROR; + return rc; + } + txn->tw.dirtyroom--; + tASSERT(txn, dpl_check(txn)); + return MDBX_SUCCESS; +} + +size_t page_subleaf2_reserve(const MDBX_env *const env, size_t host_page_room, + size_t subpage_len, size_t item_len) { + eASSERT(env, (subpage_len & 1) == 0); + eASSERT(env, env->subpage_reserve_prereq > env->subpage_room_threshold + + env->subpage_reserve_limit && + env->leaf_nodemax >= env->subpage_limit + NODESIZE); + size_t reserve = 0; + for (size_t n = 0; + n < 5 && reserve + item_len <= env->subpage_reserve_limit && + EVEN_CEIL(subpage_len + item_len) <= env->subpage_limit && + host_page_room >= + env->subpage_reserve_prereq + EVEN_CEIL(subpage_len + item_len); + ++n) { + subpage_len += item_len; + reserve += item_len; + } + return reserve + (subpage_len & 1); +} diff --git a/src/page-ops.h b/src/page-ops.h new file mode 100644 index 00000000..5e58ab77 --- /dev/null +++ b/src/page-ops.h @@ -0,0 +1,179 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +MDBX_INTERNAL int __must_check_result tree_search_finalize(MDBX_cursor *mc, + const MDBX_val *key, + int flags); +MDBX_INTERNAL int tree_search_lowest(MDBX_cursor *mc); + +enum page_search_flags { + Z_MODIFY = 1, + Z_ROOTONLY = 2, + Z_FIRST = 4, + Z_LAST = 8, +}; +MDBX_INTERNAL int __must_check_result tree_search(MDBX_cursor *mc, + const MDBX_val *key, + int flags); + +#define MDBX_SPLIT_REPLACE MDBX_APPENDDUP /* newkey is not new */ +MDBX_INTERNAL int __must_check_result page_split(MDBX_cursor *mc, + const MDBX_val *const newkey, + MDBX_val *const newdata, + pgno_t newpgno, + const unsigned naf); + +/*----------------------------------------------------------------------------*/ + +MDBX_INTERNAL int MDBX_PRINTF_ARGS(2, 3) + bad_page(const page_t *mp, const char *fmt, ...); + +MDBX_INTERNAL void MDBX_PRINTF_ARGS(2, 3) + poor_page(const page_t *mp, const char *fmt, ...); + +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_frozen(const MDBX_txn *txn, + const page_t *mp) { + return mp->txnid < txn->txnid; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_spilled(const MDBX_txn *txn, + const page_t *mp) { + return mp->txnid == txn->txnid; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_shadowed(const MDBX_txn *txn, + const page_t *mp) { + return mp->txnid > txn->txnid; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool +is_correct(const MDBX_txn *txn, const page_t *mp) { + return mp->txnid <= txn->front_txnid; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_modifable(const MDBX_txn *txn, + const page_t *mp) { + return mp->txnid == txn->front_txnid; +} + +MDBX_INTERNAL int __must_check_result page_check(const MDBX_cursor *const mc, + const page_t *const mp); + +MDBX_INTERNAL pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, + const txnid_t front); + +MDBX_INTERNAL pgr_t page_get_three(const MDBX_cursor *const mc, + const pgno_t pgno, const txnid_t front); + +MDBX_INTERNAL pgr_t page_get_large(const MDBX_cursor *const mc, + const pgno_t pgno, const txnid_t front); + +static inline int __must_check_result page_get(const MDBX_cursor *mc, + const pgno_t pgno, page_t **mp, + const txnid_t front) { + pgr_t ret = page_get_three(mc, pgno, front); + *mp = ret.page; + return ret.err; +} + +/*----------------------------------------------------------------------------*/ + +MDBX_INTERNAL int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, + size_t npages); +MDBX_INTERNAL pgr_t page_new(MDBX_cursor *mc, const unsigned flags); +MDBX_INTERNAL pgr_t page_new_large(MDBX_cursor *mc, const size_t npages); +MDBX_INTERNAL int page_touch_modifable(MDBX_txn *txn, const page_t *const mp); +MDBX_INTERNAL int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, + const page_t *const mp); + +static inline int page_touch(MDBX_cursor *mc) { + page_t *const mp = mc->pg[mc->top]; + MDBX_txn *txn = mc->txn; + + tASSERT(txn, mc->txn->flags & MDBX_TXN_DIRTY); + tASSERT(txn, + F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY)); + tASSERT(txn, !is_largepage(mp)); + if (ASSERT_ENABLED()) { + if (mc->flags & z_inner) { + subcur_t *mx = container_of(mc->tree, subcur_t, nested_tree); + cursor_couple_t *couple = container_of(mx, cursor_couple_t, inner); + tASSERT(txn, mc->tree == &couple->outer.subcur->nested_tree); + tASSERT(txn, &mc->clc->k == &couple->outer.clc->v); + tASSERT(txn, *couple->outer.dbi_state & DBI_DIRTY); + } + tASSERT(txn, dpl_check(txn)); + } + + if (is_modifable(txn, mp)) { + if (!txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC); + return MDBX_SUCCESS; + } + return is_subpage(mp) ? MDBX_SUCCESS : page_touch_modifable(txn, mp); + } + return page_touch_unmodifable(txn, mc, mp); +} + +MDBX_INTERNAL void page_copy(page_t *const dst, const page_t *const src, + const size_t size); +MDBX_INTERNAL pgr_t __must_check_result page_unspill(MDBX_txn *const txn, + const page_t *const mp); + +MDBX_INTERNAL page_t *page_shadow_alloc(MDBX_txn *txn, size_t num); + +MDBX_INTERNAL void page_shadow_release(MDBX_env *env, page_t *dp, + size_t npages); + +MDBX_INTERNAL int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, + page_t *mp /* maybe null */, + unsigned pageflags /* maybe unknown/zero */); + +static inline int page_retire(MDBX_cursor *mc, page_t *mp) { + return page_retire_ex(mc, mp->pgno, mp, mp->flags); +} + +static inline void page_wash(MDBX_txn *txn, size_t di, page_t *const mp, + const size_t npages) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + mp->txnid = INVALID_TXNID; + mp->flags = P_BAD; + + if (txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, + MDBX_AVOID_MSYNC || (di && txn->tw.dirtylist->items[di].ptr == mp)); + if (!MDBX_AVOID_MSYNC || di) { + dpl_remove_ex(txn, di, npages); + txn->tw.dirtyroom++; + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) { + page_shadow_release(txn->env, mp, npages); + return; + } + } + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC && !di); + txn->tw.writemap_dirty_npages -= (txn->tw.writemap_dirty_npages > npages) + ? npages + : txn->tw.writemap_dirty_npages; + } + VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ); + VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), + pgno2bytes(txn->env, npages) - PAGEHDRSZ); + MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), + pgno2bytes(txn->env, npages) - PAGEHDRSZ); +} + +MDBX_INTERNAL size_t page_subleaf2_reserve(const MDBX_env *const env, + size_t host_page_room, + size_t subpage_len, size_t item_len); + +#define page_next(mp) \ + (*(page_t **)ptr_disp((mp)->entries, sizeof(void *) - sizeof(uint32_t))) diff --git a/src/page-search.c b/src/page-search.c new file mode 100644 index 00000000..db985b8d --- /dev/null +++ b/src/page-search.c @@ -0,0 +1,147 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +/* Search for the lowest key under the current branch page. + * This just bypasses a numkeys check in the current page + * before calling tree_search_finalize(), because the callers + * are all in situations where the current page is known to + * be underfilled. */ +__hot int tree_search_lowest(MDBX_cursor *mc) { + cASSERT(mc, mc->top >= 0); + page_t *mp = mc->pg[mc->top]; + cASSERT(mc, is_branch(mp)); + + node_t *node = page_node(mp, 0); + int err = page_get(mc, node_pgno(node), &mp, mp->txnid); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + mc->ki[mc->top] = 0; + err = cursor_push(mc, mp, 0); + if (unlikely(err != MDBX_SUCCESS)) + return err; + return tree_search_finalize(mc, nullptr, Z_FIRST); +} + +__hot int tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { + int err; + if (unlikely(mc->txn->flags & MDBX_TXN_BLOCKED)) { + DEBUG("%s", "transaction has failed, must abort"); + err = MDBX_BAD_TXN; + bailout: + be_poor(mc); + return err; + } + + const size_t dbi = cursor_dbi(mc); + if (unlikely(*cursor_dbi_state(mc) & DBI_STALE)) { + err = sdb_fetch(mc->txn, dbi); + if (unlikely(err != MDBX_SUCCESS)) + goto bailout; + } + + const pgno_t root = mc->tree->root; + if (unlikely(root == P_INVALID)) { + DEBUG("%s", "tree is empty"); + cASSERT(mc, is_poor(mc)); + return MDBX_NOTFOUND; + } + + cASSERT(mc, root >= NUM_METAS && root < mc->txn->geo.first_unallocated); + if (mc->top < 0 || mc->pg[0]->pgno != root) { + txnid_t pp_txnid = mc->tree->mod_txnid; + pp_txnid = /* tree->mod_txnid maybe zero in a legacy DB */ pp_txnid + ? pp_txnid + : mc->txn->txnid; + if ((mc->txn->flags & MDBX_TXN_RDONLY) == 0) { + MDBX_txn *scan = mc->txn; + do + if ((scan->flags & MDBX_TXN_DIRTY) && + (dbi == MAIN_DBI || (scan->dbi_state[dbi] & DBI_DIRTY))) { + /* После коммита вложенных тразакций может быть mod_txnid > front */ + pp_txnid = scan->front_txnid; + break; + } + while (unlikely((scan = scan->parent) != nullptr)); + } + err = page_get(mc, root, &mc->pg[0], pp_txnid); + if (unlikely(err != MDBX_SUCCESS)) + goto bailout; + } + + mc->top = 0; + mc->ki[0] = (flags & Z_LAST) ? page_numkeys(mc->pg[0]) - 1 : 0; + DEBUG("db %d root page %" PRIaPGNO " has flags 0x%X", cursor_dbi_dbg(mc), + root, mc->pg[0]->flags); + + if (flags & Z_MODIFY) { + err = page_touch(mc); + if (unlikely(err != MDBX_SUCCESS)) + goto bailout; + } + + if (flags & Z_ROOTONLY) + return MDBX_SUCCESS; + + return tree_search_finalize(mc, key, flags); +} + +__hot __noinline int tree_search_finalize(MDBX_cursor *mc, const MDBX_val *key, + int flags) { + cASSERT(mc, !is_poor(mc)); + DKBUF_DEBUG; + int err; + page_t *mp = mc->pg[mc->top]; + intptr_t ki = (flags & Z_FIRST) ? 0 : page_numkeys(mp) - 1; + while (is_branch(mp)) { + DEBUG("branch page %" PRIaPGNO " has %zu keys", mp->pgno, page_numkeys(mp)); + cASSERT(mc, page_numkeys(mp) > 1); + DEBUG("found index 0 to page %" PRIaPGNO, node_pgno(page_node(mp, 0))); + + if ((flags & (Z_FIRST | Z_LAST)) == 0) { + const struct node_search_result nsr = node_search(mc, key); + if (likely(nsr.node)) + ki = mc->ki[mc->top] + (intptr_t)nsr.exact - 1; + DEBUG("following index %zu for key [%s]", ki, DKEY_DEBUG(key)); + } + + err = page_get(mc, node_pgno(page_node(mp, ki)), &mp, mp->txnid); + if (unlikely(err != MDBX_SUCCESS)) + goto bailout; + + mc->ki[mc->top] = (indx_t)ki; + ki = (flags & Z_FIRST) ? 0 : page_numkeys(mp) - 1; + err = cursor_push(mc, mp, ki); + if (unlikely(err != MDBX_SUCCESS)) + goto bailout; + + if (flags & Z_MODIFY) { + err = page_touch(mc); + if (unlikely(err != MDBX_SUCCESS)) + goto bailout; + mp = mc->pg[mc->top]; + } + } + + if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", + mp->pgno, mp->flags); + err = MDBX_CORRUPTED; + bailout: + be_poor(mc); + return err; + } + + DEBUG("found leaf page %" PRIaPGNO " for key [%s]", mp->pgno, + DKEY_DEBUG(key)); + /* Логически верно, но (в текущем понимании) нет необходимости. + Однако, стоит ещё по-проверять/по-тестировать. + Возможно есть сценарий, в котором очистка флагов всё-таки требуется. + + be_filled(mc); */ + return MDBX_SUCCESS; +} diff --git a/src/pnl.c b/src/pnl.c new file mode 100644 index 00000000..e8825c6d --- /dev/null +++ b/src/pnl.c @@ -0,0 +1,254 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +MDBX_INTERNAL pnl_t pnl_alloc(size_t size) { + size_t bytes = pnl_size2bytes(size); + pnl_t pnl = osal_malloc(bytes); + if (likely(pnl)) { +#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) + bytes = malloc_usable_size(pnl); +#endif /* malloc_usable_size */ + pnl[0] = pnl_bytes2size(bytes); + assert(pnl[0] >= size); + pnl += 1; + *pnl = 0; + } + return pnl; +} + +MDBX_INTERNAL void pnl_free(pnl_t pnl) { + if (likely(pnl)) + osal_free(pnl - 1); +} + +MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl) { + assert(pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) >= MDBX_PNL_INITIAL && + pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < + MDBX_PNL_INITIAL * 3 / 2); + assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && + MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); + MDBX_PNL_SETSIZE(*ppnl, 0); + if (unlikely(MDBX_PNL_ALLOCLEN(*ppnl) > + MDBX_PNL_INITIAL * (MDBX_PNL_PREALLOC_FOR_RADIXSORT ? 8 : 4) - + MDBX_CACHELINE_SIZE / sizeof(pgno_t))) { + size_t bytes = pnl_size2bytes(MDBX_PNL_INITIAL * 2); + pnl_t pnl = osal_realloc(*ppnl - 1, bytes); + if (likely(pnl)) { +#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) + bytes = malloc_usable_size(pnl); +#endif /* malloc_usable_size */ + *pnl = pnl_bytes2size(bytes); + *ppnl = pnl + 1; + } + } +} + +MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, + const size_t wanna) { + const size_t allocated = MDBX_PNL_ALLOCLEN(*ppnl); + assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && + MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); + if (likely(allocated >= wanna)) + return MDBX_SUCCESS; + + if (unlikely(wanna > /* paranoia */ PAGELIST_LIMIT)) { + ERROR("PNL too long (%zu > %zu)", wanna, (size_t)PAGELIST_LIMIT); + return MDBX_TXN_FULL; + } + + const size_t size = (wanna + wanna - allocated < PAGELIST_LIMIT) + ? wanna + wanna - allocated + : PAGELIST_LIMIT; + size_t bytes = pnl_size2bytes(size); + pnl_t pnl = osal_realloc(*ppnl - 1, bytes); + if (likely(pnl)) { +#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) + bytes = malloc_usable_size(pnl); +#endif /* malloc_usable_size */ + *pnl = pnl_bytes2size(bytes); + assert(*pnl >= wanna); + *ppnl = pnl + 1; + return MDBX_SUCCESS; + } + return MDBX_ENOMEM; +} + +static __always_inline int __must_check_result pnl_append_stepped( + unsigned step, __restrict pnl_t *ppnl, pgno_t pgno, size_t n) { + assert(n > 0); + int rc = pnl_need(ppnl, n); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const pnl_t pnl = *ppnl; + if (likely(n == 1)) { + pnl_append_prereserved(pnl, pgno); + return MDBX_SUCCESS; + } + +#if MDBX_PNL_ASCENDING + size_t w = MDBX_PNL_GETSIZE(pnl); + do { + pnl[++w] = pgno; + pgno += step; + } while (--n); + MDBX_PNL_SETSIZE(pnl, w); +#else + size_t w = MDBX_PNL_GETSIZE(pnl) + n; + MDBX_PNL_SETSIZE(pnl, w); + do { + pnl[w--] = pgno; + pgno += step; + } while (--n); +#endif + return MDBX_SUCCESS; +} + +__hot MDBX_INTERNAL int __must_check_result +spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { + return pnl_append_stepped(2, ppnl, pgno << 1, n); +} + +__hot MDBX_INTERNAL int __must_check_result +pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { + return pnl_append_stepped(1, ppnl, pgno, n); +} + +__hot MDBX_INTERNAL int __must_check_result +pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { + assert(n > 0); + int rc = pnl_need(ppnl, n); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const pnl_t pnl = *ppnl; + size_t r = MDBX_PNL_GETSIZE(pnl), w = r + n; + MDBX_PNL_SETSIZE(pnl, w); + while (r && MDBX_PNL_DISORDERED(pnl[r], pgno)) + pnl[w--] = pnl[r--]; + + for (pgno_t fill = MDBX_PNL_ASCENDING ? pgno + n : pgno; w > r; --w) + pnl[w] = MDBX_PNL_ASCENDING ? --fill : fill++; + + return MDBX_SUCCESS; +} + +__hot __noinline MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, + const size_t limit) { + assert(limit >= MIN_PAGENO - MDBX_ENABLE_REFUND); + if (likely(MDBX_PNL_GETSIZE(pnl))) { + if (unlikely(MDBX_PNL_GETSIZE(pnl) > PAGELIST_LIMIT)) + return false; + if (unlikely(MDBX_PNL_LEAST(pnl) < MIN_PAGENO)) + return false; + if (unlikely(MDBX_PNL_MOST(pnl) >= limit)) + return false; + + if ((!MDBX_DISABLE_VALIDATION || AUDIT_ENABLED()) && + likely(MDBX_PNL_GETSIZE(pnl) > 1)) { + const pgno_t *scan = MDBX_PNL_BEGIN(pnl); + const pgno_t *const end = MDBX_PNL_END(pnl); + pgno_t prev = *scan++; + do { + if (unlikely(!MDBX_PNL_ORDERED(prev, *scan))) + return false; + prev = *scan; + } while (likely(++scan != end)); + } + } + return true; +} + +static __always_inline void +pnl_merge_inner(pgno_t *__restrict dst, const pgno_t *__restrict src_a, + const pgno_t *__restrict src_b, + const pgno_t *__restrict const src_b_detent) { + do { +#if MDBX_HAVE_CMOV + const bool flag = MDBX_PNL_ORDERED(*src_b, *src_a); +#if defined(__LCC__) || __CLANG_PREREQ(13, 0) + // lcc 1.26: 13ШК (подготовка и первая итерация) + 7ШК (цикл), БЕЗ loop-mode + // gcc>=7: cmp+jmp с возвратом в тело цикла (WTF?) + // gcc<=6: cmov×3 + // clang<=12: cmov×3 + // clang>=13: cmov, set+add/sub + *dst = flag ? *src_a-- : *src_b--; +#else + // gcc: cmov, cmp+set+add/sub + // clang<=5: cmov×2, set+add/sub + // clang>=6: cmov, set+add/sub + *dst = flag ? *src_a : *src_b; + src_b += (ptrdiff_t)flag - 1; + src_a -= flag; +#endif + --dst; +#else /* MDBX_HAVE_CMOV */ + while (MDBX_PNL_ORDERED(*src_b, *src_a)) + *dst-- = *src_a--; + *dst-- = *src_b--; +#endif /* !MDBX_HAVE_CMOV */ + } while (likely(src_b > src_b_detent)); +} + +__hot MDBX_INTERNAL size_t pnl_merge(pnl_t dst, const pnl_t src) { + assert(pnl_check_allocated(dst, MAX_PAGENO + 1)); + assert(pnl_check(src, MAX_PAGENO + 1)); + const size_t src_len = MDBX_PNL_GETSIZE(src); + const size_t dst_len = MDBX_PNL_GETSIZE(dst); + size_t total = dst_len; + assert(MDBX_PNL_ALLOCLEN(dst) >= total); + if (likely(src_len > 0)) { + total += src_len; + if (!MDBX_DEBUG && total < (MDBX_HAVE_CMOV ? 21 : 12)) + goto avoid_call_libc_for_short_cases; + if (dst_len == 0 || + MDBX_PNL_ORDERED(MDBX_PNL_LAST(dst), MDBX_PNL_FIRST(src))) + memcpy(MDBX_PNL_END(dst), MDBX_PNL_BEGIN(src), src_len * sizeof(pgno_t)); + else if (MDBX_PNL_ORDERED(MDBX_PNL_LAST(src), MDBX_PNL_FIRST(dst))) { + memmove(MDBX_PNL_BEGIN(dst) + src_len, MDBX_PNL_BEGIN(dst), + dst_len * sizeof(pgno_t)); + memcpy(MDBX_PNL_BEGIN(dst), MDBX_PNL_BEGIN(src), + src_len * sizeof(pgno_t)); + } else { + avoid_call_libc_for_short_cases: + dst[0] = /* the detent */ (MDBX_PNL_ASCENDING ? 0 : P_INVALID); + pnl_merge_inner(dst + total, dst + dst_len, src + src_len, src); + } + MDBX_PNL_SETSIZE(dst, total); + } + assert(pnl_check_allocated(dst, MAX_PAGENO + 1)); + return total; +} + +#if MDBX_PNL_ASCENDING +#define MDBX_PNL_EXTRACT_KEY(ptr) (*(ptr)) +#else +#define MDBX_PNL_EXTRACT_KEY(ptr) (P_INVALID - *(ptr)) +#endif +RADIXSORT_IMPL(pgno, pgno_t, MDBX_PNL_EXTRACT_KEY, + MDBX_PNL_PREALLOC_FOR_RADIXSORT, 0) + +SORT_IMPL(pgno_sort, false, pgno_t, MDBX_PNL_ORDERED) + +__hot __noinline MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl) { + if (likely(MDBX_PNL_GETSIZE(pnl) < MDBX_RADIXSORT_THRESHOLD) || + unlikely(!pgno_radixsort(&MDBX_PNL_FIRST(pnl), MDBX_PNL_GETSIZE(pnl)))) + pgno_sort(MDBX_PNL_BEGIN(pnl), MDBX_PNL_END(pnl)); +} + +SEARCH_IMPL(pgno_bsearch, pgno_t, pgno_t, MDBX_PNL_ORDERED) + +__hot __noinline MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, + pgno_t pgno) { + const pgno_t *begin = MDBX_PNL_BEGIN(pnl); + const pgno_t *it = pgno_bsearch(begin, MDBX_PNL_GETSIZE(pnl), pgno); + const pgno_t *end = begin + MDBX_PNL_GETSIZE(pnl); + assert(it >= begin && it <= end); + if (it != begin) + assert(MDBX_PNL_ORDERED(it[-1], pgno)); + if (it != end) + assert(!MDBX_PNL_ORDERED(it[0], pgno)); + return it - begin + 1; +} diff --git a/src/pnl.h b/src/pnl.h new file mode 100644 index 00000000..8995b54d --- /dev/null +++ b/src/pnl.h @@ -0,0 +1,161 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +/* An PNL is an Page Number List, a sorted array of IDs. + * + * The first element of the array is a counter for how many actual page-numbers + * are in the list. By default PNLs are sorted in descending order, this allow + * cut off a page with lowest pgno (at the tail) just truncating the list. The + * sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */ +typedef pgno_t *pnl_t; +typedef const pgno_t *const_pnl_t; + +#if MDBX_PNL_ASCENDING +#define MDBX_PNL_ORDERED(first, last) ((first) < (last)) +#define MDBX_PNL_DISORDERED(first, last) ((first) >= (last)) +#else +#define MDBX_PNL_ORDERED(first, last) ((first) > (last)) +#define MDBX_PNL_DISORDERED(first, last) ((first) <= (last)) +#endif + +#define MDBX_PNL_GRANULATE_LOG2 10 +#define MDBX_PNL_GRANULATE (1 << MDBX_PNL_GRANULATE_LOG2) +#define MDBX_PNL_INITIAL \ + (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) + +#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1]) +#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0])) +#define MDBX_PNL_SETSIZE(pl, size) \ + do { \ + const size_t __size = size; \ + assert(__size < INT_MAX); \ + (pl)[0] = (pgno_t)__size; \ + } while (0) +#define MDBX_PNL_FIRST(pl) ((pl)[1]) +#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)]) +#define MDBX_PNL_BEGIN(pl) (&(pl)[1]) +#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1]) + +#if MDBX_PNL_ASCENDING +#define MDBX_PNL_EDGE(pl) ((pl) + 1) +#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl) +#define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl) +#else +#define MDBX_PNL_EDGE(pl) ((pl) + MDBX_PNL_GETSIZE(pl)) +#define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl) +#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl) +#endif + +#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t)) +#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0) + +MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) { + assert(size > 0 && size <= PAGELIST_LIMIT); +#if MDBX_PNL_PREALLOC_FOR_RADIXSORT + + size += size; +#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ + STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + + (PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + + MDBX_PNL_GRANULATE + 3) * + sizeof(pgno_t) < + SIZE_MAX / 4 * 3); + size_t bytes = + ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), + MDBX_PNL_GRANULATE * sizeof(pgno_t)) - + MDBX_ASSUME_MALLOC_OVERHEAD; + return bytes; +} + +MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size(const size_t bytes) { + size_t size = bytes / sizeof(pgno_t); + assert(size > 3 && size <= PAGELIST_LIMIT + /* alignment gap */ 65536); + size -= 3; +#if MDBX_PNL_PREALLOC_FOR_RADIXSORT + size >>= 1; +#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ + return (pgno_t)size; +} + +MDBX_INTERNAL pnl_t pnl_alloc(size_t size); + +MDBX_INTERNAL void pnl_free(pnl_t pnl); + +MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, + const size_t wanna); + +MDBX_MAYBE_UNUSED static inline int __must_check_result +pnl_need(pnl_t __restrict *__restrict ppnl, size_t num) { + assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && + MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); + assert(num <= PAGELIST_LIMIT); + const size_t wanna = MDBX_PNL_GETSIZE(*ppnl) + num; + return likely(MDBX_PNL_ALLOCLEN(*ppnl) >= wanna) ? MDBX_SUCCESS + : pnl_reserve(ppnl, wanna); +} + +MDBX_MAYBE_UNUSED static inline void +pnl_append_prereserved(__restrict pnl_t pnl, pgno_t pgno) { + assert(MDBX_PNL_GETSIZE(pnl) < MDBX_PNL_ALLOCLEN(pnl)); + if (AUDIT_ENABLED()) { + for (size_t i = MDBX_PNL_GETSIZE(pnl); i > 0; --i) + assert(pgno != pnl[i]); + } + *pnl += 1; + MDBX_PNL_LAST(pnl) = pgno; +} + +MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl); + +MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, + pgno_t pgno, size_t n); + +MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, + pgno_t pgno, size_t n); + +MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, + pgno_t pgno, size_t n); + +MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno); + +MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl); + +MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, const size_t limit); + +MDBX_MAYBE_UNUSED static inline bool pnl_check_allocated(const const_pnl_t pnl, + const size_t limit) { + return pnl == nullptr || (MDBX_PNL_ALLOCLEN(pnl) >= MDBX_PNL_GETSIZE(pnl) && + pnl_check(pnl, limit)); +} + +MDBX_MAYBE_UNUSED static inline void pnl_sort(pnl_t pnl, size_t limit4check) { + pnl_sort_nochk(pnl); + assert(pnl_check(pnl, limit4check)); + (void)limit4check; +} + +MDBX_MAYBE_UNUSED static inline size_t pnl_search(const pnl_t pnl, pgno_t pgno, + size_t limit) { + assert(pnl_check_allocated(pnl, limit)); + if (MDBX_HAVE_CMOV) { + /* cmov-ускоренный бинарный поиск может читать (но не использовать) один + * элемент за концом данных, этот элемент в пределах выделенного участка + * памяти, но не инициализирован. */ + VALGRIND_MAKE_MEM_DEFINED(MDBX_PNL_END(pnl), sizeof(pgno_t)); + } + assert(pgno < limit); + (void)limit; + size_t n = pnl_search_nochk(pnl, pgno); + if (MDBX_HAVE_CMOV) { + VALGRIND_MAKE_MEM_UNDEFINED(MDBX_PNL_END(pnl), sizeof(pgno_t)); + } + return n; +} + +MDBX_INTERNAL size_t pnl_merge(pnl_t dst, const pnl_t src); diff --git a/src/base.h b/src/preface.h similarity index 70% rename from src/base.h rename to src/preface.h index 5144e195..83576d6c 100644 --- a/src/base.h +++ b/src/preface.h @@ -1,19 +1,162 @@ -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 #pragma once +/* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */ +#if (defined(MDBX_DEBUG) && MDBX_DEBUG > 0) || \ + (defined(MDBX_FORCE_ASSERTIONS) && MDBX_FORCE_ASSERTIONS) +#undef NDEBUG +#endif + +/*----------------------------------------------------------------------------*/ + +/** Disables using GNU/Linux libc extensions. + * \ingroup build_option + * \note This option couldn't be moved to the options.h since dependent + * control macros/defined should be prepared before include the options.h */ +#ifndef MDBX_DISABLE_GNU_SOURCE +#define MDBX_DISABLE_GNU_SOURCE 0 +#endif +#if MDBX_DISABLE_GNU_SOURCE +#undef _GNU_SOURCE +#elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif /* MDBX_DISABLE_GNU_SOURCE */ + +/* Should be defined before any includes */ +#if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && \ + !defined(ANDROID) +#define _FILE_OFFSET_BITS 64 +#endif /* _FILE_OFFSET_BITS */ + +#if defined(__APPLE__) && !defined(_DARWIN_C_SOURCE) +#define _DARWIN_C_SOURCE +#endif /* _DARWIN_C_SOURCE */ + +#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \ + !defined(__USE_MINGW_ANSI_STDIO) +#define __USE_MINGW_ANSI_STDIO 1 +#endif /* MinGW */ + +#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) + +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0601 /* Windows 7 */ +#endif /* _WIN32_WINNT */ + +#if !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif /* _CRT_SECURE_NO_WARNINGS */ +#if !defined(UNICODE) +#define UNICODE +#endif /* UNICODE */ + +#if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && \ + !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT +#define _NO_CRT_STDIO_INLINE +#endif /* _NO_CRT_STDIO_INLINE */ + +#elif !defined(_POSIX_C_SOURCE) +#define _POSIX_C_SOURCE 200809L +#endif /* Windows */ + +#ifdef __cplusplus + +#ifndef NOMINMAX +#define NOMINMAX +#endif /* NOMINMAX */ + +/* Workaround for modern libstdc++ with CLANG < 4.x */ +#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && \ + defined(__clang__) && __clang_major__ < 4 +#define __GLIBCXX_BITSIZE_INT_N_0 128 +#define __GLIBCXX_TYPE_INT_N_0 __int128 +#endif /* Workaround for modern libstdc++ with CLANG < 4.x */ + +#ifdef _MSC_VER +/* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */ +#if defined(__SANITIZE_ADDRESS__) && !defined(_DISABLE_VECTOR_ANNOTATION) +#define _DISABLE_VECTOR_ANNOTATION +#endif /* _DISABLE_VECTOR_ANNOTATION */ +#endif /* _MSC_VER */ + +#endif /* __cplusplus */ + +#ifdef _MSC_VER +#if _MSC_FULL_VER < 190024234 +/* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual + * Studio 2015 Update 3). But you could remove this #error and try to continue + * at your own risk. In such case please don't rise up an issues related ONLY to + * old compilers. + * + * NOTE: + * Unfortunately, there are several different builds of "Visual Studio" that + * are called "Visual Studio 2015 Update 3". + * + * The 190024234 is used here because it is minimal version of Visual Studio + * that was used for build and testing libmdbx in recent years. Soon this + * value will be increased to 19.0.24241.7, since build and testing using + * "Visual Studio 2015" will be performed only at https://ci.appveyor.com. + * + * Please ask Microsoft (but not us) for information about version differences + * and how to and where you can obtain the latest "Visual Studio 2015" build + * with all fixes. + */ +#error \ + "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required." +#endif +#if _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif +#if _MSC_VER > 1913 +#pragma warning(disable : 5045) /* will insert Spectre mitigation... */ +#endif +#if _MSC_VER > 1914 +#pragma warning( \ + disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ + producing 'defined' has undefined behavior */ +#endif +#if _MSC_VER < 1920 +/* avoid "error C2219: syntax error: type qualifier must be after '*'" */ +#define __restrict +#endif +#if _MSC_VER > 1930 +#pragma warning(disable : 6235) /* is always a constant */ +#pragma warning(disable : 6237) /* is never evaluated and might \ + have side effects */ +#endif +#pragma warning(disable : 4710) /* 'xyz': function not inlined */ +#pragma warning(disable : 4711) /* function 'xyz' selected for automatic \ + inline expansion */ +#pragma warning(disable : 4201) /* nonstandard extension used: nameless \ + struct/union */ +#pragma warning(disable : 4702) /* unreachable code */ +#pragma warning(disable : 4706) /* assignment within conditional expression */ +#pragma warning(disable : 4127) /* conditional expression is constant */ +#pragma warning(disable : 4324) /* 'xyz': structure was padded due to \ + alignment specifier */ +#pragma warning(disable : 4310) /* cast truncates constant value */ +#pragma warning(disable : 4820) /* bytes padding added after data member for \ + alignment */ +#pragma warning(disable : 4548) /* expression before comma has no effect; \ + expected expression with side - effect */ +#pragma warning(disable : 4366) /* the result of the unary '&' operator may be \ + unaligned */ +#pragma warning(disable : 4200) /* nonstandard extension used: zero-sized \ + array in struct/union */ +#pragma warning(disable : 4204) /* nonstandard extension used: non-constant \ + aggregate initializer */ +#pragma warning( \ + disable : 4505) /* unreferenced local function has been removed */ +#endif /* _MSC_VER (warnings) */ + +#if defined(__GNUC__) && __GNUC__ < 9 +#pragma GCC diagnostic ignored "-Wattributes" +#endif /* GCC < 9 */ + +#include "../mdbx.h" + /*----------------------------------------------------------------------------*/ /* Microsoft compiler generates a lot of warning for self includes... */ @@ -28,20 +171,9 @@ * not guaranteed. Specify /EHsc */ #endif /* _MSC_VER (warnings) */ -#if defined(_WIN32) || defined(_WIN64) -#if !defined(_CRT_SECURE_NO_WARNINGS) -#define _CRT_SECURE_NO_WARNINGS -#endif /* _CRT_SECURE_NO_WARNINGS */ -#if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && \ - !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT -#define _NO_CRT_STDIO_INLINE -#endif -#elif !defined(_POSIX_C_SOURCE) -#define _POSIX_C_SOURCE 200809L -#endif /* Windows */ - /*----------------------------------------------------------------------------*/ /* basic C99 includes */ + #include #include #include @@ -55,21 +187,6 @@ #include #include -#if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF -#error \ - "Sanity checking failed: Two's complement, reasonably sized integer types" -#endif - -#ifndef SSIZE_MAX -#define SSIZE_MAX INTPTR_MAX -#endif - -#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64) -#define MDBX_WORDBITS 64 -#else -#define MDBX_WORDBITS 32 -#endif /* MDBX_WORDBITS */ - /*----------------------------------------------------------------------------*/ /* feature testing */ @@ -81,6 +198,14 @@ #define __has_include(x) (0) #endif +#ifndef __has_attribute +#define __has_attribute(x) (0) +#endif + +#ifndef __has_cpp_attribute +#define __has_cpp_attribute(x) 0 +#endif + #ifndef __has_feature #define __has_feature(x) (0) #endif @@ -89,6 +214,10 @@ #define __has_extension(x) (0) #endif +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + #if __has_feature(thread_sanitizer) #define __SANITIZE_THREAD__ 1 #endif @@ -124,6 +253,47 @@ #endif #endif /* __GLIBC_PREREQ */ +/*----------------------------------------------------------------------------*/ +/* pre-requirements */ + +#if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF +#error \ + "Sanity checking failed: Two's complement, reasonably sized integer types" +#endif + +#ifndef SSIZE_MAX +#define SSIZE_MAX INTPTR_MAX +#endif + +#if defined(__GNUC__) && !__GNUC_PREREQ(4, 2) +/* Actually libmdbx was not tested with compilers older than GCC 4.2. + * But you could ignore this warning at your own risk. + * In such case please don't rise up an issues related ONLY to old compilers. + */ +#warning "libmdbx required GCC >= 4.2" +#endif + +#if defined(__clang__) && !__CLANG_PREREQ(3, 8) +/* Actually libmdbx was not tested with CLANG older than 3.8. + * But you could ignore this warning at your own risk. + * In such case please don't rise up an issues related ONLY to old compilers. + */ +#warning "libmdbx required CLANG >= 3.8" +#endif + +#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) +/* Actually libmdbx was not tested with something older than glibc 2.12. + * But you could ignore this warning at your own risk. + * In such case please don't rise up an issues related ONLY to old systems. + */ +#warning "libmdbx was only tested with GLIBC >= 2.12." +#endif + +#ifdef __SANITIZE_THREAD__ +#warning \ + "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues." +#endif /* __SANITIZE_THREAD__ */ + /*----------------------------------------------------------------------------*/ /* C11' alignas() */ @@ -240,12 +410,14 @@ __extern_C key_t ftok(const char *, int); #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif /* WIN32_LEAN_AND_MEAN */ -#include -#include #include #include #include +/* После подгрузки windows.h, чтобы избежать проблем со сборкой MINGW и т.п. */ +#include +#include + #else /*----------------------------------------------------------------------*/ #include @@ -502,10 +674,11 @@ __extern_C key_t ftok(const char *, int); #ifndef container_of #define container_of(ptr, type, member) \ - ((type *)((char *)(ptr)-offsetof(type, member))) + ((type *)((char *)(ptr) - offsetof(type, member))) #endif /* container_of */ /*----------------------------------------------------------------------------*/ +/* useful attributes */ #ifndef __always_inline #if defined(__GNUC__) || __has_attribute(__always_inline__) @@ -513,7 +686,7 @@ __extern_C key_t ftok(const char *, int); #elif defined(_MSC_VER) #define __always_inline __forceinline #else -#define __always_inline +#define __always_inline __inline #endif #endif /* __always_inline */ @@ -639,16 +812,6 @@ __extern_C key_t ftok(const char *, int); #endif #endif /* __anonymous_struct_extension__ */ -#ifndef expect_with_probability -#if defined(__builtin_expect_with_probability) || \ - __has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0) -#define expect_with_probability(expr, value, prob) \ - __builtin_expect_with_probability(expr, value, prob) -#else -#define expect_with_probability(expr, value, prob) (expr) -#endif -#endif /* expect_with_probability */ - #ifndef MDBX_WEAK_IMPORT_ATTRIBUTE #ifdef WEAK_IMPORT_ATTRIBUTE #define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE @@ -662,6 +825,32 @@ __extern_C key_t ftok(const char *, int); #endif #endif /* MDBX_WEAK_IMPORT_ATTRIBUTE */ +#if !defined(__thread) && (defined(_MSC_VER) || defined(__DMC__)) +#define __thread __declspec(thread) +#endif /* __thread */ + +#ifndef MDBX_EXCLUDE_FOR_GPROF +#ifdef ENABLE_GPROF +#define MDBX_EXCLUDE_FOR_GPROF \ + __attribute__((__no_instrument_function__, \ + __no_profile_instrument_function__)) +#else +#define MDBX_EXCLUDE_FOR_GPROF +#endif /* ENABLE_GPROF */ +#endif /* MDBX_EXCLUDE_FOR_GPROF */ + +/*----------------------------------------------------------------------------*/ + +#ifndef expect_with_probability +#if defined(__builtin_expect_with_probability) || \ + __has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0) +#define expect_with_probability(expr, value, prob) \ + __builtin_expect_with_probability(expr, value, prob) +#else +#define expect_with_probability(expr, value, prob) (expr) +#endif +#endif /* expect_with_probability */ + #ifndef MDBX_GOOFY_MSVC_STATIC_ANALYZER #ifdef _PREFAST_ #define MDBX_GOOFY_MSVC_STATIC_ANALYZER 1 @@ -684,7 +873,17 @@ __extern_C key_t ftok(const char *, int); #define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) #endif /* MDBX_GOOFY_MSVC_STATIC_ANALYZER */ +#ifndef FLEXIBLE_ARRAY_MEMBERS +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ + (!defined(__cplusplus) && defined(_MSC_VER)) +#define FLEXIBLE_ARRAY_MEMBERS 1 +#else +#define FLEXIBLE_ARRAY_MEMBERS 0 +#endif +#endif /* FLEXIBLE_ARRAY_MEMBERS */ + /*----------------------------------------------------------------------------*/ +/* Valgrind and Address Sanitizer */ #if defined(ENABLE_MEMCHECK) #include @@ -766,10 +965,69 @@ template char (&__ArraySizeHelper(T (&array)[N]))[N]; #define STATIC_ASSERT(expr) STATIC_ASSERT_MSG(expr, #expr) #endif -#ifndef __Wpedantic_format_voidptr -MDBX_MAYBE_UNUSED MDBX_PURE_FUNCTION static __inline const void * -__Wpedantic_format_voidptr(const void *ptr) { - return ptr; -} -#define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG) -#endif /* __Wpedantic_format_voidptr */ +/*----------------------------------------------------------------------------*/ + +#if defined(_MSC_VER) && _MSC_VER >= 1900 +/* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros + * for internal format-args checker. */ +#undef PRIuPTR +#undef PRIiPTR +#undef PRIdPTR +#undef PRIxPTR +#define PRIuPTR "Iu" +#define PRIiPTR "Ii" +#define PRIdPTR "Id" +#define PRIxPTR "Ix" +#define PRIuSIZE "zu" +#define PRIiSIZE "zi" +#define PRIdSIZE "zd" +#define PRIxSIZE "zx" +#endif /* fix PRI*PTR for _MSC_VER */ + +#ifndef PRIuSIZE +#define PRIuSIZE PRIuPTR +#define PRIiSIZE PRIiPTR +#define PRIdSIZE PRIdPTR +#define PRIxSIZE PRIxPTR +#endif /* PRI*SIZE macros for MSVC */ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +/*----------------------------------------------------------------------------*/ + +#if __has_warning("-Wnested-anon-types") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wnested-anon-types" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wnested-anon-types" +#else +#pragma warning disable "nested-anon-types" +#endif +#endif /* -Wnested-anon-types */ + +#if __has_warning("-Wconstant-logical-operand") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wconstant-logical-operand" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wconstant-logical-operand" +#else +#pragma warning disable "constant-logical-operand" +#endif +#endif /* -Wconstant-logical-operand */ + +#if defined(__LCC__) && (__LCC__ <= 121) +/* bug #2798 */ +#pragma diag_suppress alignment_reduction_ignored +#elif defined(__ICC) +#pragma warning(disable : 3453 1366) +#elif __has_warning("-Walignment-reduction-ignored") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Walignment-reduction-ignored" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Walignment-reduction-ignored" +#else +#pragma warning disable "alignment-reduction-ignored" +#endif +#endif /* -Walignment-reduction-ignored */ diff --git a/src/proto.h b/src/proto.h new file mode 100644 index 00000000..5c752405 --- /dev/null +++ b/src/proto.h @@ -0,0 +1,119 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +/* Internal prototypes */ + +/* audit.c */ +MDBX_INTERNAL int audit_ex(MDBX_txn *txn, size_t retired_stored, + bool dont_filter_gc); + +/* mvcc-readers.c */ +MDBX_INTERNAL bsr_t mvcc_bind_slot(MDBX_env *env, const uintptr_t tid); +MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, + pgno_t largest); +MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, + const txnid_t steady); +MDBX_INTERNAL pgno_t mvcc_snapshot_largest(const MDBX_env *env, + pgno_t last_used_page); +MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, + const txnid_t straggler); +MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rlocked, int *dead); +MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t laggard); + +/* dxb.c */ +MDBX_INTERNAL int dxb_setup(MDBX_env *env, const int lck_rc, + const mdbx_mode_t mode_bits); +MDBX_INTERNAL int __must_check_result +dxb_read_header(MDBX_env *env, meta_t *meta, const int lck_exclusive, + const mdbx_mode_t mode_bits); +enum resize_mode { implicit_grow, impilict_shrink, explicit_resize }; +MDBX_INTERNAL int __must_check_result dxb_resize(MDBX_env *const env, + const pgno_t used_pgno, + const pgno_t size_pgno, + pgno_t limit_pgno, + const enum resize_mode mode); +MDBX_INTERNAL int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, + const bool enable, const bool force_whole); +MDBX_INTERNAL int __must_check_result dxb_sync_locked(MDBX_env *env, + unsigned flags, + meta_t *const pending, + troika_t *const troika); +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) +MDBX_INTERNAL void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn); +#else +static inline void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { + (void)env; + (void)txn; +} +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ + +/* txn.c */ +MDBX_INTERNAL bool txn_refund(MDBX_txn *txn); +MDBX_INTERNAL txnid_t txn_snapshot_oldest(const MDBX_txn *const txn); +MDBX_INTERNAL int txn_abort(MDBX_txn *txn); +MDBX_INTERNAL int txn_renew(MDBX_txn *txn, unsigned flags); + +#define TXN_END_NAMES \ + {"committed", "empty-commit", "abort", "reset", \ + "reset-tmp", "fail-begin", "fail-beginchild"} +enum { + /* txn_end operation number, for logging */ + TXN_END_COMMITTED, + TXN_END_PURE_COMMIT, + TXN_END_ABORT, + TXN_END_RESET, + TXN_END_RESET_TMP, + TXN_END_FAIL_BEGIN, + TXN_END_FAIL_BEGINCHILD, + + TXN_END_OPMASK = 0x0F /* mask for txn_end() operation number */, + TXN_END_UPDATE = 0x10 /* update env state (DBIs) */, + TXN_END_FREE = 0x20 /* free txn unless it is env.basal_txn */, + TXN_END_EOTDONE = 0x40 /* txn's cursors already closed */, + TXN_END_SLOT = 0x80 /* release any reader slot if NOSTICKYTHREADS */ +}; +MDBX_INTERNAL int txn_end(MDBX_txn *txn, const unsigned mode); +MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx); + +/* env.c */ +MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode); +MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, + MDBX_envinfo *out, size_t bytes, troika_t *troika); +MDBX_INTERNAL int env_sync(MDBX_env *env, bool force, bool nonblock); +MDBX_INTERNAL int env_close(MDBX_env *env, bool resurrect_after_fork); +MDBX_INTERNAL bool env_txn0_owned(const MDBX_env *env); +MDBX_INTERNAL void env_options_init(MDBX_env *env); +MDBX_INTERNAL void env_options_adjust_defaults(MDBX_env *env); +MDBX_INTERNAL int __must_check_result env_page_auxbuffer(MDBX_env *env); +MDBX_INTERNAL unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize); + +/* tree.c */ +MDBX_INTERNAL int tree_drop(MDBX_cursor *mc, const bool may_have_subDBs); +MDBX_INTERNAL int __must_check_result tree_rebalance(MDBX_cursor *mc); +MDBX_INTERNAL int __must_check_result tree_propagate_key(MDBX_cursor *mc, + const MDBX_val *key); +MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env); + +/* subdb.c */ +MDBX_INTERNAL int __must_check_result sdb_fetch(MDBX_txn *txn, size_t dbi); +MDBX_INTERNAL int __must_check_result sdb_setup(const MDBX_env *env, + kvx_t *const kvx, + const tree_t *const db); + +/* coherency.c */ +MDBX_INTERNAL bool coherency_check_meta(const MDBX_env *env, + const volatile meta_t *meta, + bool report); +MDBX_INTERNAL int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, + uint64_t *timestamp); +MDBX_INTERNAL int coherency_check_written(const MDBX_env *env, + const txnid_t txnid, + const volatile meta_t *meta, + const intptr_t pgno, + uint64_t *timestamp); +MDBX_INTERNAL int coherency_timeout(uint64_t *timestamp, intptr_t pgno, + const MDBX_env *env); diff --git a/src/range-estimate.c b/src/range-estimate.c new file mode 100644 index 00000000..51b19538 --- /dev/null +++ b/src/range-estimate.c @@ -0,0 +1,394 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +typedef struct diff_result { + ptrdiff_t diff; + intptr_t level; + ptrdiff_t root_nkeys; +} diff_t; + +/* calculates: r = x - y */ +__hot static int cursor_diff(const MDBX_cursor *const __restrict x, + const MDBX_cursor *const __restrict y, + diff_t *const __restrict r) { + r->diff = 0; + r->level = 0; + r->root_nkeys = 0; + + if (unlikely(x->signature != cur_signature_live)) + return (x->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (unlikely(y->signature != cur_signature_live)) + return (y->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn(x->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(x->txn != y->txn)) + return MDBX_BAD_TXN; + + if (unlikely(y->dbi_state != x->dbi_state)) + return MDBX_EINVAL; + + const intptr_t depth = (x->top < y->top) ? x->top : y->top; + if (unlikely(depth < 0)) + return MDBX_ENODATA; + + r->root_nkeys = page_numkeys(x->pg[0]); + intptr_t nkeys = r->root_nkeys; + for (;;) { + if (unlikely(y->pg[r->level] != x->pg[r->level])) { + ERROR("Mismatch cursors's pages at %zu level", r->level); + return MDBX_PROBLEM; + } + r->diff = x->ki[r->level] - y->ki[r->level]; + if (r->diff) + break; + r->level += 1; + if (r->level > depth) { + r->diff = CMP2INT(x->flags & z_eof_hard, y->flags & z_eof_hard); + return MDBX_SUCCESS; + } + nkeys = page_numkeys(x->pg[r->level]); + } + + while (unlikely(r->diff == 1) && likely(r->level < depth)) { + r->level += 1; + /* DB'PAGEs: 0------------------>MAX + * + * CURSORs: y < x + * STACK[i ]: | + * STACK[+1]: ...y++N|0++x... + */ + nkeys = page_numkeys(y->pg[r->level]); + r->diff = (nkeys - y->ki[r->level]) + x->ki[r->level]; + assert(r->diff > 0); + } + + while (unlikely(r->diff == -1) && likely(r->level < depth)) { + r->level += 1; + /* DB'PAGEs: 0------------------>MAX + * + * CURSORs: x < y + * STACK[i ]: | + * STACK[+1]: ...x--N|0--y... + */ + nkeys = page_numkeys(x->pg[r->level]); + r->diff = -(nkeys - x->ki[r->level]) - y->ki[r->level]; + assert(r->diff < 0); + } + + return MDBX_SUCCESS; +} + +__hot static ptrdiff_t estimate(const tree_t *tree, + diff_t *const __restrict dr) { + /* root: branch-page => scale = leaf-factor * branch-factor^(N-1) + * level-1: branch-page(s) => scale = leaf-factor * branch-factor^2 + * level-2: branch-page(s) => scale = leaf-factor * branch-factor + * level-N: branch-page(s) => scale = leaf-factor + * leaf-level: leaf-page(s) => scale = 1 + */ + ptrdiff_t btree_power = (ptrdiff_t)tree->height - 2 - (ptrdiff_t)dr->level; + if (btree_power < 0) + return dr->diff; + + ptrdiff_t estimated = + (ptrdiff_t)tree->items * dr->diff / (ptrdiff_t)tree->leaf_pages; + if (btree_power == 0) + return estimated; + + if (tree->height < 4) { + assert(dr->level == 0 && btree_power == 1); + return (ptrdiff_t)tree->items * dr->diff / (ptrdiff_t)dr->root_nkeys; + } + + /* average_branchpage_fillfactor = total(branch_entries) / branch_pages + total(branch_entries) = leaf_pages + branch_pages - 1 (root page) */ + const size_t log2_fixedpoint = sizeof(size_t) - 1; + const size_t half = UINT64_C(1) << (log2_fixedpoint - 1); + const size_t factor = + ((tree->leaf_pages + tree->branch_pages - 1) << log2_fixedpoint) / + tree->branch_pages; + while (1) { + switch ((size_t)btree_power) { + default: { + const size_t square = (factor * factor + half) >> log2_fixedpoint; + const size_t quad = (square * square + half) >> log2_fixedpoint; + do { + estimated = estimated * quad + half; + estimated >>= log2_fixedpoint; + btree_power -= 4; + } while (btree_power >= 4); + continue; + } + case 3: + estimated = estimated * factor + half; + estimated >>= log2_fixedpoint; + __fallthrough /* fall through */; + case 2: + estimated = estimated * factor + half; + estimated >>= log2_fixedpoint; + __fallthrough /* fall through */; + case 1: + estimated = estimated * factor + half; + estimated >>= log2_fixedpoint; + __fallthrough /* fall through */; + case 0: + if (unlikely(estimated > (ptrdiff_t)tree->items)) + return (ptrdiff_t)tree->items; + if (unlikely(estimated < -(ptrdiff_t)tree->items)) + return -(ptrdiff_t)tree->items; + return estimated; + } + } +} + +__hot int mdbx_estimate_distance(const MDBX_cursor *first, + const MDBX_cursor *last, + ptrdiff_t *distance_items) { + if (unlikely(first == nullptr || last == nullptr || + distance_items == nullptr)) + return MDBX_EINVAL; + + *distance_items = 0; + diff_t dr; + int rc = cursor_diff(last, first, &dr); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cASSERT(first, dr.diff || inner_pointed(first) == inner_pointed(last)); + if (unlikely(dr.diff == 0) && inner_pointed(first)) { + first = &first->subcur->cursor; + last = &last->subcur->cursor; + rc = cursor_diff(first, last, &dr); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + if (likely(dr.diff != 0)) + *distance_items = estimate(first->tree, &dr); + + return MDBX_SUCCESS; +} + +__hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, + MDBX_val *data, MDBX_cursor_op move_op, + ptrdiff_t *distance_items) { + if (unlikely(cursor == nullptr || distance_items == nullptr || + move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE)) + return MDBX_EINVAL; + + if (unlikely(cursor->signature != cur_signature_live)) + return (cursor->signature == cur_signature_ready4dispose) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn(cursor->txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!is_pointed(cursor))) + return MDBX_ENODATA; + + cursor_couple_t next; + rc = cursor_init(&next.outer, cursor->txn, cursor_dbi(cursor)); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cursor_cpstk(cursor, &next.outer); + if (cursor->tree->flags & MDBX_DUPSORT) { + subcur_t *mx = &container_of(cursor, cursor_couple_t, outer)->inner; + cursor_cpstk(&mx->cursor, &next.inner.cursor); + } + + MDBX_val stub_data; + if (data == nullptr) { + const unsigned mask = + 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY; + if (unlikely(mask & (1 << move_op))) + return MDBX_EINVAL; + stub_data.iov_base = nullptr; + stub_data.iov_len = 0; + data = &stub_data; + } + + MDBX_val stub_key; + if (key == nullptr) { + const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | + 1 << MDBX_SET_KEY | 1 << MDBX_SET | + 1 << MDBX_SET_RANGE; + if (unlikely(mask & (1 << move_op))) + return MDBX_EINVAL; + stub_key.iov_base = nullptr; + stub_key.iov_len = 0; + key = &stub_key; + } + + next.outer.signature = cur_signature_live; + rc = cursor_ops(&next.outer, key, data, move_op); + if (unlikely(rc != MDBX_SUCCESS && + (rc != MDBX_NOTFOUND || !is_pointed(&next.outer)))) + return rc; + + if (move_op == MDBX_LAST) { + next.outer.flags |= z_eof_hard; + next.inner.cursor.flags |= z_eof_hard; + } + return mdbx_estimate_distance(cursor, &next.outer, distance_items); +} + +__hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *begin_key, + const MDBX_val *begin_data, + const MDBX_val *end_key, const MDBX_val *end_data, + ptrdiff_t *size_items) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!size_items)) + return MDBX_EINVAL; + + if (unlikely(begin_data && + (begin_key == nullptr || begin_key == MDBX_EPSILON))) + return MDBX_EINVAL; + + if (unlikely(end_data && (end_key == nullptr || end_key == MDBX_EPSILON))) + return MDBX_EINVAL; + + if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON)) + return MDBX_EINVAL; + + cursor_couple_t begin; + /* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */ + rc = cursor_init(&begin.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(begin.outer.tree->items == 0)) { + *size_items = 0; + return MDBX_SUCCESS; + } + + if (!begin_key) { + if (unlikely(!end_key)) { + /* LY: FIRST..LAST case */ + *size_items = (ptrdiff_t)begin.outer.tree->items; + return MDBX_SUCCESS; + } + rc = outer_first(&begin.outer, nullptr, nullptr); + if (unlikely(end_key == MDBX_EPSILON)) { + /* LY: FIRST..+epsilon case */ + return (rc == MDBX_SUCCESS) + ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) + : rc; + } + } else { + if (unlikely(begin_key == MDBX_EPSILON)) { + if (end_key == nullptr) { + /* LY: -epsilon..LAST case */ + rc = outer_last(&begin.outer, nullptr, nullptr); + return (rc == MDBX_SUCCESS) + ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) + : rc; + } + /* LY: -epsilon..value case */ + assert(end_key != MDBX_EPSILON); + begin_key = end_key; + } else if (unlikely(end_key == MDBX_EPSILON)) { + /* LY: value..+epsilon case */ + assert(begin_key != MDBX_EPSILON); + end_key = begin_key; + } + if (end_key && !begin_data && !end_data && + (begin_key == end_key || + begin.outer.clc->k.cmp(begin_key, end_key) == 0)) { + /* LY: single key case */ + rc = cursor_seek(&begin.outer, (MDBX_val *)begin_key, nullptr, MDBX_SET) + .err; + if (unlikely(rc != MDBX_SUCCESS)) { + *size_items = 0; + return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; + } + *size_items = 1; + if (inner_pointed(&begin.outer)) + *size_items = + (sizeof(*size_items) >= sizeof(begin.inner.nested_tree.items) || + begin.inner.nested_tree.items <= PTRDIFF_MAX) + ? (size_t)begin.inner.nested_tree.items + : PTRDIFF_MAX; + + return MDBX_SUCCESS; + } else { + MDBX_val proxy_key = *begin_key; + MDBX_val proxy_data = {nullptr, 0}; + if (begin_data) + proxy_data = *begin_data; + rc = cursor_seek(&begin.outer, &proxy_key, &proxy_data, + MDBX_SET_LOWERBOUND) + .err; + } + } + + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND || !is_pointed(&begin.outer)) + return rc; + } + + cursor_couple_t end; + rc = cursor_init(&end.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (!end_key) { + rc = outer_last(&end.outer, nullptr, nullptr); + end.outer.flags |= z_eof_hard; + end.inner.cursor.flags |= z_eof_hard; + } else { + MDBX_val proxy_key = *end_key; + MDBX_val proxy_data = {nullptr, 0}; + if (end_data) + proxy_data = *end_data; + rc = cursor_seek(&end.outer, &proxy_key, &proxy_data, MDBX_SET_LOWERBOUND) + .err; + } + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND || !is_pointed(&end.outer)) + return rc; + } + + rc = mdbx_estimate_distance(&begin.outer, &end.outer, size_items); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + assert(*size_items >= -(ptrdiff_t)begin.outer.tree->items && + *size_items <= (ptrdiff_t)begin.outer.tree->items); + +#if 0 /* LY: Was decided to returns as-is (i.e. negative) the estimation \ + * results for an inverted ranges. */ + + /* Commit 8ddfd1f34ad7cf7a3c4aa75d2e248ca7e639ed63 + Change-Id: If59eccf7311123ab6384c4b93f9b1fed5a0a10d1 */ + + if (*size_items < 0) { + /* LY: inverted range case */ + *size_items += (ptrdiff_t)begin.outer.tree->items; + } else if (*size_items == 0 && begin_key && end_key) { + int cmp = begin.outer.kvx->cmp(&origin_begin_key, &origin_end_key); + if (cmp == 0 && cursor_pointed(begin.inner.cursor.flags) && + begin_data && end_data) + cmp = begin.outer.kvx->v.cmp(&origin_begin_data, &origin_end_data); + if (cmp > 0) { + /* LY: inverted range case with empty scope */ + *size_items = (ptrdiff_t)begin.outer.tree->items; + } + } + assert(*size_items >= 0 && + *size_items <= (ptrdiff_t)begin.outer.tree->items); +#endif + + return MDBX_SUCCESS; +} diff --git a/src/refund.c b/src/refund.c new file mode 100644 index 00000000..3742e569 --- /dev/null +++ b/src/refund.c @@ -0,0 +1,229 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +#if MDBX_ENABLE_REFUND +static void refund_reclaimed(MDBX_txn *txn) { + /* Scanning in descend order */ + pgno_t first_unallocated = txn->geo.first_unallocated; + const pnl_t pnl = txn->tw.relist; + tASSERT(txn, + MDBX_PNL_GETSIZE(pnl) && MDBX_PNL_MOST(pnl) == first_unallocated - 1); +#if MDBX_PNL_ASCENDING + size_t i = MDBX_PNL_GETSIZE(pnl); + tASSERT(txn, pnl[i] == first_unallocated - 1); + while (--first_unallocated, --i > 0 && pnl[i] == first_unallocated - 1) + ; + MDBX_PNL_SETSIZE(pnl, i); +#else + size_t i = 1; + tASSERT(txn, pnl[i] == first_unallocated - 1); + size_t len = MDBX_PNL_GETSIZE(pnl); + while (--first_unallocated, ++i <= len && pnl[i] == first_unallocated - 1) + ; + MDBX_PNL_SETSIZE(pnl, len -= i - 1); + for (size_t move = 0; move < len; ++move) + pnl[1 + move] = pnl[i + move]; +#endif + VERBOSE("refunded %" PRIaPGNO " pages: %" PRIaPGNO " -> %" PRIaPGNO, + txn->geo.first_unallocated - first_unallocated, + txn->geo.first_unallocated, first_unallocated); + txn->geo.first_unallocated = first_unallocated; + tASSERT(txn, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - 1)); +} + +static void refund_loose(MDBX_txn *txn) { + tASSERT(txn, txn->tw.loose_pages != nullptr); + tASSERT(txn, txn->tw.loose_count > 0); + + dpl_t *const dl = txn->tw.dirtylist; + if (dl) { + tASSERT(txn, dl->length >= txn->tw.loose_count); + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } + + pgno_t onstack[MDBX_CACHELINE_SIZE * 8 / sizeof(pgno_t)]; + pnl_t suitable = onstack; + + if (!dl || dl->length - dl->sorted > txn->tw.loose_count) { + /* Dirty list is useless since unsorted. */ + if (pnl_bytes2size(sizeof(onstack)) < txn->tw.loose_count) { + suitable = pnl_alloc(txn->tw.loose_count); + if (unlikely(!suitable)) + return /* this is not a reason for transaction fail */; + } + + /* Collect loose-pages which may be refunded. */ + tASSERT(txn, + txn->geo.first_unallocated >= MIN_PAGENO + txn->tw.loose_count); + pgno_t most = MIN_PAGENO; + size_t w = 0; + for (const page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) { + tASSERT(txn, lp->flags == P_LOOSE); + tASSERT(txn, txn->geo.first_unallocated > lp->pgno); + if (likely(txn->geo.first_unallocated - txn->tw.loose_count <= + lp->pgno)) { + tASSERT(txn, + w < ((suitable == onstack) ? pnl_bytes2size(sizeof(onstack)) + : MDBX_PNL_ALLOCLEN(suitable))); + suitable[++w] = lp->pgno; + most = (lp->pgno > most) ? lp->pgno : most; + } + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + + if (most + 1 == txn->geo.first_unallocated) { + /* Sort suitable list and refund pages at the tail. */ + MDBX_PNL_SETSIZE(suitable, w); + pnl_sort(suitable, MAX_PAGENO + 1); + + /* Scanning in descend order */ + const intptr_t step = MDBX_PNL_ASCENDING ? -1 : 1; + const intptr_t begin = + MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(suitable) : 1; + const intptr_t end = + MDBX_PNL_ASCENDING ? 0 : MDBX_PNL_GETSIZE(suitable) + 1; + tASSERT(txn, suitable[begin] >= suitable[end - step]); + tASSERT(txn, most == suitable[begin]); + + for (intptr_t i = begin + step; i != end; i += step) { + if (suitable[i] != most - 1) + break; + most -= 1; + } + const size_t refunded = txn->geo.first_unallocated - most; + DEBUG("refund-suitable %zu pages %" PRIaPGNO " -> %" PRIaPGNO, refunded, + most, txn->geo.first_unallocated); + txn->geo.first_unallocated = most; + txn->tw.loose_count -= refunded; + if (dl) { + txn->tw.dirtyroom += refunded; + dl->pages_including_loose -= refunded; + assert(txn->tw.dirtyroom <= txn->env->options.dp_limit); + + /* Filter-out dirty list */ + size_t r = 0; + w = 0; + if (dl->sorted) { + do { + if (dl->items[++r].pgno < most) { + if (++w != r) + dl->items[w] = dl->items[r]; + } + } while (r < dl->sorted); + dl->sorted = w; + } + while (r < dl->length) { + if (dl->items[++r].pgno < most) { + if (++w != r) + dl->items[w] = dl->items[r]; + } + } + dpl_setlen(dl, w); + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + } + goto unlink_loose; + } + } else { + /* Dirtylist is mostly sorted, just refund loose pages at the end. */ + dpl_sort(txn); + tASSERT(txn, + dl->length < 2 || dl->items[1].pgno < dl->items[dl->length].pgno); + tASSERT(txn, dl->sorted == dl->length); + + /* Scan dirtylist tail-forward and cutoff suitable pages. */ + size_t n; + for (n = dl->length; dl->items[n].pgno == txn->geo.first_unallocated - 1 && + dl->items[n].ptr->flags == P_LOOSE; + --n) { + tASSERT(txn, n > 0); + page_t *dp = dl->items[n].ptr; + DEBUG("refund-sorted page %" PRIaPGNO, dp->pgno); + tASSERT(txn, dp->pgno == dl->items[n].pgno); + txn->geo.first_unallocated -= 1; + } + dpl_setlen(dl, n); + + if (dl->sorted != dl->length) { + const size_t refunded = dl->sorted - dl->length; + dl->sorted = dl->length; + txn->tw.loose_count -= refunded; + txn->tw.dirtyroom += refunded; + dl->pages_including_loose -= refunded; + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + + /* Filter-out loose chain & dispose refunded pages. */ + unlink_loose: + for (page_t *__restrict *__restrict link = &txn->tw.loose_pages; *link;) { + page_t *dp = *link; + tASSERT(txn, dp->flags == P_LOOSE); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(dp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(dp), sizeof(page_t *)); + if (txn->geo.first_unallocated > dp->pgno) { + link = &page_next(dp); + } else { + *link = page_next(dp); + if ((txn->flags & MDBX_WRITEMAP) == 0) + page_shadow_release(txn->env, dp, 1); + } + } + } + } + + tASSERT(txn, dpl_check(txn)); + if (suitable != onstack) + pnl_free(suitable); + txn->tw.loose_refund_wl = txn->geo.first_unallocated; +} + +bool txn_refund(MDBX_txn *txn) { + const pgno_t before = txn->geo.first_unallocated; + + if (txn->tw.loose_pages && + txn->tw.loose_refund_wl > txn->geo.first_unallocated) + refund_loose(txn); + + while (true) { + if (MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || + MDBX_PNL_MOST(txn->tw.relist) != txn->geo.first_unallocated - 1) + break; + + refund_reclaimed(txn); + if (!txn->tw.loose_pages || + txn->tw.loose_refund_wl <= txn->geo.first_unallocated) + break; + + const pgno_t memo = txn->geo.first_unallocated; + refund_loose(txn); + if (memo == txn->geo.first_unallocated) + break; + } + + if (before == txn->geo.first_unallocated) + return false; + + if (txn->tw.spilled.list) + /* Squash deleted pagenums if we refunded any */ + spill_purge(txn); + + return true; +} + +#else /* MDBX_ENABLE_REFUND */ + +bool txn_refund(MDBX_txn *txn) { + (void)txn; + /* No online auto-compactification. */ + return false; +} + +#endif /* MDBX_ENABLE_REFUND */ diff --git a/src/sort.h b/src/sort.h new file mode 100644 index 00000000..3169e317 --- /dev/null +++ b/src/sort.h @@ -0,0 +1,485 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// \file sort.h +/// \brief Маркосы реализующие сортировку и двоичный поиск + +#pragma once + +#define MDBX_RADIXSORT_THRESHOLD 142 + +/* --------------------------------------------------------------------------- + * LY: State of the art quicksort-based sorting, with internal stack + * and network-sort for small chunks. + * Thanks to John M. Gamble for the http://pages.ripco.net/~jgamble/nw.html */ + +#if MDBX_HAVE_CMOV +#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ + do { \ + const TYPE swap_tmp = (a); \ + const bool swap_cmp = expect_with_probability(CMP(swap_tmp, b), 0, .5); \ + (a) = swap_cmp ? swap_tmp : b; \ + (b) = swap_cmp ? b : swap_tmp; \ + } while (0) +#else +#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ + do \ + if (expect_with_probability(!CMP(a, b), 0, .5)) { \ + const TYPE swap_tmp = (a); \ + (a) = (b); \ + (b) = swap_tmp; \ + } \ + while (0) +#endif + +// 3 comparators, 3 parallel operations +// o-----^--^--o +// | | +// o--^--|--v--o +// | | +// o--v--v-----o +// +// [[1,2]] +// [[0,2]] +// [[0,1]] +#define SORT_NETWORK_3(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + } while (0) + +// 5 comparators, 3 parallel operations +// o--^--^--------o +// | | +// o--v--|--^--^--o +// | | | +// o--^--v--|--v--o +// | | +// o--v-----v-----o +// +// [[0,1],[2,3]] +// [[0,2],[1,3]] +// [[1,2]] +#define SORT_NETWORK_4(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + } while (0) + +// 9 comparators, 5 parallel operations +// o--^--^-----^-----------o +// | | | +// o--|--|--^--v-----^--^--o +// | | | | | +// o--|--v--|--^--^--|--v--o +// | | | | | +// o--|-----v--|--v--|--^--o +// | | | | +// o--v--------v-----v--v--o +// +// [[0,4],[1,3]] +// [[0,2]] +// [[2,4],[0,1]] +// [[2,3],[1,4]] +// [[1,2],[3,4]] +#define SORT_NETWORK_5(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + } while (0) + +// 12 comparators, 6 parallel operations +// o-----^--^--^-----------------o +// | | | +// o--^--|--v--|--^--------^-----o +// | | | | | +// o--v--v-----|--|--^--^--|--^--o +// | | | | | | +// o-----^--^--v--|--|--|--v--v--o +// | | | | | +// o--^--|--v-----v--|--v--------o +// | | | +// o--v--v-----------v-----------o +// +// [[1,2],[4,5]] +// [[0,2],[3,5]] +// [[0,1],[3,4],[2,5]] +// [[0,3],[1,4]] +// [[2,4],[1,3]] +// [[2,3]] +#define SORT_NETWORK_6(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + } while (0) + +// 16 comparators, 6 parallel operations +// o--^--------^-----^-----------------o +// | | | +// o--|--^-----|--^--v--------^--^-----o +// | | | | | | +// o--|--|--^--v--|--^-----^--|--v-----o +// | | | | | | | +// o--|--|--|-----v--|--^--v--|--^--^--o +// | | | | | | | | +// o--v--|--|--^-----v--|--^--v--|--v--o +// | | | | | | +// o-----v--|--|--------v--v-----|--^--o +// | | | | +// o--------v--v-----------------v--v--o +// +// [[0,4],[1,5],[2,6]] +// [[0,2],[1,3],[4,6]] +// [[2,4],[3,5],[0,1]] +// [[2,3],[4,5]] +// [[1,4],[3,6]] +// [[1,2],[3,4],[5,6]] +#define SORT_NETWORK_7(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ + } while (0) + +// 19 comparators, 6 parallel operations +// o--^--------^-----^-----------------o +// | | | +// o--|--^-----|--^--v--------^--^-----o +// | | | | | | +// o--|--|--^--v--|--^-----^--|--v-----o +// | | | | | | | +// o--|--|--|--^--v--|--^--v--|--^--^--o +// | | | | | | | | | +// o--v--|--|--|--^--v--|--^--v--|--v--o +// | | | | | | | +// o-----v--|--|--|--^--v--v-----|--^--o +// | | | | | | +// o--------v--|--v--|--^--------v--v--o +// | | | +// o-----------v-----v--v--------------o +// +// [[0,4],[1,5],[2,6],[3,7]] +// [[0,2],[1,3],[4,6],[5,7]] +// [[2,4],[3,5],[0,1],[6,7]] +// [[2,3],[4,5]] +// [[1,4],[3,6]] +// [[1,2],[3,4],[5,6]] +#define SORT_NETWORK_8(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[7]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[7]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[6], begin[7]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ + } while (0) + +#define SORT_INNER(TYPE, CMP, begin, end, len) \ + switch (len) { \ + default: \ + assert(false); \ + __unreachable(); \ + case 0: \ + case 1: \ + break; \ + case 2: \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + break; \ + case 3: \ + SORT_NETWORK_3(TYPE, CMP, begin); \ + break; \ + case 4: \ + SORT_NETWORK_4(TYPE, CMP, begin); \ + break; \ + case 5: \ + SORT_NETWORK_5(TYPE, CMP, begin); \ + break; \ + case 6: \ + SORT_NETWORK_6(TYPE, CMP, begin); \ + break; \ + case 7: \ + SORT_NETWORK_7(TYPE, CMP, begin); \ + break; \ + case 8: \ + SORT_NETWORK_8(TYPE, CMP, begin); \ + break; \ + } + +#define SORT_SWAP(TYPE, a, b) \ + do { \ + const TYPE swap_tmp = (a); \ + (a) = (b); \ + (b) = swap_tmp; \ + } while (0) + +#define SORT_PUSH(low, high) \ + do { \ + top->lo = (low); \ + top->hi = (high); \ + ++top; \ + } while (0) + +#define SORT_POP(low, high) \ + do { \ + --top; \ + low = top->lo; \ + high = top->hi; \ + } while (0) + +#define SORT_IMPL(NAME, EXPECT_LOW_CARDINALITY_OR_PRESORTED, TYPE, CMP) \ + \ + static inline bool NAME##_is_sorted(const TYPE *first, const TYPE *last) { \ + while (++first <= last) \ + if (expect_with_probability(CMP(first[0], first[-1]), 1, .1)) \ + return false; \ + return true; \ + } \ + \ + typedef struct { \ + TYPE *lo, *hi; \ + } NAME##_stack; \ + \ + __hot static void NAME(TYPE *const __restrict begin, \ + TYPE *const __restrict end) { \ + NAME##_stack stack[sizeof(size_t) * CHAR_BIT], *__restrict top = stack; \ + \ + TYPE *__restrict hi = end - 1; \ + TYPE *__restrict lo = begin; \ + while (true) { \ + const ptrdiff_t len = hi - lo; \ + if (len < 8) { \ + SORT_INNER(TYPE, CMP, lo, hi + 1, len + 1); \ + if (unlikely(top == stack)) \ + break; \ + SORT_POP(lo, hi); \ + continue; \ + } \ + \ + TYPE *__restrict mid = lo + (len >> 1); \ + SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ + SORT_CMP_SWAP(TYPE, CMP, *mid, *hi); \ + SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ + \ + TYPE *right = hi - 1; \ + TYPE *left = lo + 1; \ + while (1) { \ + while (expect_with_probability(CMP(*left, *mid), 0, .5)) \ + ++left; \ + while (expect_with_probability(CMP(*mid, *right), 0, .5)) \ + --right; \ + if (unlikely(left > right)) { \ + if (EXPECT_LOW_CARDINALITY_OR_PRESORTED) { \ + if (NAME##_is_sorted(lo, right)) \ + lo = right + 1; \ + if (NAME##_is_sorted(left, hi)) \ + hi = left; \ + } \ + break; \ + } \ + SORT_SWAP(TYPE, *left, *right); \ + mid = (mid == left) ? right : (mid == right) ? left : mid; \ + ++left; \ + --right; \ + } \ + \ + if (right - lo > hi - left) { \ + SORT_PUSH(lo, right); \ + lo = left; \ + } else { \ + SORT_PUSH(left, hi); \ + hi = right; \ + } \ + } \ + \ + if (AUDIT_ENABLED()) { \ + for (TYPE *scan = begin + 1; scan < end; ++scan) \ + assert(CMP(scan[-1], scan[0])); \ + } \ + } + +/*------------------------------------------------------------------------------ + * LY: radix sort for large chunks */ + +#define RADIXSORT_IMPL(NAME, TYPE, EXTRACT_KEY, BUFFER_PREALLOCATED, END_GAP) \ + \ + __hot static bool NAME##_radixsort(TYPE *const begin, const size_t length) { \ + TYPE *tmp; \ + if (BUFFER_PREALLOCATED) { \ + tmp = begin + length + END_GAP; \ + /* memset(tmp, 0xDeadBeef, sizeof(TYPE) * length); */ \ + } else { \ + tmp = osal_malloc(sizeof(TYPE) * length); \ + if (unlikely(!tmp)) \ + return false; \ + } \ + \ + size_t key_shift = 0, key_diff_mask; \ + do { \ + struct { \ + pgno_t a[256], b[256]; \ + } counters; \ + memset(&counters, 0, sizeof(counters)); \ + \ + key_diff_mask = 0; \ + size_t prev_key = EXTRACT_KEY(begin) >> key_shift; \ + TYPE *r = begin, *end = begin + length; \ + do { \ + const size_t key = EXTRACT_KEY(r) >> key_shift; \ + counters.a[key & 255]++; \ + counters.b[(key >> 8) & 255]++; \ + key_diff_mask |= prev_key ^ key; \ + prev_key = key; \ + } while (++r != end); \ + \ + pgno_t ta = 0, tb = 0; \ + for (size_t i = 0; i < 256; ++i) { \ + const pgno_t ia = counters.a[i]; \ + counters.a[i] = ta; \ + ta += ia; \ + const pgno_t ib = counters.b[i]; \ + counters.b[i] = tb; \ + tb += ib; \ + } \ + \ + r = begin; \ + do { \ + const size_t key = EXTRACT_KEY(r) >> key_shift; \ + tmp[counters.a[key & 255]++] = *r; \ + } while (++r != end); \ + \ + if (unlikely(key_diff_mask < 256)) { \ + memcpy(begin, tmp, ptr_dist(end, begin)); \ + break; \ + } \ + end = (r = tmp) + length; \ + do { \ + const size_t key = EXTRACT_KEY(r) >> key_shift; \ + begin[counters.b[(key >> 8) & 255]++] = *r; \ + } while (++r != end); \ + \ + key_shift += 16; \ + } while (key_diff_mask >> 16); \ + \ + if (!(BUFFER_PREALLOCATED)) \ + osal_free(tmp); \ + return true; \ + } + +/*------------------------------------------------------------------------------ + * LY: Binary search */ + +#if defined(__clang__) && __clang_major__ > 4 && defined(__ia32__) +#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ + do \ + __asm __volatile("" \ + : "+r"(size) \ + : "r" /* the `b` constraint is more suitable here, but \ + cause CLANG to allocate and push/pop an one more \ + register, so using the `r` which avoids this. */ \ + (flag)); \ + while (0) +#else +#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ + do { \ + /* nope for non-clang or non-x86 */; \ + } while (0) +#endif /* Workaround for CLANG */ + +/* *INDENT-OFF* */ +/* clang-format off */ +#define SEARCH_IMPL(NAME, TYPE_LIST, TYPE_ARG, CMP) \ + static __always_inline const TYPE_LIST *NAME( \ + const TYPE_LIST *it, size_t length, const TYPE_ARG item) { \ + const TYPE_LIST *const begin = it, *const end = begin + length; \ + \ + if (MDBX_HAVE_CMOV) \ + do { \ + /* Адаптивно-упрощенный шаг двоичного поиска: \ + * - без переходов при наличии cmov или аналога; \ + * - допускает лишние итерации; \ + * - но ищет пока size > 2, что требует дозавершения поиска \ + * среди остающихся 0-1-2 элементов. */ \ + const TYPE_LIST *const middle = it + (length >> 1); \ + length = (length + 1) >> 1; \ + const bool flag = expect_with_probability(CMP(*middle, item), 0, .5); \ + WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(length, flag); \ + it = flag ? middle : it; \ + } while (length > 2); \ + else \ + while (length > 2) { \ + /* Вариант с использованием условного перехода. Основное отличие в \ + * том, что при "не равно" (true от компаратора) переход делается на 1 \ + * ближе к концу массива. Алгоритмически это верно и обеспечивает \ + * чуть-чуть более быструю сходимость, но зато требует больше \ + * вычислений при true от компаратора. Также ВАЖНО(!) не допускается \ + * спекулятивное выполнение при size == 0. */ \ + const TYPE_LIST *const middle = it + (length >> 1); \ + length = (length + 1) >> 1; \ + const bool flag = expect_with_probability(CMP(*middle, item), 0, .5); \ + if (flag) { \ + it = middle + 1; \ + length -= 1; \ + } \ + } \ + it += length > 1 && expect_with_probability(CMP(*it, item), 0, .5); \ + it += length > 0 && expect_with_probability(CMP(*it, item), 0, .5); \ + \ + if (AUDIT_ENABLED()) { \ + for (const TYPE_LIST *scan = begin; scan < it; ++scan) \ + assert(CMP(*scan, item)); \ + for (const TYPE_LIST *scan = it; scan < end; ++scan) \ + assert(!CMP(*scan, item)); \ + (void)begin, (void)end; \ + } \ + \ + return it; \ + } +/* *INDENT-ON* */ +/* clang-format on */ diff --git a/src/spill.c b/src/spill.c new file mode 100644 index 00000000..3368acfb --- /dev/null +++ b/src/spill.c @@ -0,0 +1,484 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) { + tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) && + txn->tw.spilled.least_removed > 0); + txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) + ? idx + : txn->tw.spilled.least_removed; + txn->tw.spilled.list[idx] |= 1; + MDBX_PNL_SETSIZE(txn->tw.spilled.list, + MDBX_PNL_GETSIZE(txn->tw.spilled.list) - + (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); + + while (unlikely(npages > 1)) { + const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1; + if (MDBX_PNL_ASCENDING) { + if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) || + (txn->tw.spilled.list[idx] >> 1) != pgno) + return; + } else { + if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno) + return; + txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) + ? idx + : txn->tw.spilled.least_removed; + } + txn->tw.spilled.list[idx] |= 1; + MDBX_PNL_SETSIZE(txn->tw.spilled.list, + MDBX_PNL_GETSIZE(txn->tw.spilled.list) - + (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); + --npages; + } +} + +pnl_t spill_purge(MDBX_txn *txn) { + tASSERT(txn, txn->tw.spilled.least_removed > 0); + const pnl_t sl = txn->tw.spilled.list; + if (txn->tw.spilled.least_removed != INT_MAX) { + size_t len = MDBX_PNL_GETSIZE(sl), r, w; + for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) { + sl[w] = sl[r]; + w += 1 - (sl[r] & 1); + } + for (size_t i = 1; i < w; ++i) + tASSERT(txn, (sl[i] & 1) == 0); + MDBX_PNL_SETSIZE(sl, w - 1); + txn->tw.spilled.least_removed = INT_MAX; + } else { + for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i) + tASSERT(txn, (sl[i] & 1) == 0); + } + return sl; +} + +/*----------------------------------------------------------------------------*/ + +static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, + const size_t npages) { + tASSERT(txn, !(txn->flags & MDBX_WRITEMAP)); +#if MDBX_ENABLE_PGOP_STAT + txn->env->lck->pgops.spill.weak += npages; +#endif /* MDBX_ENABLE_PGOP_STAT */ + const pgno_t pgno = dp->pgno; + int err = iov_page(txn, ctx, dp, npages); + if (likely(err == MDBX_SUCCESS)) + err = spill_append_span(&txn->tw.spilled.list, pgno, npages); + return err; +} + +/* Set unspillable LRU-label for dirty pages watched by txn. + * Returns the number of pages marked as unspillable. */ +static size_t spill_cursor_keep(const MDBX_txn *const txn, + const MDBX_cursor *mc) { + tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); + size_t keep = 0; + while (!is_poor(mc)) { + tASSERT(txn, mc->top >= 0); + const page_t *mp; + intptr_t i = 0; + do { + mp = mc->pg[i]; + tASSERT(txn, !is_subpage(mp)); + if (is_modifable(txn, mp)) { + size_t const n = dpl_search(txn, mp->pgno); + if (txn->tw.dirtylist->items[n].pgno == mp->pgno && + /* не считаем дважды */ dpl_age(txn, n)) { + size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr, + -(ptrdiff_t)sizeof(size_t)); + *ptr = txn->tw.dirtylru; + tASSERT(txn, dpl_age(txn, n) == 0); + ++keep; + } + } + } while (++i <= mc->top); + + tASSERT(txn, is_leaf(mp)); + if (!mc->subcur || mc->ki[mc->top] >= page_numkeys(mp)) + break; + if (!(node_flags(page_node(mp, mc->ki[mc->top])) & N_SUBDATA)) + break; + mc = &mc->subcur->cursor; + } + return keep; +} + +static size_t spill_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { + tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); + dpl_lru_turn(txn); + size_t keep = m0 ? spill_cursor_keep(txn, m0) : 0; + + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (F_ISSET(txn->dbi_state[dbi], DBI_DIRTY | DBI_VALID) && + txn->dbs[dbi].root != P_INVALID) + for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next) + if (mc != m0) + keep += spill_cursor_keep(txn, mc); + } + + return keep; +} + +/* Returns the spilling priority (0..255) for a dirty page: + * 0 = should be spilled; + * ... + * > 255 = must not be spilled. */ +MDBX_NOTHROW_PURE_FUNCTION static unsigned +spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { + dpl_t *const dl = txn->tw.dirtylist; + const uint32_t age = dpl_age(txn, i); + const size_t npages = dpl_npages(dl, i); + const pgno_t pgno = dl->items[i].pgno; + if (age == 0) { + DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno); + return 256; + } + + page_t *const dp = dl->items[i].ptr; + if (dp->flags & (P_LOOSE | P_SPILLED)) { + DEBUG("skip %s %zu page %" PRIaPGNO, + (dp->flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno); + return 256; + } + + /* Can't spill twice, + * make sure it's not already in a parent's spill list(s). */ + MDBX_txn *parent = txn->parent; + if (parent && (parent->flags & MDBX_TXN_SPILLS)) { + do + if (spill_intersect(parent, pgno, npages)) { + DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno); + dp->flags |= P_SPILLED; + return 256; + } + while ((parent = parent->parent) != nullptr); + } + + tASSERT(txn, age * (uint64_t)reciprocal < UINT32_MAX); + unsigned prio = age * reciprocal >> 24; + tASSERT(txn, prio < 256); + if (likely(npages == 1)) + return prio = 256 - prio; + + /* make a large/overflow pages be likely to spill */ + size_t factor = npages | npages >> 1; + factor |= factor >> 2; + factor |= factor >> 4; + factor |= factor >> 8; + factor |= factor >> 16; + factor = (size_t)prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157; + factor = (factor < 256) ? 255 - factor : 0; + tASSERT(txn, factor < 256 && factor < (256 - prio)); + return prio = (unsigned)factor; +} + +static size_t spill_gate(const MDBX_env *env, intptr_t part, + const size_t total) { + const intptr_t spill_min = + env->options.spill_min_denominator + ? (total + env->options.spill_min_denominator - 1) / + env->options.spill_min_denominator + : 1; + const intptr_t spill_max = + total - (env->options.spill_max_denominator + ? total / env->options.spill_max_denominator + : 0); + part = (part < spill_max) ? part : spill_max; + part = (part > spill_min) ? part : spill_min; + eASSERT(env, part >= 0 && (size_t)part <= total); + return (size_t)part; +} + +__cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, + const intptr_t wanna_spill_entries, + const intptr_t wanna_spill_npages, + const size_t need) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + + int rc = MDBX_SUCCESS; + if (unlikely(txn->tw.loose_count >= + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose + : txn->tw.writemap_dirty_npages))) + goto done; + + const size_t dirty_entries = + txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1; + const size_t dirty_npages = + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose + : txn->tw.writemap_dirty_npages) - + txn->tw.loose_count; + const size_t need_spill_entries = + spill_gate(txn->env, wanna_spill_entries, dirty_entries); + const size_t need_spill_npages = + spill_gate(txn->env, wanna_spill_npages, dirty_npages); + + const size_t need_spill = (need_spill_entries > need_spill_npages) + ? need_spill_entries + : need_spill_npages; + if (!need_spill) + goto done; + + if (txn->flags & MDBX_WRITEMAP) { + NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync", + dirty_entries, dirty_npages); + const MDBX_env *env = txn->env; + tASSERT(txn, txn->tw.spilled.list == nullptr); + rc = osal_msync(&txn->env->dxb_mmap, 0, + pgno_align2os_bytes(env, txn->geo.first_unallocated), + MDBX_SYNC_KICK); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; +#if MDBX_AVOID_MSYNC + MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr); + tASSERT(txn, dpl_check(txn)); + env->lck->unsynced_pages.weak += + txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count; + dpl_clear(txn->tw.dirtylist); + txn->tw.dirtyroom = env->options.dp_limit - txn->tw.loose_count; + for (page_t *lp = txn->tw.loose_pages; lp != nullptr; lp = page_next(lp)) { + tASSERT(txn, lp->flags == P_LOOSE); + rc = dpl_append(txn, lp->pgno, lp, 1); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + tASSERT(txn, dpl_check(txn)); +#else + tASSERT(txn, txn->tw.dirtylist == nullptr); + env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages; + txn->tw.writemap_spilled_npages += txn->tw.writemap_dirty_npages; + txn->tw.writemap_dirty_npages = 0; +#endif /* MDBX_AVOID_MSYNC */ + goto done; + } + + NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write", + need_spill_entries, need_spill_npages); + MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr); + tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1); + tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >= + need_spill_npages); + if (!txn->tw.spilled.list) { + txn->tw.spilled.least_removed = INT_MAX; + txn->tw.spilled.list = pnl_alloc(need_spill); + if (unlikely(!txn->tw.spilled.list)) { + rc = MDBX_ENOMEM; + bailout: + txn->flags |= MDBX_TXN_ERROR; + return rc; + } + } else { + /* purge deleted slots */ + spill_purge(txn); + rc = pnl_reserve(&txn->tw.spilled.list, need_spill); + (void)rc /* ignore since the resulting list may be shorter + and pnl_append() will increase pnl on demand */ + ; + } + + /* Сортируем чтобы запись на диск была полее последовательна */ + dpl_t *const dl = dpl_sort(txn); + + /* Preserve pages which may soon be dirtied again */ + const size_t unspillable = spill_txn_keep(txn, m0); + if (unspillable + txn->tw.loose_count >= dl->length) { +#if xMDBX_DEBUG_SPILLING == 1 /* avoid false failure in debug mode */ + if (likely(txn->tw.dirtyroom + txn->tw.loose_count >= need)) + return MDBX_SUCCESS; +#endif /* xMDBX_DEBUG_SPILLING */ + ERROR("all %zu dirty pages are unspillable since referenced " + "by a cursor(s), use fewer cursors or increase " + "MDBX_opt_txn_dp_limit", + unspillable); + goto done; + } + + /* Подзадача: Вытолкнуть часть страниц на диск в соответствии с LRU, + * но при этом учесть важные поправки: + * - лучше выталкивать старые large/overflow страницы, так будет освобождено + * больше памяти, а также так как они (в текущем понимании) гораздо реже + * повторно изменяются; + * - при прочих равных лучше выталкивать смежные страницы, так будет + * меньше I/O операций; + * - желательно потратить на это меньше времени чем std::partial_sort_copy; + * + * Решение: + * - Квантуем весь диапазон lru-меток до 256 значений и задействуем один + * проход 8-битного radix-sort. В результате получаем 256 уровней + * "свежести", в том числе значение lru-метки, старее которой страницы + * должны быть выгружены; + * - Двигаемся последовательно в сторону увеличения номеров страниц + * и выталкиваем страницы с lru-меткой старее отсекающего значения, + * пока не вытолкнем достаточно; + * - Встречая страницы смежные с выталкиваемыми для уменьшения кол-ва + * I/O операций выталкиваем и их, если они попадают в первую половину + * между выталкиваемыми и самыми свежими lru-метками; + * - дополнительно при сортировке умышленно старим large/overflow страницы, + * тем самым повышая их шансы на выталкивание. */ + + /* get min/max of LRU-labels */ + uint32_t age_max = 0; + for (size_t i = 1; i <= dl->length; ++i) { + const uint32_t age = dpl_age(txn, i); + age_max = (age_max >= age) ? age_max : age; + } + + VERBOSE("lru-head %u, age-max %u", txn->tw.dirtylru, age_max); + + /* half of 8-bit radix-sort */ + pgno_t radix_entries[256], radix_npages[256]; + memset(&radix_entries, 0, sizeof(radix_entries)); + memset(&radix_npages, 0, sizeof(radix_npages)); + size_t spillable_entries = 0, spillable_npages = 0; + const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1); + for (size_t i = 1; i <= dl->length; ++i) { + const unsigned prio = spill_prio(txn, i, reciprocal); + size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); + TRACE("page %" PRIaPGNO + ", lru %zu, is_multi %c, npages %u, age %u of %u, prio %u", + dl->items[i].pgno, *ptr, (dl->items[i].npages > 1) ? 'Y' : 'N', + dpl_npages(dl, i), dpl_age(txn, i), age_max, prio); + if (prio < 256) { + radix_entries[prio] += 1; + spillable_entries += 1; + const pgno_t npages = dpl_npages(dl, i); + radix_npages[prio] += npages; + spillable_npages += npages; + } + } + + tASSERT(txn, spillable_npages >= spillable_entries); + pgno_t spilled_entries = 0, spilled_npages = 0; + if (likely(spillable_entries > 0)) { + size_t prio2spill = 0, prio2adjacent = 128, + amount_entries = radix_entries[0], amount_npages = radix_npages[0]; + for (size_t i = 1; i < 256; i++) { + if (amount_entries < need_spill_entries || + amount_npages < need_spill_npages) { + prio2spill = i; + prio2adjacent = i + (257 - i) / 2; + amount_entries += radix_entries[i]; + amount_npages += radix_npages[i]; + } else if (amount_entries + amount_entries < + spillable_entries + need_spill_entries + /* РАВНОЗНАЧНО: amount - need_spill < spillable - amount */ + || amount_npages + amount_npages < + spillable_npages + need_spill_npages) { + prio2adjacent = i; + amount_entries += radix_entries[i]; + amount_npages += radix_npages[i]; + } else + break; + } + + VERBOSE("prio2spill %zu, prio2adjacent %zu, spillable %zu/%zu," + " wanna-spill %zu/%zu, amount %zu/%zu", + prio2spill, prio2adjacent, spillable_entries, spillable_npages, + need_spill_entries, need_spill_npages, amount_entries, + amount_npages); + tASSERT(txn, prio2spill < prio2adjacent && prio2adjacent <= 256); + + iov_ctx_t ctx; + rc = iov_init( + txn, &ctx, amount_entries, amount_npages, +#if defined(_WIN32) || defined(_WIN64) + txn->env->ioring.overlapped_fd ? txn->env->ioring.overlapped_fd : +#endif + txn->env->lazy_fd, + true); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + size_t r = 0, w = 0; + pgno_t last = 0; + while (r < dl->length && (spilled_entries < need_spill_entries || + spilled_npages < need_spill_npages)) { + dl->items[++w] = dl->items[++r]; + unsigned prio = spill_prio(txn, w, reciprocal); + if (prio > prio2spill && + (prio >= prio2adjacent || last != dl->items[w].pgno)) + continue; + + const size_t e = w; + last = dpl_endpgno(dl, w); + while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno && + spill_prio(txn, w, reciprocal) < prio2adjacent) + ; + + for (size_t i = w; ++i <= e;) { + const unsigned npages = dpl_npages(dl, i); + prio = spill_prio(txn, i, reciprocal); + DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)", + (prio > prio2spill) ? "co-" : "", i, npages, dl->items[i].pgno, + dpl_age(txn, i), prio); + tASSERT(txn, prio < 256); + ++spilled_entries; + spilled_npages += npages; + rc = spill_page(txn, &ctx, dl->items[i].ptr, npages); + if (unlikely(rc != MDBX_SUCCESS)) + goto failed; + } + } + + VERBOSE("spilled entries %u, spilled npages %u", spilled_entries, + spilled_npages); + tASSERT(txn, spillable_entries == 0 || spilled_entries > 0); + tASSERT(txn, spilled_npages >= spilled_entries); + + failed: + while (r < dl->length) + dl->items[++w] = dl->items[++r]; + tASSERT(txn, r - w == spilled_entries || rc != MDBX_SUCCESS); + + dl->sorted = dpl_setlen(dl, w); + txn->tw.dirtyroom += spilled_entries; + txn->tw.dirtylist->pages_including_loose -= spilled_npages; + tASSERT(txn, dpl_check(txn)); + + if (!iov_empty(&ctx)) { + tASSERT(txn, rc == MDBX_SUCCESS); + rc = iov_write(&ctx); + } + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + txn->env->lck->unsynced_pages.weak += spilled_npages; + pnl_sort(txn->tw.spilled.list, (size_t)txn->geo.first_unallocated << 1); + txn->flags |= MDBX_TXN_SPILLS; + NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room", + spilled_entries, spilled_npages, txn->tw.dirtyroom); + } else { + tASSERT(txn, rc == MDBX_SUCCESS); + for (size_t i = 1; i <= dl->length; ++i) { + page_t *dp = dl->items[i].ptr; + VERBOSE( + "unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u", + i, dp->pgno, dpl_npages(dl, i), dp->flags, dpl_age(txn, i), + spill_prio(txn, i, reciprocal)); + } + } + +#if xMDBX_DEBUG_SPILLING == 2 + if (txn->tw.loose_count + txn->tw.dirtyroom <= need / 2 + 1) + ERROR("dirty-list length: before %zu, after %zu, parent %zi, loose %zu; " + "needed %zu, spillable %zu; " + "spilled %u dirty-entries, now have %zu dirty-room", + dl->length + spilled_entries, dl->length, + (txn->parent && txn->parent->tw.dirtylist) + ? (intptr_t)txn->parent->tw.dirtylist->length + : -1, + txn->tw.loose_count, need, spillable_entries, spilled_entries, + txn->tw.dirtyroom); + ENSURE(txn->env, txn->tw.loose_count + txn->tw.dirtyroom > need / 2); +#endif /* xMDBX_DEBUG_SPILLING */ + +done: + return likely(txn->tw.dirtyroom + txn->tw.loose_count > + ((need > CURSOR_STACK_SIZE) ? CURSOR_STACK_SIZE : need)) + ? MDBX_SUCCESS + : MDBX_TXN_FULL; +} diff --git a/src/spill.h b/src/spill.h new file mode 100644 index 00000000..f4c427dd --- /dev/null +++ b/src/spill.h @@ -0,0 +1,86 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +MDBX_INTERNAL void spill_remove(MDBX_txn *txn, size_t idx, size_t npages); +MDBX_INTERNAL pnl_t spill_purge(MDBX_txn *txn); +MDBX_INTERNAL int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, + const intptr_t wanna_spill_entries, + const intptr_t wanna_spill_npages, + const size_t need); +/*----------------------------------------------------------------------------*/ + +static inline size_t spill_search(const MDBX_txn *txn, pgno_t pgno) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + const pnl_t pnl = txn->tw.spilled.list; + if (likely(!pnl)) + return 0; + pgno <<= 1; + size_t n = pnl_search(pnl, pgno, (size_t)MAX_PAGENO + MAX_PAGENO + 1); + return (n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] == pgno) ? n : 0; +} + +static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, + size_t npages) { + const pnl_t pnl = txn->tw.spilled.list; + if (likely(!pnl)) + return false; + const size_t len = MDBX_PNL_GETSIZE(pnl); + if (LOG_ENABLED(MDBX_LOG_EXTRA)) { + DEBUG_EXTRA("PNL len %zu [", len); + for (size_t i = 1; i <= len; ++i) + DEBUG_EXTRA_PRINT(" %li", (pnl[i] & 1) ? -(long)(pnl[i] >> 1) + : (long)(pnl[i] >> 1)); + DEBUG_EXTRA_PRINT("%s\n", "]"); + } + const pgno_t spilled_range_begin = pgno << 1; + const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1; +#if MDBX_PNL_ASCENDING + const size_t n = + pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1); + tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || + spilled_range_begin <= pnl[n])); + const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] <= spilled_range_last; +#else + const size_t n = + pnl_search(pnl, spilled_range_last, (size_t)MAX_PAGENO + MAX_PAGENO + 1); + tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || + spilled_range_last >= pnl[n])); + const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] >= spilled_range_begin; +#endif + if (ASSERT_ENABLED()) { + bool check = false; + for (size_t i = 0; i < npages; ++i) + check |= spill_search(txn, (pgno_t)(pgno + i)) != 0; + tASSERT(txn, check == rc); + } + return rc; +} + +static inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0, + const size_t need) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, !m0 || cursor_is_tracked(m0)); + + const intptr_t wanna_spill_entries = + txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0; + const intptr_t wanna_spill_npages = + need + + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose + : txn->tw.writemap_dirty_npages) - + txn->tw.loose_count - txn->env->options.dp_limit; + + /* production mode */ + if (likely(wanna_spill_npages < 1 && wanna_spill_entries < 1) +#if xMDBX_DEBUG_SPILLING == 1 + /* debug mode: always try to spill if xMDBX_DEBUG_SPILLING == 1 */ + && txn->txnid % 23 > 11 +#endif + ) + return MDBX_SUCCESS; + + return spill_slowpath(txn, m0, wanna_spill_entries, wanna_spill_npages, need); +} diff --git a/src/subdb.c b/src/subdb.c new file mode 100644 index 00000000..c1481035 --- /dev/null +++ b/src/subdb.c @@ -0,0 +1,104 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +int sdb_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db) { + if (unlikely(!check_sdb_flags(db->flags))) { + ERROR("incompatible or invalid db.flags (0x%x) ", db->flags); + return MDBX_INCOMPATIBLE; + } + if (unlikely(!kvx->clc.k.cmp)) { + kvx->clc.k.cmp = builtin_keycmp(db->flags); + kvx->clc.v.cmp = builtin_datacmp(db->flags); + } + + kvx->clc.k.lmin = keysize_min(db->flags); + kvx->clc.k.lmax = env_keysize_max(env, db->flags); + kvx->clc.v.lmin = valsize_min(db->flags); + kvx->clc.v.lmax = env_valsize_max(env, db->flags); + + if ((db->flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->dupfix_size) { + if (!MDBX_DISABLE_VALIDATION && + unlikely(db->dupfix_size < kvx->clc.v.lmin || + db->dupfix_size > kvx->clc.v.lmax)) { + ERROR("db.dupfix_size (%u) <> min/max value-length (%zu/%zu)", + db->dupfix_size, kvx->clc.v.lmin, kvx->clc.v.lmax); + return MDBX_CORRUPTED; + } + kvx->clc.v.lmin = kvx->clc.v.lmax = db->dupfix_size; + } + return MDBX_SUCCESS; +} + +int sdb_fetch(MDBX_txn *txn, size_t dbi) { + cursor_couple_t couple; + int rc = cursor_init(&couple.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + kvx_t *const kvx = &txn->env->kvs[dbi]; + rc = tree_search(&couple.outer, &kvx->name, 0); + if (unlikely(rc != MDBX_SUCCESS)) { + bailout: + NOTICE("dbi %zu refs to inaccessible subDB `%*s` for txn %" PRIaTXN + " (err %d)", + dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, + txn->txnid, rc); + return (rc == MDBX_NOTFOUND) ? MDBX_BAD_DBI : rc; + } + + MDBX_val data; + struct node_search_result nsr = node_search(&couple.outer, &kvx->name); + if (unlikely(!nsr.exact)) { + rc = MDBX_NOTFOUND; + goto bailout; + } + if (unlikely((node_flags(nsr.node) & (N_DUPDATA | N_SUBDATA)) != N_SUBDATA)) { + NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", + dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, + txn->txnid, "wrong flags"); + return MDBX_INCOMPATIBLE; /* not a named DB */ + } + + rc = node_read(&couple.outer, nsr.node, &data, + couple.outer.pg[couple.outer.top]); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(data.iov_len != sizeof(tree_t))) { + NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", + dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, + txn->txnid, "wrong rec-size"); + return MDBX_INCOMPATIBLE; /* not a named DB */ + } + + uint16_t flags = UNALIGNED_PEEK_16(data.iov_base, tree_t, flags); + /* The txn may not know this DBI, or another process may + * have dropped and recreated the DB with other flags. */ + tree_t *const db = &txn->dbs[dbi]; + if (unlikely((db->flags & DB_PERSISTENT_FLAGS) != flags)) { + NOTICE("dbi %zu refs to the re-created subDB `%*s` for txn %" PRIaTXN + " with different flags (present 0x%X != wanna 0x%X)", + dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, + txn->txnid, db->flags & DB_PERSISTENT_FLAGS, flags); + return MDBX_INCOMPATIBLE; + } + + memcpy(db, data.iov_base, sizeof(tree_t)); +#if !MDBX_DISABLE_VALIDATION + const txnid_t pp_txnid = couple.outer.pg[couple.outer.top]->txnid; + tASSERT(txn, txn->front_txnid >= pp_txnid); + if (unlikely(db->mod_txnid > pp_txnid)) { + ERROR("db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", + db->mod_txnid, pp_txnid); + return MDBX_CORRUPTED; + } +#endif /* !MDBX_DISABLE_VALIDATION */ + rc = sdb_setup(txn->env, kvx, db); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + txn->dbi_state[dbi] &= ~DBI_STALE; + return MDBX_SUCCESS; +} diff --git a/src/tls.c b/src/tls.c new file mode 100644 index 00000000..cdfdda3e --- /dev/null +++ b/src/tls.c @@ -0,0 +1,610 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +typedef struct rthc_entry { + MDBX_env *env; +} rthc_entry_t; + +#if MDBX_DEBUG +#define RTHC_INITIAL_LIMIT 1 +#else +#define RTHC_INITIAL_LIMIT 16 +#endif + +static unsigned rthc_count, rthc_limit = RTHC_INITIAL_LIMIT; +static rthc_entry_t rthc_table_static[RTHC_INITIAL_LIMIT]; +static rthc_entry_t *rthc_table = rthc_table_static; + +static int uniq_peek(const osal_mmap_t *pending, osal_mmap_t *scan) { + int rc; + uint64_t bait; + lck_t *const pending_lck = pending->lck; + lck_t *const scan_lck = scan->lck; + if (pending_lck) { + bait = atomic_load64(&pending_lck->bait_uniqueness, mo_AcquireRelease); + rc = MDBX_SUCCESS; + } else { + bait = 0 /* hush MSVC warning */; + rc = osal_msync(scan, 0, sizeof(lck_t), MDBX_SYNC_DATA); + if (rc == MDBX_SUCCESS) + rc = osal_pread(pending->fd, &bait, sizeof(scan_lck->bait_uniqueness), + offsetof(lck_t, bait_uniqueness)); + } + if (likely(rc == MDBX_SUCCESS) && + bait == atomic_load64(&scan_lck->bait_uniqueness, mo_AcquireRelease)) + rc = MDBX_RESULT_TRUE; + + TRACE("uniq-peek: %s, bait 0x%016" PRIx64 ",%s rc %d", + pending_lck ? "mem" : "file", bait, + (rc == MDBX_RESULT_TRUE) ? " found," : (rc ? " FAILED," : ""), rc); + return rc; +} + +static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, + uint64_t *abra) { + if (*abra == 0) { + const uintptr_t tid = osal_thread_self(); + uintptr_t uit = 0; + memcpy(&uit, &tid, (sizeof(tid) < sizeof(uit)) ? sizeof(tid) : sizeof(uit)); + *abra = rrxmrrxmsx_0(osal_monotime() + UINT64_C(5873865991930747) * uit); + } + const uint64_t cadabra = + rrxmrrxmsx_0(*abra + UINT64_C(7680760450171793) * (unsigned)osal_getpid()) + << 24 | + *abra >> 40; + lck_t *const scan_lck = scan->lck; + atomic_store64(&scan_lck->bait_uniqueness, cadabra, mo_AcquireRelease); + *abra = *abra * UINT64_C(6364136223846793005) + 1; + return uniq_peek(pending, scan); +} + +__cold int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found) { + *found = nullptr; + uint64_t salt = 0; + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const scan = rthc_table[i].env; + if (!scan->lck_mmap.lck || &scan->lck_mmap == pending) + continue; + int err = + atomic_load64(&scan->lck_mmap.lck->bait_uniqueness, mo_AcquireRelease) + ? uniq_peek(pending, &scan->lck_mmap) + : uniq_poke(pending, &scan->lck_mmap, &salt); + if (err == MDBX_ENODATA) { + uint64_t length = 0; + if (likely(osal_filesize(pending->fd, &length) == MDBX_SUCCESS && + length == 0)) { + /* LY: skip checking since LCK-file is empty, i.e. just created. */ + DEBUG("%s", "unique (new/empty lck)"); + return MDBX_SUCCESS; + } + } + if (err == MDBX_RESULT_TRUE) + err = uniq_poke(pending, &scan->lck_mmap, &salt); + if (err == MDBX_RESULT_TRUE) { + (void)osal_msync(&scan->lck_mmap, 0, sizeof(lck_t), MDBX_SYNC_KICK); + err = uniq_poke(pending, &scan->lck_mmap, &salt); + } + if (err == MDBX_RESULT_TRUE) { + err = uniq_poke(pending, &scan->lck_mmap, &salt); + *found = scan; + DEBUG("found %p", __Wpedantic_format_voidptr(*found)); + return MDBX_SUCCESS; + } + if (unlikely(err != MDBX_SUCCESS)) { + DEBUG("failed rc %d", err); + return err; + } + } + + DEBUG("%s", "unique"); + return MDBX_SUCCESS; +} + +//------------------------------------------------------------------------------ + +#if defined(_WIN32) || defined(_WIN64) +static CRITICAL_SECTION rthc_critical_section; +#else + +static pthread_mutex_t rthc_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t rthc_cond = PTHREAD_COND_INITIALIZER; +static osal_thread_key_t rthc_key; +static mdbx_atomic_uint32_t rthc_pending; + +static inline uint64_t rthc_signature(const void *addr, uint8_t kind) { + uint64_t salt = osal_thread_self() * UINT64_C(0xA2F0EEC059629A17) ^ + UINT64_C(0x01E07C6FDB596497) * (uintptr_t)(addr); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return salt << 8 | kind; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return (uint64_t)kind << 56 | salt >> 8; +#else +#error "FIXME: Unsupported byte order" +#endif /* __BYTE_ORDER__ */ +} + +#define MDBX_THREAD_RTHC_REGISTERED(addr) rthc_signature(addr, 0x0D) +#define MDBX_THREAD_RTHC_COUNTED(addr) rthc_signature(addr, 0xC0) +static __thread uint64_t rthc_thread_state +#if __has_attribute(tls_model) && \ + (defined(__PIC__) || defined(__pic__) || MDBX_BUILD_SHARED_LIBRARY) + __attribute__((tls_model("local-dynamic"))) +#endif + ; + +#if defined(__APPLE__) && defined(__SANITIZE_ADDRESS__) && \ + !defined(MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS) +/* Avoid ASAN-trap due the target TLS-variable feed by Darwin's tlv_free() */ +#define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS \ + __attribute__((__no_sanitize_address__, __noinline__)) +#else +#define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS inline +#endif + +MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t rthc_read(const void *rthc) { + return *(volatile uint64_t *)rthc; +} + +MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t +rthc_compare_and_clean(const void *rthc, const uint64_t signature) { +#if MDBX_64BIT_CAS + return atomic_cas64((mdbx_atomic_uint64_t *)rthc, signature, 0); +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return atomic_cas32((mdbx_atomic_uint32_t *)rthc, (uint32_t)signature, 0); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return atomic_cas32((mdbx_atomic_uint32_t *)rthc, (uint32_t)(signature >> 32), + 0); +#else +#error "FIXME: Unsupported byte order" +#endif +} + +static inline int rthc_atexit(void (*dtor)(void *), void *obj, + void *dso_symbol) { +#ifndef MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL +#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) || \ + defined(HAVE___CXA_THREAD_ATEXIT_IMPL) || __GLIBC_PREREQ(2, 18) || \ + defined(BIONIC) +#define MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL 1 +#else +#define MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL 0 +#endif +#endif /* MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL */ + +#ifndef MDBX_HAVE_CXA_THREAD_ATEXIT +#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT) || \ + defined(HAVE___CXA_THREAD_ATEXIT) +#define MDBX_HAVE_CXA_THREAD_ATEXIT 1 +#elif !MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL && \ + (defined(__linux__) || defined(__gnu_linux__)) +#define MDBX_HAVE_CXA_THREAD_ATEXIT 1 +#else +#define MDBX_HAVE_CXA_THREAD_ATEXIT 0 +#endif +#endif /* MDBX_HAVE_CXA_THREAD_ATEXIT */ + + int rc = MDBX_ENOSYS; +#if MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL && !MDBX_HAVE_CXA_THREAD_ATEXIT +#define __cxa_thread_atexit __cxa_thread_atexit_impl +#endif +#if MDBX_HAVE_CXA_THREAD_ATEXIT || defined(__cxa_thread_atexit) + extern int __cxa_thread_atexit(void (*dtor)(void *), void *obj, + void *dso_symbol) MDBX_WEAK_IMPORT_ATTRIBUTE; + if (&__cxa_thread_atexit) + rc = __cxa_thread_atexit(dtor, obj, dso_symbol); +#elif defined(__APPLE__) || defined(_DARWIN_C_SOURCE) + extern void _tlv_atexit(void (*termfunc)(void *objAddr), void *objAddr) + MDBX_WEAK_IMPORT_ATTRIBUTE; + if (&_tlv_atexit) { + (void)dso_symbol; + _tlv_atexit(dtor, obj); + rc = 0; + } +#else + (void)dtor; + (void)obj; + (void)dso_symbol; +#endif + return rc; +} + +__cold void workaround_glibc_bug21031(void) { + /* Workaround for https://sourceware.org/bugzilla/show_bug.cgi?id=21031 + * + * Due race between pthread_key_delete() and __nptl_deallocate_tsd() + * The destructor(s) of thread-local-storage object(s) may be running + * in another thread(s) and be blocked or not finished yet. + * In such case we get a SEGFAULT after unload this library DSO. + * + * So just by yielding a few timeslices we give a chance + * to such destructor(s) for completion and avoids segfault. */ + sched_yield(); + sched_yield(); + sched_yield(); +} +#endif /* !Windows */ + +void rthc_lock(void) { +#if defined(_WIN32) || defined(_WIN64) + EnterCriticalSection(&rthc_critical_section); +#else + ENSURE(nullptr, osal_pthread_mutex_lock(&rthc_mutex) == 0); +#endif +} + +void rthc_unlock(void) { +#if defined(_WIN32) || defined(_WIN64) + LeaveCriticalSection(&rthc_critical_section); +#else + ENSURE(nullptr, pthread_mutex_unlock(&rthc_mutex) == 0); +#endif +} + +static inline int thread_key_create(osal_thread_key_t *key) { + int rc; +#if defined(_WIN32) || defined(_WIN64) + *key = TlsAlloc(); + rc = (*key != TLS_OUT_OF_INDEXES) ? MDBX_SUCCESS : GetLastError(); +#else + rc = pthread_key_create(key, nullptr); +#endif + TRACE("&key = %p, value %" PRIuPTR ", rc %d", __Wpedantic_format_voidptr(key), + (uintptr_t)*key, rc); + return rc; +} + +void thread_rthc_set(osal_thread_key_t key, const void *value) { +#if defined(_WIN32) || defined(_WIN64) + ENSURE(nullptr, TlsSetValue(key, (void *)value)); +#else + const uint64_t sign_registered = + MDBX_THREAD_RTHC_REGISTERED(&rthc_thread_state); + const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(&rthc_thread_state); + if (value && unlikely(rthc_thread_state != sign_registered && + rthc_thread_state != sign_counted)) { + rthc_thread_state = sign_registered; + TRACE("thread registered 0x%" PRIxPTR, osal_thread_self()); + if (rthc_atexit(rthc_thread_dtor, &rthc_thread_state, + (void *)&mdbx_version /* dso_anchor */)) { + ENSURE(nullptr, pthread_setspecific(rthc_key, &rthc_thread_state) == 0); + rthc_thread_state = sign_counted; + const unsigned count_before = atomic_add32(&rthc_pending, 1); + ENSURE(nullptr, count_before < INT_MAX); + NOTICE("fallback to pthreads' tsd, key %" PRIuPTR ", count %u", + (uintptr_t)rthc_key, count_before); + (void)count_before; + } + } + ENSURE(nullptr, pthread_setspecific(key, value) == 0); +#endif +} + +/* dtor called for thread, i.e. for all mdbx's environment objects */ +__cold void rthc_thread_dtor(void *rthc) { + rthc_lock(); + const uint32_t current_pid = osal_getpid(); +#if defined(_WIN32) || defined(_WIN64) + TRACE(">> pid %d, thread 0x%" PRIxPTR ", module %p", current_pid, + osal_thread_self(), rthc); +#else + TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", current_pid, + osal_thread_self(), rthc); +#endif + + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const env = rthc_table[i].env; + if (env->pid != current_pid) + continue; + if (!(env->flags & ENV_TXKEY)) + continue; + reader_slot_t *const reader = thread_rthc_get(env->me_txkey); + reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0]; + reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers]; + if (reader < begin || reader >= end) + continue; +#if !defined(_WIN32) && !defined(_WIN64) + if (pthread_setspecific(env->me_txkey, nullptr) != 0) { + TRACE("== thread 0x%" PRIxPTR + ", rthc %p: ignore race with tsd-key deletion", + osal_thread_self(), __Wpedantic_format_voidptr(reader)); + continue /* ignore race with tsd-key deletion by mdbx_env_close() */; + } +#endif + + TRACE("== thread 0x%" PRIxPTR + ", rthc %p, [%zi], %p ... %p (%+i), rtch-pid %i, " + "current-pid %i", + osal_thread_self(), __Wpedantic_format_voidptr(reader), i, + __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + (int)(reader - begin), reader->pid.weak, current_pid); + if (atomic_load32(&reader->pid, mo_Relaxed) == current_pid) { + TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), + __Wpedantic_format_voidptr(reader)); + (void)atomic_cas32(&reader->pid, current_pid, 0); + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); + } + } + +#if defined(_WIN32) || defined(_WIN64) + TRACE("<< thread 0x%" PRIxPTR ", module %p", osal_thread_self(), rthc); + rthc_unlock(); +#else + const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(rthc); + const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(rthc); + const uint64_t state = rthc_read(rthc); + if (state == sign_registered && + rthc_compare_and_clean(rthc, sign_registered)) { + TRACE("== thread 0x%" PRIxPTR + ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", + osal_thread_self(), rthc, osal_getpid(), "registered", state); + } else if (state == sign_counted && + rthc_compare_and_clean(rthc, sign_counted)) { + TRACE("== thread 0x%" PRIxPTR + ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", + osal_thread_self(), rthc, osal_getpid(), "counted", state); + ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); + } else { + WARNING("thread 0x%" PRIxPTR + ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", + osal_thread_self(), rthc, osal_getpid(), "wrong", state); + } + + if (atomic_load32(&rthc_pending, mo_AcquireRelease) == 0) { + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, wake", osal_thread_self(), + rthc, osal_getpid()); + ENSURE(nullptr, pthread_cond_broadcast(&rthc_cond) == 0); + } + + TRACE("<< thread 0x%" PRIxPTR ", rthc %p", osal_thread_self(), rthc); + /* Allow tail call optimization, i.e. gcc should generate the jmp instruction + * instead of a call for pthread_mutex_unlock() and therefore CPU could not + * return to current DSO's code section, which may be unloaded immediately + * after the mutex got released. */ + pthread_mutex_unlock(&rthc_mutex); +#endif +} + +__cold int rthc_register(MDBX_env *const env) { + TRACE(">> env %p, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), rthc_count, rthc_limit); + + int rc = MDBX_SUCCESS; + for (size_t i = 0; i < rthc_count; ++i) + if (unlikely(rthc_table[i].env == env)) { + rc = MDBX_PANIC; + goto bailout; + } + + env->me_txkey = 0; + if (unlikely(rthc_count == rthc_limit)) { + rthc_entry_t *new_table = + osal_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table, + sizeof(rthc_entry_t) * rthc_limit * 2); + if (unlikely(new_table == nullptr)) { + rc = MDBX_ENOMEM; + goto bailout; + } + if (rthc_table == rthc_table_static) + memcpy(new_table, rthc_table, sizeof(rthc_entry_t) * rthc_limit); + rthc_table = new_table; + rthc_limit *= 2; + } + + if ((env->flags & MDBX_NOSTICKYTHREADS) == 0) { + rc = thread_key_create(&env->me_txkey); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + env->flags |= ENV_TXKEY; + } + + rthc_table[rthc_count].env = env; + TRACE("== [%i] = env %p, key %" PRIuPTR, rthc_count, + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey); + ++rthc_count; + +bailout: + TRACE("<< env %p, key %" PRIuPTR ", rthc_count %u, rthc_limit %u, rc %d", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, + rthc_limit, rc); + return rc; +} + +__cold static int rthc_drown(MDBX_env *const env) { + const uint32_t current_pid = osal_getpid(); + int rc = MDBX_SUCCESS; + MDBX_env *inprocess_neighbor = nullptr; + if (likely(env->lck_mmap.lck && current_pid == env->pid)) { + reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0]; + reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers]; + TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", + (current_pid == env->pid) ? "cleanup" : "skip", + __Wpedantic_format_voidptr(env), env->pid, + __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + current_pid); + bool cleaned = false; + for (reader_slot_t *r = begin; r < end; ++r) { + if (atomic_load32(&r->pid, mo_Relaxed) == current_pid) { + atomic_store32(&r->pid, 0, mo_AcquireRelease); + TRACE("== cleanup %p", __Wpedantic_format_voidptr(r)); + cleaned = true; + } + } + if (cleaned) + atomic_store32(&env->lck_mmap.lck->rdt_refresh_flag, true, mo_Relaxed); + rc = rthc_uniq_check(&env->lck_mmap, &inprocess_neighbor); + if (!inprocess_neighbor && env->registered_reader_pid && + env->lck_mmap.fd != INVALID_HANDLE_VALUE) { + int err = lck_rpid_clear(env); + rc = rc ? rc : err; + } + } + int err = lck_destroy(env, inprocess_neighbor, current_pid); + env->pid = 0; + return rc ? rc : err; +} + +__cold int rthc_remove(MDBX_env *const env) { + TRACE(">>> env %p, key %zu, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, + rthc_limit); + + int rc = MDBX_SUCCESS; + if (likely(env->pid)) + rc = rthc_drown(env); + + for (size_t i = 0; i < rthc_count; ++i) { + if (rthc_table[i].env == env) { + if (--rthc_count > 0) + rthc_table[i] = rthc_table[rthc_count]; + else if (rthc_table != rthc_table_static) { + void *tmp = rthc_table; + rthc_table = rthc_table_static; + rthc_limit = RTHC_INITIAL_LIMIT; + osal_memory_barrier(); + osal_free(tmp); + } + break; + } + } + + TRACE("<<< %p, key %zu, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, + rthc_limit); + return rc; +} + +#if !defined(_WIN32) && !defined(_WIN64) +__cold void rthc_afterfork(void) { + NOTICE("drown %d rthc entries", rthc_count); + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const env = rthc_table[i].env; + NOTICE("drown env %p", __Wpedantic_format_voidptr(env)); + if (env->lck_mmap.lck) + osal_munmap(&env->lck_mmap); + if (env->dxb_mmap.base) { + osal_munmap(&env->dxb_mmap); +#ifdef ENABLE_MEMCHECK + VALGRIND_DISCARD(env->valgrind_handle); + env->valgrind_handle = -1; +#endif /* ENABLE_MEMCHECK */ + } + env->lck = lckless_stub(env); + rthc_drown(env); + } + if (rthc_table != rthc_table_static) + osal_free(rthc_table); + rthc_count = 0; + rthc_table = rthc_table_static; + rthc_limit = RTHC_INITIAL_LIMIT; + rthc_pending.weak = 0; +} +#endif /* ! Windows */ + +__cold void rthc_ctor(void) { +#if defined(_WIN32) || defined(_WIN64) + InitializeCriticalSection(&rthc_critical_section); +#else + ENSURE(nullptr, pthread_atfork(nullptr, nullptr, rthc_afterfork) == 0); + ENSURE(nullptr, pthread_key_create(&rthc_key, rthc_thread_dtor) == 0); + TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), + __Wpedantic_format_voidptr(&rthc_key), (unsigned)rthc_key); +#endif +} + +__cold void rthc_dtor(const uint32_t current_pid) { + rthc_lock(); +#if !defined(_WIN32) && !defined(_WIN64) + uint64_t *rthc = pthread_getspecific(rthc_key); + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 + ", left %d", + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, + rthc ? rthc_read(rthc) : ~UINT64_C(0), + atomic_load32(&rthc_pending, mo_Relaxed)); + if (rthc) { + const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(rthc); + const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(rthc); + const uint64_t state = rthc_read(rthc); + if (state == sign_registered && + rthc_compare_and_clean(rthc, sign_registered)) { + TRACE("== thread 0x%" PRIxPTR + ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, + "registered", state); + } else if (state == sign_counted && + rthc_compare_and_clean(rthc, sign_counted)) { + TRACE("== thread 0x%" PRIxPTR + ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, + "counted", state); + ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); + } else { + WARNING("thread 0x%" PRIxPTR + ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, + "wrong", state); + } + } + + struct timespec abstime; + ENSURE(nullptr, clock_gettime(CLOCK_REALTIME, &abstime) == 0); + abstime.tv_nsec += 1000000000l / 10; + if (abstime.tv_nsec >= 1000000000l) { + abstime.tv_nsec -= 1000000000l; + abstime.tv_sec += 1; + } +#if MDBX_DEBUG > 0 + abstime.tv_sec += 600; +#endif + + for (unsigned left; + (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { + NOTICE("tls-cleanup: pid %d, pending %u, wait for...", current_pid, left); + const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); + if (rc && rc != EINTR) + break; + } + thread_key_delete(rthc_key); +#endif + + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const env = rthc_table[i].env; + if (env->pid != current_pid) + continue; + if (!(env->flags & ENV_TXKEY)) + continue; + reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0]; + reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers]; + thread_key_delete(env->me_txkey); + bool cleaned = false; + for (reader_slot_t *reader = begin; reader < end; ++reader) { + TRACE("== [%zi] = key %" PRIuPTR ", %p ... %p, rthc %p (%+i), " + "rthc-pid %i, current-pid %i", + i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), + __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), + (int)(reader - begin), reader->pid.weak, current_pid); + if (atomic_load32(&reader->pid, mo_Relaxed) == current_pid) { + (void)atomic_cas32(&reader->pid, current_pid, 0); + TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); + cleaned = true; + } + } + if (cleaned) + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); + } + + rthc_limit = rthc_count = 0; + if (rthc_table != rthc_table_static) + osal_free(rthc_table); + rthc_table = nullptr; + rthc_unlock(); + +#if defined(_WIN32) || defined(_WIN64) + DeleteCriticalSection(&rthc_critical_section); +#else + /* LY: yielding a few timeslices to give a more chance + * to racing destructor(s) for completion. */ + workaround_glibc_bug21031(); +#endif +} diff --git a/src/tls.h b/src/tls.h new file mode 100644 index 00000000..5bcbfa23 --- /dev/null +++ b/src/tls.h @@ -0,0 +1,43 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +MDBX_INTERNAL void rthc_ctor(void); +MDBX_INTERNAL void rthc_dtor(const uint32_t current_pid); +MDBX_INTERNAL void rthc_lock(void); +MDBX_INTERNAL void rthc_unlock(void); + +MDBX_INTERNAL int rthc_register(MDBX_env *const env); +MDBX_INTERNAL int rthc_remove(MDBX_env *const env); +MDBX_INTERNAL int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found); + +/* dtor called for thread, i.e. for all mdbx's environment objects */ +MDBX_INTERNAL void rthc_thread_dtor(void *rthc); + +static inline void *thread_rthc_get(osal_thread_key_t key) { +#if defined(_WIN32) || defined(_WIN64) + return TlsGetValue(key); +#else + return pthread_getspecific(key); +#endif +} + +MDBX_INTERNAL void thread_rthc_set(osal_thread_key_t key, const void *value); + +#if !defined(_WIN32) && !defined(_WIN64) +MDBX_INTERNAL void rthc_afterfork(void); +MDBX_INTERNAL void workaround_glibc_bug21031(void); +#endif /* !Windows */ + +static inline void thread_key_delete(osal_thread_key_t key) { + TRACE("key = %" PRIuPTR, (uintptr_t)key); +#if defined(_WIN32) || defined(_WIN64) + ENSURE(nullptr, TlsFree(key)); +#else + ENSURE(nullptr, pthread_key_delete(key) == 0); + workaround_glibc_bug21031(); +#endif +} diff --git a/src/mdbx_chk.c b/src/tools/chk.c similarity index 95% rename from src/mdbx_chk.c rename to src/tools/chk.c index 12431b10..80e37a7c 100644 --- a/src/mdbx_chk.c +++ b/src/tools/chk.c @@ -1,17 +1,8 @@ -/* mdbx_chk.c - memory-mapped database check tool */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// mdbx_chk.c - memory-mapped database check tool +/// #ifdef _MSC_VER #if _MSC_VER > 1800 @@ -21,7 +12,7 @@ #endif /* _MSC_VER (warnings) */ #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#include "internals.h" +#include "essentials.h" #include @@ -59,8 +50,7 @@ static void signal_handler(int sig) { #define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1) #define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE -enum MDBX_env_flags_t env_flags = - MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; +MDBX_env_flags_t env_flags = MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; MDBX_env *env; MDBX_txn *txn; unsigned verbose = 0; @@ -70,8 +60,8 @@ int stuck_meta = -1; MDBX_chk_context_t chk; bool turn_meta = false; bool force_turn_meta = false; -enum MDBX_chk_flags_t chk_flags = MDBX_CHK_DEFAULTS; -enum MDBX_chk_stage chk_stage = MDBX_chk_none; +MDBX_chk_flags_t chk_flags = MDBX_CHK_DEFAULTS; +MDBX_chk_stage_t chk_stage = MDBX_chk_none; static MDBX_chk_line_t line_struct; static size_t anchor_lineno; @@ -105,7 +95,7 @@ static bool silently(enum MDBX_chk_severity severity) { chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); int prio = (severity >> MDBX_chk_severity_prio_shift); - if (chk.scope && chk.scope->stage == MDBX_chk_traversal_subdbs && verbose < 2) + if (chk.scope && chk.scope->stage == MDBX_chk_subdbs && verbose < 2) prio += 1; return quiet || cutoff < ((prio > 0) ? prio : 0); } @@ -398,7 +388,7 @@ static int conclude(MDBX_chk_context_t *ctx) { " at txn-id #%" PRIi64 "...", ctx->result.recent_txnid); flush(); - err = error_fn("mdbx_env_pgwalk", mdbx_env_sync_ex(ctx->env, true, false)); + err = error_fn("walk_pages", mdbx_env_sync_ex(ctx->env, true, false)); if (err == MDBX_SUCCESS) { ctx->result.problems_meta -= 1; ctx->result.total_problems -= 1; diff --git a/src/mdbx_copy.c b/src/tools/copy.c similarity index 87% rename from src/mdbx_copy.c rename to src/tools/copy.c index e73f143a..8a962e73 100644 --- a/src/mdbx_copy.c +++ b/src/tools/copy.c @@ -1,17 +1,10 @@ -/* mdbx_copy.c - memory-mapped database backup tool */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// mdbx_copy.c - memory-mapped database backup tool +/// #ifdef _MSC_VER #if _MSC_VER > 1800 @@ -21,7 +14,7 @@ #endif /* _MSC_VER (warnings) */ #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#include "internals.h" +#include "essentials.h" #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" @@ -60,7 +53,7 @@ static void usage(const char *prog) { int main(int argc, char *argv[]) { int rc; - MDBX_env *env = NULL; + MDBX_env *env = nullptr; const char *progname = argv[0], *act; unsigned flags = MDBX_RDONLY; unsigned cpflags = 0; @@ -123,7 +116,7 @@ int main(int argc, char *argv[]) { "mdbx_copy %s (%s, T-%s)\nRunning for copy %s to %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, argv[1], (argc == 2) ? "stdout" : argv[2]); - fflush(NULL); + fflush(nullptr); } act = "opening environment"; diff --git a/src/mdbx_drop.c b/src/tools/drop.c similarity index 87% rename from src/mdbx_drop.c rename to src/tools/drop.c index b3107218..483073b4 100644 --- a/src/mdbx_drop.c +++ b/src/tools/drop.c @@ -1,19 +1,10 @@ -/* mdbx_drop.c - memory-mapped database delete tool */ - -/* - * Copyright 2021-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * - * Copyright 2016-2021 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2021-2024 +/// +/// mdbx_drop.c - memory-mapped database delete tool +/// #ifdef _MSC_VER #if _MSC_VER > 1800 @@ -23,7 +14,7 @@ #endif /* _MSC_VER (warnings) */ #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#include "internals.h" +#include "essentials.h" #include @@ -162,7 +153,7 @@ int main(int argc, char *argv[]) { goto env_close; } - rc = mdbx_txn_begin(env, NULL, 0, &txn); + rc = mdbx_txn_begin(env, nullptr, 0, &txn); if (unlikely(rc != MDBX_SUCCESS)) { error("mdbx_txn_begin", rc); goto env_close; diff --git a/src/mdbx_dump.c b/src/tools/dump.c similarity index 79% rename from src/mdbx_dump.c rename to src/tools/dump.c index f918919b..2a5952b1 100644 --- a/src/mdbx_dump.c +++ b/src/tools/dump.c @@ -1,17 +1,10 @@ -/* mdbx_dump.c - memory-mapped database dump tool */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// mdbx_dump.c - memory-mapped database dump tool +/// #ifdef _MSC_VER #if _MSC_VER > 1800 @@ -21,7 +14,7 @@ #endif /* _MSC_VER (warnings) */ #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#include "internals.h" +#include "essentials.h" #include @@ -37,7 +30,7 @@ typedef struct flagbit { flagbit dbflags[] = {{MDBX_REVERSEKEY, "reversekey"}, {MDBX_DUPSORT, "dupsort"}, {MDBX_INTEGERKEY, "integerkey"}, - {MDBX_DUPFIXED, "dupfixed"}, + {MDBX_DUPFIXED, "dupfix"}, {MDBX_INTEGERDUP, "integerdup"}, {MDBX_REVERSEDUP, "reversedup"}, {0, nullptr}}; @@ -108,7 +101,7 @@ static void error(const char *func, int rc) { /* Dump in BDB-compatible format */ static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) { - unsigned int flags; + unsigned flags; int rc = mdbx_dbi_flags(txn, dbi, &flags); if (unlikely(rc != MDBX_SUCCESS)) { error("mdbx_dbi_flags", rc); @@ -187,9 +180,11 @@ static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) { return rc; } if (rescue) { - cursor->mc_checking |= CC_SKIPORD; - if (cursor->mc_xcursor) - cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD; + rc = mdbx_cursor_ignord(cursor); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_ignord", rc); + return rc; + } } while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) == @@ -245,7 +240,7 @@ static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { } int main(int argc, char *argv[]) { - int i, rc; + int i, err; MDBX_env *env; MDBX_txn *txn; MDBX_dbi dbi; @@ -355,47 +350,47 @@ int main(int argc, char *argv[]) { fflush(nullptr); } - rc = mdbx_env_create(&env); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_create", rc); + err = mdbx_env_create(&env); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_create", err); return EXIT_FAILURE; } if (alldbs || subname) { - rc = mdbx_env_set_maxdbs(env, 2); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_set_maxdbs", rc); + err = mdbx_env_set_maxdbs(env, 2); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_maxdbs", err); goto env_close; } } - rc = mdbx_env_open( + err = mdbx_env_open( env, envname, envflags | (rescue ? MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION : MDBX_RDONLY), 0); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_open", rc); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_open", err); goto env_close; } if (warmup) { - rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536); - if (MDBX_IS_ERROR(rc)) { - error("mdbx_env_warmup", rc); + err = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536); + if (MDBX_IS_ERROR(err)) { + error("mdbx_env_warmup", err); goto env_close; } } - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_begin", rc); + err = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_begin", err); goto env_close; } - rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_open", rc); + err = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &dbi); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_open", err); goto txn_abort; } @@ -403,24 +398,26 @@ int main(int argc, char *argv[]) { assert(dbi == MAIN_DBI); MDBX_cursor *cursor; - rc = mdbx_cursor_open(txn, MAIN_DBI, &cursor); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_open", rc); + err = mdbx_cursor_open(txn, MAIN_DBI, &cursor); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_open", err); goto txn_abort; } if (rescue) { - cursor->mc_checking |= CC_SKIPORD; - if (cursor->mc_xcursor) - cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD; + err = mdbx_cursor_ignord(cursor); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_ignord", err); + return err; + } } bool have_raw = false; int count = 0; MDBX_val key; while (MDBX_SUCCESS == - (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { + (err = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { if (user_break) { - rc = MDBX_EINTR; + err = MDBX_EINTR; break; } @@ -428,7 +425,7 @@ int main(int argc, char *argv[]) { continue; subname = osal_realloc(buf4free, key.iov_len + 1); if (!subname) { - rc = MDBX_ENOMEM; + err = MDBX_ENOMEM; break; } @@ -437,15 +434,15 @@ int main(int argc, char *argv[]) { subname[key.iov_len] = '\0'; MDBX_dbi sub_dbi; - rc = mdbx_dbi_open_ex(txn, subname, MDBX_DB_ACCEDE, &sub_dbi, - rescue ? equal_or_greater : nullptr, - rescue ? equal_or_greater : nullptr); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == MDBX_INCOMPATIBLE) { + err = mdbx_dbi_open_ex(txn, subname, MDBX_DB_ACCEDE, &sub_dbi, + rescue ? equal_or_greater : nullptr, + rescue ? equal_or_greater : nullptr); + if (unlikely(err != MDBX_SUCCESS)) { + if (err == MDBX_INCOMPATIBLE) { have_raw = true; continue; } - error("mdbx_dbi_open", rc); + error("mdbx_dbi_open", err); if (!rescue) break; } else { @@ -453,13 +450,13 @@ int main(int argc, char *argv[]) { if (list) { printf("%s\n", subname); } else { - rc = dump_sdb(txn, sub_dbi, subname); - if (unlikely(rc != MDBX_SUCCESS)) { + err = dump_sdb(txn, sub_dbi, subname); + if (unlikely(err != MDBX_SUCCESS)) { if (!rescue) break; if (!quiet) fprintf(stderr, "%s: %s: ignore %s for `%s` and continue\n", prog, - envname, mdbx_strerror(rc), subname); + envname, mdbx_strerror(err), subname); /* Here is a hack for rescue mode, don't do that: * - we should restart transaction in case error due * database corruption; @@ -468,21 +465,21 @@ int main(int argc, char *argv[]) { * - this is possible since DB is opened in read-only exclusive * mode and transaction is the same, i.e. has the same address * and so on. */ - rc = mdbx_txn_reset(txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_reset", rc); + err = mdbx_txn_reset(txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_reset", err); goto env_close; } - rc = mdbx_txn_renew(txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_renew", rc); + err = mdbx_txn_renew(txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_renew", err); goto env_close; } } } - rc = mdbx_dbi_close(env, sub_dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_close", rc); + err = mdbx_dbi_close(env, sub_dbi); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_close", err); break; } } @@ -491,20 +488,20 @@ int main(int argc, char *argv[]) { cursor = nullptr; if (have_raw && (!count /* || rescue */)) - rc = dump_sdb(txn, MAIN_DBI, nullptr); + err = dump_sdb(txn, MAIN_DBI, nullptr); else if (!count) { if (!quiet) fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, envname); - rc = MDBX_NOTFOUND; + err = MDBX_NOTFOUND; } } else { - rc = dump_sdb(txn, dbi, subname); + err = dump_sdb(txn, dbi, subname); } - switch (rc) { + switch (err) { case MDBX_NOTFOUND: - rc = MDBX_SUCCESS; + err = MDBX_SUCCESS; case MDBX_SUCCESS: break; case MDBX_EINTR: @@ -512,8 +509,8 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Interrupted by signal/user\n"); break; default: - if (unlikely(rc != MDBX_SUCCESS)) - error("mdbx_cursor_get", rc); + if (unlikely(err != MDBX_SUCCESS)) + error("mdbx_cursor_get", err); } mdbx_dbi_close(env, dbi); @@ -523,5 +520,5 @@ env_close: mdbx_env_close(env); free(buf4free); - return rc ? EXIT_FAILURE : EXIT_SUCCESS; + return err ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/mdbx_load.c b/src/tools/load.c similarity index 97% rename from src/mdbx_load.c rename to src/tools/load.c index 75337f53..ade698f3 100644 --- a/src/mdbx_load.c +++ b/src/tools/load.c @@ -1,17 +1,10 @@ -/* mdbx_load.c - memory-mapped database load tool */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// mdbx_load.c - memory-mapped database load tool +/// #ifdef _MSC_VER #if _MSC_VER > 1800 @@ -21,7 +14,7 @@ #endif /* _MSC_VER (warnings) */ #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#include "internals.h" +#include "essentials.h" #include @@ -139,7 +132,7 @@ typedef struct flagbit { flagbit dbflags[] = { {MDBX_REVERSEKEY, S("reversekey")}, {MDBX_DUPSORT, S("duplicates")}, {MDBX_DUPSORT, S("dupsort")}, {MDBX_INTEGERKEY, S("integerkey")}, - {MDBX_DUPFIXED, S("dupfixed")}, {MDBX_INTEGERDUP, S("integerdup")}, + {MDBX_DUPFIXED, S("dupfix")}, {MDBX_INTEGERDUP, S("integerdup")}, {MDBX_REVERSEDUP, S("reversedup")}, {0, 0, nullptr}}; static int readhdr(void) { @@ -375,7 +368,7 @@ static int badend(void) { return errno ? errno : MDBX_ENODATA; } -static __inline int unhex(unsigned char *c2) { +static inline int unhex(unsigned char *c2) { int x, c; x = *c2++ & 0x4f; if (x & 0x40) diff --git a/src/mdbx_stat.c b/src/tools/stat.c similarity index 96% rename from src/mdbx_stat.c rename to src/tools/stat.c index 2059972d..8ad82f9c 100644 --- a/src/mdbx_stat.c +++ b/src/tools/stat.c @@ -1,17 +1,10 @@ -/* mdbx_stat.c - memory-mapped database status tool */ - -/* - * Copyright 2015-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . */ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// +/// mdbx_stat.c - memory-mapped database status tool +/// #ifdef _MSC_VER #if _MSC_VER > 1800 @@ -21,7 +14,7 @@ #endif /* _MSC_VER (warnings) */ #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ -#include "internals.h" +#include "essentials.h" #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" diff --git a/src/wingetopt.c b/src/tools/wingetopt.c similarity index 100% rename from src/wingetopt.c rename to src/tools/wingetopt.c diff --git a/src/wingetopt.h b/src/tools/wingetopt.h similarity index 100% rename from src/wingetopt.h rename to src/tools/wingetopt.h diff --git a/src/tree.c b/src/tree.c new file mode 100644 index 00000000..e691ac4f --- /dev/null +++ b/src/tree.c @@ -0,0 +1,1645 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \note Please refer to the COPYRIGHT file for explanations license change, +/// credits and acknowledgments. +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +static MDBX_cursor *cursor_clone(const MDBX_cursor *csrc, + cursor_couple_t *couple) { + cASSERT(csrc, csrc->txn->txnid >= csrc->txn->env->lck->cached_oldest.weak); + couple->outer.next = nullptr; + couple->outer.backup = nullptr; + couple->outer.subcur = nullptr; + couple->outer.clc = nullptr; + couple->outer.txn = csrc->txn; + couple->outer.dbi_state = csrc->dbi_state; + couple->outer.checking = z_pagecheck; + couple->outer.tree = nullptr; + couple->outer.top_and_flags = 0; + + MDBX_cursor *cdst = &couple->outer; + if (is_inner(csrc)) { + couple->inner.cursor.next = nullptr; + couple->inner.cursor.backup = nullptr; + couple->inner.cursor.subcur = nullptr; + couple->inner.cursor.txn = csrc->txn; + couple->inner.cursor.dbi_state = csrc->dbi_state; + couple->outer.subcur = &couple->inner; + cdst = &couple->inner.cursor; + } + + cdst->checking = csrc->checking; + cdst->tree = csrc->tree; + cdst->clc = csrc->clc; + cursor_cpstk(csrc, cdst); + return cdst; +} + +/*----------------------------------------------------------------------------*/ + +void recalculate_merge_thresholds(MDBX_env *env) { + const size_t bytes = page_space(env); + env->merge_threshold = + (uint16_t)(bytes - + (bytes * env->options.merge_threshold_16dot16_percent >> 16)); + env->merge_threshold_gc = + (uint16_t)(bytes - ((env->options.merge_threshold_16dot16_percent > 19005) + ? bytes / 3 /* 33 % */ + : bytes / 4 /* 25 % */)); +} + +int tree_drop(MDBX_cursor *mc, const bool may_have_subDBs) { + MDBX_txn *txn = mc->txn; + int rc = tree_search(mc, nullptr, Z_FIRST); + if (likely(rc == MDBX_SUCCESS)) { + /* DUPSORT sub-DBs have no large-pages/subDBs. Omit scanning leaves. + * This also avoids any P_DUPFIX pages, which have no nodes. + * Also if the DB doesn't have sub-DBs and has no large/overflow + * pages, omit scanning leaves. */ + if (!(may_have_subDBs | mc->tree->large_pages)) + cursor_pop(mc); + + rc = pnl_need(&txn->tw.retired_pages, (size_t)mc->tree->branch_pages + + (size_t)mc->tree->leaf_pages + + (size_t)mc->tree->large_pages); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + page_t *stack[CURSOR_STACK_SIZE]; + for (intptr_t i = 0; i <= mc->top; ++i) + stack[i] = mc->pg[i]; + + while (mc->top >= 0) { + page_t *const mp = mc->pg[mc->top]; + const size_t nkeys = page_numkeys(mp); + if (is_leaf(mp)) { + cASSERT(mc, mc->top + 1 == mc->tree->height); + for (size_t i = 0; i < nkeys; i++) { + node_t *node = page_node(mp, i); + if (node_flags(node) & N_BIGDATA) { + rc = page_retire_ex(mc, node_largedata_pgno(node), nullptr, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + if (!(may_have_subDBs | mc->tree->large_pages)) + goto pop; + } else if (node_flags(node) & N_SUBDATA) { + if (unlikely((node_flags(node) & N_DUPDATA) == 0)) { + rc = /* disallowing implicit subDB deletion */ MDBX_INCOMPATIBLE; + goto bailout; + } + rc = cursor_dupsort_setup(mc, node, mp); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + rc = tree_drop(&mc->subcur->cursor, false); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + } + } else { + cASSERT(mc, mc->top + 1 < mc->tree->height); + mc->checking |= z_retiring; + const unsigned pagetype = (is_frozen(txn, mp) ? P_FROZEN : 0) + + ((mc->top + 2 == mc->tree->height) + ? (mc->checking & (P_LEAF | P_DUPFIX)) + : P_BRANCH); + for (size_t i = 0; i < nkeys; i++) { + node_t *node = page_node(mp, i); + tASSERT(txn, (node_flags(node) & + (N_BIGDATA | N_SUBDATA | N_DUPDATA)) == 0); + const pgno_t pgno = node_pgno(node); + rc = page_retire_ex(mc, pgno, nullptr, pagetype); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + mc->checking -= z_retiring; + } + if (!mc->top) + break; + cASSERT(mc, nkeys > 0); + mc->ki[mc->top] = (indx_t)nkeys; + rc = cursor_sibling_right(mc); + if (unlikely(rc != MDBX_SUCCESS)) { + if (unlikely(rc != MDBX_NOTFOUND)) + goto bailout; + /* no more siblings, go back to beginning + * of previous level. */ + pop: + cursor_pop(mc); + mc->ki[0] = 0; + for (intptr_t i = 1; i <= mc->top; i++) { + mc->pg[i] = stack[i]; + mc->ki[i] = 0; + } + } + } + rc = page_retire(mc, mc->pg[0]); + } + +bailout: + be_poor(mc); + if (unlikely(rc != MDBX_SUCCESS)) + txn->flags |= MDBX_TXN_ERROR; + return rc; +} + +static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { + int rc; + DKBUF_DEBUG; + + page_t *psrc = csrc->pg[csrc->top]; + page_t *pdst = cdst->pg[cdst->top]; + cASSERT(csrc, page_type(psrc) == page_type(pdst)); + cASSERT(csrc, csrc->tree == cdst->tree); + cASSERT(csrc, csrc->top == cdst->top); + if (unlikely(page_type(psrc) != page_type(pdst))) { + bailout: + ERROR("Wrong or mismatch pages's types (src %d, dst %d) to move node", + page_type(psrc), page_type(pdst)); + csrc->txn->flags |= MDBX_TXN_ERROR; + return MDBX_PROBLEM; + } + + MDBX_val key4move; + switch (page_type(psrc)) { + case P_BRANCH: { + const node_t *srcnode = page_node(psrc, csrc->ki[csrc->top]); + cASSERT(csrc, node_flags(srcnode) == 0); + const pgno_t srcpg = node_pgno(srcnode); + key4move.iov_len = node_ks(srcnode); + key4move.iov_base = node_key(srcnode); + + if (csrc->ki[csrc->top] == 0) { + const int8_t top = csrc->top; + cASSERT(csrc, top >= 0); + /* must find the lowest key below src */ + rc = tree_search_lowest(csrc); + page_t *lowest_page = csrc->pg[csrc->top]; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + cASSERT(csrc, is_leaf(lowest_page)); + if (unlikely(!is_leaf(lowest_page))) + goto bailout; + if (is_dupfix_leaf(lowest_page)) + key4move = page_dupfix_key(lowest_page, 0, csrc->tree->dupfix_size); + else { + const node_t *lowest_node = page_node(lowest_page, 0); + key4move.iov_len = node_ks(lowest_node); + key4move.iov_base = node_key(lowest_node); + } + + /* restore cursor after mdbx_page_search_lowest() */ + csrc->top = top; + csrc->ki[csrc->top] = 0; + + /* paranoia */ + cASSERT(csrc, psrc == csrc->pg[csrc->top]); + cASSERT(csrc, is_branch(psrc)); + if (unlikely(!is_branch(psrc))) + goto bailout; + } + + if (cdst->ki[cdst->top] == 0) { + cursor_couple_t couple; + MDBX_cursor *const mn = cursor_clone(cdst, &couple); + const int8_t top = cdst->top; + cASSERT(csrc, top >= 0); + + /* must find the lowest key below dst */ + rc = tree_search_lowest(mn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + page_t *const lowest_page = mn->pg[mn->top]; + cASSERT(cdst, is_leaf(lowest_page)); + if (unlikely(!is_leaf(lowest_page))) + goto bailout; + MDBX_val key; + if (is_dupfix_leaf(lowest_page)) + key = page_dupfix_key(lowest_page, 0, mn->tree->dupfix_size); + else { + node_t *lowest_node = page_node(lowest_page, 0); + key.iov_len = node_ks(lowest_node); + key.iov_base = node_key(lowest_node); + } + + /* restore cursor after mdbx_page_search_lowest() */ + mn->top = top; + mn->ki[mn->top] = 0; + + const intptr_t delta = EVEN_CEIL(key.iov_len) - + EVEN_CEIL(node_ks(page_node(mn->pg[mn->top], 0))); + const intptr_t needed = branch_size(cdst->txn->env, &key4move) + delta; + const intptr_t have = page_room(pdst); + if (unlikely(needed > have)) + return MDBX_RESULT_TRUE; + + if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) + return rc; + psrc = csrc->pg[csrc->top]; + pdst = cdst->pg[cdst->top]; + + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = tree_propagate_key(mn, &key); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } else { + const size_t needed = branch_size(cdst->txn->env, &key4move); + const size_t have = page_room(pdst); + if (unlikely(needed > have)) + return MDBX_RESULT_TRUE; + + if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) + return rc; + psrc = csrc->pg[csrc->top]; + pdst = cdst->pg[cdst->top]; + } + + DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO + " to node %u on page %" PRIaPGNO, + "branch", csrc->ki[csrc->top], DKEY_DEBUG(&key4move), psrc->pgno, + cdst->ki[cdst->top], pdst->pgno); + /* Add the node to the destination page. */ + rc = node_add_branch(cdst, cdst->ki[cdst->top], &key4move, srcpg); + } break; + + case P_LEAF: { + /* Mark src and dst as dirty. */ + if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) + return rc; + psrc = csrc->pg[csrc->top]; + pdst = cdst->pg[cdst->top]; + const node_t *srcnode = page_node(psrc, csrc->ki[csrc->top]); + MDBX_val data; + data.iov_len = node_ds(srcnode); + data.iov_base = node_data(srcnode); + key4move.iov_len = node_ks(srcnode); + key4move.iov_base = node_key(srcnode); + DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO + " to node %u on page %" PRIaPGNO, + "leaf", csrc->ki[csrc->top], DKEY_DEBUG(&key4move), psrc->pgno, + cdst->ki[cdst->top], pdst->pgno); + /* Add the node to the destination page. */ + rc = node_add_leaf(cdst, cdst->ki[cdst->top], &key4move, &data, + node_flags(srcnode)); + } break; + + case P_LEAF | P_DUPFIX: { + /* Mark src and dst as dirty. */ + if (unlikely((rc = page_touch(csrc)) || (rc = page_touch(cdst)))) + return rc; + psrc = csrc->pg[csrc->top]; + pdst = cdst->pg[cdst->top]; + key4move = + page_dupfix_key(psrc, csrc->ki[csrc->top], csrc->tree->dupfix_size); + DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO + " to node %u on page %" PRIaPGNO, + "leaf2", csrc->ki[csrc->top], DKEY_DEBUG(&key4move), psrc->pgno, + cdst->ki[cdst->top], pdst->pgno); + /* Add the node to the destination page. */ + rc = node_add_dupfix(cdst, cdst->ki[cdst->top], &key4move); + } break; + + default: + assert(false); + goto bailout; + } + + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + /* Delete the node from the source page. */ + node_del(csrc, key4move.iov_len); + + cASSERT(csrc, psrc == csrc->pg[csrc->top]); + cASSERT(cdst, pdst == cdst->pg[cdst->top]); + cASSERT(csrc, page_type(psrc) == page_type(pdst)); + + /* csrc курсор тут всегда временный, на стеке внутри tree_rebalance(), + * и его нет необходимости корректировать. */ + { + /* Adjust other cursors pointing to mp */ + MDBX_cursor *m2, *m3; + const size_t dbi = cursor_dbi(csrc); + cASSERT(csrc, csrc->top == cdst->top); + if (fromleft) { + /* If we're adding on the left, bump others up */ + for (m2 = csrc->txn->cursors[dbi]; m2; m2 = m2->next) { + m3 = (csrc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_related(csrc, m3)) + continue; + + if (m3 != cdst && m3->pg[csrc->top] == pdst && + m3->ki[csrc->top] >= cdst->ki[csrc->top]) { + m3->ki[csrc->top] += 1; + } + + if (/* m3 != csrc && */ m3->pg[csrc->top] == psrc && + m3->ki[csrc->top] == csrc->ki[csrc->top]) { + m3->pg[csrc->top] = pdst; + m3->ki[csrc->top] = cdst->ki[cdst->top]; + cASSERT(csrc, csrc->top > 0); + m3->ki[csrc->top - 1] += 1; + } + + if (is_leaf(psrc) && inner_pointed(m3)) { + cASSERT(csrc, csrc->top == m3->top); + size_t nkeys = page_numkeys(m3->pg[csrc->top]); + if (likely(nkeys > m3->ki[csrc->top])) + cursor_inner_refresh(m3, m3->pg[csrc->top], m3->ki[csrc->top]); + } + } + } else { + /* Adding on the right, bump others down */ + for (m2 = csrc->txn->cursors[dbi]; m2; m2 = m2->next) { + m3 = (csrc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_related(csrc, m3)) + continue; + if (m3->pg[csrc->top] == psrc) { + if (!m3->ki[csrc->top]) { + m3->pg[csrc->top] = pdst; + m3->ki[csrc->top] = cdst->ki[cdst->top]; + cASSERT(csrc, csrc->top > 0 && m3->ki[csrc->top - 1] > 0); + m3->ki[csrc->top - 1] -= 1; + } else + m3->ki[csrc->top] -= 1; + + if (is_leaf(psrc) && inner_pointed(m3)) { + cASSERT(csrc, csrc->top == m3->top); + size_t nkeys = page_numkeys(m3->pg[csrc->top]); + if (likely(nkeys > m3->ki[csrc->top])) + cursor_inner_refresh(m3, m3->pg[csrc->top], m3->ki[csrc->top]); + } + } + } + } + } + + /* Update the parent separators. */ + if (csrc->ki[csrc->top] == 0) { + cASSERT(csrc, csrc->top > 0); + if (csrc->ki[csrc->top - 1] != 0) { + MDBX_val key; + if (is_dupfix_leaf(psrc)) + key = page_dupfix_key(psrc, 0, csrc->tree->dupfix_size); + else { + node_t *srcnode = page_node(psrc, 0); + key.iov_len = node_ks(srcnode); + key.iov_base = node_key(srcnode); + } + DEBUG("update separator for source page %" PRIaPGNO " to [%s]", + psrc->pgno, DKEY_DEBUG(&key)); + + cursor_couple_t couple; + MDBX_cursor *const mn = cursor_clone(csrc, &couple); + cASSERT(csrc, mn->top > 0); + mn->top -= 1; + + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = tree_propagate_key(mn, &key); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + if (is_branch(psrc)) { + const MDBX_val nullkey = {0, 0}; + const indx_t ix = csrc->ki[csrc->top]; + csrc->ki[csrc->top] = 0; + rc = tree_propagate_key(csrc, &nullkey); + csrc->ki[csrc->top] = ix; + cASSERT(csrc, rc == MDBX_SUCCESS); + } + } + + if (cdst->ki[cdst->top] == 0) { + cASSERT(cdst, cdst->top > 0); + if (cdst->ki[cdst->top - 1] != 0) { + MDBX_val key; + if (is_dupfix_leaf(pdst)) + key = page_dupfix_key(pdst, 0, cdst->tree->dupfix_size); + else { + node_t *srcnode = page_node(pdst, 0); + key.iov_len = node_ks(srcnode); + key.iov_base = node_key(srcnode); + } + DEBUG("update separator for destination page %" PRIaPGNO " to [%s]", + pdst->pgno, DKEY_DEBUG(&key)); + cursor_couple_t couple; + MDBX_cursor *const mn = cursor_clone(cdst, &couple); + cASSERT(cdst, mn->top > 0); + mn->top -= 1; + + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = tree_propagate_key(mn, &key); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + if (is_branch(pdst)) { + const MDBX_val nullkey = {0, 0}; + const indx_t ix = cdst->ki[cdst->top]; + cdst->ki[cdst->top] = 0; + rc = tree_propagate_key(cdst, &nullkey); + cdst->ki[cdst->top] = ix; + cASSERT(cdst, rc == MDBX_SUCCESS); + } + } + + return MDBX_SUCCESS; +} + +static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { + MDBX_val key; + int rc; + + cASSERT(csrc, csrc != cdst); + cASSERT(csrc, cursor_is_tracked(csrc)); + cASSERT(cdst, cursor_is_tracked(cdst)); + const page_t *const psrc = csrc->pg[csrc->top]; + page_t *pdst = cdst->pg[cdst->top]; + DEBUG("merging page %" PRIaPGNO " into %" PRIaPGNO, psrc->pgno, pdst->pgno); + + cASSERT(csrc, page_type(psrc) == page_type(pdst)); + cASSERT(csrc, csrc->clc == cdst->clc && csrc->tree == cdst->tree); + cASSERT(csrc, csrc->top > 0); /* can't merge root page */ + cASSERT(cdst, cdst->top > 0); + cASSERT(cdst, cdst->top + 1 < cdst->tree->height || + is_leaf(cdst->pg[cdst->tree->height - 1])); + cASSERT(csrc, csrc->top + 1 < csrc->tree->height || + is_leaf(csrc->pg[csrc->tree->height - 1])); + cASSERT(cdst, csrc->txn->env->options.prefer_waf_insteadof_balance || + page_room(pdst) >= page_used(cdst->txn->env, psrc)); + const int pagetype = page_type(psrc); + + /* Move all nodes from src to dst */ + const size_t dst_nkeys = page_numkeys(pdst); + const size_t src_nkeys = page_numkeys(psrc); + cASSERT(cdst, dst_nkeys + src_nkeys >= (is_leaf(psrc) ? 1u : 2u)); + if (likely(src_nkeys)) { + size_t ii = dst_nkeys; + if (unlikely(pagetype & P_DUPFIX)) { + /* Mark dst as dirty. */ + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + key.iov_len = csrc->tree->dupfix_size; + key.iov_base = page_data(psrc); + size_t i = 0; + do { + rc = node_add_dupfix(cdst, ii++, &key); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + key.iov_base = ptr_disp(key.iov_base, key.iov_len); + } while (++i != src_nkeys); + } else { + node_t *srcnode = page_node(psrc, 0); + key.iov_len = node_ks(srcnode); + key.iov_base = node_key(srcnode); + if (pagetype & P_BRANCH) { + cursor_couple_t couple; + MDBX_cursor *const mn = cursor_clone(csrc, &couple); + + /* must find the lowest key below src */ + rc = tree_search_lowest(mn); + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const page_t *mp = mn->pg[mn->top]; + if (likely(!is_dupfix_leaf(mp))) { + cASSERT(mn, is_leaf(mp)); + const node_t *lowest = page_node(mp, 0); + key.iov_len = node_ks(lowest); + key.iov_base = node_key(lowest); + } else { + cASSERT(mn, mn->top > csrc->top); + key = page_dupfix_key(mp, mn->ki[mn->top], csrc->tree->dupfix_size); + } + cASSERT(mn, key.iov_len >= csrc->clc->k.lmin); + cASSERT(mn, key.iov_len <= csrc->clc->k.lmax); + + const size_t dst_room = page_room(pdst); + const size_t src_used = page_used(cdst->txn->env, psrc); + const size_t space_needed = src_used - node_ks(srcnode) + key.iov_len; + if (unlikely(space_needed > dst_room)) + return MDBX_RESULT_TRUE; + } + + /* Mark dst as dirty. */ + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + size_t i = 0; + while (true) { + if (pagetype & P_LEAF) { + MDBX_val data; + data.iov_len = node_ds(srcnode); + data.iov_base = node_data(srcnode); + rc = node_add_leaf(cdst, ii++, &key, &data, node_flags(srcnode)); + } else { + cASSERT(csrc, node_flags(srcnode) == 0); + rc = node_add_branch(cdst, ii++, &key, node_pgno(srcnode)); + } + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (++i == src_nkeys) + break; + srcnode = page_node(psrc, i); + key.iov_len = node_ks(srcnode); + key.iov_base = node_key(srcnode); + } + } + + pdst = cdst->pg[cdst->top]; + DEBUG("dst page %" PRIaPGNO " now has %zu keys (%u.%u%% filled)", + pdst->pgno, page_numkeys(pdst), + page_fill_percentum_x10(cdst->txn->env, pdst) / 10, + page_fill_percentum_x10(cdst->txn->env, pdst) % 10); + + cASSERT(csrc, psrc == csrc->pg[csrc->top]); + cASSERT(cdst, pdst == cdst->pg[cdst->top]); + } + + /* Unlink the src page from parent and add to free list. */ + csrc->top -= 1; + node_del(csrc, 0); + if (csrc->ki[csrc->top] == 0) { + const MDBX_val nullkey = {0, 0}; + rc = tree_propagate_key(csrc, &nullkey); + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) { + csrc->top += 1; + return rc; + } + } + csrc->top += 1; + + cASSERT(csrc, psrc == csrc->pg[csrc->top]); + cASSERT(cdst, pdst == cdst->pg[cdst->top]); + + { + /* Adjust other cursors pointing to mp */ + MDBX_cursor *m2, *m3; + const size_t dbi = cursor_dbi(csrc); + for (m2 = csrc->txn->cursors[dbi]; m2; m2 = m2->next) { + m3 = (csrc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_related(csrc, m3)) + continue; + if (m3->pg[csrc->top] == psrc) { + m3->pg[csrc->top] = pdst; + m3->ki[csrc->top] += (indx_t)dst_nkeys; + m3->ki[csrc->top - 1] = cdst->ki[csrc->top - 1]; + } else if (m3->pg[csrc->top - 1] == csrc->pg[csrc->top - 1] && + m3->ki[csrc->top - 1] > csrc->ki[csrc->top - 1]) { + cASSERT(m3, m3->ki[csrc->top - 1] > 0 && + m3->ki[csrc->top - 1] <= + page_numkeys(m3->pg[csrc->top - 1])); + m3->ki[csrc->top - 1] -= 1; + } + + if (is_leaf(psrc) && inner_pointed(m3)) { + cASSERT(csrc, csrc->top == m3->top); + size_t nkeys = page_numkeys(m3->pg[csrc->top]); + if (likely(nkeys > m3->ki[csrc->top])) + cursor_inner_refresh(m3, m3->pg[csrc->top], m3->ki[csrc->top]); + } + } + } + + rc = page_retire(csrc, (page_t *)psrc); + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cASSERT(cdst, cdst->tree->items > 0); + cASSERT(cdst, cdst->top + 1 <= cdst->tree->height); + cASSERT(cdst, cdst->top > 0); + page_t *const top_page = cdst->pg[cdst->top]; + const indx_t top_indx = cdst->ki[cdst->top]; + const int save_top = cdst->top; + const uint16_t save_height = cdst->tree->height; + cursor_pop(cdst); + rc = tree_rebalance(cdst); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cASSERT(cdst, cdst->tree->items > 0); + cASSERT(cdst, cdst->top + 1 <= cdst->tree->height); + +#if MDBX_ENABLE_PGOP_STAT + cdst->txn->env->lck->pgops.merge.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + + if (is_leaf(cdst->pg[cdst->top])) { + /* LY: don't touch cursor if top-page is a LEAF */ + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || + page_type(cdst->pg[cdst->top]) == pagetype); + return MDBX_SUCCESS; + } + + cASSERT(cdst, page_numkeys(top_page) == dst_nkeys + src_nkeys); + + if (unlikely(pagetype != page_type(top_page))) { + /* LY: LEAF-page becomes BRANCH, unable restore cursor's stack */ + goto bailout; + } + + if (top_page == cdst->pg[cdst->top]) { + /* LY: don't touch cursor if prev top-page already on the top */ + cASSERT(cdst, cdst->ki[cdst->top] == top_indx); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || + page_type(cdst->pg[cdst->top]) == pagetype); + return MDBX_SUCCESS; + } + + const int new_top = save_top - save_height + cdst->tree->height; + if (unlikely(new_top < 0 || new_top >= cdst->tree->height)) { + /* LY: out of range, unable restore cursor's stack */ + goto bailout; + } + + if (top_page == cdst->pg[new_top]) { + cASSERT(cdst, cdst->ki[new_top] == top_indx); + /* LY: restore cursor stack */ + cdst->top = (int8_t)new_top; + cASSERT(cdst, cdst->top + 1 < cdst->tree->height || + is_leaf(cdst->pg[cdst->tree->height - 1])); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || + page_type(cdst->pg[cdst->top]) == pagetype); + return MDBX_SUCCESS; + } + + page_t *const stub_page = (page_t *)(~(uintptr_t)top_page); + const indx_t stub_indx = top_indx; + if (save_height > cdst->tree->height && + ((cdst->pg[save_top] == top_page && cdst->ki[save_top] == top_indx) || + (cdst->pg[save_top] == stub_page && cdst->ki[save_top] == stub_indx))) { + /* LY: restore cursor stack */ + cdst->pg[new_top] = top_page; + cdst->ki[new_top] = top_indx; +#if MDBX_DEBUG + cdst->pg[new_top + 1] = nullptr; + cdst->ki[new_top + 1] = INT16_MAX; +#endif + cdst->top = (int8_t)new_top; + cASSERT(cdst, cdst->top + 1 < cdst->tree->height || + is_leaf(cdst->pg[cdst->tree->height - 1])); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || + page_type(cdst->pg[cdst->top]) == pagetype); + return MDBX_SUCCESS; + } + +bailout: + /* LY: unable restore cursor's stack */ + be_poor(cdst); + return MDBX_CURSOR_FULL; +} + +int tree_rebalance(MDBX_cursor *mc) { + cASSERT(mc, cursor_is_tracked(mc)); + cASSERT(mc, mc->top >= 0); + cASSERT(mc, mc->top + 1 < mc->tree->height || + is_leaf(mc->pg[mc->tree->height - 1])); + const page_t *const tp = mc->pg[mc->top]; + const uint8_t pagetype = page_type(tp); + + STATIC_ASSERT(P_BRANCH == 1); + const size_t minkeys = (pagetype & P_BRANCH) + (size_t)1; + + /* Pages emptier than this are candidates for merging. */ + size_t room_threshold = likely(mc->tree != &mc->txn->dbs[FREE_DBI]) + ? mc->txn->env->merge_threshold + : mc->txn->env->merge_threshold_gc; + + const size_t numkeys = page_numkeys(tp); + const size_t room = page_room(tp); + DEBUG("rebalancing %s page %" PRIaPGNO + " (has %zu keys, fill %u.%u%%, used %zu, room %zu bytes)", + is_leaf(tp) ? "leaf" : "branch", tp->pgno, numkeys, + page_fill_percentum_x10(mc->txn->env, tp) / 10, + page_fill_percentum_x10(mc->txn->env, tp) % 10, + page_used(mc->txn->env, tp), room); + cASSERT(mc, is_modifable(mc->txn, tp)); + + if (unlikely(numkeys < minkeys)) { + DEBUG("page %" PRIaPGNO " must be merged due keys < %zu threshold", + tp->pgno, minkeys); + } else if (unlikely(room > room_threshold)) { + DEBUG("page %" PRIaPGNO " should be merged due room %zu > %zu threshold", + tp->pgno, room, room_threshold); + } else { + DEBUG("no need to rebalance page %" PRIaPGNO ", room %zu < %zu threshold", + tp->pgno, room, room_threshold); + cASSERT(mc, mc->tree->items > 0); + return MDBX_SUCCESS; + } + + int rc; + if (mc->top == 0) { + page_t *const mp = mc->pg[0]; + const size_t nkeys = page_numkeys(mp); + cASSERT(mc, (mc->tree->items == 0) == (nkeys == 0)); + if (nkeys == 0) { + DEBUG("%s", "tree is completely empty"); + cASSERT(mc, is_leaf(mp)); + cASSERT(mc, (*cursor_dbi_state(mc) & DBI_DIRTY) != 0); + cASSERT(mc, mc->tree->branch_pages == 0 && mc->tree->large_pages == 0 && + mc->tree->leaf_pages == 1); + /* Adjust cursors pointing to mp */ + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; + m2 = m2->next) { + MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_poor(m3) && m3->pg[0] == mp) { + be_poor(m3); + m3->flags |= z_after_delete; + } + } + if (is_subpage(mp)) { + return MDBX_SUCCESS; + } else { + mc->tree->root = P_INVALID; + mc->tree->height = 0; + return page_retire(mc, mp); + } + } + if (is_subpage(mp)) { + DEBUG("%s", "Can't rebalance a subpage, ignoring"); + cASSERT(mc, is_leaf(tp)); + return MDBX_SUCCESS; + } + if (is_branch(mp) && nkeys == 1) { + DEBUG("%s", "collapsing root page!"); + mc->tree->root = node_pgno(page_node(mp, 0)); + rc = page_get(mc, mc->tree->root, &mc->pg[0], mp->txnid); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + mc->tree->height--; + mc->ki[0] = mc->ki[1]; + for (intptr_t i = 1; i < mc->tree->height; i++) { + mc->pg[i] = mc->pg[i + 1]; + mc->ki[i] = mc->ki[i + 1]; + } + + /* Adjust other cursors pointing to mp */ + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; + m2 = m2->next) { + MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (is_related(mc, m3) && m3->pg[0] == mp) { + for (intptr_t i = 0; i < mc->tree->height; i++) { + m3->pg[i] = m3->pg[i + 1]; + m3->ki[i] = m3->ki[i + 1]; + } + m3->top -= 1; + } + } + cASSERT(mc, is_leaf(mc->pg[mc->top]) || + page_type(mc->pg[mc->top]) == pagetype); + cASSERT(mc, mc->top + 1 < mc->tree->height || + is_leaf(mc->pg[mc->tree->height - 1])); + return page_retire(mc, mp); + } + DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", + mp->pgno, mp->flags); + return MDBX_SUCCESS; + } + + /* The parent (branch page) must have at least 2 pointers, + * otherwise the tree is invalid. */ + const size_t pre_top = mc->top - 1; + cASSERT(mc, is_branch(mc->pg[pre_top])); + cASSERT(mc, !is_subpage(mc->pg[0])); + cASSERT(mc, page_numkeys(mc->pg[pre_top]) > 1); + + /* Leaf page fill factor is below the threshold. + * Try to move keys from left or right neighbor, or + * merge with a neighbor page. */ + + /* Find neighbors. */ + cursor_couple_t couple; + MDBX_cursor *const mn = cursor_clone(mc, &couple); + + page_t *left = nullptr, *right = nullptr; + if (mn->ki[pre_top] > 0) { + rc = + page_get(mn, node_pgno(page_node(mn->pg[pre_top], mn->ki[pre_top] - 1)), + &left, mc->pg[mc->top]->txnid); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + cASSERT(mc, page_type(left) == page_type(mc->pg[mc->top])); + } + if (mn->ki[pre_top] + (size_t)1 < page_numkeys(mn->pg[pre_top])) { + rc = page_get( + mn, node_pgno(page_node(mn->pg[pre_top], mn->ki[pre_top] + (size_t)1)), + &right, mc->pg[mc->top]->txnid); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + cASSERT(mc, page_type(right) == page_type(mc->pg[mc->top])); + } + cASSERT(mc, left || right); + + const size_t ki_top = mc->ki[mc->top]; + const size_t ki_pre_top = mn->ki[pre_top]; + const size_t nkeys = page_numkeys(mn->pg[mn->top]); + + const size_t left_room = left ? page_room(left) : 0; + const size_t right_room = right ? page_room(right) : 0; + const size_t left_nkeys = left ? page_numkeys(left) : 0; + const size_t right_nkeys = right ? page_numkeys(right) : 0; + bool involve = false; +retry: + cASSERT(mc, mc->top > 0); + if (left_room > room_threshold && left_room >= right_room && + (is_modifable(mc->txn, left) || involve)) { + /* try merge with left */ + cASSERT(mc, left_nkeys >= minkeys); + mn->pg[mn->top] = left; + mn->ki[mn->top - 1] = (indx_t)(ki_pre_top - 1); + mn->ki[mn->top] = (indx_t)(left_nkeys - 1); + mc->ki[mc->top] = 0; + const size_t new_ki = ki_top + left_nkeys; + mn->ki[mn->top] += mc->ki[mn->top] + 1; + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = page_merge(mc, mn); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (likely(rc != MDBX_RESULT_TRUE)) { + cursor_cpstk(mn, mc); + mc->ki[mc->top] = (indx_t)new_ki; + cASSERT(mc, rc || page_numkeys(mc->pg[mc->top]) >= minkeys); + return rc; + } + } + if (right_room > room_threshold && + (is_modifable(mc->txn, right) || involve)) { + /* try merge with right */ + cASSERT(mc, right_nkeys >= minkeys); + mn->pg[mn->top] = right; + mn->ki[mn->top - 1] = (indx_t)(ki_pre_top + 1); + mn->ki[mn->top] = 0; + mc->ki[mc->top] = (indx_t)nkeys; + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = page_merge(mn, mc); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (likely(rc != MDBX_RESULT_TRUE)) { + mc->ki[mc->top] = (indx_t)ki_top; + cASSERT(mc, rc || page_numkeys(mc->pg[mc->top]) >= minkeys); + return rc; + } + } + + if (left_nkeys > minkeys && + (right_nkeys <= left_nkeys || right_room >= left_room) && + (is_modifable(mc->txn, left) || involve)) { + /* try move from left */ + mn->pg[mn->top] = left; + mn->ki[mn->top - 1] = (indx_t)(ki_pre_top - 1); + mn->ki[mn->top] = (indx_t)(left_nkeys - 1); + mc->ki[mc->top] = 0; + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = node_move(mn, mc, true); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (likely(rc != MDBX_RESULT_TRUE)) { + mc->ki[mc->top] = (indx_t)(ki_top + 1); + cASSERT(mc, rc || page_numkeys(mc->pg[mc->top]) >= minkeys); + return rc; + } + } + if (right_nkeys > minkeys && (is_modifable(mc->txn, right) || involve)) { + /* try move from right */ + mn->pg[mn->top] = right; + mn->ki[mn->top - 1] = (indx_t)(ki_pre_top + 1); + mn->ki[mn->top] = 0; + mc->ki[mc->top] = (indx_t)nkeys; + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = node_move(mn, mc, false); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (likely(rc != MDBX_RESULT_TRUE)) { + mc->ki[mc->top] = (indx_t)ki_top; + cASSERT(mc, rc || page_numkeys(mc->pg[mc->top]) >= minkeys); + return rc; + } + } + + if (nkeys >= minkeys) { + mc->ki[mc->top] = (indx_t)ki_top; + if (AUDIT_ENABLED()) + return cursor_check_updating(mc); + return MDBX_SUCCESS; + } + + if (mc->txn->env->options.prefer_waf_insteadof_balance && + likely(room_threshold > 0)) { + room_threshold = 0; + goto retry; + } + if (likely(!involve) && + (likely(mc->tree != &mc->txn->dbs[FREE_DBI]) || mc->txn->tw.loose_pages || + MDBX_PNL_GETSIZE(mc->txn->tw.relist) || + (mc->flags & z_gcu_preparation) || (mc->txn->flags & txn_gc_drained) || + room_threshold)) { + involve = true; + goto retry; + } + if (likely(room_threshold > 0)) { + room_threshold = 0; + goto retry; + } + + ERROR("Unable to merge/rebalance %s page %" PRIaPGNO + " (has %zu keys, fill %u.%u%%, used %zu, room %zu bytes)", + is_leaf(tp) ? "leaf" : "branch", tp->pgno, numkeys, + page_fill_percentum_x10(mc->txn->env, tp) / 10, + page_fill_percentum_x10(mc->txn->env, tp) % 10, + page_used(mc->txn->env, tp), room); + return MDBX_PROBLEM; +} + +int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, + MDBX_val *const newdata, pgno_t newpgno, const unsigned naf) { + unsigned flags; + int rc = MDBX_SUCCESS, foliage = 0; + MDBX_env *const env = mc->txn->env; + MDBX_val rkey, xdata; + page_t *tmp_ki_copy = nullptr; + DKBUF; + + page_t *const mp = mc->pg[mc->top]; + cASSERT(mc, (mp->flags & P_ILL_BITS) == 0); + + const size_t newindx = mc->ki[mc->top]; + size_t nkeys = page_numkeys(mp); + if (AUDIT_ENABLED()) { + rc = cursor_check_updating(mc); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + STATIC_ASSERT(P_BRANCH == 1); + const size_t minkeys = (mp->flags & P_BRANCH) + (size_t)1; + + DEBUG(">> splitting %s-page %" PRIaPGNO + " and adding %zu+%zu [%s] at %i, nkeys %zi", + is_leaf(mp) ? "leaf" : "branch", mp->pgno, newkey->iov_len, + newdata ? newdata->iov_len : 0, DKEY_DEBUG(newkey), mc->ki[mc->top], + nkeys); + cASSERT(mc, nkeys + 1 >= minkeys * 2); + + /* Create a new sibling page. */ + pgr_t npr = page_new(mc, mp->flags); + if (unlikely(npr.err != MDBX_SUCCESS)) + return npr.err; + page_t *const sister = npr.page; + sister->dupfix_ksize = mp->dupfix_ksize; + DEBUG("new sibling: page %" PRIaPGNO, sister->pgno); + + /* Usually when splitting the root page, the cursor + * height is 1. But when called from tree_propagate_key, + * the cursor height may be greater because it walks + * up the stack while finding the branch slot to update. */ + intptr_t prev_top = mc->top - 1; + if (mc->top == 0) { + npr = page_new(mc, P_BRANCH); + rc = npr.err; + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + page_t *const pp = npr.page; + /* shift current top to make room for new parent */ + cASSERT(mc, mc->tree->height > 0); +#if MDBX_DEBUG + memset(mc->pg + 3, 0, sizeof(mc->pg) - sizeof(mc->pg[0]) * 3); + memset(mc->ki + 3, -1, sizeof(mc->ki) - sizeof(mc->ki[0]) * 3); +#endif + mc->pg[2] = mc->pg[1]; + mc->ki[2] = mc->ki[1]; + mc->pg[1] = mc->pg[0]; + mc->ki[1] = mc->ki[0]; + mc->pg[0] = pp; + mc->ki[0] = 0; + mc->tree->root = pp->pgno; + DEBUG("root split! new root = %" PRIaPGNO, pp->pgno); + foliage = mc->tree->height++; + + /* Add left (implicit) pointer. */ + rc = node_add_branch(mc, 0, nullptr, mp->pgno); + if (unlikely(rc != MDBX_SUCCESS)) { + /* undo the pre-push */ + mc->pg[0] = mc->pg[1]; + mc->ki[0] = mc->ki[1]; + mc->tree->root = mp->pgno; + mc->tree->height--; + goto done; + } + mc->top = 1; + prev_top = 0; + if (AUDIT_ENABLED()) { + rc = cursor_check_updating(mc); + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + } + } else { + DEBUG("parent branch page is %" PRIaPGNO, mc->pg[prev_top]->pgno); + } + + cursor_couple_t couple; + MDBX_cursor *const mn = cursor_clone(mc, &couple); + mn->pg[mn->top] = sister; + mn->ki[mn->top] = 0; + mn->ki[prev_top] = mc->ki[prev_top] + 1; + + size_t split_indx = + (newindx < nkeys) + ? /* split at the middle */ (nkeys + 1) >> 1 + : /* split at the end (i.e. like append-mode ) */ nkeys - minkeys + 1; + eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1); + + cASSERT(mc, !is_branch(mp) || newindx > 0); + MDBX_val sepkey = {nullptr, 0}; + /* It is reasonable and possible to split the page at the begin */ + if (unlikely(newindx < minkeys)) { + split_indx = minkeys; + if (newindx == 0 && !(naf & MDBX_SPLIT_REPLACE)) { + split_indx = 0; + /* Checking for ability of splitting by the left-side insertion + * of a pure page with the new key */ + for (intptr_t i = 0; i < mc->top; ++i) + if (mc->ki[i]) { + sepkey = get_key(page_node(mc->pg[i], mc->ki[i])); + if (mc->clc->k.cmp(newkey, &sepkey) >= 0) + split_indx = minkeys; + break; + } + if (split_indx == 0) { + /* Save the current first key which was omitted on the parent branch + * page and should be updated if the new first entry will be added */ + if (is_dupfix_leaf(mp)) + sepkey = page_dupfix_key(mp, 0, mc->tree->dupfix_size); + else + sepkey = get_key(page_node(mp, 0)); + cASSERT(mc, mc->clc->k.cmp(newkey, &sepkey) < 0); + /* Avoiding rare complex cases of nested split the parent page(s) */ + if (page_room(mc->pg[prev_top]) < branch_size(env, &sepkey)) + split_indx = minkeys; + } + if (foliage) { + TRACE("pure-left: foliage %u, top %i, ptop %zu, split_indx %zi, " + "minkeys %zi, sepkey %s, parent-room %zu, need4split %zu", + foliage, mc->top, prev_top, split_indx, minkeys, + DKEY_DEBUG(&sepkey), page_room(mc->pg[prev_top]), + branch_size(env, &sepkey)); + TRACE("pure-left: newkey %s, newdata %s, newindx %zu", + DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); + } + } + } + + const bool pure_right = split_indx == nkeys; + const bool pure_left = split_indx == 0; + if (unlikely(pure_right)) { + /* newindx == split_indx == nkeys */ + TRACE("no-split, but add new pure page at the %s", "right/after"); + cASSERT(mc, newindx == nkeys && split_indx == nkeys && minkeys == 1); + sepkey = *newkey; + } else if (unlikely(pure_left)) { + /* newindx == split_indx == 0 */ + TRACE("pure-left: no-split, but add new pure page at the %s", + "left/before"); + cASSERT(mc, newindx == 0 && split_indx == 0 && minkeys == 1); + TRACE("pure-left: old-first-key is %s", DKEY_DEBUG(&sepkey)); + } else { + if (is_dupfix_leaf(sister)) { + /* Move half of the keys to the right sibling */ + const intptr_t distance = mc->ki[mc->top] - split_indx; + size_t ksize = mc->tree->dupfix_size; + void *const split = page_dupfix_ptr(mp, split_indx, ksize); + size_t rsize = (nkeys - split_indx) * ksize; + size_t lsize = (nkeys - split_indx) * sizeof(indx_t); + cASSERT(mc, mp->lower >= lsize); + mp->lower -= (indx_t)lsize; + cASSERT(mc, sister->lower + lsize <= UINT16_MAX); + sister->lower += (indx_t)lsize; + cASSERT(mc, mp->upper + rsize - lsize <= UINT16_MAX); + mp->upper += (indx_t)(rsize - lsize); + cASSERT(mc, sister->upper >= rsize - lsize); + sister->upper -= (indx_t)(rsize - lsize); + sepkey.iov_len = ksize; + sepkey.iov_base = (newindx != split_indx) ? split : newkey->iov_base; + if (distance < 0) { + cASSERT(mc, ksize >= sizeof(indx_t)); + void *const ins = page_dupfix_ptr(mp, mc->ki[mc->top], ksize); + memcpy(sister->entries, split, rsize); + sepkey.iov_base = sister->entries; + memmove(ptr_disp(ins, ksize), ins, + (split_indx - mc->ki[mc->top]) * ksize); + memcpy(ins, newkey->iov_base, ksize); + cASSERT(mc, UINT16_MAX - mp->lower >= (int)sizeof(indx_t)); + mp->lower += sizeof(indx_t); + cASSERT(mc, mp->upper >= ksize - sizeof(indx_t)); + mp->upper -= (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->upper) & 1) == 0); + } else { + memcpy(sister->entries, split, distance * ksize); + void *const ins = page_dupfix_ptr(sister, distance, ksize); + memcpy(ins, newkey->iov_base, ksize); + memcpy(ptr_disp(ins, ksize), ptr_disp(split, distance * ksize), + rsize - distance * ksize); + cASSERT(mc, UINT16_MAX - sister->lower >= (int)sizeof(indx_t)); + sister->lower += sizeof(indx_t); + cASSERT(mc, sister->upper >= ksize - sizeof(indx_t)); + sister->upper -= (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, distance <= (int)UINT16_MAX); + mc->ki[mc->top] = (indx_t)distance; + cASSERT(mc, + (((ksize & page_numkeys(sister)) ^ sister->upper) & 1) == 0); + } + + if (AUDIT_ENABLED()) { + rc = cursor_check_updating(mc); + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + rc = cursor_check_updating(mn); + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + } + } else { + /* grab a page to hold a temporary copy */ + tmp_ki_copy = page_shadow_alloc(mc->txn, 1); + if (unlikely(tmp_ki_copy == nullptr)) { + rc = MDBX_ENOMEM; + goto done; + } + + const size_t max_space = page_space(env); + const size_t new_size = is_leaf(mp) ? leaf_size(env, newkey, newdata) + : branch_size(env, newkey); + + /* prepare to insert */ + size_t i = 0; + while (i < newindx) { + tmp_ki_copy->entries[i] = mp->entries[i]; + ++i; + } + tmp_ki_copy->entries[i] = (indx_t)-1; + while (++i <= nkeys) + tmp_ki_copy->entries[i] = mp->entries[i - 1]; + tmp_ki_copy->pgno = mp->pgno; + tmp_ki_copy->flags = mp->flags; + tmp_ki_copy->txnid = INVALID_TXNID; + tmp_ki_copy->lower = 0; + tmp_ki_copy->upper = (indx_t)max_space; + + /* Добавляемый узел может не поместиться в страницу-половину вместе + * с количественной половиной узлов из исходной страницы. В худшем случае, + * в страницу-половину с добавляемым узлом могут попасть самые больше узлы + * из исходной страницы, а другую половину только узлы с самыми короткими + * ключами и с пустыми данными. Поэтому, чтобы найти подходящую границу + * разреза требуется итерировать узлы и считая их объем. + * + * Однако, при простом количественном делении (без учета размера ключей + * и данных) на страницах-половинах будет примерно вдвое меньше узлов. + * Поэтому добавляемый узел точно поместится, если его размер не больше + * чем место "освобождающееся" от заголовков узлов, которые переедут + * в другую страницу-половину. Кроме этого, как минимум по одному байту + * будет в каждом ключе, в худшем случае кроме одного, который может быть + * нулевого размера. */ + + if (newindx == split_indx && nkeys >= 5) { + STATIC_ASSERT(P_BRANCH == 1); + split_indx += mp->flags & P_BRANCH; + } + eASSERT(env, split_indx >= minkeys && split_indx <= nkeys + 1 - minkeys); + const size_t dim_nodes = + (newindx >= split_indx) ? split_indx : nkeys - split_indx; + const size_t dim_used = (sizeof(indx_t) + NODESIZE + 1) * dim_nodes; + if (new_size >= dim_used) { + /* Search for best acceptable split point */ + i = (newindx < split_indx) ? 0 : nkeys; + intptr_t dir = (newindx < split_indx) ? 1 : -1; + size_t before = 0, after = new_size + page_used(env, mp); + size_t best_split = split_indx; + size_t best_shift = INT_MAX; + + TRACE("seek separator from %zu, step %zi, default %zu, new-idx %zu, " + "new-size %zu", + i, dir, split_indx, newindx, new_size); + do { + cASSERT(mc, i <= nkeys); + size_t size = new_size; + if (i != newindx) { + node_t *node = ptr_disp(mp, tmp_ki_copy->entries[i] + PAGEHDRSZ); + size = NODESIZE + node_ks(node) + sizeof(indx_t); + if (is_leaf(mp)) + size += (node_flags(node) & N_BIGDATA) ? sizeof(pgno_t) + : node_ds(node); + size = EVEN_CEIL(size); + } + + before += size; + after -= size; + TRACE("step %zu, size %zu, before %zu, after %zu, max %zu", i, size, + before, after, max_space); + + if (before <= max_space && after <= max_space) { + const size_t split = i + (dir > 0); + if (split >= minkeys && split <= nkeys + 1 - minkeys) { + const size_t shift = branchless_abs(split_indx - split); + if (shift >= best_shift) + break; + best_shift = shift; + best_split = split; + if (!best_shift) + break; + } + } + i += dir; + } while (i < nkeys); + + split_indx = best_split; + TRACE("chosen %zu", split_indx); + } + eASSERT(env, split_indx >= minkeys && split_indx <= nkeys + 1 - minkeys); + + sepkey = *newkey; + if (split_indx != newindx) { + node_t *node = + ptr_disp(mp, tmp_ki_copy->entries[split_indx] + PAGEHDRSZ); + sepkey.iov_len = node_ks(node); + sepkey.iov_base = node_key(node); + } + } + } + DEBUG("separator is %zd [%s]", split_indx, DKEY_DEBUG(&sepkey)); + + bool did_split_parent = false; + /* Copy separator key to the parent. */ + if (page_room(mn->pg[prev_top]) < branch_size(env, &sepkey)) { + TRACE("need split parent branch-page for key %s", DKEY_DEBUG(&sepkey)); + cASSERT(mc, page_numkeys(mn->pg[prev_top]) > 2); + cASSERT(mc, !pure_left); + const int top = mc->top; + const int height = mc->tree->height; + mn->top -= 1; + did_split_parent = true; + couple.outer.next = mn->txn->cursors[cursor_dbi(mn)]; + mn->txn->cursors[cursor_dbi(mn)] = &couple.outer; + rc = page_split(mn, &sepkey, nullptr, sister->pgno, 0); + mn->txn->cursors[cursor_dbi(mn)] = couple.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + cASSERT(mc, mc->top - top == mc->tree->height - height); + if (AUDIT_ENABLED()) { + rc = cursor_check_updating(mc); + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + } + + /* root split? */ + prev_top += mc->top - top; + + /* Right page might now have changed parent. + * Check if left page also changed parent. */ + if (mn->pg[prev_top] != mc->pg[prev_top] && + mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { + for (intptr_t i = 0; i < prev_top; i++) { + mc->pg[i] = mn->pg[i]; + mc->ki[i] = mn->ki[i]; + } + mc->pg[prev_top] = mn->pg[prev_top]; + if (mn->ki[prev_top]) { + mc->ki[prev_top] = mn->ki[prev_top] - 1; + } else { + /* find right page's left sibling */ + mc->ki[prev_top] = mn->ki[prev_top]; + rc = cursor_sibling_left(mc); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_NOTFOUND) /* improper mdbx_cursor_sibling() result */ { + ERROR("unexpected %i error going left sibling", rc); + rc = MDBX_PROBLEM; + } + goto done; + } + } + } + } else if (unlikely(pure_left)) { + page_t *ptop_page = mc->pg[prev_top]; + TRACE("pure-left: adding to parent page %u node[%u] left-leaf page #%u key " + "%s", + ptop_page->pgno, mc->ki[prev_top], sister->pgno, + DKEY(mc->ki[prev_top] ? newkey : nullptr)); + assert(mc->top == prev_top + 1); + mc->top = (uint8_t)prev_top; + rc = node_add_branch(mc, mc->ki[prev_top], + mc->ki[prev_top] ? newkey : nullptr, sister->pgno); + cASSERT(mc, mp == mc->pg[prev_top + 1] && newindx == mc->ki[prev_top + 1] && + prev_top == mc->top); + + if (likely(rc == MDBX_SUCCESS) && mc->ki[prev_top] == 0) { + node_t *node = page_node(mc->pg[prev_top], 1); + TRACE("pure-left: update prev-first key on parent to %s", DKEY(&sepkey)); + cASSERT(mc, node_ks(node) == 0 && node_pgno(node) == mp->pgno); + cASSERT(mc, mc->top == prev_top && mc->ki[prev_top] == 0); + mc->ki[prev_top] = 1; + rc = tree_propagate_key(mc, &sepkey); + cASSERT(mc, mc->top == prev_top && mc->ki[prev_top] == 1); + cASSERT(mc, + mp == mc->pg[prev_top + 1] && newindx == mc->ki[prev_top + 1]); + mc->ki[prev_top] = 0; + } else { + TRACE("pure-left: no-need-update prev-first key on parent %s", + DKEY(&sepkey)); + } + + mc->top++; + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + + node_t *node = page_node(mc->pg[prev_top], mc->ki[prev_top] + (size_t)1); + cASSERT(mc, node_pgno(node) == mp->pgno && mc->pg[prev_top] == ptop_page); + } else { + mn->top -= 1; + TRACE("add-to-parent the right-entry[%u] for new sibling-page", + mn->ki[prev_top]); + rc = node_add_branch(mn, mn->ki[prev_top], &sepkey, sister->pgno); + mn->top += 1; + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + } + + if (unlikely(pure_left | pure_right)) { + mc->pg[mc->top] = sister; + mc->ki[mc->top] = 0; + switch (page_type(sister)) { + case P_LEAF: { + cASSERT(mc, newpgno == 0 || newpgno == P_INVALID); + rc = node_add_leaf(mc, 0, newkey, newdata, naf); + } break; + case P_LEAF | P_DUPFIX: { + cASSERT(mc, (naf & (N_BIGDATA | N_SUBDATA | N_DUPDATA)) == 0); + cASSERT(mc, newpgno == 0 || newpgno == P_INVALID); + rc = node_add_dupfix(mc, 0, newkey); + } break; + default: + rc = bad_page(sister, "wrong page-type %u\n", page_type(sister)); + } + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + + if (pure_right) { + for (intptr_t i = 0; i < mc->top; i++) + mc->ki[i] = mn->ki[i]; + } else if (mc->ki[mc->top - 1] == 0) { + for (intptr_t i = 2; i <= mc->top; ++i) + if (mc->ki[mc->top - i]) { + sepkey = get_key(page_node(mc->pg[mc->top - i], mc->ki[mc->top - i])); + if (mc->clc->k.cmp(newkey, &sepkey) < 0) { + mc->top -= (int8_t)i; + DEBUG("pure-left: update new-first on parent [%i] page %u key %s", + mc->ki[mc->top], mc->pg[mc->top]->pgno, DKEY(newkey)); + rc = tree_propagate_key(mc, newkey); + mc->top += (int8_t)i; + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + } + break; + } + } + } else if (tmp_ki_copy) { /* !is_dupfix_leaf(mp) */ + /* Move nodes */ + mc->pg[mc->top] = sister; + size_t n = 0, ii = split_indx; + do { + TRACE("i %zu, nkeys %zu => n %zu, rp #%u", ii, nkeys, n, sister->pgno); + pgno_t pgno = 0; + MDBX_val *rdata = nullptr; + if (ii == newindx) { + rkey = *newkey; + if (is_leaf(mp)) + rdata = newdata; + else + pgno = newpgno; + flags = naf; + /* Update index for the new key. */ + mc->ki[mc->top] = (indx_t)n; + } else { + node_t *node = ptr_disp(mp, tmp_ki_copy->entries[ii] + PAGEHDRSZ); + rkey.iov_base = node_key(node); + rkey.iov_len = node_ks(node); + if (is_leaf(mp)) { + xdata.iov_base = node_data(node); + xdata.iov_len = node_ds(node); + rdata = &xdata; + } else + pgno = node_pgno(node); + flags = node_flags(node); + } + + switch (page_type(sister)) { + case P_BRANCH: { + cASSERT(mc, 0 == (uint16_t)flags); + /* First branch index doesn't need key data. */ + rc = node_add_branch(mc, n, n ? &rkey : nullptr, pgno); + } break; + case P_LEAF: { + cASSERT(mc, pgno == 0); + cASSERT(mc, rdata != nullptr); + rc = node_add_leaf(mc, n, &rkey, rdata, flags); + } break; + /* case P_LEAF | P_DUPFIX: { + cASSERT(mc, (nflags & (N_BIGDATA | N_SUBDATA | N_DUPDATA)) == 0); + cASSERT(mc, gno == 0); + rc = mdbx_node_add_dupfix(mc, n, &rkey); + } break; */ + default: + rc = bad_page(sister, "wrong page-type %u\n", page_type(sister)); + } + if (unlikely(rc != MDBX_SUCCESS)) + goto done; + + ++n; + if (++ii > nkeys) { + ii = 0; + n = 0; + mc->pg[mc->top] = tmp_ki_copy; + TRACE("switch to mp #%u", tmp_ki_copy->pgno); + } + } while (ii != split_indx); + + TRACE("ii %zu, nkeys %zu, n %zu, pgno #%u", ii, nkeys, n, + mc->pg[mc->top]->pgno); + + nkeys = page_numkeys(tmp_ki_copy); + for (size_t i = 0; i < nkeys; i++) + mp->entries[i] = tmp_ki_copy->entries[i]; + mp->lower = tmp_ki_copy->lower; + mp->upper = tmp_ki_copy->upper; + memcpy(page_node(mp, nkeys - 1), page_node(tmp_ki_copy, nkeys - 1), + env->ps - tmp_ki_copy->upper - PAGEHDRSZ); + + /* reset back to original page */ + if (newindx < split_indx) { + mc->pg[mc->top] = mp; + } else { + mc->pg[mc->top] = sister; + mc->ki[prev_top]++; + /* Make sure ki is still valid. */ + if (mn->pg[prev_top] != mc->pg[prev_top] && + mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { + for (intptr_t i = 0; i <= prev_top; i++) { + mc->pg[i] = mn->pg[i]; + mc->ki[i] = mn->ki[i]; + } + } + } + } else if (newindx >= split_indx) { + mc->pg[mc->top] = sister; + mc->ki[prev_top]++; + /* Make sure ki is still valid. */ + if (mn->pg[prev_top] != mc->pg[prev_top] && + mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { + for (intptr_t i = 0; i <= prev_top; i++) { + mc->pg[i] = mn->pg[i]; + mc->ki[i] = mn->ki[i]; + } + } + } + + /* Adjust other cursors pointing to mp and/or to parent page */ + nkeys = page_numkeys(mp); + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; m2 = m2->next) { + MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; + if (!is_pointed(m3) || m3 == mc) + continue; + if (foliage) { + /* sub cursors may be on different DB */ + if (m3->pg[0] != mp) + continue; + /* root split */ + for (intptr_t k = foliage; k >= 0; k--) { + m3->ki[k + 1] = m3->ki[k]; + m3->pg[k + 1] = m3->pg[k]; + } + m3->ki[0] = m3->ki[0] >= nkeys + pure_left; + m3->pg[0] = mc->pg[0]; + m3->top += 1; + } + + if (m3->top >= mc->top && m3->pg[mc->top] == mp && !pure_left) { + if (m3->ki[mc->top] >= newindx) + m3->ki[mc->top] += !(naf & MDBX_SPLIT_REPLACE); + if (m3->ki[mc->top] >= nkeys) { + m3->pg[mc->top] = sister; + cASSERT(mc, m3->ki[mc->top] >= nkeys); + m3->ki[mc->top] -= (indx_t)nkeys; + for (intptr_t i = 0; i < mc->top; i++) { + m3->ki[i] = mn->ki[i]; + m3->pg[i] = mn->pg[i]; + } + } + } else if (!did_split_parent && m3->top >= prev_top && + m3->pg[prev_top] == mc->pg[prev_top] && + m3->ki[prev_top] >= mc->ki[prev_top]) { + m3->ki[prev_top]++; /* also for the `pure-left` case */ + } + if (inner_pointed(m3) && is_leaf(mp)) + cursor_inner_refresh(m3, m3->pg[mc->top], m3->ki[mc->top]); + } + TRACE("mp #%u left: %zd, sister #%u left: %zd", mp->pgno, page_room(mp), + sister->pgno, page_room(sister)); + +done: + if (tmp_ki_copy) + page_shadow_release(env, tmp_ki_copy, 1); + + if (unlikely(rc != MDBX_SUCCESS)) + mc->txn->flags |= MDBX_TXN_ERROR; + else { + if (AUDIT_ENABLED()) + rc = cursor_check_updating(mc); + if (unlikely(naf & MDBX_RESERVE)) { + node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); + if (!(node_flags(node) & N_BIGDATA)) + newdata->iov_base = node_data(node); + } +#if MDBX_ENABLE_PGOP_STAT + env->lck->pgops.split.weak += 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + } + + DEBUG("<< mp #%u, rc %d", mp->pgno, rc); + return rc; +} + +int tree_propagate_key(MDBX_cursor *mc, const MDBX_val *key) { + page_t *mp; + node_t *node; + size_t len; + ptrdiff_t delta, ksize, oksize; + intptr_t ptr, i, nkeys, indx; + DKBUF_DEBUG; + + cASSERT(mc, cursor_is_tracked(mc)); + indx = mc->ki[mc->top]; + mp = mc->pg[mc->top]; + node = page_node(mp, indx); + ptr = mp->entries[indx]; +#if MDBX_DEBUG + MDBX_val k2; + k2.iov_base = node_key(node); + k2.iov_len = node_ks(node); + DEBUG("update key %zi (offset %zu) [%s] to [%s] on page %" PRIaPGNO, indx, + ptr, DVAL_DEBUG(&k2), DKEY_DEBUG(key), mp->pgno); +#endif /* MDBX_DEBUG */ + + /* Sizes must be 2-byte aligned. */ + ksize = EVEN_CEIL(key->iov_len); + oksize = EVEN_CEIL(node_ks(node)); + delta = ksize - oksize; + + /* Shift node contents if EVEN_CEIL(key length) changed. */ + if (delta) { + if (delta > (int)page_room(mp)) { + /* not enough space left, do a delete and split */ + DEBUG("Not enough room, delta = %zd, splitting...", delta); + pgno_t pgno = node_pgno(node); + node_del(mc, 0); + int err = page_split(mc, key, nullptr, pgno, MDBX_SPLIT_REPLACE); + if (err == MDBX_SUCCESS && AUDIT_ENABLED()) + err = cursor_check_updating(mc); + return err; + } + + nkeys = page_numkeys(mp); + for (i = 0; i < nkeys; i++) { + if (mp->entries[i] <= ptr) { + cASSERT(mc, mp->entries[i] >= delta); + mp->entries[i] -= (indx_t)delta; + } + } + + void *const base = ptr_disp(mp, mp->upper + PAGEHDRSZ); + len = ptr - mp->upper + NODESIZE; + memmove(ptr_disp(base, -delta), base, len); + cASSERT(mc, mp->upper >= delta); + mp->upper -= (indx_t)delta; + + node = page_node(mp, indx); + } + + /* But even if no shift was needed, update ksize */ + node_set_ks(node, key->iov_len); + + if (likely(key->iov_len /* to avoid UBSAN traps*/ != 0)) + memcpy(node_key(node), key->iov_base, key->iov_len); + return MDBX_SUCCESS; +} diff --git a/src/txl.c b/src/txl.c new file mode 100644 index 00000000..aca3758d --- /dev/null +++ b/src/txl.c @@ -0,0 +1,102 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +static inline size_t txl_size2bytes(const size_t size) { + assert(size > 0 && size <= txl_max * 2); + size_t bytes = + ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(txnid_t) * (size + 2), + txl_granulate * sizeof(txnid_t)) - + MDBX_ASSUME_MALLOC_OVERHEAD; + return bytes; +} + +static inline size_t txl_bytes2size(const size_t bytes) { + size_t size = bytes / sizeof(txnid_t); + assert(size > 2 && size <= txl_max * 2); + return size - 2; +} + +MDBX_INTERNAL txl_t txl_alloc(void) { + size_t bytes = txl_size2bytes(txl_initial); + txl_t txl = osal_malloc(bytes); + if (likely(txl)) { +#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) + bytes = malloc_usable_size(txl); +#endif /* malloc_usable_size */ + txl[0] = txl_bytes2size(bytes); + assert(txl[0] >= txl_initial); + txl += 1; + *txl = 0; + } + return txl; +} + +MDBX_INTERNAL void txl_free(txl_t txl) { + if (likely(txl)) + osal_free(txl - 1); +} + +MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, + const size_t wanna) { + const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptxl); + assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && + MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); + if (likely(allocated >= wanna)) + return MDBX_SUCCESS; + + if (unlikely(wanna > /* paranoia */ txl_max)) { + ERROR("TXL too long (%zu > %zu)", wanna, (size_t)txl_max); + return MDBX_TXN_FULL; + } + + const size_t size = (wanna + wanna - allocated < txl_max) + ? wanna + wanna - allocated + : txl_max; + size_t bytes = txl_size2bytes(size); + txl_t txl = osal_realloc(*ptxl - 1, bytes); + if (likely(txl)) { +#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) + bytes = malloc_usable_size(txl); +#endif /* malloc_usable_size */ + *txl = txl_bytes2size(bytes); + assert(*txl >= wanna); + *ptxl = txl + 1; + return MDBX_SUCCESS; + } + return MDBX_ENOMEM; +} + +static __always_inline int __must_check_result +txl_need(txl_t __restrict *__restrict ptxl, size_t num) { + assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && + MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); + assert(num <= PAGELIST_LIMIT); + const size_t wanna = (size_t)MDBX_PNL_GETSIZE(*ptxl) + num; + return likely(MDBX_PNL_ALLOCLEN(*ptxl) >= wanna) ? MDBX_SUCCESS + : txl_reserve(ptxl, wanna); +} + +static __always_inline void txl_xappend(txl_t __restrict txl, txnid_t id) { + assert(MDBX_PNL_GETSIZE(txl) < MDBX_PNL_ALLOCLEN(txl)); + txl[0] += 1; + MDBX_PNL_LAST(txl) = id; +} + +#define TXNID_SORT_CMP(first, last) ((first) > (last)) +SORT_IMPL(txnid_sort, false, txnid_t, TXNID_SORT_CMP) +MDBX_INTERNAL void txl_sort(txl_t txl) { + txnid_sort(MDBX_PNL_BEGIN(txl), MDBX_PNL_END(txl)); +} + +MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, + txnid_t id) { + if (unlikely(MDBX_PNL_GETSIZE(*ptxl) == MDBX_PNL_ALLOCLEN(*ptxl))) { + int rc = txl_need(ptxl, txl_granulate); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + txl_xappend(*ptxl, id); + return MDBX_SUCCESS; +} diff --git a/src/txl.h b/src/txl.h new file mode 100644 index 00000000..a17fbee6 --- /dev/null +++ b/src/txl.h @@ -0,0 +1,26 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +/* List of txnid */ +typedef txnid_t *txl_t; +typedef const txnid_t *const_txl_t; + +enum txl_rules { + txl_granulate = 32, + txl_initial = + txl_granulate - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t), + txl_max = (1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t) +}; + +MDBX_INTERNAL txl_t txl_alloc(void); + +MDBX_INTERNAL void txl_free(txl_t txl); + +MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, + txnid_t id); + +MDBX_INTERNAL void txl_sort(txl_t txl); diff --git a/src/txn.c b/src/txn.c new file mode 100644 index 00000000..17845876 --- /dev/null +++ b/src/txn.c @@ -0,0 +1,1947 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__hot txnid_t txn_snapshot_oldest(const MDBX_txn *const txn) { + return mvcc_shapshot_oldest( + txn->env, txn->tw.troika.txnid[txn->tw.troika.prefer_steady]); +} + +static void done_cursors(MDBX_txn *txn, const bool merge) { + tASSERT(txn, txn->cursors[FREE_DBI] == nullptr); + TXN_FOREACH_DBI_FROM(txn, i, /* skip FREE_DBI */ 1) { + MDBX_cursor *mc = txn->cursors[i]; + if (mc) { + txn->cursors[i] = nullptr; + do { + MDBX_cursor *const next = mc->next; + cursor_eot(mc, merge); + mc = next; + } while (mc); + } + } +} + +int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + dpl_t *const dl = dpl_sort(txn); + int rc = MDBX_SUCCESS; + size_t r, w, total_npages = 0; + for (w = 0, r = 1; r <= dl->length; ++r) { + page_t *dp = dl->items[r].ptr; + if (dp->flags & P_LOOSE) { + dl->items[++w] = dl->items[r]; + continue; + } + unsigned npages = dpl_npages(dl, r); + total_npages += npages; + rc = iov_page(txn, ctx, dp, npages); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + if (!iov_empty(ctx)) { + tASSERT(txn, rc == MDBX_SUCCESS); + rc = iov_write(ctx); + } + + if (likely(rc == MDBX_SUCCESS) && ctx->fd == txn->env->lazy_fd) { + txn->env->lck->unsynced_pages.weak += total_npages; + if (!txn->env->lck->eoos_timestamp.weak) + txn->env->lck->eoos_timestamp.weak = osal_monotime(); + } + + txn->tw.dirtylist->pages_including_loose -= total_npages; + while (r <= dl->length) + dl->items[++w] = dl->items[r++]; + + dl->sorted = dpl_setlen(dl, w); + txn->tw.dirtyroom += r - 1 - w; + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + tASSERT(txn, txn->tw.dirtylist->length == txn->tw.loose_count); + tASSERT(txn, txn->tw.dirtylist->pages_including_loose == txn->tw.loose_count); + return rc; +} + +/* Merge child txn into parent */ +static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, + const size_t parent_retired_len) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0); + dpl_t *const src = dpl_sort(txn); + + /* Remove refunded pages from parent's dirty list */ + dpl_t *const dst = dpl_sort(parent); + if (MDBX_ENABLE_REFUND) { + size_t n = dst->length; + while (n && dst->items[n].pgno >= parent->geo.first_unallocated) { + const unsigned npages = dpl_npages(dst, n); + page_shadow_release(txn->env, dst->items[n].ptr, npages); + --n; + } + parent->tw.dirtyroom += dst->sorted - n; + dst->sorted = dpl_setlen(dst, n); + tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == + (parent->parent ? parent->parent->tw.dirtyroom + : parent->env->options.dp_limit)); + } + + /* Remove reclaimed pages from parent's dirty list */ + const pnl_t reclaimed_list = parent->tw.relist; + dpl_sift(parent, reclaimed_list, false); + + /* Move retired pages from parent's dirty & spilled list to reclaimed */ + size_t r, w, d, s, l; + for (r = w = parent_retired_len; + ++r <= MDBX_PNL_GETSIZE(parent->tw.retired_pages);) { + const pgno_t pgno = parent->tw.retired_pages[r]; + const size_t di = dpl_exist(parent, pgno); + const size_t si = !di ? spill_search(parent, pgno) : 0; + unsigned npages; + const char *kind; + if (di) { + page_t *dp = dst->items[di].ptr; + tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | + P_SPILLED)) == 0); + npages = dpl_npages(dst, di); + page_wash(parent, di, dp, npages); + kind = "dirty"; + l = 1; + if (unlikely(npages > l)) { + /* OVERFLOW-страница могла быть переиспользована по частям. Тогда + * в retired-списке может быть только начало последовательности, + * а остаток растащен по dirty, spilled и reclaimed спискам. Поэтому + * переносим в reclaimed с проверкой на обрыв последовательности. + * В любом случае, все осколки будут учтены и отфильтрованы, т.е. если + * страница была разбита на части, то важно удалить dirty-элемент, + * а все осколки будут учтены отдельно. */ + + /* Список retired страниц не сортирован, но для ускорения сортировки + * дополняется в соответствии с MDBX_PNL_ASCENDING */ +#if MDBX_PNL_ASCENDING + const size_t len = MDBX_PNL_GETSIZE(parent->tw.retired_pages); + while (r < len && parent->tw.retired_pages[r + 1] == pgno + l) { + ++r; + if (++l == npages) + break; + } +#else + while (w > parent_retired_len && + parent->tw.retired_pages[w - 1] == pgno + l) { + --w; + if (++l == npages) + break; + } +#endif + } + } else if (unlikely(si)) { + l = npages = 1; + spill_remove(parent, si, 1); + kind = "spilled"; + } else { + parent->tw.retired_pages[++w] = pgno; + continue; + } + + DEBUG("reclaim retired parent's %u -> %zu %s page %" PRIaPGNO, npages, l, + kind, pgno); + int err = pnl_insert_span(&parent->tw.relist, pgno, l); + ENSURE(txn->env, err == MDBX_SUCCESS); + } + MDBX_PNL_SETSIZE(parent->tw.retired_pages, w); + + /* Filter-out parent spill list */ + if (parent->tw.spilled.list && + MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) { + const pnl_t sl = spill_purge(parent); + size_t len = MDBX_PNL_GETSIZE(sl); + if (len) { + /* Remove refunded pages from parent's spill list */ + if (MDBX_ENABLE_REFUND && + MDBX_PNL_MOST(sl) >= (parent->geo.first_unallocated << 1)) { +#if MDBX_PNL_ASCENDING + size_t i = MDBX_PNL_GETSIZE(sl); + assert(MDBX_PNL_MOST(sl) == MDBX_PNL_LAST(sl)); + do { + if ((sl[i] & 1) == 0) + DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1); + i -= 1; + } while (i && sl[i] >= (parent->geo.first_unallocated << 1)); + MDBX_PNL_SETSIZE(sl, i); +#else + assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl)); + size_t i = 0; + do { + ++i; + if ((sl[i] & 1) == 0) + DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1); + } while (i < len && sl[i + 1] >= (parent->geo.first_unallocated << 1)); + MDBX_PNL_SETSIZE(sl, len -= i); + memmove(sl + 1, sl + 1 + i, len * sizeof(sl[0])); +#endif + } + tASSERT(txn, pnl_check_allocated(sl, (size_t)parent->geo.first_unallocated + << 1)); + + /* Remove reclaimed pages from parent's spill list */ + s = MDBX_PNL_GETSIZE(sl), r = MDBX_PNL_GETSIZE(reclaimed_list); + /* Scanning from end to begin */ + while (s && r) { + if (sl[s] & 1) { + --s; + continue; + } + const pgno_t spilled_pgno = sl[s] >> 1; + const pgno_t reclaimed_pgno = reclaimed_list[r]; + if (reclaimed_pgno != spilled_pgno) { + const bool cmp = MDBX_PNL_ORDERED(spilled_pgno, reclaimed_pgno); + s -= !cmp; + r -= cmp; + } else { + DEBUG("remove reclaimed parent's spilled page %" PRIaPGNO, + reclaimed_pgno); + spill_remove(parent, s, 1); + --s; + --r; + } + } + + /* Remove anything in our dirty list from parent's spill list */ + /* Scanning spill list in descend order */ + const intptr_t step = MDBX_PNL_ASCENDING ? -1 : 1; + s = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(sl) : 1; + d = src->length; + while (d && (MDBX_PNL_ASCENDING ? s > 0 : s <= MDBX_PNL_GETSIZE(sl))) { + if (sl[s] & 1) { + s += step; + continue; + } + const pgno_t spilled_pgno = sl[s] >> 1; + const pgno_t dirty_pgno_form = src->items[d].pgno; + const unsigned npages = dpl_npages(src, d); + const pgno_t dirty_pgno_to = dirty_pgno_form + npages; + if (dirty_pgno_form > spilled_pgno) { + --d; + continue; + } + if (dirty_pgno_to <= spilled_pgno) { + s += step; + continue; + } + + DEBUG("remove dirtied parent's spilled %u page %" PRIaPGNO, npages, + dirty_pgno_form); + spill_remove(parent, s, 1); + s += step; + } + + /* Squash deleted pagenums if we deleted any */ + spill_purge(parent); + } + } + + /* Remove anything in our spill list from parent's dirty list */ + if (txn->tw.spilled.list) { + tASSERT(txn, + pnl_check_allocated(txn->tw.spilled.list, + (size_t)parent->geo.first_unallocated << 1)); + dpl_sift(parent, txn->tw.spilled.list, true); + tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == + (parent->parent ? parent->parent->tw.dirtyroom + : parent->env->options.dp_limit)); + } + + /* Find length of merging our dirty list with parent's and release + * filter-out pages */ + for (l = 0, d = dst->length, s = src->length; d > 0 && s > 0;) { + page_t *sp = src->items[s].ptr; + tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | + P_LOOSE | P_SPILLED)) == 0); + const unsigned s_npages = dpl_npages(src, s); + const pgno_t s_pgno = src->items[s].pgno; + + page_t *dp = dst->items[d].ptr; + tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | + P_SPILLED)) == 0); + const unsigned d_npages = dpl_npages(dst, d); + const pgno_t d_pgno = dst->items[d].pgno; + + if (d_pgno >= s_pgno + s_npages) { + --d; + ++l; + } else if (d_pgno + d_npages <= s_pgno) { + if (sp->flags != P_LOOSE) { + sp->txnid = parent->front_txnid; + sp->flags &= ~P_SPILLED; + } + --s; + ++l; + } else { + dst->items[d--].ptr = nullptr; + page_shadow_release(txn->env, dp, d_npages); + } + } + assert(dst->sorted == dst->length); + tASSERT(parent, dst->detent >= l + d + s); + dst->sorted = l + d + s; /* the merged length */ + + while (s > 0) { + page_t *sp = src->items[s].ptr; + tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | + P_LOOSE | P_SPILLED)) == 0); + if (sp->flags != P_LOOSE) { + sp->txnid = parent->front_txnid; + sp->flags &= ~P_SPILLED; + } + --s; + } + + /* Merge our dirty list into parent's, i.e. merge(dst, src) -> dst */ + if (dst->sorted >= dst->length) { + /* from end to begin with dst extending */ + for (l = dst->sorted, s = src->length, d = dst->length; s > 0 && d > 0;) { + if (unlikely(l <= d)) { + /* squash to get a gap of free space for merge */ + for (r = w = 1; r <= d; ++r) + if (dst->items[r].ptr) { + if (w != r) { + dst->items[w] = dst->items[r]; + dst->items[r].ptr = nullptr; + } + ++w; + } + VERBOSE("squash to begin for extending-merge %zu -> %zu", d, w - 1); + d = w - 1; + continue; + } + assert(l > d); + if (dst->items[d].ptr) { + dst->items[l--] = (dst->items[d].pgno > src->items[s].pgno) + ? dst->items[d--] + : src->items[s--]; + } else + --d; + } + if (s > 0) { + assert(l == s); + while (d > 0) { + assert(dst->items[d].ptr == nullptr); + --d; + } + do { + assert(l > 0); + dst->items[l--] = src->items[s--]; + } while (s > 0); + } else { + assert(l == d); + while (l > 0) { + assert(dst->items[l].ptr != nullptr); + --l; + } + } + } else { + /* from begin to end with shrinking (a lot of new large/overflow pages) */ + for (l = s = d = 1; s <= src->length && d <= dst->length;) { + if (unlikely(l >= d)) { + /* squash to get a gap of free space for merge */ + for (r = w = dst->length; r >= d; --r) + if (dst->items[r].ptr) { + if (w != r) { + dst->items[w] = dst->items[r]; + dst->items[r].ptr = nullptr; + } + --w; + } + VERBOSE("squash to end for shrinking-merge %zu -> %zu", d, w + 1); + d = w + 1; + continue; + } + assert(l < d); + if (dst->items[d].ptr) { + dst->items[l++] = (dst->items[d].pgno < src->items[s].pgno) + ? dst->items[d++] + : src->items[s++]; + } else + ++d; + } + if (s <= src->length) { + assert(dst->sorted - l == src->length - s); + while (d <= dst->length) { + assert(dst->items[d].ptr == nullptr); + --d; + } + do { + assert(l <= dst->sorted); + dst->items[l++] = src->items[s++]; + } while (s <= src->length); + } else { + assert(dst->sorted - l == dst->length - d); + while (l <= dst->sorted) { + assert(l <= d && d <= dst->length && dst->items[d].ptr); + dst->items[l++] = dst->items[d++]; + } + } + } + parent->tw.dirtyroom -= dst->sorted - dst->length; + assert(parent->tw.dirtyroom <= parent->env->options.dp_limit); + dpl_setlen(dst, dst->sorted); + parent->tw.dirtylru = txn->tw.dirtylru; + + /* В текущем понимании выгоднее пересчитать кол-во страниц, + * чем подмешивать лишние ветвления и вычисления в циклы выше. */ + dst->pages_including_loose = 0; + for (r = 1; r <= dst->length; ++r) + dst->pages_including_loose += dpl_npages(dst, r); + + tASSERT(parent, dpl_check(parent)); + dpl_free(txn); + + if (txn->tw.spilled.list) { + if (parent->tw.spilled.list) { + /* Must not fail since space was preserved above. */ + pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list); + pnl_free(txn->tw.spilled.list); + } else { + parent->tw.spilled.list = txn->tw.spilled.list; + parent->tw.spilled.least_removed = txn->tw.spilled.least_removed; + } + tASSERT(parent, dpl_check(parent)); + } + + parent->flags &= ~MDBX_TXN_HAS_CHILD; + if (parent->tw.spilled.list) { + assert(pnl_check_allocated(parent->tw.spilled.list, + (size_t)parent->geo.first_unallocated << 1)); + if (MDBX_PNL_GETSIZE(parent->tw.spilled.list)) + parent->flags |= MDBX_TXN_SPILLS; + } +} + +static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { + MDBX_env *const env = txn->env; + if (MDBX_ENABLE_PROFGC) { + pgop_stat_t *const ptr = &env->lck->pgops; + latency->gc_prof.work_counter = ptr->gc_prof.work.spe_counter; + latency->gc_prof.work_rtime_monotonic = + osal_monotime_to_16dot16(ptr->gc_prof.work.rtime_monotonic); + latency->gc_prof.work_xtime_cpu = + osal_monotime_to_16dot16(ptr->gc_prof.work.xtime_cpu); + latency->gc_prof.work_rsteps = ptr->gc_prof.work.rsteps; + latency->gc_prof.work_xpages = ptr->gc_prof.work.xpages; + latency->gc_prof.work_majflt = ptr->gc_prof.work.majflt; + + latency->gc_prof.self_counter = ptr->gc_prof.self.spe_counter; + latency->gc_prof.self_rtime_monotonic = + osal_monotime_to_16dot16(ptr->gc_prof.self.rtime_monotonic); + latency->gc_prof.self_xtime_cpu = + osal_monotime_to_16dot16(ptr->gc_prof.self.xtime_cpu); + latency->gc_prof.self_rsteps = ptr->gc_prof.self.rsteps; + latency->gc_prof.self_xpages = ptr->gc_prof.self.xpages; + latency->gc_prof.self_majflt = ptr->gc_prof.self.majflt; + + latency->gc_prof.wloops = ptr->gc_prof.wloops; + latency->gc_prof.coalescences = ptr->gc_prof.coalescences; + latency->gc_prof.wipes = ptr->gc_prof.wipes; + latency->gc_prof.flushes = ptr->gc_prof.flushes; + latency->gc_prof.kicks = ptr->gc_prof.kicks; + if (txn == env->basal_txn) + memset(&ptr->gc_prof, 0, sizeof(ptr->gc_prof)); + } else + memset(&latency->gc_prof, 0, sizeof(latency->gc_prof)); +} + +int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { + STATIC_ASSERT(MDBX_TXN_FINISHED == + MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR); + const uint64_t ts_0 = latency ? osal_monotime() : 0; + uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; + + int rc = check_txn(txn, MDBX_TXN_FINISHED); + if (unlikely(rc != MDBX_SUCCESS)) { + if (latency) + memset(latency, 0, sizeof(*latency)); + return rc; + } + + MDBX_env *const env = txn->env; +#if MDBX_ENV_CHECKPID + if (unlikely(env->pid != osal_getpid())) { + env->flags |= ENV_FATAL_ERROR; + if (latency) + memset(latency, 0, sizeof(*latency)); + return MDBX_PANIC; + } +#endif /* MDBX_ENV_CHECKPID */ + + if (unlikely(txn->flags & MDBX_TXN_ERROR)) { + rc = MDBX_RESULT_TRUE; + goto fail; + } + + /* txn_end() mode for a commit which writes nothing */ + unsigned end_mode = + TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; + if (unlikely(txn->flags & MDBX_TXN_RDONLY)) + goto done; + + if ((txn->flags & MDBX_NOSTICKYTHREADS) && + unlikely(txn->owner != osal_thread_self())) { + rc = MDBX_THREAD_MISMATCH; + goto fail; + } + + if (txn->nested) { + rc = mdbx_txn_commit_ex(txn->nested, nullptr); + tASSERT(txn, txn->nested == nullptr); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + + if (unlikely(txn != env->txn)) { + DEBUG("%s", "attempt to commit unknown transaction"); + rc = MDBX_EINVAL; + goto fail; + } + + if (txn->parent) { + tASSERT(txn, audit_ex(txn, 0, false) == 0); + eASSERT(env, txn != env->basal_txn); + MDBX_txn *const parent = txn->parent; + eASSERT(env, parent->signature == txn_signature); + eASSERT(env, + parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); + eASSERT(env, dpl_check(txn)); + + if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && + parent->n_dbi == txn->n_dbi) { + TXN_FOREACH_DBI_ALL(txn, i) { + tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0); + if ((txn->dbi_state[i] & DBI_STALE) && + !(parent->dbi_state[i] & DBI_STALE)) + tASSERT(txn, + memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); + } + + tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0); + tASSERT(txn, memcmp(&parent->canary, &txn->canary, + sizeof(parent->canary)) == 0); + tASSERT(txn, !txn->tw.spilled.list || + MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); + tASSERT(txn, txn->tw.loose_count == 0); + + /* fast completion of pure nested transaction */ + VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid); + end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; + goto done; + } + + /* Preserve space for spill list to avoid parent's state corruption + * if allocation fails. */ + const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; + tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + const size_t retired_delta = + MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; + if (retired_delta) { + rc = pnl_need(&txn->tw.relist, retired_delta); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + + if (txn->tw.spilled.list) { + if (parent->tw.spilled.list) { + rc = pnl_need(&parent->tw.spilled.list, + MDBX_PNL_GETSIZE(txn->tw.spilled.list)); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + spill_purge(txn); + } + + if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > + parent->tw.dirtylist->detent && + !dpl_reserve(parent, txn->tw.dirtylist->length + + parent->tw.dirtylist->length))) { + rc = MDBX_ENOMEM; + goto fail; + } + + //------------------------------------------------------------------------- + + parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; + txn->tw.gc.reclaimed = nullptr; + + parent->tw.retired_pages = txn->tw.retired_pages; + txn->tw.retired_pages = nullptr; + + pnl_free(parent->tw.relist); + parent->tw.relist = txn->tw.relist; + txn->tw.relist = nullptr; + parent->tw.gc.time_acc = txn->tw.gc.time_acc; + parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed; + + parent->geo = txn->geo; + parent->canary = txn->canary; + parent->flags |= txn->flags & MDBX_TXN_DIRTY; + + /* Move loose pages to parent */ +#if MDBX_ENABLE_REFUND + parent->tw.loose_refund_wl = txn->tw.loose_refund_wl; +#endif /* MDBX_ENABLE_REFUND */ + parent->tw.loose_count = txn->tw.loose_count; + parent->tw.loose_pages = txn->tw.loose_pages; + + /* Merge our cursors into parent's and close them */ + done_cursors(txn, true); + end_mode |= TXN_END_EOTDONE; + + /* Update parent's DBs array */ + eASSERT(env, parent->n_dbi == txn->n_dbi); + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { + parent->dbs[dbi] = txn->dbs[dbi]; + /* preserve parent's status */ + const uint8_t state = + txn->dbi_state[dbi] | + (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, + (parent->dbi_state[dbi] != state) ? "update" : "still", + parent->dbi_state[dbi], state); + parent->dbi_state[dbi] = state; + } else { + eASSERT(env, + txn->dbi_state[dbi] == (parent->dbi_state[dbi] & + ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); + } + } + + if (latency) { + ts_1 = osal_monotime(); + ts_2 = /* no gc-update */ ts_1; + ts_3 = /* no audit */ ts_2; + ts_4 = /* no write */ ts_3; + ts_5 = /* no sync */ ts_4; + } + txn_merge(parent, txn, parent_retired_len); + env->txn = parent; + parent->nested = nullptr; + tASSERT(parent, dpl_check(parent)); + +#if MDBX_ENABLE_REFUND + txn_refund(parent); + if (ASSERT_ENABLED()) { + /* Check parent's loose pages not suitable for refund */ + for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) { + tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && + lp->pgno + 1 < parent->geo.first_unallocated); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + /* Check parent's reclaimed pages not suitable for refund */ + if (MDBX_PNL_GETSIZE(parent->tw.relist)) + tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < + parent->geo.first_unallocated); + } +#endif /* MDBX_ENABLE_REFUND */ + + txn->signature = 0; + osal_free(txn); + tASSERT(parent, audit_ex(parent, 0, false) == 0); + rc = MDBX_SUCCESS; + goto provide_latency; + } + + if (!txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : env->options.dp_limit)); + } + done_cursors(txn, false); + end_mode |= TXN_END_EOTDONE; + + if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && + (txn->flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { + TXN_FOREACH_DBI_ALL(txn, i) { + tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); + } +#if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT + rc = txn_end(txn, end_mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + rc = MDBX_RESULT_TRUE; + goto provide_latency; +#else + goto done; +#endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ + } + + DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO + "/%" PRIaPGNO, + txn->txnid, (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root, + txn->dbs[FREE_DBI].root); + + if (txn->n_dbi > CORE_DBS) { + /* Update subDB root pointers */ + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + TXN_FOREACH_DBI_USER(txn, i) { + if ((txn->dbi_state[i] & DBI_DIRTY) == 0) + continue; + tree_t *const db = &txn->dbs[i]; + DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN + " -> %" PRIaTXN, + i, db->mod_txnid, txn->txnid); + /* Может быть mod_txnid > front после коммита вложенных тразакций */ + db->mod_txnid = txn->txnid; + MDBX_val data = {db, sizeof(tree_t)}; + rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_SUBDATA); + if (unlikely(rc != MDBX_SUCCESS)) { + txn->cursors[MAIN_DBI] = cx.outer.next; + goto fail; + } + } + txn->cursors[MAIN_DBI] = cx.outer.next; + } + + ts_1 = latency ? osal_monotime() : 0; + + gcu_t gcu_ctx; + gc_cputime = latency ? osal_cputime(nullptr) : 0; + rc = gc_update_init(txn, &gcu_ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + rc = gc_update(txn, &gcu_ctx); + gc_cputime = latency ? osal_cputime(nullptr) - gc_cputime : 0; + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + + tASSERT(txn, txn->tw.loose_count == 0); + txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) + ? txn->txnid + : txn->dbs[FREE_DBI].mod_txnid; + + txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) + ? txn->txnid + : txn->dbs[MAIN_DBI].mod_txnid; + + ts_2 = latency ? osal_monotime() : 0; + ts_3 = ts_2; + if (AUDIT_ENABLED()) { + rc = audit_ex(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages), true); + ts_3 = osal_monotime(); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + + bool need_flush_for_nometasync = false; + const meta_ptr_t head = meta_recent(env, &txn->tw.troika); + const uint32_t meta_sync_txnid = + atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); + /* sync prev meta */ + if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) { + /* Исправление унаследованного от LMDB недочета: + * + * Всё хорошо, если все процессы работающие с БД не используют WRITEMAP. + * Тогда мета-страница (обновленная, но не сброшенная на диск) будет + * сохранена в результате fdatasync() при записи данных этой транзакции. + * + * Всё хорошо, если все процессы работающие с БД используют WRITEMAP + * без MDBX_AVOID_MSYNC. + * Тогда мета-страница (обновленная, но не сброшенная на диск) будет + * сохранена в результате msync() при записи данных этой транзакции. + * + * Если же в процессах работающих с БД используется оба метода, как sync() + * в режиме MDBX_WRITEMAP, так и записи через файловый дескриптор, то + * становится невозможным обеспечить фиксацию на диске мета-страницы + * предыдущей транзакции и данных текущей транзакции, за счет одной + * sync-операцией выполняемой после записи данных текущей транзакции. + * Соответственно, требуется явно обновлять мета-страницу, что полностью + * уничтожает выгоду от NOMETASYNC. */ + const uint32_t txnid_dist = + ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) + ? MDBX_NOMETASYNC_LAZY_FD + : MDBX_NOMETASYNC_LAZY_WRITEMAP; + /* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync() + * или msync() для гарантированной фиксации на диске мета-страницы, + * которая была "лениво" отправлена на запись в предыдущей транзакции, + * но не сброшена на диск из-за активного режима MDBX_NOMETASYNC. */ + if ( +#if defined(_WIN32) || defined(_WIN64) + !env->ioring.overlapped_fd && +#endif + meta_sync_txnid == (uint32_t)head.txnid - txnid_dist) + need_flush_for_nometasync = true; + else { + rc = meta_sync(env, head); + if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("txn-%s: error %d", "presync-meta", rc); + goto fail; + } + } + } + + if (txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, txn->tw.loose_count == 0); + + mdbx_filehandle_t fd = +#if defined(_WIN32) || defined(_WIN64) + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + (void)need_flush_for_nometasync; +#else + (need_flush_for_nometasync || env->dsync_fd == INVALID_HANDLE_VALUE || + txn->tw.dirtylist->length > env->options.writethrough_threshold || + atomic_load64(&env->lck->unsynced_pages, mo_Relaxed)) + ? env->lazy_fd + : env->dsync_fd; +#endif /* Windows */ + + iov_ctx_t write_ctx; + rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, + txn->tw.dirtylist->pages_including_loose, fd, false); + if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("txn-%s: error %d", "iov-init", rc); + goto fail; + } + + rc = txn_write(txn, &write_ctx); + if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("txn-%s: error %d", "write", rc); + goto fail; + } + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages; + if (!env->lck->eoos_timestamp.weak) + env->lck->eoos_timestamp.weak = osal_monotime(); + } + + /* TODO: use ctx.flush_begin & ctx.flush_end for range-sync */ + ts_4 = latency ? osal_monotime() : 0; + + meta_t meta; + memcpy(meta.magic_and_version, head.ptr_c->magic_and_version, 8); + meta.reserve16 = head.ptr_c->reserve16; + meta.validator_id = head.ptr_c->validator_id; + meta.extra_pagehdr = head.ptr_c->extra_pagehdr; + unaligned_poke_u64(4, meta.pages_retired, + unaligned_peek_u64(4, head.ptr_c->pages_retired) + + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + meta.geometry = txn->geo; + meta.trees.gc = txn->dbs[FREE_DBI]; + meta.trees.main = txn->dbs[MAIN_DBI]; + meta.canary = txn->canary; + + txnid_t commit_txnid = txn->txnid; +#if MDBX_ENABLE_BIGFOOT + if (gcu_ctx.bigfoot > txn->txnid) { + commit_txnid = gcu_ctx.bigfoot; + TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, + (size_t)(commit_txnid - txn->txnid)); + } +#endif + meta.unsafe_sign = DATASIGN_NONE; + meta_set_txnid(env, &meta, commit_txnid); + + rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, + &txn->tw.troika); + + ts_5 = latency ? osal_monotime() : 0; + if (unlikely(rc != MDBX_SUCCESS)) { + env->flags |= ENV_FATAL_ERROR; + ERROR("txn-%s: error %d", "sync", rc); + goto fail; + } + + end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE; + +done: + if (latency) + take_gcprof(txn, latency); + rc = txn_end(txn, end_mode); + +provide_latency: + if (latency) { + latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0; + latency->gc_wallclock = + (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0; + latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0; + latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0; + latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0; + latency->sync = (ts_5 > ts_4) ? osal_monotime_to_16dot16(ts_5 - ts_4) : 0; + const uint64_t ts_6 = osal_monotime(); + latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0; + latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0); + } + return rc; + +fail: + txn->flags |= MDBX_TXN_ERROR; + if (latency) + take_gcprof(txn, latency); + txn_abort(txn); + goto provide_latency; +} + +int txn_abort(MDBX_txn *txn) { + if (txn->flags & MDBX_TXN_RDONLY) + /* LY: don't close DBI-handles */ + return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | + TXN_END_FREE); + + if (unlikely(txn->flags & MDBX_TXN_FINISHED)) + return MDBX_BAD_TXN; + + if (txn->nested) + txn_abort(txn->nested); + + tASSERT(txn, (txn->flags & MDBX_TXN_ERROR) || dpl_check(txn)); + return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); +} + +int txn_renew(MDBX_txn *txn, unsigned flags) { + MDBX_env *const env = txn->env; + int rc; + +#if MDBX_ENV_CHECKPID + if (unlikely(env->pid != osal_getpid())) { + env->flags |= ENV_FATAL_ERROR; + return MDBX_PANIC; + } +#endif /* MDBX_ENV_CHECKPID */ + + const uintptr_t tid = osal_thread_self(); + flags |= env->flags & (MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); + if (flags & MDBX_TXN_RDONLY) { + eASSERT(env, (flags & ~(txn_ro_begin_flags | MDBX_WRITEMAP | + MDBX_NOSTICKYTHREADS)) == 0); + txn->flags = flags; + reader_slot_t *r = txn->to.reader; + STATIC_ASSERT(sizeof(uintptr_t) <= sizeof(r->tid)); + if (likely(env->flags & ENV_TXKEY)) { + eASSERT(env, !(env->flags & MDBX_NOSTICKYTHREADS)); + r = thread_rthc_get(env->me_txkey); + if (likely(r)) { + if (unlikely(!r->pid.weak) && + (globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN)) { + thread_rthc_set(env->me_txkey, nullptr); + r = nullptr; + } else { + eASSERT(env, r->pid.weak == env->pid); + eASSERT(env, r->tid.weak == osal_thread_self()); + } + } + } else { + eASSERT(env, !env->lck_mmap.lck || (env->flags & MDBX_NOSTICKYTHREADS)); + } + + if (likely(r)) { + if (unlikely(r->pid.weak != env->pid || + r->txnid.weak < SAFE64_INVALID_THRESHOLD)) + return MDBX_BAD_RSLOT; + } else if (env->lck_mmap.lck) { + bsr_t brs = mvcc_bind_slot(env, tid); + if (unlikely(brs.err != MDBX_SUCCESS)) + return brs.err; + r = brs.rslot; + } + txn->to.reader = r; + STATIC_ASSERT(MDBX_TXN_RDONLY_PREPARE > MDBX_TXN_RDONLY); + if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) { + eASSERT(env, txn->txnid == 0); + eASSERT(env, txn->owner == 0); + eASSERT(env, txn->n_dbi == 0); + if (likely(r)) { + eASSERT(env, r->snapshot_pages_used.weak == 0); + eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD); + atomic_store32(&r->snapshot_pages_used, 0, mo_Relaxed); + } + txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; + return MDBX_SUCCESS; + } + txn->owner = tid; + + /* Seek & fetch the last meta */ + uint64_t timestamp = 0; + size_t loop = 0; + troika_t troika = meta_tap(env); + while (1) { + const meta_ptr_t head = + likely(env->stuck_meta < 0) + ? /* regular */ meta_recent(env, &troika) + : /* recovery mode */ meta_ptr(env, env->stuck_meta); + if (likely(r)) { + safe64_reset(&r->txnid, false); + atomic_store32(&r->snapshot_pages_used, + head.ptr_v->geometry.first_unallocated, mo_Relaxed); + atomic_store64( + &r->snapshot_pages_retired, + unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired), + mo_Relaxed); + safe64_write(&r->txnid, head.txnid); + eASSERT(env, r->pid.weak == osal_getpid()); + eASSERT(env, r->tid.weak == ((env->flags & MDBX_NOSTICKYTHREADS) + ? 0 + : osal_thread_self())); + eASSERT(env, r->txnid.weak == head.txnid || + (r->txnid.weak >= SAFE64_INVALID_THRESHOLD && + head.txnid < env->lck->cached_oldest.weak)); + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease); + } else { + /* exclusive mode without lck */ + eASSERT(env, !env->lck_mmap.lck && env->lck == lckless_stub(env)); + } + jitter4testing(true); + + /* Snap the state from current meta-head */ + txn->txnid = head.txnid; + if (likely(env->stuck_meta < 0) && + unlikely(meta_should_retry(env, &troika) || + head.txnid < atomic_load64(&env->lck->cached_oldest, + mo_AcquireRelease))) { + if (unlikely(++loop > 42)) { + ERROR("bailout waiting for valid snapshot (%s)", + "metapages are too volatile"); + rc = MDBX_PROBLEM; + txn->txnid = INVALID_TXNID; + if (likely(r)) + safe64_reset(&r->txnid, false); + goto bailout; + } + timestamp = 0; + continue; + } + + rc = coherency_check_head(txn, head, ×tamp); + jitter4testing(false); + if (likely(rc == MDBX_SUCCESS)) + break; + + if (unlikely(rc != MDBX_RESULT_TRUE)) { + txn->txnid = INVALID_TXNID; + if (likely(r)) + safe64_reset(&r->txnid, false); + goto bailout; + } + } + + if (unlikely(txn->txnid < MIN_TXNID || txn->txnid > MAX_TXNID)) { + ERROR("%s", "environment corrupted by died writer, must shutdown!"); + if (likely(r)) + safe64_reset(&r->txnid, false); + txn->txnid = INVALID_TXNID; + rc = MDBX_CORRUPTED; + goto bailout; + } + ENSURE(env, + txn->txnid >= + /* paranoia is appropriate here */ env->lck->cached_oldest.weak); + tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); + tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + } else { + eASSERT(env, (flags & ~(txn_rw_begin_flags | MDBX_TXN_SPILLS | + MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); + if (unlikely(txn->owner == tid || + /* not recovery mode */ env->stuck_meta >= 0)) + return MDBX_BUSY; + lck_t *const lck = env->lck_mmap.lck; + if (lck && (env->flags & MDBX_NOSTICKYTHREADS) == 0 && + (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + for (size_t i = 0; i < snap_nreaders; ++i) { + if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) == env->pid && + unlikely(atomic_load64(&lck->rdt[i].tid, mo_Relaxed) == tid)) { + const txnid_t txnid = safe64_read(&lck->rdt[i].txnid); + if (txnid >= MIN_TXNID && txnid <= MAX_TXNID) + return MDBX_TXN_OVERLAPPING; + } + } + } + + /* Not yet touching txn == env->basal_txn, it may be active */ + jitter4testing(false); + rc = lck_txn_lock(env, !!(flags & MDBX_TXN_TRY)); + if (unlikely(rc)) + return rc; + if (unlikely(env->flags & ENV_FATAL_ERROR)) { + lck_txn_unlock(env); + return MDBX_PANIC; + } +#if defined(_WIN32) || defined(_WIN64) + if (unlikely(!env->dxb_mmap.base)) { + lck_txn_unlock(env); + return MDBX_EPERM; + } +#endif /* Windows */ + + txn->tw.troika = meta_tap(env); + const meta_ptr_t head = meta_recent(env, &txn->tw.troika); + uint64_t timestamp = 0; + while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") { + rc = coherency_check_head(txn, head, ×tamp); + if (likely(rc == MDBX_SUCCESS)) + break; + if (unlikely(rc != MDBX_RESULT_TRUE)) + goto bailout; + } + eASSERT(env, meta_txnid(head.ptr_v) == head.txnid); + txn->txnid = safe64_txnid_next(head.txnid); + if (unlikely(txn->txnid > MAX_TXNID)) { + rc = MDBX_TXN_FULL; + ERROR("txnid overflow, raise %d", rc); + goto bailout; + } + + tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); + tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + txn->flags = flags; + txn->nested = nullptr; + txn->tw.loose_pages = nullptr; + txn->tw.loose_count = 0; +#if MDBX_ENABLE_REFUND + txn->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0); + txn->tw.spilled.list = nullptr; + txn->tw.spilled.least_removed = 0; + txn->tw.gc.time_acc = 0; + txn->tw.gc.last_reclaimed = 0; + if (txn->tw.gc.reclaimed) + MDBX_PNL_SETSIZE(txn->tw.gc.reclaimed, 0); + env->txn = txn; + + if ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { + rc = dpl_alloc(txn); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + txn->tw.dirtyroom = txn->env->options.dp_limit; + txn->tw.dirtylru = MDBX_DEBUG ? UINT32_MAX / 3 - 42 : 0; + } else { + tASSERT(txn, txn->tw.dirtylist == nullptr); + txn->tw.dirtylist = nullptr; + txn->tw.dirtyroom = MAX_PAGENO; + txn->tw.dirtylru = 0; + } + eASSERT(env, txn->tw.writemap_dirty_npages == 0); + eASSERT(env, txn->tw.writemap_spilled_npages == 0); + } + + txn->front_txnid = + txn->txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); + + /* Setup db info */ + tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); + tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + VALGRIND_MAKE_MEM_UNDEFINED(txn->dbi_state, env->max_dbi); +#if MDBX_ENABLE_DBI_SPARSE + txn->n_dbi = CORE_DBS; + VALGRIND_MAKE_MEM_UNDEFINED( + txn->dbi_sparse, + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / + CHAR_BIT); + txn->dbi_sparse[0] = (1 << CORE_DBS) - 1; +#else + txn->n_dbi = (env->n_dbi < 8) ? env->n_dbi : 8; + if (txn->n_dbi > CORE_DBS) + memset(txn->dbi_state + CORE_DBS, 0, txn->n_dbi - CORE_DBS); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->dbi_state[FREE_DBI] = DBI_LINDO | DBI_VALID; + txn->dbi_state[MAIN_DBI] = DBI_LINDO | DBI_VALID; + txn->cursors[FREE_DBI] = nullptr; + txn->cursors[MAIN_DBI] = nullptr; + txn->dbi_seqs[FREE_DBI] = 0; + txn->dbi_seqs[MAIN_DBI] = + atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease); + + if (unlikely(env->dbs_flags[MAIN_DBI] != + (DB_VALID | txn->dbs[MAIN_DBI].flags))) { + const bool need_txn_lock = env->basal_txn && env->basal_txn->owner != tid; + bool should_unlock = false; + if (need_txn_lock) { + rc = lck_txn_lock(env, true); + if (rc == MDBX_SUCCESS) + should_unlock = true; + else if (rc != MDBX_BUSY && rc != MDBX_EDEADLK) + goto bailout; + } + rc = osal_fastmutex_acquire(&env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + uint32_t seq = dbi_seq_next(env, MAIN_DBI); + /* проверяем повторно после захвата блокировки */ + if (env->dbs_flags[MAIN_DBI] != (DB_VALID | txn->dbs[MAIN_DBI].flags)) { + if (!need_txn_lock || should_unlock || + /* если нет активной пишущей транзакции, + * то следующая будет ждать на dbi_lock */ + !env->txn) { + if (env->dbs_flags[MAIN_DBI] != 0 || MDBX_DEBUG) + NOTICE("renew MainDB for %s-txn %" PRIaTXN + " since db-flags changes 0x%x -> 0x%x", + (txn->flags & MDBX_TXN_RDONLY) ? "ro" : "rw", txn->txnid, + env->dbs_flags[MAIN_DBI] & ~DB_VALID, + txn->dbs[MAIN_DBI].flags); + env->dbs_flags[MAIN_DBI] = DB_POISON; + atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + rc = sdb_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]); + if (likely(rc == MDBX_SUCCESS)) { + seq = dbi_seq_next(env, MAIN_DBI); + env->dbs_flags[MAIN_DBI] = DB_VALID | txn->dbs[MAIN_DBI].flags; + txn->dbi_seqs[MAIN_DBI] = atomic_store32(&env->dbi_seqs[MAIN_DBI], + seq, mo_AcquireRelease); + } + } else { + ERROR("MainDB db-flags changes 0x%x -> 0x%x ahead of read-txn " + "%" PRIaTXN, + txn->dbs[MAIN_DBI].flags, env->dbs_flags[MAIN_DBI] & ~DB_VALID, + txn->txnid); + rc = MDBX_INCOMPATIBLE; + } + } + ENSURE(env, osal_fastmutex_release(&env->dbi_lock) == MDBX_SUCCESS); + } else { + DEBUG("dbi_lock failed, err %d", rc); + } + if (should_unlock) + lck_txn_unlock(env); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + txn->dbs[FREE_DBI].flags); + rc = MDBX_INCOMPATIBLE; + goto bailout; + } + + tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); + tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + if (unlikely(env->flags & ENV_FATAL_ERROR)) { + WARNING("%s", "environment had fatal error, must shutdown!"); + rc = MDBX_PANIC; + } else { + const size_t size_bytes = pgno2bytes(env, txn->geo.end_pgno); + const size_t used_bytes = pgno2bytes(env, txn->geo.first_unallocated); + const size_t required_bytes = + (txn->flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + if (unlikely(required_bytes > env->dxb_mmap.current)) { + /* Размер БД (для пишущих транзакций) или используемых данных (для + * читающих транзакций) больше предыдущего/текущего размера внутри + * процесса, увеличиваем. Сюда также попадает случай увеличения верхней + * границы размера БД и отображения. В читающих транзакциях нельзя + * изменять размер файла, который может быть больше необходимого этой + * транзакции. */ + if (txn->geo.upper > MAX_PAGENO + 1 || + bytes2pgno(env, pgno2bytes(env, txn->geo.upper)) != txn->geo.upper) { + rc = MDBX_UNABLE_EXTEND_MAPSIZE; + goto bailout; + } + rc = dxb_resize(env, txn->geo.first_unallocated, txn->geo.end_pgno, + txn->geo.upper, implicit_grow); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + } else if (unlikely(size_bytes < env->dxb_mmap.current)) { + /* Размер БД меньше предыдущего/текущего размера внутри процесса, можно + * уменьшить, но всё сложнее: + * - размер файла согласован со всеми читаемыми снимками на момент + * коммита последней транзакции; + * - в читающей транзакции размер файла может быть больше и него нельзя + * изменять, в том числе менять madvise (меньша размера файла нельзя, + * а за размером нет смысла). + * - в пишущей транзакции уменьшать размер файла можно только после + * проверки размера читаемых снимков, но в этом нет смысла, так как + * это будет сделано при фиксации транзакции. + * + * В сухом остатке, можно только установить dxb_mmap.current равным + * размеру файла, а это проще сделать без вызова dxb_resize() и усложения + * внутренней логики. + * + * В этой тактике есть недостаток: если пишущите транзакции не регулярны, + * и при завершении такой транзакции файл БД остаётся не-уменьшеным из-за + * читающих транзакций использующих предыдущие снимки. */ +#if defined(_WIN32) || defined(_WIN64) + imports.srwl_AcquireShared(&env->remap_guard); +#else + rc = osal_fastmutex_acquire(&env->remap_guard); +#endif + if (likely(rc == MDBX_SUCCESS)) { + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + rc = osal_filesize(env->dxb_mmap.fd, &env->dxb_mmap.filesize); + if (likely(rc == MDBX_SUCCESS)) { + eASSERT(env, env->dxb_mmap.filesize >= required_bytes); + if (env->dxb_mmap.current > env->dxb_mmap.filesize) + env->dxb_mmap.current = + (env->dxb_mmap.limit < env->dxb_mmap.filesize) + ? env->dxb_mmap.limit + : (size_t)env->dxb_mmap.filesize; + } +#if defined(_WIN32) || defined(_WIN64) + imports.srwl_ReleaseShared(&env->remap_guard); +#else + int err = osal_fastmutex_release(&env->remap_guard); + if (unlikely(err) && likely(rc == MDBX_SUCCESS)) + rc = err; +#endif + } + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + eASSERT(env, pgno2bytes(env, txn->geo.first_unallocated) <= + env->dxb_mmap.current); + eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); + if (txn->flags & MDBX_TXN_RDONLY) { +#if defined(_WIN32) || defined(_WIN64) + if (((used_bytes > env->geo_in_bytes.lower && env->geo_in_bytes.shrink) || + (globals.running_under_Wine && + /* under Wine acquisition of remap_guard is always required, + * since Wine don't support section extending, + * i.e. in both cases unmap+map are required. */ + used_bytes < env->geo_in_bytes.upper && env->geo_in_bytes.grow)) && + /* avoid recursive use SRW */ (txn->flags & MDBX_NOSTICKYTHREADS) == + 0) { + txn->flags |= txn_shrink_allowed; + imports.srwl_AcquireShared(&env->remap_guard); + } +#endif /* Windows */ + } else { + tASSERT(txn, txn == env->basal_txn); + MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); + rc = cursor_init(gc, txn, FREE_DBI); + if (rc != MDBX_SUCCESS) + goto bailout; + } + dxb_sanitize_tail(env, txn); + return MDBX_SUCCESS; + } +bailout: + tASSERT(txn, rc != MDBX_SUCCESS); + txn_end(txn, TXN_END_SLOT | TXN_END_FAIL_BEGIN); + return rc; +} + +int txn_end(MDBX_txn *txn, const unsigned mode) { + MDBX_env *env = txn->env; + static const char *const names[] = TXN_END_NAMES; + + DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO + "/%" PRIaPGNO, + names[mode & TXN_END_OPMASK], txn->txnid, + (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, + txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + + if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ + done_cursors(txn, false); + + int rc = MDBX_SUCCESS; + if (txn->flags & MDBX_TXN_RDONLY) { + if (txn->to.reader) { + reader_slot_t *slot = txn->to.reader; + eASSERT(env, slot->pid.weak == env->pid); + if (likely(!(txn->flags & MDBX_TXN_FINISHED))) { + ENSURE(env, txn->txnid >= + /* paranoia is appropriate here */ env->lck + ->cached_oldest.weak); + eASSERT(env, txn->txnid == slot->txnid.weak && + slot->txnid.weak >= env->lck->cached_oldest.weak); + dxb_sanitize_tail(env, nullptr); + atomic_store32(&slot->snapshot_pages_used, 0, mo_Relaxed); + safe64_reset(&slot->txnid, false); + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); + } else { + eASSERT(env, slot->pid.weak == env->pid); + eASSERT(env, slot->txnid.weak >= SAFE64_INVALID_THRESHOLD); + } + if (mode & TXN_END_SLOT) { + if ((env->flags & ENV_TXKEY) == 0) + atomic_store32(&slot->pid, 0, mo_Relaxed); + txn->to.reader = nullptr; + } + } +#if defined(_WIN32) || defined(_WIN64) + if (txn->flags & txn_shrink_allowed) + imports.srwl_ReleaseShared(&env->remap_guard); +#endif + txn->n_dbi = 0; /* prevent further DBI activity */ + txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; + txn->owner = 0; + } else if (!(txn->flags & MDBX_TXN_FINISHED)) { + ENSURE(env, + txn->txnid >= + /* paranoia is appropriate here */ env->lck->cached_oldest.weak); + if (txn == env->basal_txn) + dxb_sanitize_tail(env, nullptr); + + txn->flags = MDBX_TXN_FINISHED; + env->txn = txn->parent; + pnl_free(txn->tw.spilled.list); + txn->tw.spilled.list = nullptr; + if (txn == env->basal_txn) { + eASSERT(env, txn->parent == nullptr); + /* Export or close DBI handles created in this txn */ + rc = dbi_update(txn, mode & TXN_END_UPDATE); + pnl_shrink(&txn->tw.retired_pages); + pnl_shrink(&txn->tw.relist); + if (!(env->flags & MDBX_WRITEMAP)) + dpl_release_shadows(txn); + /* The writer mutex was locked in mdbx_txn_begin. */ + lck_txn_unlock(env); + } else { + eASSERT(env, txn->parent != nullptr); + MDBX_txn *const parent = txn->parent; + eASSERT(env, parent->signature == txn_signature); + eASSERT(env, parent->nested == txn && + (parent->flags & MDBX_TXN_HAS_CHILD) != 0); + eASSERT(env, + pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - + MDBX_ENABLE_REFUND)); + eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, + sizeof(troika_t)) == 0); + + txn->owner = 0; + if (txn->tw.gc.reclaimed) { + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) >= + (uintptr_t)parent->tw.gc.reclaimed); + MDBX_PNL_SETSIZE(txn->tw.gc.reclaimed, + (uintptr_t)parent->tw.gc.reclaimed); + parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; + } + + if (txn->tw.retired_pages) { + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.retired_pages) >= + (uintptr_t)parent->tw.retired_pages); + MDBX_PNL_SETSIZE(txn->tw.retired_pages, + (uintptr_t)parent->tw.retired_pages); + parent->tw.retired_pages = txn->tw.retired_pages; + } + + parent->nested = nullptr; + parent->flags &= ~MDBX_TXN_HAS_CHILD; + parent->tw.dirtylru = txn->tw.dirtylru; + tASSERT(parent, dpl_check(parent)); + tASSERT(parent, audit_ex(parent, 0, false) == 0); + dpl_release_shadows(txn); + dpl_free(txn); + pnl_free(txn->tw.relist); + + if (parent->geo.upper != txn->geo.upper || + parent->geo.now != txn->geo.now) { + /* undo resize performed by child txn */ + rc = dxb_resize(env, parent->geo.first_unallocated, parent->geo.now, + parent->geo.upper, impilict_shrink); + if (rc == MDBX_EPERM) { + /* unable undo resize (it is regular for Windows), + * therefore promote size changes from child to the parent txn */ + WARNING("unable undo resize performed by child txn, promote to " + "the parent (%u->%u, %u->%u)", + txn->geo.now, parent->geo.now, txn->geo.upper, + parent->geo.upper); + parent->geo.now = txn->geo.now; + parent->geo.upper = txn->geo.upper; + parent->flags |= MDBX_TXN_DIRTY; + rc = MDBX_SUCCESS; + } else if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("error %d while undo resize performed by child txn, fail " + "the parent", + rc); + parent->flags |= MDBX_TXN_ERROR; + if (!env->dxb_mmap.base) + env->flags |= ENV_FATAL_ERROR; + } + } + } + } + + eASSERT(env, txn == env->basal_txn || txn->owner == 0); + if ((mode & TXN_END_FREE) != 0 && txn != env->basal_txn) { + txn->signature = 0; + osal_free(txn); + } + + return rc; +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_txn_renew(MDBX_txn *txn) { + if (unlikely(!txn)) + return MDBX_EINVAL; + + if (unlikely(txn->signature != txn_signature)) + return MDBX_EBADSIGN; + + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) + return MDBX_EINVAL; + + int rc; + if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) { + rc = mdbx_txn_reset(txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + rc = txn_renew(txn, MDBX_TXN_RDONLY); + if (rc == MDBX_SUCCESS) { + tASSERT(txn, txn->owner == osal_thread_self()); + DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO + "/%" PRIaPGNO, + txn->txnid, (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, + (void *)txn->env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + } + return rc; +} + +int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) { + int rc = check_txn(txn, MDBX_TXN_FINISHED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + txn->userctx = ctx; + return MDBX_SUCCESS; +} + +void *mdbx_txn_get_userctx(const MDBX_txn *txn) { + return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; +} + +int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, + MDBX_txn **ret, void *context) { + if (unlikely(!ret)) + return MDBX_EINVAL; + *ret = nullptr; + + if (unlikely((flags & ~txn_rw_begin_flags) && (flags & ~txn_ro_begin_flags))) + return MDBX_EINVAL; + + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->flags & MDBX_RDONLY & ~flags)) /* write txn in RDONLY env */ + return MDBX_EACCESS; + + MDBX_txn *txn = nullptr; + if (parent) { + /* Nested transactions: Max 1 child, write txns only, no writemap */ + rc = check_txn_rw(parent, + MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (env->options.spill_parent4child_denominator) { + /* Spill dirty-pages of parent to provide dirtyroom for child txn */ + rc = txn_spill(parent, nullptr, + parent->tw.dirtylist->length / + env->options.spill_parent4child_denominator); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + tASSERT(parent, audit_ex(parent, 0, false) == 0); + + flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | + MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); + } else if (flags & MDBX_TXN_RDONLY) { + if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && + unlikely(env->basal_txn->owner == osal_thread_self()) && + (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) + return MDBX_TXN_OVERLAPPING; + } else { + /* Reuse preallocated write txn. However, do not touch it until + * txn_renew() succeeds, since it currently may be active. */ + txn = env->basal_txn; + goto renew; + } + + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / + CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); + const size_t base = (flags & MDBX_TXN_RDONLY) + ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) + : sizeof(MDBX_txn); + const size_t size = + base + + ((flags & MDBX_TXN_RDONLY) + ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) + : 0) + + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + + sizeof(txn->dbi_state[0])); + txn = osal_malloc(size); + if (unlikely(txn == nullptr)) { + DEBUG("calloc: %s", "failed"); + return MDBX_ENOMEM; + } +#if MDBX_DEBUG + memset(txn, 0xCD, size); + VALGRIND_MAKE_MEM_UNDEFINED(txn, size); +#endif /* MDBX_DEBUG */ + MDBX_ANALYSIS_ASSUME(size > base); + memset(txn, 0, + (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); + txn->dbs = ptr_disp(txn, base); + txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); +#if MDBX_DEBUG + txn->cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ +#endif + txn->dbi_state = + ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); + txn->flags = flags; + txn->env = env; + + if (parent) { + tASSERT(parent, dpl_check(parent)); +#if MDBX_ENABLE_DBI_SPARSE + txn->dbi_sparse = parent->dbi_sparse; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->dbi_seqs = parent->dbi_seqs; + txn->geo = parent->geo; + rc = dpl_alloc(txn); + if (likely(rc == MDBX_SUCCESS)) { + const size_t len = + MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; + txn->tw.relist = + pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.relist)) + rc = MDBX_ENOMEM; + } + if (unlikely(rc != MDBX_SUCCESS)) { + nested_failed: + pnl_free(txn->tw.relist); + dpl_free(txn); + osal_free(txn); + return rc; + } + + /* Move loose pages to reclaimed list */ + if (parent->tw.loose_count) { + do { + page_t *lp = parent->tw.loose_pages; + tASSERT(parent, lp->flags == P_LOOSE); + rc = pnl_insert_span(&parent->tw.relist, lp->pgno, 1); + if (unlikely(rc != MDBX_SUCCESS)) + goto nested_failed; + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + parent->tw.loose_pages = page_next(lp); + /* Remove from dirty list */ + page_wash(parent, dpl_exist(parent, lp->pgno), lp, 1); + } while (parent->tw.loose_pages); + parent->tw.loose_count = 0; +#if MDBX_ENABLE_REFUND + parent->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + tASSERT(parent, dpl_check(parent)); + } + txn->tw.dirtyroom = parent->tw.dirtyroom; + txn->tw.dirtylru = parent->tw.dirtylru; + + dpl_sort(parent); + if (parent->tw.spilled.list) + spill_purge(parent); + + tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= + MDBX_PNL_GETSIZE(parent->tw.relist)); + memcpy(txn->tw.relist, parent->tw.relist, + MDBX_PNL_SIZEOF(parent->tw.relist)); + eASSERT(env, pnl_check_allocated( + txn->tw.relist, + (txn->geo.first_unallocated /* LY: intentional assignment + here, only for assertion */ + = parent->geo.first_unallocated) - + MDBX_ENABLE_REFUND)); + + txn->tw.gc.time_acc = parent->tw.gc.time_acc; + txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed; + if (parent->tw.gc.reclaimed) { + txn->tw.gc.reclaimed = parent->tw.gc.reclaimed; + parent->tw.gc.reclaimed = + (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed); + } + + txn->tw.retired_pages = parent->tw.retired_pages; + parent->tw.retired_pages = + (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages); + + txn->txnid = parent->txnid; + txn->front_txnid = parent->front_txnid + 1; +#if MDBX_ENABLE_REFUND + txn->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + txn->canary = parent->canary; + parent->flags |= MDBX_TXN_HAS_CHILD; + parent->nested = txn; + txn->parent = parent; + txn->owner = parent->owner; + txn->tw.troika = parent->tw.troika; + + txn->cursors[FREE_DBI] = nullptr; + txn->cursors[MAIN_DBI] = nullptr; + txn->dbi_state[FREE_DBI] = + parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->dbi_state[MAIN_DBI] = + parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + memset(txn->dbi_state + CORE_DBS, 0, + (txn->n_dbi = parent->n_dbi) - CORE_DBS); + memcpy(txn->dbs, parent->dbs, sizeof(txn->dbs[0]) * CORE_DBS); + + tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == + (parent->parent ? parent->parent->tw.dirtyroom + : parent->env->options.dp_limit)); + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom + : txn->env->options.dp_limit)); + env->txn = txn; + tASSERT(parent, parent->cursors[FREE_DBI] == nullptr); + rc = parent->cursors[MAIN_DBI] + ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) + : MDBX_SUCCESS; + if (AUDIT_ENABLED() && ASSERT_ENABLED()) { + txn->signature = txn_signature; + tASSERT(txn, audit_ex(txn, 0, false) == 0); + } + if (unlikely(rc != MDBX_SUCCESS)) + txn_end(txn, TXN_END_FAIL_BEGINCHILD); + } else { /* MDBX_TXN_RDONLY */ + txn->dbi_seqs = + ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + renew: + rc = txn_renew(txn, flags); + } + + if (unlikely(rc != MDBX_SUCCESS)) { + if (txn != env->basal_txn) + osal_free(txn); + } else { + if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) + eASSERT(env, txn->flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)); + else if (flags & MDBX_TXN_RDONLY) + eASSERT(env, (txn->flags & + ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | + /* Win32: SRWL flag */ txn_shrink_allowed)) == 0); + else { + eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | + txn_shrink_allowed | MDBX_NOMETASYNC | + MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); + assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed); + } + txn->signature = txn_signature; + txn->userctx = context; + *ret = txn; + DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO + "/%" PRIaPGNO, + txn->txnid, (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, + (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + } + + return rc; +} + +int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { + int rc = check_txn(txn, MDBX_TXN_FINISHED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!info)) + return MDBX_EINVAL; + + MDBX_env *const env = txn->env; +#if MDBX_ENV_CHECKPID + if (unlikely(env->pid != osal_getpid())) { + env->flags |= ENV_FATAL_ERROR; + return MDBX_PANIC; + } +#endif /* MDBX_ENV_CHECKPID */ + + info->txn_id = txn->txnid; + info->txn_space_used = pgno2bytes(env, txn->geo.first_unallocated); + + if (txn->flags & MDBX_TXN_RDONLY) { + meta_ptr_t head; + uint64_t head_retired; + troika_t troika = meta_tap(env); + do { + /* fetch info from volatile head */ + head = meta_recent(env, &troika); + head_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); + info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->geometry.now); + info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->geometry.upper); + info->txn_space_leftover = + pgno2bytes(env, head.ptr_v->geometry.now - + head.ptr_v->geometry.first_unallocated); + } while (unlikely(meta_should_retry(env, &troika))); + + info->txn_reader_lag = head.txnid - info->txn_id; + info->txn_space_dirty = info->txn_space_retired = 0; + uint64_t reader_snapshot_pages_retired; + if (txn->to.reader && + head_retired > + (reader_snapshot_pages_retired = atomic_load64( + &txn->to.reader->snapshot_pages_retired, mo_Relaxed))) { + info->txn_space_dirty = info->txn_space_retired = pgno2bytes( + env, (pgno_t)(head_retired - reader_snapshot_pages_retired)); + + size_t retired_next_reader = 0; + lck_t *const lck = env->lck_mmap.lck; + if (scan_rlt && info->txn_reader_lag > 1 && lck) { + /* find next more recent reader */ + txnid_t next_reader = head.txnid; + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + for (size_t i = 0; i < snap_nreaders; ++i) { + retry: + if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { + jitter4testing(true); + const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); + const uint64_t snap_retired = atomic_load64( + &lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease); + if (unlikely(snap_retired != + atomic_load64(&lck->rdt[i].snapshot_pages_retired, + mo_Relaxed)) || + snap_txnid != safe64_read(&lck->rdt[i].txnid)) + goto retry; + if (snap_txnid <= txn->txnid) { + retired_next_reader = 0; + break; + } + if (snap_txnid < next_reader) { + next_reader = snap_txnid; + retired_next_reader = pgno2bytes( + env, (pgno_t)(snap_retired - + atomic_load64( + &txn->to.reader->snapshot_pages_retired, + mo_Relaxed))); + } + } + } + } + info->txn_space_dirty = retired_next_reader; + } + } else { + info->txn_space_limit_soft = pgno2bytes(env, txn->geo.now); + info->txn_space_limit_hard = pgno2bytes(env, txn->geo.upper); + info->txn_space_retired = + pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages + : MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom); + info->txn_space_dirty = pgno2bytes( + env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose + : (txn->tw.writemap_dirty_npages + + txn->tw.writemap_spilled_npages)); + info->txn_reader_lag = INT64_MAX; + lck_t *const lck = env->lck_mmap.lck; + if (scan_rlt && lck) { + txnid_t oldest_snapshot = txn->txnid; + const size_t snap_nreaders = + atomic_load32(&lck->rdt_length, mo_AcquireRelease); + if (snap_nreaders) { + oldest_snapshot = txn_snapshot_oldest(txn); + if (oldest_snapshot == txn->txnid - 1) { + /* check if there is at least one reader */ + bool exists = false; + for (size_t i = 0; i < snap_nreaders; ++i) { + if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && + txn->txnid > safe64_read(&lck->rdt[i].txnid)) { + exists = true; + break; + } + } + oldest_snapshot += !exists; + } + } + info->txn_reader_lag = txn->txnid - oldest_snapshot; + } + } + + return MDBX_SUCCESS; +} + +MDBX_env *mdbx_txn_env(const MDBX_txn *txn) { + if (unlikely(!txn || txn->signature != txn_signature || + txn->env->signature.weak != env_signature)) + return nullptr; + return txn->env; +} + +uint64_t mdbx_txn_id(const MDBX_txn *txn) { + if (unlikely(!txn || txn->signature != txn_signature)) + return 0; + return txn->txnid; +} + +int mdbx_txn_flags(const MDBX_txn *txn) { + STATIC_ASSERT( + (MDBX_TXN_INVALID & + (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | + MDBX_TXN_HAS_CHILD | txn_gc_drained | txn_shrink_allowed | + txn_rw_begin_flags | txn_ro_begin_flags)) == 0); + if (unlikely(!txn || txn->signature != txn_signature)) + return MDBX_TXN_INVALID; + assert(0 == (int)(txn->flags & MDBX_TXN_INVALID)); + return txn->flags; +} + +int mdbx_txn_reset(MDBX_txn *txn) { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + /* This call is only valid for read-only txns */ + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) + return MDBX_EINVAL; + + /* LY: don't close DBI-handles */ + rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); + if (rc == MDBX_SUCCESS) { + tASSERT(txn, txn->signature == txn_signature); + tASSERT(txn, txn->owner == 0); + } + return rc; +} + +int mdbx_txn_break(MDBX_txn *txn) { + do { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + txn->flags |= MDBX_TXN_ERROR; + if (txn->flags & MDBX_TXN_RDONLY) + break; + txn = txn->nested; + } while (txn); + return MDBX_SUCCESS; +} + +int mdbx_txn_abort(MDBX_txn *txn) { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = check_env(txn->env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == + MDBX_NOSTICKYTHREADS && + unlikely(txn->owner != osal_thread_self())) { + mdbx_txn_break(txn); + return MDBX_THREAD_MISMATCH; + } + + return txn_abort(txn); +} diff --git a/src/unaligned.h b/src/unaligned.h new file mode 100644 index 00000000..0dcbb3f2 --- /dev/null +++ b/src/unaligned.h @@ -0,0 +1,242 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +/*------------------------------------------------------------------------------ + * Unaligned access */ + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +field_alignment(size_t alignment_baseline, size_t field_offset) { + size_t merge = alignment_baseline | (size_t)field_offset; + return merge & -(int)merge; +} + +/* read-thunk for UB-sanitizer */ +MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t +peek_u8(const uint8_t *__restrict ptr) { + return *ptr; +} + +/* write-thunk for UB-sanitizer */ +static inline void poke_u8(uint8_t *__restrict ptr, const uint8_t v) { + *ptr = v; +} + +static inline void *bcopy_2(void *__restrict dst, const void *__restrict src) { + uint8_t *__restrict d = (uint8_t *)dst; + const uint8_t *__restrict s = (uint8_t *)src; + d[0] = s[0]; + d[1] = s[1]; + return d; +} + +static inline void *bcopy_4(void *const __restrict dst, + const void *const __restrict src) { + uint8_t *__restrict d = (uint8_t *)dst; + const uint8_t *__restrict s = (uint8_t *)src; + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + return d; +} + +static inline void *bcopy_8(void *const __restrict dst, + const void *const __restrict src) { + uint8_t *__restrict d = (uint8_t *)dst; + const uint8_t *__restrict s = (uint8_t *)src; + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + d[4] = s[4]; + d[5] = s[5]; + d[6] = s[6]; + d[7] = s[7]; + return d; +} + +MDBX_NOTHROW_PURE_FUNCTION static inline uint16_t +unaligned_peek_u16(const size_t expected_alignment, const void *const ptr) { + assert((uintptr_t)ptr % expected_alignment == 0); + if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(uint16_t)) == 0) + return *(const uint16_t *)ptr; + else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + return *(const __unaligned uint16_t *)ptr; +#else + uint16_t v; + bcopy_2((uint8_t *)&v, (const uint8_t *)ptr); + return v; +#endif /* _MSC_VER || __unaligned */ + } +} + +static inline void unaligned_poke_u16(const size_t expected_alignment, + void *const __restrict ptr, + const uint16_t v) { + assert((uintptr_t)ptr % expected_alignment == 0); + if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(v)) == 0) + *(uint16_t *)ptr = v; + else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + *((uint16_t __unaligned *)ptr) = v; +#else + bcopy_2((uint8_t *)ptr, (const uint8_t *)&v); +#endif /* _MSC_VER || __unaligned */ + } +} + +MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t +unaligned_peek_u32(const size_t expected_alignment, + const void *const __restrict ptr) { + assert((uintptr_t)ptr % expected_alignment == 0); + if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(uint32_t)) == 0) + return *(const uint32_t *)ptr; + else if ((expected_alignment % sizeof(uint16_t)) == 0) { + const uint16_t lo = + ((const uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; + const uint16_t hi = + ((const uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; + return lo | (uint32_t)hi << 16; + } else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + return *(const __unaligned uint32_t *)ptr; +#else + uint32_t v; + bcopy_4((uint8_t *)&v, (const uint8_t *)ptr); + return v; +#endif /* _MSC_VER || __unaligned */ + } +} + +static inline void unaligned_poke_u32(const size_t expected_alignment, + void *const __restrict ptr, + const uint32_t v) { + assert((uintptr_t)ptr % expected_alignment == 0); + if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(v)) == 0) + *(uint32_t *)ptr = v; + else if ((expected_alignment % sizeof(uint16_t)) == 0) { + ((uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint16_t)v; + ((uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = + (uint16_t)(v >> 16); + } else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + *((uint32_t __unaligned *)ptr) = v; +#else + bcopy_4((uint8_t *)ptr, (const uint8_t *)&v); +#endif /* _MSC_VER || __unaligned */ + } +} + +MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t +unaligned_peek_u64(const size_t expected_alignment, + const void *const __restrict ptr) { + assert((uintptr_t)ptr % expected_alignment == 0); + if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0) + return *(const uint64_t *)ptr; + else if ((expected_alignment % sizeof(uint32_t)) == 0) { + const uint32_t lo = + ((const uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; + const uint32_t hi = + ((const uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; + return lo | (uint64_t)hi << 32; + } else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + return *(const __unaligned uint64_t *)ptr; +#else + uint64_t v; + bcopy_8((uint8_t *)&v, (const uint8_t *)ptr); + return v; +#endif /* _MSC_VER || __unaligned */ + } +} + +static inline uint64_t +unaligned_peek_u64_volatile(const size_t expected_alignment, + const volatile void *const __restrict ptr) { + assert((uintptr_t)ptr % expected_alignment == 0); + assert(expected_alignment % sizeof(uint32_t) == 0); + if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0) + return *(const volatile uint64_t *)ptr; + else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + return *(const volatile __unaligned uint64_t *)ptr; +#else + const uint32_t lo = ((const volatile uint32_t *) + ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; + const uint32_t hi = ((const volatile uint32_t *) + ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; + return lo | (uint64_t)hi << 32; +#endif /* _MSC_VER || __unaligned */ + } +} + +static inline void unaligned_poke_u64(const size_t expected_alignment, + void *const __restrict ptr, + const uint64_t v) { + assert((uintptr_t)ptr % expected_alignment == 0); + if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(v)) == 0) + *(uint64_t *)ptr = v; + else if ((expected_alignment % sizeof(uint32_t)) == 0) { + ((uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint32_t)v; + ((uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = + (uint32_t)(v >> 32); + } else { +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_X64) || defined(_M_IA64) + *((uint64_t __unaligned *)ptr) = v; +#else + bcopy_8((uint8_t *)ptr, (const uint8_t *)&v); +#endif /* _MSC_VER || __unaligned */ + } +} + +#define UNALIGNED_PEEK_8(ptr, struct, field) \ + peek_u8(ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_8(ptr, struct, field, value) \ + poke_u8(ptr_disp(ptr, offsetof(struct, field)), value) + +#define UNALIGNED_PEEK_16(ptr, struct, field) \ + unaligned_peek_u16(1, ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_16(ptr, struct, field, value) \ + unaligned_poke_u16(1, ptr_disp(ptr, offsetof(struct, field)), value) + +#define UNALIGNED_PEEK_32(ptr, struct, field) \ + unaligned_peek_u32(1, ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_32(ptr, struct, field, value) \ + unaligned_poke_u32(1, ptr_disp(ptr, offsetof(struct, field)), value) + +#define UNALIGNED_PEEK_64(ptr, struct, field) \ + unaligned_peek_u64(1, ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_64(ptr, struct, field, value) \ + unaligned_poke_u64(1, ptr_disp(ptr, offsetof(struct, field)), value) + +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t +peek_pgno(const void *const __restrict ptr) { + if (sizeof(pgno_t) == sizeof(uint32_t)) + return (pgno_t)unaligned_peek_u32(1, ptr); + else if (sizeof(pgno_t) == sizeof(uint64_t)) + return (pgno_t)unaligned_peek_u64(1, ptr); + else { + pgno_t pgno; + memcpy(&pgno, ptr, sizeof(pgno)); + return pgno; + } +} + +static inline void poke_pgno(void *const __restrict ptr, const pgno_t pgno) { + if (sizeof(pgno) == sizeof(uint32_t)) + unaligned_poke_u32(1, ptr, pgno); + else if (sizeof(pgno) == sizeof(uint64_t)) + unaligned_poke_u64(1, ptr, pgno); + else + memcpy(ptr, &pgno, sizeof(pgno)); +} diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 00000000..317b3dd7 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,35 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned +log2n_powerof2(size_t value_uintptr) { + assert(value_uintptr > 0 && value_uintptr < INT32_MAX && + is_powerof2(value_uintptr)); + assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr); + const uint32_t value_uint32 = (uint32_t)value_uintptr; +#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz) + STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned)); + return __builtin_ctz(value_uint32); +#elif defined(_MSC_VER) + unsigned long index; + STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long)); + _BitScanForward(&index, value_uint32); + return index; +#else + static const uint8_t debruijn_ctz32[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27]; +#endif +} + +MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v) { + /* Pelle Evensen's mixer, https://bit.ly/2HOfynt */ + v ^= (v << 39 | v >> 25) ^ (v << 14 | v >> 50); + v *= UINT64_C(0xA24BAED4963EE407); + v ^= (v << 40 | v >> 24) ^ (v << 15 | v >> 49); + v *= UINT64_C(0x9FB21C651E98DF25); + return v ^ v >> 28; +} diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 00000000..ec65379a --- /dev/null +++ b/src/utils.h @@ -0,0 +1,87 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +/* Test if the flags f are set in a flag word w. */ +#define F_ISSET(w, f) (((w) & (f)) == (f)) + +/* Round n up to an even number. */ +#define EVEN_CEIL(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */ + +/* Round n down to an even number. */ +#define EVEN_FLOOR(n) ((n) & ~(size_t)1) + +/* + * / + * | -1, a < b + * CMP2INT(a,b) = < 0, a == b + * | 1, a > b + * \ + */ +#define CMP2INT(a, b) (((a) != (b)) ? (((a) < (b)) ? -1 : 1) : 0) + +/* Pointer displacement without casting to char* to avoid pointer-aliasing */ +#define ptr_disp(ptr, disp) ((void *)(((intptr_t)(ptr)) + ((intptr_t)(disp)))) + +/* Pointer distance as signed number of bytes */ +#define ptr_dist(more, less) (((intptr_t)(more)) - ((intptr_t)(less))) + +#define MDBX_ASAN_POISON_MEMORY_REGION(addr, size) \ + do { \ + TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ + (size_t)(size), __LINE__); \ + ASAN_POISON_MEMORY_REGION(addr, size); \ + } while (0) + +#define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + do { \ + TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ + (size_t)(size), __LINE__); \ + ASAN_UNPOISON_MEMORY_REGION(addr, size); \ + } while (0) + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +branchless_abs(intptr_t value) { + assert(value > INT_MIN); + const size_t expanded_sign = + (size_t)(value >> (sizeof(value) * CHAR_BIT - 1)); + return ((size_t)value + expanded_sign) ^ expanded_sign; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline bool +is_powerof2(size_t x) { + return (x & (x - 1)) == 0; +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +floor_powerof2(size_t value, size_t granularity) { + assert(is_powerof2(granularity)); + return value & ~(granularity - 1); +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +ceil_powerof2(size_t value, size_t granularity) { + return floor_powerof2(value + granularity - 1, granularity); +} + +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned +log2n_powerof2(size_t value_uintptr); + +MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v); + +struct monotime_cache { + uint64_t value; + int expire_countdown; +}; + +MDBX_MAYBE_UNUSED static inline uint64_t +monotime_since_cached(uint64_t begin_timestamp, struct monotime_cache *cache) { + if (cache->expire_countdown) + cache->expire_countdown -= 1; + else { + cache->value = osal_monotime(); + cache->expire_countdown = 42 / 3; + } + return cache->value - begin_timestamp; +} diff --git a/src/walk.c b/src/walk.c new file mode 100644 index 00000000..431d812f --- /dev/null +++ b/src/walk.c @@ -0,0 +1,314 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +typedef struct walk_ctx { + void *userctx; + walk_options_t options; + int deep; + walk_func *visitor; + MDBX_txn *txn; + MDBX_cursor *cursor; +} walk_ctx_t; + +__cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb); + +static page_type_t walk_page_type(const page_t *mp) { + if (mp) + switch (mp->flags & ~P_SPILLED) { + case P_BRANCH: + return page_branch; + case P_LEAF: + return page_leaf; + case P_LEAF | P_DUPFIX: + return page_dupfix_leaf; + case P_LARGE: + return page_large; + } + return page_broken; +} + +static page_type_t walk_subpage_type(const page_t *sp) { + switch (sp->flags & /* ignore legacy P_DIRTY flag */ ~P_LEGACY_DIRTY) { + case P_LEAF | P_SUBP: + return page_sub_leaf; + case P_LEAF | P_DUPFIX | P_SUBP: + return page_sub_dupfix_leaf; + default: + return page_sub_broken; + } +} + +/* Depth-first tree traversal. */ +__cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, + txnid_t parent_txnid) { + assert(pgno != P_INVALID); + page_t *mp = nullptr; + int err = page_get(ctx->cursor, pgno, &mp, parent_txnid); + + const page_type_t type = walk_page_type(mp); + const size_t nentries = mp ? page_numkeys(mp) : 0; + size_t header_size = + (mp && !is_dupfix_leaf(mp)) ? PAGEHDRSZ + mp->lower : PAGEHDRSZ; + size_t payload_size = 0; + size_t unused_size = + (mp ? page_room(mp) : ctx->txn->env->ps - header_size) - payload_size; + size_t align_bytes = 0; + + for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { + if (type == page_dupfix_leaf) { + /* DUPFIX pages have no entries[] or node headers */ + payload_size += mp->dupfix_ksize; + continue; + } + + const node_t *node = page_node(mp, i); + header_size += NODESIZE; + const size_t node_key_size = node_ks(node); + payload_size += node_key_size; + + if (type == page_branch) { + assert(i > 0 || node_ks(node) == 0); + align_bytes += node_key_size & 1; + continue; + } + + const size_t node_data_size = node_ds(node); + assert(type == page_leaf); + switch (node_flags(node)) { + case 0 /* usual node */: + payload_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; + break; + + case N_BIGDATA /* long data on the large/overflow page */: { + const pgno_t large_pgno = node_largedata_pgno(node); + const size_t over_payload = node_data_size; + const size_t over_header = PAGEHDRSZ; + + assert(err == MDBX_SUCCESS); + pgr_t lp = page_get_large(ctx->cursor, large_pgno, mp->txnid); + const size_t npages = + ((err = lp.err) == MDBX_SUCCESS) ? lp.page->pages : 1; + const size_t pagesize = pgno2bytes(ctx->txn->env, npages); + const size_t over_unused = pagesize - over_payload - over_header; + const int rc = ctx->visitor(large_pgno, npages, ctx->userctx, ctx->deep, + sdb, pagesize, page_large, err, 1, + over_payload, over_header, over_unused); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; + payload_size += sizeof(pgno_t); + align_bytes += node_key_size & 1; + } break; + + case N_SUBDATA /* sub-db */: { + if (unlikely(node_data_size != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid subDb node size", (unsigned)node_data_size); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; + } break; + + case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + if (unlikely(node_data_size != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-tree node size", (unsigned)node_data_size); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; + break; + + case N_DUPDATA /* short sub-page */: { + if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-page node size", (unsigned)node_data_size); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + break; + } + + const page_t *const sp = node_data(node); + const page_type_t subtype = walk_subpage_type(sp); + const size_t nsubkeys = page_numkeys(sp); + if (unlikely(subtype == page_sub_broken)) { + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-page flags", sp->flags); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } + + size_t subheader_size = + is_dupfix_leaf(sp) ? PAGEHDRSZ : PAGEHDRSZ + sp->lower; + size_t subunused_size = page_room(sp); + size_t subpayload_size = 0; + size_t subalign_bytes = 0; + + for (size_t ii = 0; err == MDBX_SUCCESS && ii < nsubkeys; ++ii) { + if (subtype == page_sub_dupfix_leaf) { + /* DUPFIX pages have no entries[] or node headers */ + subpayload_size += sp->dupfix_ksize; + } else { + assert(subtype == page_sub_leaf); + const node_t *subnode = page_node(sp, ii); + const size_t subnode_size = node_ks(subnode) + node_ds(subnode); + subheader_size += NODESIZE; + subpayload_size += subnode_size; + subalign_bytes += subnode_size & 1; + if (unlikely(node_flags(subnode) != 0)) { + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "unexpected sub-node flags", node_flags(subnode)); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } + } + } + + const int rc = + ctx->visitor(pgno, 0, ctx->userctx, ctx->deep + 1, sdb, + node_data_size, subtype, err, nsubkeys, subpayload_size, + subheader_size, subunused_size + subalign_bytes); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; + header_size += subheader_size; + unused_size += subunused_size; + payload_size += subpayload_size; + align_bytes += subalign_bytes + (node_key_size & 1); + } break; + + default: + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid node flags", node_flags(node)); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } + } + + const int rc = ctx->visitor( + pgno, 1, ctx->userctx, ctx->deep, sdb, ctx->txn->env->ps, type, err, + nentries, payload_size, header_size, unused_size + align_bytes); + if (unlikely(rc != MDBX_SUCCESS)) + return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; + + for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { + if (type == page_dupfix_leaf) + continue; + + node_t *node = page_node(mp, i); + if (type == page_branch) { + assert(err == MDBX_SUCCESS); + ctx->deep += 1; + err = walk_pgno(ctx, sdb, node_pgno(node), mp->txnid); + ctx->deep -= 1; + if (unlikely(err != MDBX_SUCCESS)) { + if (err == MDBX_RESULT_TRUE) + break; + return err; + } + continue; + } + + assert(type == page_leaf); + switch (node_flags(node)) { + default: + continue; + + case N_SUBDATA /* sub-db */: + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid sub-tree node size", (unsigned)node_ds(node)); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } else { + tree_t aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); + walk_sdb_t subdb = {{node_key(node), node_ks(node)}, nullptr, nullptr}; + subdb.internal = &aligned_db; + assert(err == MDBX_SUCCESS); + ctx->deep += 1; + err = walk_sdb(ctx, &subdb); + ctx->deep -= 1; + } + break; + + case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); + assert(err == MDBX_CORRUPTED); + err = MDBX_CORRUPTED; + } else { + tree_t aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); + assert(err == MDBX_SUCCESS); + err = cursor_dupsort_setup(ctx->cursor, node, mp); + if (likely(err == MDBX_SUCCESS)) { + assert(ctx->cursor->subcur == + &container_of(ctx->cursor, cursor_couple_t, outer)->inner); + ctx->cursor = &ctx->cursor->subcur->cursor; + ctx->deep += 1; + sdb->nested = &aligned_db; + err = walk_pgno(ctx, sdb, aligned_db.root, mp->txnid); + sdb->nested = nullptr; + ctx->deep -= 1; + subcur_t *inner_xcursor = container_of(ctx->cursor, subcur_t, cursor); + cursor_couple_t *couple = + container_of(inner_xcursor, cursor_couple_t, inner); + ctx->cursor = &couple->outer; + } + } + break; + } + } + + return MDBX_SUCCESS; +} + +__cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb) { + tree_t *const db = sdb->internal; + if (unlikely(db->root == P_INVALID)) + return MDBX_SUCCESS; /* empty db */ + + kvx_t kvx = {.clc = {.k = {.lmin = INT_MAX}, .v = {.lmin = INT_MAX}}}; + cursor_couple_t couple; + int rc = cursor_init4walk(&couple, ctx->txn, db, &kvx); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + const uint8_t cursor_checking = (ctx->options & dont_check_keys_ordering) + ? z_pagecheck | z_ignord + : z_pagecheck; + couple.outer.checking |= cursor_checking; + couple.inner.cursor.checking |= cursor_checking; + couple.outer.next = ctx->cursor; + couple.outer.top_and_flags = z_disable_tree_search_fastpath; + ctx->cursor = &couple.outer; + rc = walk_pgno(ctx, sdb, db->root, + db->mod_txnid ? db->mod_txnid : ctx->txn->txnid); + ctx->cursor = couple.outer.next; + return rc; +} + +__cold int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, + walk_options_t options) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + walk_ctx_t ctx = { + .txn = txn, .userctx = user, .visitor = visitor, .options = options}; + walk_sdb_t sdb = {.name = {.iov_base = MDBX_CHK_GC}, + .internal = &txn->dbs[FREE_DBI]}; + rc = walk_sdb(&ctx, &sdb); + if (!MDBX_IS_ERROR(rc)) { + sdb.name.iov_base = MDBX_CHK_MAIN; + sdb.internal = &txn->dbs[MAIN_DBI]; + rc = walk_sdb(&ctx, &sdb); + } + return rc; +} diff --git a/src/walk.h b/src/walk.h new file mode 100644 index 00000000..7ac5a48e --- /dev/null +++ b/src/walk.h @@ -0,0 +1,23 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +#include "essentials.h" + +typedef struct walk_sdb { + MDBX_val name; + tree_t *internal, *nested; +} walk_sdb_t; + +typedef int walk_func(const size_t pgno, const unsigned number, void *const ctx, + const int deep, const walk_sdb_t *subdb, + const size_t page_size, const page_type_t page_type, + const MDBX_error_t err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes); + +typedef enum walk_options { dont_check_keys_ordering = 1 } walk_options_t; + +MDBX_INTERNAL int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, + walk_options_t options); diff --git a/src/windows-import.c b/src/windows-import.c new file mode 100644 index 00000000..0e702c3e --- /dev/null +++ b/src/windows-import.c @@ -0,0 +1,158 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#if defined(_WIN32) || defined(_WIN64) + +#include "internals.h" + +//------------------------------------------------------------------------------ +// Stub for slim read-write lock +// Portion Copyright (C) 1995-2002 Brad Wilson + +static void WINAPI stub_srwlock_Init(osal_srwlock_t *srwl) { + srwl->readerCount = srwl->writerCount = 0; +} + +static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) { + while (true) { + assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); + + // If there's a writer already, spin without unnecessarily + // interlocking the CPUs + if (srwl->writerCount != 0) { + SwitchToThread(); + continue; + } + + // Add to the readers list + _InterlockedIncrement(&srwl->readerCount); + + // Check for writers again (we may have been preempted). If + // there are no writers writing or waiting, then we're done. + if (srwl->writerCount == 0) + break; + + // Remove from the readers list, spin, try again + _InterlockedDecrement(&srwl->readerCount); + SwitchToThread(); + } +} + +static void WINAPI stub_srwlock_ReleaseShared(osal_srwlock_t *srwl) { + assert(srwl->readerCount > 0); + _InterlockedDecrement(&srwl->readerCount); +} + +static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) { + while (true) { + assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); + + // If there's a writer already, spin without unnecessarily + // interlocking the CPUs + if (srwl->writerCount != 0) { + SwitchToThread(); + continue; + } + + // See if we can become the writer (expensive, because it inter- + // locks the CPUs, so writing should be an infrequent process) + if (_InterlockedExchange(&srwl->writerCount, 1) == 0) + break; + } + + // Now we're the writer, but there may be outstanding readers. + // Spin until there aren't any more; new readers will wait now + // that we're the writer. + while (srwl->readerCount != 0) { + assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); + SwitchToThread(); + } +} + +static void WINAPI stub_srwlock_ReleaseExclusive(osal_srwlock_t *srwl) { + assert(srwl->writerCount == 1 && srwl->readerCount >= 0); + srwl->writerCount = 0; +} + +static uint64_t WINAPI stub_GetTickCount64(void) { + LARGE_INTEGER Counter, Frequency; + return (QueryPerformanceFrequency(&Frequency) && + QueryPerformanceCounter(&Counter)) + ? Counter.QuadPart * 1000ul / Frequency.QuadPart + : 0; +} + +//------------------------------------------------------------------------------ + +struct libmdbx_imports imports; + +#if __GNUC_PREREQ(8, 0) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-function-type" +#endif /* GCC/MINGW */ + +#define MDBX_IMPORT(HANDLE, ENTRY) \ + imports.ENTRY = (MDBX_##ENTRY)GetProcAddress(HANDLE, #ENTRY) + +void windows_import(void) { + const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll"); + if (hNtdll) { + globals.running_under_Wine = !!GetProcAddress(hNtdll, "wine_get_version"); + if (!globals.running_under_Wine) { + MDBX_IMPORT(hNtdll, NtFsControlFile); + MDBX_IMPORT(hNtdll, NtExtendSection); + ENSURE(nullptr, imports.NtExtendSection); + } + } + + const HINSTANCE hKernel32dll = GetModuleHandleA("kernel32.dll"); + if (hKernel32dll) { + MDBX_IMPORT(hKernel32dll, GetFileInformationByHandleEx); + MDBX_IMPORT(hKernel32dll, GetTickCount64); + if (!imports.GetTickCount64) + imports.GetTickCount64 = stub_GetTickCount64; + if (!globals.running_under_Wine) { + MDBX_IMPORT(hKernel32dll, SetFileInformationByHandle); + MDBX_IMPORT(hKernel32dll, GetVolumeInformationByHandleW); + MDBX_IMPORT(hKernel32dll, GetFinalPathNameByHandleW); + MDBX_IMPORT(hKernel32dll, PrefetchVirtualMemory); + MDBX_IMPORT(hKernel32dll, SetFileIoOverlappedRange); + } + } + + const osal_srwlock_t_function srwlock_init = + (osal_srwlock_t_function)(hKernel32dll + ? GetProcAddress(hKernel32dll, + "InitializeSRWLock") + : nullptr); + if (srwlock_init) { + imports.srwl_Init = srwlock_init; + imports.srwl_AcquireShared = (osal_srwlock_t_function)GetProcAddress( + hKernel32dll, "AcquireSRWLockShared"); + imports.srwl_ReleaseShared = (osal_srwlock_t_function)GetProcAddress( + hKernel32dll, "ReleaseSRWLockShared"); + imports.srwl_AcquireExclusive = (osal_srwlock_t_function)GetProcAddress( + hKernel32dll, "AcquireSRWLockExclusive"); + imports.srwl_ReleaseExclusive = (osal_srwlock_t_function)GetProcAddress( + hKernel32dll, "ReleaseSRWLockExclusive"); + } else { + imports.srwl_Init = stub_srwlock_Init; + imports.srwl_AcquireShared = stub_srwlock_AcquireShared; + imports.srwl_ReleaseShared = stub_srwlock_ReleaseShared; + imports.srwl_AcquireExclusive = stub_srwlock_AcquireExclusive; + imports.srwl_ReleaseExclusive = stub_srwlock_ReleaseExclusive; + } + + const HINSTANCE hAdvapi32dll = GetModuleHandleA("advapi32.dll"); + if (hAdvapi32dll) { + MDBX_IMPORT(hAdvapi32dll, RegGetValueA); + } +} + +#undef MDBX_IMPORT + +#if __GNUC_PREREQ(8, 0) +#pragma GCC diagnostic pop +#endif /* GCC/MINGW */ + +#endif /* Windows */ diff --git a/src/windows-import.h b/src/windows-import.h new file mode 100644 index 00000000..7a6c8789 --- /dev/null +++ b/src/windows-import.h @@ -0,0 +1,136 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#pragma once + +typedef union osal_srwlock { + __anonymous_struct_extension__ struct { + long volatile readerCount; + long volatile writerCount; + }; + RTL_SRWLOCK native; +} osal_srwlock_t; + +typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *); + +#if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */ +typedef enum _FILE_INFO_BY_HANDLE_CLASS { + FileBasicInfo, + FileStandardInfo, + FileNameInfo, + FileRenameInfo, + FileDispositionInfo, + FileAllocationInfo, + FileEndOfFileInfo, + FileStreamInfo, + FileCompressionInfo, + FileAttributeTagInfo, + FileIdBothDirectoryInfo, + FileIdBothDirectoryRestartInfo, + FileIoPriorityHintInfo, + FileRemoteProtocolInfo, + MaximumFileInfoByHandleClass +} FILE_INFO_BY_HANDLE_CLASS, + *PFILE_INFO_BY_HANDLE_CLASS; + +typedef struct _FILE_END_OF_FILE_INFO { + LARGE_INTEGER EndOfFile; +} FILE_END_OF_FILE_INFO, *PFILE_END_OF_FILE_INFO; + +#define REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK 0x00000001 +#define REMOTE_PROTOCOL_INFO_FLAG_OFFLINE 0x00000002 + +typedef struct _FILE_REMOTE_PROTOCOL_INFO { + USHORT StructureVersion; + USHORT StructureSize; + DWORD Protocol; + USHORT ProtocolMajorVersion; + USHORT ProtocolMinorVersion; + USHORT ProtocolRevision; + USHORT Reserved; + DWORD Flags; + struct { + DWORD Reserved[8]; + } GenericReserved; + struct { + DWORD Reserved[16]; + } ProtocolSpecificReserved; +} FILE_REMOTE_PROTOCOL_INFO, *PFILE_REMOTE_PROTOCOL_INFO; + +#endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */ + +typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)( + _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, + _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); + +typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( + _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, + _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber, + _Out_opt_ LPDWORD lpMaximumComponentLength, + _Out_opt_ LPDWORD lpFileSystemFlags, + _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); + +typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, + _Out_ LPWSTR lpszFilePath, + _In_ DWORD cchFilePath, + _In_ DWORD dwFlags); + +typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)( + _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, + _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); + +typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( + IN HANDLE FileHandle, IN OUT HANDLE Event, + IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext, + OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, + IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, + OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); + +typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void); + +#if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8 +typedef struct _WIN32_MEMORY_RANGE_ENTRY { + PVOID VirtualAddress; + SIZE_T NumberOfBytes; +} WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY; +#endif /* Windows 8.x */ + +typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)( + HANDLE hProcess, ULONG_PTR NumberOfEntries, + PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); + +typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT; + +typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, + IN PLARGE_INTEGER NewSectionSize); + +typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, + LPCSTR lpValue, DWORD dwFlags, + LPDWORD pdwType, PVOID pvData, + LPDWORD pcbData); + +NTSYSAPI ULONG RtlRandomEx(PULONG Seed); + +typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, + PUCHAR OverlappedRangeStart, + ULONG Length); + +struct libmdbx_imports { + osal_srwlock_t_function srwl_Init; + osal_srwlock_t_function srwl_AcquireShared; + osal_srwlock_t_function srwl_ReleaseShared; + osal_srwlock_t_function srwl_AcquireExclusive; + osal_srwlock_t_function srwl_ReleaseExclusive; + MDBX_NtExtendSection NtExtendSection; + MDBX_GetFileInformationByHandleEx GetFileInformationByHandleEx; + MDBX_GetVolumeInformationByHandleW GetVolumeInformationByHandleW; + MDBX_GetFinalPathNameByHandleW GetFinalPathNameByHandleW; + MDBX_SetFileInformationByHandle SetFileInformationByHandle; + MDBX_NtFsControlFile NtFsControlFile; + MDBX_PrefetchVirtualMemory PrefetchVirtualMemory; + MDBX_GetTickCount64 GetTickCount64; + MDBX_RegGetValueA RegGetValueA; + MDBX_SetFileIoOverlappedRange SetFileIoOverlappedRange; +}; + +MDBX_INTERNAL void windows_import(void); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 61531a57..fa7ada56 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,6 @@ +## Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev +## SPDX-License-Identifier: Apache-2.0 + enable_language(CXX) include(../cmake/compiler.cmake) @@ -37,6 +40,7 @@ if(NOT MDBX_BUILD_CXX) endif() add_executable(mdbx_test ${LIBMDBX_TEST_SOURCES}) +target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_TEST=1 MDBX_BUILD_CXX=1) if(MDBX_CXX_STANDARD) set_target_properties(mdbx_test PROPERTIES @@ -47,6 +51,13 @@ set_target_properties(mdbx_test PROPERTIES INTERPROCEDURAL_OPTIMIZATION $) target_setup_options(mdbx_test) +if(NOT MDBX_BUILD_CXX) + target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_CXX=1) + if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + target_compile_definitions(mdbx_test PRIVATE MDBX_WITHOUT_MSVC_CRT=0) + endif() +endif() + if(NOT MDBX_BUILD_CXX AND LIBCXX_FILESYSTEM) if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 AND NOT CMAKE_VERSION VERSION_LESS 3.13) @@ -73,9 +84,9 @@ if(UNIX AND NOT SUBPROJECT) target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) - add_executable(test_extra_dupfixed_addodd extra/dupfixed_addodd.c) - target_include_directories(test_extra_dupfixed_addodd PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_dupfixed_addodd ${TOOL_MDBX_LIB}) + add_executable(test_extra_dupfix_addodd extra/dupfix_addodd.c) + target_include_directories(test_extra_dupfix_addodd PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dupfix_addodd ${TOOL_MDBX_LIB}) if(MDBX_BUILD_CXX) add_executable(test_extra_maindb_ordinal extra/maindb_ordinal.c++) @@ -85,11 +96,11 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_maindb_ordinal PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() - add_executable(test_extra_dupfixed_multiple extra/dupfixed_multiple.c++) - target_include_directories(test_extra_dupfixed_multiple PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_dupfixed_multiple ${TOOL_MDBX_LIB}) + add_executable(test_extra_dupfix_multiple extra/dupfix_multiple.c++) + target_include_directories(test_extra_dupfix_multiple PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dupfix_multiple ${TOOL_MDBX_LIB}) if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_dupfixed_multiple PROPERTIES + set_target_properties(test_extra_dupfix_multiple PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() add_executable(test_extra_hex_base64_base58 extra/hex_base64_base58.c++) @@ -187,10 +198,10 @@ else() if(UNIX AND NOT SUBPROJECT) add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) - add_test(NAME extra_dupfixed_addodd COMMAND test_extra_dupfixed_addodd) + add_test(NAME extra_dupfix_addodd COMMAND test_extra_dupfix_addodd) if(MDBX_BUILD_CXX) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) - add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) + add_test(NAME extra_dupfix_multiple COMMAND test_extra_dupfix_multiple) add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) add_test(NAME extra_doubtless_positioning COMMAND test_extra_doubtless_positioning) if (ENABLE_MEMCHECK) diff --git a/test/append.c++ b/test/append.c++ index d5f9ff40..064dbf3d 100644 --- a/test/append.c++ +++ b/test/append.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/base.h++ b/test/base.h++ index 5cc3beb2..a2d2cfbd 100644 --- a/test/base.h++ +++ b/test/base.h++ @@ -1,42 +1,11 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once -#ifndef NOMINMAX -#define NOMINMAX -#endif +#include "../src/essentials.h" -/* Workaround for modern libstdc++ with CLANG < 4.x */ -#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && \ - defined(__clang__) && __clang_major__ < 4 -#define __GLIBCXX_BITSIZE_INT_N_0 128 -#define __GLIBCXX_TYPE_INT_N_0 __int128 -#endif /* Workaround for modern libstdc++ with CLANG < 4.x */ - -#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) -#ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 /* Windows 7 */ -#endif #ifdef _MSC_VER -/* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */ -#if defined(__SANITIZE_ADDRESS__) && !defined(_DISABLE_VECTOR_ANNOTATION) -#define _DISABLE_VECTOR_ANNOTATION -#endif /* _DISABLE_VECTOR_ANNOTATION */ -#ifndef _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_WARNINGS -#endif /* _CRT_SECURE_NO_WARNINGS */ #pragma warning(push, 1) #pragma warning(disable : 4548) /* expression before comma has no effect; \ expected expression with side - effect */ @@ -47,6 +16,7 @@ is not guaranteed. Specify /EHsc */ #endif /* _MSC_VER (warnings) */ +#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) /* If you wish to build your application for a previous Windows platform, * include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you * wish to support before including SDKDDKVer.h. @@ -55,10 +25,6 @@ #include #endif /* WINDOWS */ -#ifdef __APPLE__ -#define _DARWIN_C_SOURCE -#endif - #include #include #include @@ -96,18 +62,11 @@ #include #include -#define MDBX_INTERNAL_FUNC -#define MDBX_INTERNAL_VAR_PROTO extern -#define MDBX_INTERNAL_VAR_INSTA +#define MDBX_INTERNAL #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ #include "../mdbx.h++" -#include "../src/base.h" #include "../src/osal.h" -#if !defined(__thread) && (defined(_MSC_VER) || defined(__DMC__)) -#define __thread __declspec(thread) -#endif /* __thread */ - #include "../src/options.h" #ifdef _MSC_VER diff --git a/test/cases.c++ b/test/cases.c++ index a7a51897..3d380f61 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/chrono.c++ b/test/chrono.c++ index 2099b105..f9f20862 100644 --- a/test/chrono.c++ +++ b/test/chrono.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/chrono.h++ b/test/chrono.h++ index c4c7cd6e..5908509f 100644 --- a/test/chrono.h++ +++ b/test/chrono.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/test/config.c++ b/test/config.c++ index d0e14e86..a06b99d2 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/config.h++ b/test/config.h++ index 336c3447..4da0ed3a 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/test/copy.c++ b/test/copy.c++ index 93ae77c8..7ab96c24 100644 --- a/test/copy.c++ +++ b/test/copy.c++ @@ -1,3 +1,6 @@ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 + #include "test.h++" class testcase_copy : public testcase { diff --git a/test/dead.c++ b/test/dead.c++ index 6d372d7b..d4bbbc19 100644 --- a/test/dead.c++ +++ b/test/dead.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/extra/doubtless_positioning.c++ b/test/extra/doubtless_positioning.c++ index aff30792..e1f070b1 100644 --- a/test/extra/doubtless_positioning.c++ +++ b/test/extra/doubtless_positioning.c++ @@ -1,3 +1,6 @@ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 + #include "mdbx.h++" #include #include diff --git a/test/extra/dupfixed_addodd.c b/test/extra/dupfix_addodd.c similarity index 100% rename from test/extra/dupfixed_addodd.c rename to test/extra/dupfix_addodd.c diff --git a/test/extra/dupfixed_multiple.c++ b/test/extra/dupfix_multiple.c++ similarity index 98% rename from test/extra/dupfixed_multiple.c++ rename to test/extra/dupfix_multiple.c++ index f45b8892..5a70be88 100644 --- a/test/extra/dupfixed_multiple.c++ +++ b/test/extra/dupfix_multiple.c++ @@ -1,3 +1,6 @@ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 + #include "mdbx.h++" #include #include diff --git a/test/extra/hex_base64_base58.c++ b/test/extra/hex_base64_base58.c++ index 879e8f7b..652c9e33 100644 --- a/test/extra/hex_base64_base58.c++ +++ b/test/extra/hex_base64_base58.c++ @@ -1,3 +1,6 @@ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 + #include "mdbx.h++" #include #include diff --git a/test/extra/maindb_ordinal.c++ b/test/extra/maindb_ordinal.c++ index 14742f14..b38c04ef 100644 --- a/test/extra/maindb_ordinal.c++ +++ b/test/extra/maindb_ordinal.c++ @@ -1,3 +1,6 @@ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 + #include "mdbx.h++" #include #include diff --git a/test/fork.c++ b/test/fork.c++ index 81af98b4..29c95ff8 100644 --- a/test/fork.c++ +++ b/test/fork.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2023 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/hill.c++ b/test/hill.c++ index 42e82a86..6689a0db 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/jitter.c++ b/test/jitter.c++ index 473fc21a..b868c9c5 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/keygen.c++ b/test/keygen.c++ index 6420f86b..7e0df590 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/keygen.h++ b/test/keygen.h++ index 4f1a5764..80a87d0e 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/test/log.c++ b/test/log.c++ index 1c1c51fa..0a3d4355 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/log.h++ b/test/log.h++ index ca9e04ce..838e8de1 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once @@ -101,6 +90,7 @@ void log_trouble(const char *where, const char *what, int errnum); void log_flush(void); bool log_enabled(const logging::loglevel priority); +#undef TRACE #ifdef _DEBUG #define TRACE(...) log_trace(__VA_ARGS__) #else diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index c74623d5..d3e30763 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +# Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 + LIST=basic FROM=1 UPTO=9999999 diff --git a/test/main.c++ b/test/main.c++ index e4081b7b..7c07f2fe 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" diff --git a/test/nested.c++ b/test/nested.c++ index 9b954329..a90c4d37 100644 --- a/test/nested.c++ +++ b/test/nested.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" #include diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index d5a5a5bc..91cf7da5 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" @@ -51,11 +40,11 @@ #if __cplusplus >= 201103L #include -MDBX_MAYBE_UNUSED static __inline int atomic_decrement(std::atomic_int *p) { +MDBX_MAYBE_UNUSED static inline int atomic_decrement(std::atomic_int *p) { return std::atomic_fetch_sub(p, 1) - 1; } #else -MDBX_MAYBE_UNUSED static __inline int atomic_decrement(volatile int *p) { +MDBX_MAYBE_UNUSED static inline int atomic_decrement(volatile int *p) { #if defined(__GNUC__) || defined(__clang__) return __sync_sub_and_fetch(p, 1); #elif defined(_MSC_VER) @@ -351,8 +340,6 @@ static void handler_SIGCHLD(int signum) { ++sigalarm_head; } -mdbx_pid_t osal_getpid(void) { return getpid(); } - int osal_delay(unsigned seconds) { return sleep(seconds) ? errno : 0; } int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { diff --git a/test/osal-windows.c++ b/test/osal-windows.c++ index 5df510c6..54a4ed15 100644 --- a/test/osal-windows.c++ +++ b/test/osal-windows.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" @@ -112,8 +101,6 @@ int osal_waitfor(unsigned id) { return waitstatus2errcode(rc); } -mdbx_pid_t osal_getpid(void) { return GetCurrentProcessId(); } - int osal_delay(unsigned seconds) { Sleep(seconds * 1000u); return 0; @@ -300,7 +287,7 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { failure_perror("QueryFullProcessImageName()", GetLastError()); if (exename[1] != ':') { - exename_size = GetModuleFileName(NULL, exename, sizeof(exename)); + exename_size = GetModuleFileNameA(NULL, exename, sizeof(exename)); if (exename_size >= sizeof(exename)) return ERROR_BAD_LENGTH; } diff --git a/test/osal.h++ b/test/osal.h++ index dd7a5a21..7d11dbf3 100644 --- a/test/osal.h++ +++ b/test/osal.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once @@ -28,7 +17,6 @@ void osal_wait4barrier(void); bool osal_progress_push(bool active); -mdbx_pid_t osal_getpid(void); int osal_delay(unsigned seconds); void osal_udelay(size_t us); void osal_yield(void); diff --git a/test/stochastic_small.sh b/test/stochastic_small.sh index 136fc7a7..ffdd212e 100755 --- a/test/stochastic_small.sh +++ b/test/stochastic_small.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +# Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 + LIST=--hill FROM=1 UPTO=9999999 diff --git a/test/test.c++ b/test/test.c++ index 7386e50c..21900731 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" @@ -802,15 +791,7 @@ void testcase::speculum_check_cursor(const char *where, const char *stage, const MDBX_cursor_op op) const { MDBX_val cursor_key = {0, 0}; MDBX_val cursor_data = {0, 0}; - int err; - if (it != speculum.end() && std::next(it) == speculum.end() && - op == MDBX_PREV && (config.params.table_flags & MDBX_DUPSORT)) { - /* Workaround for MDBX/LMDB flaw */ - err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, MDBX_LAST); - if (err == MDBX_SUCCESS) - err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, MDBX_LAST_DUP); - } else - err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, op); + int err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, op); return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data); } @@ -1265,14 +1246,14 @@ bool testcase::check_batch_get() { bool rc = true; MDBX_val pairs[42]; size_t count = 0xDeadBeef; - MDBX_cursor_op batch_op; batch_err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, - ARRAY_LENGTH(pairs), batch_op = MDBX_FIRST); + ARRAY_LENGTH(pairs), MDBX_FIRST); size_t i, n = 0; while (batch_err == MDBX_SUCCESS || batch_err == MDBX_RESULT_TRUE) { for (i = 0; i < count; i += 2) { mdbx::slice k, v; - check_err = mdbx_cursor_get(check_cursor, &k, &v, MDBX_NEXT); + check_err = + mdbx_cursor_get(check_cursor, &k, &v, n ? MDBX_NEXT : MDBX_FIRST); if (check_err != MDBX_SUCCESS) failure_perror("batch-verify: mdbx_cursor_get(MDBX_NEXT)", check_err); if (k != pairs[i] || v != pairs[i + 1]) { @@ -1286,14 +1267,13 @@ bool testcase::check_batch_get() { sizeof(dump_value_batch))); rc = false; } + ++n; } - n += i / 2; - batch_op = (batch_err == MDBX_RESULT_TRUE) ? MDBX_GET_CURRENT : MDBX_NEXT; batch_err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, - ARRAY_LENGTH(pairs), batch_op); + ARRAY_LENGTH(pairs), MDBX_NEXT); } if (batch_err != MDBX_NOTFOUND) { - log_error("mdbx_cursor_get_batch(), op %u, err %d", batch_op, batch_err); + log_error("mdbx_cursor_get_batch(), err %d", batch_err); rc = false; } diff --git a/test/test.h++ b/test/test.h++ index c854c8e6..583e539a 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/test/try.c++ b/test/try.c++ index 50c959c9..708122ac 100644 --- a/test/try.c++ +++ b/test/try.c++ @@ -1,3 +1,6 @@ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 + #include "test.h++" class testcase_try : public testcase { diff --git a/test/ttl.c++ b/test/ttl.c++ index 064021d5..d22be384 100644 --- a/test/ttl.c++ +++ b/test/ttl.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" #include diff --git a/test/utils.c++ b/test/utils.c++ index c5a33d54..f96f4ffd 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #include "test.h++" #include diff --git a/test/utils.h++ b/test/utils.h++ index 5698f5fc..0dd7c4d8 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -1,16 +1,5 @@ -/* - * Copyright 2017-2024 Leonid Yuriev - * and other libmdbx authors: please see AUTHORS file. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// \copyright SPDX-License-Identifier: Apache-2.0 #pragma once #include "base.h++" @@ -41,7 +30,7 @@ #if _MSC_FULL_VER < 190024215 #pragma message( \ - "It is recommended to use Visual Studio 2015 (MSC 19.0) or newer.") + "It is recommended to use Visual Studio 2015 (MSC 19.0) or newer.") #endif #define bswap64(v) _byteswap_uint64(v) @@ -70,7 +59,7 @@ #ifdef __bswap_64 #define bswap64(v) __bswap_64(v) #else -static __inline uint64_t bswap64(uint64_t v) { +static inline uint64_t bswap64(uint64_t v) { return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | ((v << 24) & UINT64_C(0x0000ff0000000000)) | ((v << 8) & UINT64_C(0x000000ff00000000)) | @@ -85,7 +74,7 @@ static __inline uint64_t bswap64(uint64_t v) { #ifdef __bswap_32 #define bswap32(v) __bswap_32(v) #else -static __inline uint32_t bswap32(uint32_t v) { +static inline uint32_t bswap32(uint32_t v) { return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | ((v >> 8) & UINT32_C(0x0000ff00)); } @@ -96,7 +85,7 @@ static __inline uint32_t bswap32(uint32_t v) { #ifdef __bswap_16 #define bswap16(v) __bswap_16(v) #else -static __inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } +static inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } #endif #endif /* bswap16 */ @@ -147,7 +136,7 @@ static __inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } namespace unaligned { -template static __inline T load(const void *ptr) { +template static inline T load(const void *ptr) { if (MDBX_UNALIGNED_OK >= sizeof(T)) return *(const T *)ptr; else { @@ -162,7 +151,7 @@ template static __inline T load(const void *ptr) { } } -template static __inline void store(void *ptr, const T &value) { +template static inline void store(void *ptr, const T &value) { if (MDBX_UNALIGNED_OK >= sizeof(T)) *(T *)ptr = value; else { @@ -180,22 +169,22 @@ template static __inline void store(void *ptr, const T &value) { //----------------------------------------------------------------------------- #ifndef rot64 -static __inline uint64_t rot64(uint64_t v, unsigned s) { +static inline uint64_t rot64(uint64_t v, unsigned s) { return (v >> s) | (v << (64 - s)); } #endif /* rot64 */ -static __inline bool is_power2(size_t x) { return (x & (x - 1)) == 0; } +static inline bool is_power2(size_t x) { return (x & (x - 1)) == 0; } #undef roundup2 -static __inline size_t roundup2(size_t value, size_t granularity) { +static inline size_t roundup2(size_t value, size_t granularity) { assert(is_power2(granularity)); return (value + granularity - 1) & ~(granularity - 1); } //----------------------------------------------------------------------------- -static __inline void memory_barrier(void) { +static inline void memory_barrier(void) { #if __has_extension(c_atomic) || __has_extension(cxx_atomic) __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); #elif defined(__ATOMIC_SEQ_CST) @@ -225,7 +214,7 @@ static __inline void memory_barrier(void) { #endif } -static __inline void cpu_relax() { +static inline void cpu_relax() { #if defined(__ia32__) _mm_pause(); #elif defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) || \ diff --git a/test/valgrind_suppress.txt b/test/valgrind_suppress.txt index c01054ac..92ab677a 100644 --- a/test/valgrind_suppress.txt +++ b/test/valgrind_suppress.txt @@ -3,7 +3,7 @@ Memcheck:Param msync(start) ... - fun:sync_locked* + fun:dxb_sync_locked* } { msync-whole-mmap-2 @@ -24,7 +24,7 @@ Memcheck:Param msync(start) ... - fun:wipe_steady* + fun:meta_wipe_steady* } { msync-meta From 12eb2df57d1868987355aabf77bdf7544f47cdc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 May 2024 22:30:47 +0300 Subject: [PATCH 177/443] =?UTF-8?q?mdbx:=20=D1=83=D0=B4=D0=B0=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20`DEFAULT=5FMAPSIZE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-env.c | 72 ++++++++++++++++++++++++++----------------------- src/dxb.c | 2 +- src/internals.h | 5 ---- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/api-env.c b/src/api-env.c index 9115370e..1e8988b6 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -3,35 +3,35 @@ #include "internals.h" -__cold static intptr_t reasonable_db_maxsize(intptr_t *cached_result) { - if (*cached_result == 0) { +__cold static intptr_t reasonable_db_maxsize(void) { + static intptr_t cached_result; + if (cached_result == 0) { intptr_t pagesize, total_ram_pages; if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != MDBX_SUCCESS)) - return *cached_result = MAX_MAPSIZE32 /* the 32-bit limit is good enough - for fallback */ - ; + /* the 32-bit limit is good enough for fallback */ + return cached_result = MAX_MAPSIZE32; if (unlikely((size_t)total_ram_pages * 2 > MAX_MAPSIZE / (size_t)pagesize)) - return *cached_result = MAX_MAPSIZE; + return cached_result = MAX_MAPSIZE; assert(MAX_MAPSIZE >= (size_t)(total_ram_pages * pagesize * 2)); /* Suggesting should not be more than golden ratio of the size of RAM. */ - *cached_result = (intptr_t)((size_t)total_ram_pages * 207 >> 7) * pagesize; + cached_result = (intptr_t)((size_t)total_ram_pages * 207 >> 7) * pagesize; /* Round to the nearest human-readable granulation. */ for (size_t unit = MEGABYTE; unit; unit <<= 5) { - const size_t floor = floor_powerof2(*cached_result, unit); - const size_t ceil = ceil_powerof2(*cached_result, unit); - const size_t threshold = (size_t)*cached_result >> 4; + const size_t floor = floor_powerof2(cached_result, unit); + const size_t ceil = ceil_powerof2(cached_result, unit); + const size_t threshold = (size_t)cached_result >> 4; const bool down = - *cached_result - floor < ceil - *cached_result || ceil > MAX_MAPSIZE; - if (threshold < (down ? *cached_result - floor : ceil - *cached_result)) + cached_result - floor < ceil - cached_result || ceil > MAX_MAPSIZE; + if (threshold < (down ? cached_result - floor : ceil - cached_result)) break; - *cached_result = down ? floor : ceil; + cached_result = down ? floor : ceil; } } - return *cached_result; + return cached_result; } __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { @@ -1023,7 +1023,6 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, } #endif /* MDBX_DEBUG */ - intptr_t reasonable_maxsize_cache = 0; if (env->dxb_mmap.base) { /* env already mapped */ if (unlikely(env->flags & MDBX_RDONLY)) @@ -1089,16 +1088,17 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, pagesize = MDBX_MIN_PAGESIZE; /* choose pagesize */ - intptr_t max_size = (size_now > size_lower) ? size_now : size_lower; - max_size = (size_upper > max_size) ? size_upper : max_size; - if (max_size < 0 /* default */) - max_size = DEFAULT_MAPSIZE; - else if (max_size == 0 /* minimal */) - max_size = MIN_MAPSIZE; - else if (max_size >= (intptr_t)MAX_MAPSIZE /* maximal */) - max_size = reasonable_db_maxsize(&reasonable_maxsize_cache); + intptr_t top = (size_now > size_lower) ? size_now : size_lower; + if (size_upper > top) + top = size_upper; + if (top < 0 /* default */) + top = reasonable_db_maxsize(); + else if (top == 0 /* minimal */) + top = MIN_MAPSIZE; + else if (top >= (intptr_t)MAX_MAPSIZE /* maximal */) + top = MAX_MAPSIZE; - while (max_size > pagesize * (int64_t)(MAX_PAGENO + 1) && + while (top > pagesize * (int64_t)(MAX_PAGENO + 1) && pagesize < MDBX_MAX_PAGESIZE) pagesize <<= 1; } @@ -1116,7 +1116,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, size_lower = MIN_PAGENO * pagesize; } if (size_lower >= INTPTR_MAX) { - size_lower = reasonable_db_maxsize(&reasonable_maxsize_cache); + size_lower = reasonable_db_maxsize(); if ((size_t)size_lower / pagesize > MAX_PAGENO + 1) size_lower = pagesize * (MAX_PAGENO + 1); } @@ -1127,27 +1127,31 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, size_now = size_upper; } if (size_now >= INTPTR_MAX) { - size_now = reasonable_db_maxsize(&reasonable_maxsize_cache); + size_now = reasonable_db_maxsize(); if ((size_t)size_now / pagesize > MAX_PAGENO + 1) size_now = pagesize * (MAX_PAGENO + 1); } if (size_upper <= 0) { - if (size_now >= reasonable_db_maxsize(&reasonable_maxsize_cache) / 2) - size_upper = reasonable_db_maxsize(&reasonable_maxsize_cache); - else if (MAX_MAPSIZE != MAX_MAPSIZE32 && - (size_t)size_now >= MAX_MAPSIZE32 / 2 && + if (growth_step == 0 || size_upper == 0) + size_upper = size_now; + else if (size_now >= reasonable_db_maxsize() / 2) + size_upper = reasonable_db_maxsize(); + else if ((size_t)size_now >= MAX_MAPSIZE32 / 2 && (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) size_upper = MAX_MAPSIZE32; else { - size_upper = size_now + size_now; - if ((size_t)size_upper < DEFAULT_MAPSIZE * 2) - size_upper = DEFAULT_MAPSIZE * 2; + size_upper = ceil_powerof2(((size_t)size_now < MAX_MAPSIZE / 4) + ? size_now + size_now + : size_now + size_now / 2, + MEGABYTE * MDBX_WORDBITS * MDBX_WORDBITS / 32); + if ((size_t)size_upper > MAX_MAPSIZE) + size_upper = MAX_MAPSIZE; } if ((size_t)size_upper / pagesize > (MAX_PAGENO + 1)) size_upper = pagesize * (MAX_PAGENO + 1); } else if (size_upper >= INTPTR_MAX) { - size_upper = reasonable_db_maxsize(&reasonable_maxsize_cache); + size_upper = reasonable_db_maxsize(); if ((size_t)size_upper / pagesize > MAX_PAGENO + 1) size_upper = pagesize * (MAX_PAGENO + 1); } diff --git a/src/dxb.c b/src/dxb.c index cacf5975..9da9c009 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -589,7 +589,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if (!env->geo_in_bytes.now) { /* set defaults if not configured */ - err = mdbx_env_set_geometry(env, 0, -1, DEFAULT_MAPSIZE, -1, -1, -1); + err = mdbx_env_set_geometry(env, 0, -1, -1, -1, -1, -1); if (unlikely(err != MDBX_SUCCESS)) return err; } diff --git a/src/internals.h b/src/internals.h index 45d83725..d3c36bc9 100644 --- a/src/internals.h +++ b/src/internals.h @@ -494,11 +494,6 @@ struct MDBX_env { /* pseudo-error code, not exposed outside libmdbx */ #define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 33) -/* Default size of memory map. - * This is certainly too small for any actual applications. Apps should - * always set the size explicitly using mdbx_env_set_geometry(). */ -#define DEFAULT_MAPSIZE MEGABYTE - /* Number of slots in the reader table. * This value was chosen somewhat arbitrarily. The 61 is a prime number, * and such readers plus a couple mutexes fit into single 4KB page. From 2f66eb9fec486b44bb4d568d0098fad6d4fe8812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 15 May 2024 00:09:58 +0300 Subject: [PATCH 178/443] =?UTF-8?q?mdbx-tools:=20=D0=BF=D1=80=D0=B5=D0=BE?= =?UTF-8?q?=D0=B1=D1=80=D0=B0=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=B8=D0=B7=20hex=20=D0=B1=D0=B5=D0=B7=20=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D1=85=D0=BE=D0=B4=D0=BE=D0=B2=20=D0=B2=20`mdbx=5Fload`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/load.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/tools/load.c b/src/tools/load.c index ade698f3..77a81864 100644 --- a/src/tools/load.c +++ b/src/tools/load.c @@ -369,16 +369,15 @@ static int badend(void) { } static inline int unhex(unsigned char *c2) { - int x, c; - x = *c2++ & 0x4f; - if (x & 0x40) - x -= 55; - c = x << 4; - x = *c2 & 0x4f; - if (x & 0x40) - x -= 55; - c |= x; - return c; + int8_t hi = c2[0]; + hi = (hi | 0x20) - 'a'; + hi += 10 + ((hi >> 7) & 39); + + int8_t lo = c2[1]; + lo = (lo | 0x20) - 'a'; + lo += 10 + ((lo >> 7) & 39); + + return hi << 4 | lo; } __hot static int readline(MDBX_val *out, MDBX_val *buf) { From 0e831f42cc5a4e3f769e669f327a42e5a7f5b758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 May 2024 11:14:23 +0300 Subject: [PATCH 179/443] =?UTF-8?q?mdbx-testing:=20=D0=B8=D0=B7=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=BD=D1=8F=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=BE-=D1=83=D0=BC=D0=BE=D0=BB?= =?UTF-8?q?=D1=87=D0=B0=D0=BD=D0=B8=D1=8E.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/main.c++ | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/main.c++ b/test/main.c++ index 7c07f2fe..6b482807 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -115,10 +115,14 @@ MDBX_NORETURN void usage(void) { void actor_params::set_defaults(const std::string &tmpdir) { pathname_log = ""; loglevel = -#if defined(NDEBUG) || defined(_WIN32) || defined(_WIN64) +#if MDBX_DEBUG < 1 + logging::verbose; +#elif MDBX_DEBUG > 1 + logging::trace; +#elif defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) logging::verbose; #else - logging::trace; + logging::debug; #endif pathname_db = tmpdir + "mdbx-test.db"; From 5792eb31eb868498fe817358ba91895c96a2cfc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 May 2024 14:36:50 +0300 Subject: [PATCH 180/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B9?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20subpage:=20limit,=20room=5Fthreshold,?= =?UTF-8?q?=20reserve=5Fprereq,=20reserve=5Flimit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 44 +++++++++++++++++++++++- src/env-opts.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ src/env.c | 10 +----- src/internals.h | 7 ++++ src/page-ops.c | 31 ++++++++++++++--- src/page-ops.h | 2 +- src/proto.h | 1 + 7 files changed, 169 insertions(+), 15 deletions(-) diff --git a/mdbx.h b/mdbx.h index 37cfd10f..7a7b25e0 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2344,7 +2344,49 @@ typedef enum MDBX_option { * будет УМЕНЬШАТЬ неравномерность заполнения страниц. * * \see MDBX_opt_merge_threshold_16dot16_percent */ - MDBX_opt_prefer_waf_insteadof_balance + MDBX_opt_prefer_waf_insteadof_balance, + + /** \brief Задаёт в % максимальный размер вложенных страниц, используемых для + * размещения небольшого количества мульти-значений связанных с одном ключем. + * + * Использование вложенных страниц, вместо выноса значений на отдельные + * страницы вложенного дерева, позволяет уменьшить объем неиспользуемого места + * и этим увеличить плотность размещения данных. + * + * Но с увеличением размера вложенных страниц требуется больше листовых + * страниц основного дерева, что также увеличивает высоту основного дерева. + * Кроме этого, изменение данных на вложенных страницах требует дополнительных + * копирований, поэтому стоимость может быть больше во многих сценариях. + * + * min 12.5% (8192), max 100% (65535), default = 100% */ + MDBX_opt_subpage_limit, + + /** \brief Задаёт в % минимальный объём свободного места на основной странице, + * при отсутствии которого вложенные страницы выносятся в отдельное дерево. + * + * min 0, max 100% (65535), default = 0 */ + MDBX_opt_subpage_room_threshold, + + /** \brief Задаёт в % минимальный объём свободного места на основной странице, + * при наличии которого, производится резервирование места во вложенной. + * + * Если на основной странице свободного места недостаточно, то вложенная + * страница будет минимального размера. В свою очередь, при отсутствии резерва + * во вложенной странице, каждое добавлении в неё элементов будет требовать + * переформирования основной страниц с переносом всех узлов данных. + * + * Поэтому резервирование места, как правило, выгодно в сценариях с + * интенсивным добавлением коротких мульти-значений, например при + * индексировании. Но уменьшает плотность размещения данных, соответственно + * увеличивает объем БД и операций ввода-вывода. + * + * min 0, max 100% (65535), default = 42% (27525) */ + MDBX_opt_subpage_reserve_prereq, + + /** \brief Задаёт в % ограничение резервирования места на вложенных страницах. + * + * min 0, max 100% (65535), default = 4.2% (2753) */ + MDBX_opt_subpage_reserve_limit } MDBX_option_t; /** \brief Sets the value of a extra runtime options for an environment. diff --git a/src/env-opts.c b/src/env-opts.c index c1e6324d..659fb5fa 100644 --- a/src/env-opts.c +++ b/src/env-opts.c @@ -31,6 +31,26 @@ static bool default_prefer_waf_insteadof_balance(const MDBX_env *env) { return false; } +static uint16_t default_subpage_limit(const MDBX_env *env) { + (void)env; + return 65535 /* 100% */; +} + +static uint16_t default_subpage_room_threshold(const MDBX_env *env) { + (void)env; + return 0 /* 0% */; +} + +static uint16_t default_subpage_reserve_prereq(const MDBX_env *env) { + (void)env; + return 27525 /* 42% */; +} + +static uint16_t default_subpage_reserve_limit(const MDBX_env *env) { + (void)env; + return 2753 /* 4.2% */; +} + void env_options_init(MDBX_env *env) { env->options.rp_augment_limit = MDBX_PNL_INITIAL; env->options.dp_reserve_limit = MDBX_PNL_INITIAL; @@ -50,6 +70,11 @@ void env_options_init(MDBX_env *env) { #endif /* Linux */ MDBX_WRITETHROUGH_THRESHOLD_DEFAULT; #endif /* Windows */ + + env->options.subpage.limit = default_subpage_limit(env); + env->options.subpage.room_threshold = default_subpage_room_threshold(env); + env->options.subpage.reserve_prereq = default_subpage_reserve_prereq(env); + env->options.subpage.reserve_limit = default_subpage_reserve_limit(env); } void env_options_adjust_defaults(MDBX_env *env) { @@ -318,6 +343,54 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, env->options.prefer_waf_insteadof_balance = value != 0; break; + case MDBX_opt_subpage_limit: + if (value == /* default */ UINT64_MAX) { + env->options.subpage.limit = default_subpage_limit(env); + recalculate_subpage_thresholds(env); + } else if (value > 65535) + err = MDBX_EINVAL; + else { + env->options.subpage.limit = (uint16_t)value; + recalculate_subpage_thresholds(env); + } + break; + + case MDBX_opt_subpage_room_threshold: + if (value == /* default */ UINT64_MAX) { + env->options.subpage.room_threshold = default_subpage_room_threshold(env); + recalculate_subpage_thresholds(env); + } else if (value > 65535) + err = MDBX_EINVAL; + else { + env->options.subpage.room_threshold = (uint16_t)value; + recalculate_subpage_thresholds(env); + } + break; + + case MDBX_opt_subpage_reserve_prereq: + if (value == /* default */ UINT64_MAX) { + env->options.subpage.reserve_prereq = default_subpage_reserve_prereq(env); + recalculate_subpage_thresholds(env); + } else if (value > 65535) + err = MDBX_EINVAL; + else { + env->options.subpage.reserve_prereq = (uint16_t)value; + recalculate_subpage_thresholds(env); + } + break; + + case MDBX_opt_subpage_reserve_limit: + if (value == /* default */ UINT64_MAX) { + env->options.subpage.reserve_limit = default_subpage_reserve_limit(env); + recalculate_subpage_thresholds(env); + } else if (value > 65535) + err = MDBX_EINVAL; + else { + env->options.subpage.reserve_limit = (uint16_t)value; + recalculate_subpage_thresholds(env); + } + break; + default: return MDBX_EINVAL; } @@ -411,6 +484,22 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, *pvalue = env->options.prefer_waf_insteadof_balance; break; + case MDBX_opt_subpage_limit: + *pvalue = env->options.subpage.limit; + break; + + case MDBX_opt_subpage_room_threshold: + *pvalue = env->options.subpage.room_threshold; + break; + + case MDBX_opt_subpage_reserve_prereq: + *pvalue = env->options.subpage.reserve_prereq; + break; + + case MDBX_opt_subpage_reserve_limit: + *pvalue = env->options.subpage.reserve_limit; + break; + default: return MDBX_EINVAL; } diff --git a/src/env.c b/src/env.c index 10fcfc29..2d5dadc5 100644 --- a/src/env.c +++ b/src/env.c @@ -58,15 +58,7 @@ __cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { eASSERT(env, pgno2bytes(env, 1) == pagesize); eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); recalculate_merge_thresholds(env); - - /* TODO: recalculate me_subpage_xyz values from MDBX_opt_subpage_xyz. */ - env->subpage_limit = env->leaf_nodemax - NODESIZE; - env->subpage_room_threshold = 0; - env->subpage_reserve_prereq = env->leaf_nodemax; - env->subpage_reserve_limit = env->subpage_limit / 42; - eASSERT(env, env->subpage_reserve_prereq > - env->subpage_room_threshold + env->subpage_reserve_limit); - eASSERT(env, env->leaf_nodemax >= env->subpage_limit + NODESIZE); + recalculate_subpage_thresholds(env); const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); if (!env->options.flags.non_auto.dp_limit) { diff --git a/src/internals.h b/src/internals.h index d3c36bc9..e986e6c5 100644 --- a/src/internals.h +++ b/src/internals.h @@ -408,6 +408,13 @@ struct MDBX_env { bool prefault_write; bool prefer_waf_insteadof_balance; /* Strive to minimize WAF instead of balancing pages fullment */ + struct { + uint16_t limit; + uint16_t room_threshold; + uint16_t reserve_prereq; + uint16_t reserve_limit; + } subpage; + union { unsigned all; /* tracks options with non-auto values but tuned by user */ diff --git a/src/page-ops.c b/src/page-ops.c index b25c860e..d07cde07 100644 --- a/src/page-ops.c +++ b/src/page-ops.c @@ -752,12 +752,35 @@ __hot int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, return MDBX_SUCCESS; } -size_t page_subleaf2_reserve(const MDBX_env *const env, size_t host_page_room, +void recalculate_subpage_thresholds(MDBX_env *env) { + size_t whole = env->leaf_nodemax - NODESIZE; + env->subpage_limit = (whole * env->options.subpage.limit + 32767) >> 16; + whole = env->subpage_limit; + env->subpage_reserve_limit = + (whole * env->options.subpage.reserve_limit + 32767) >> 16; + eASSERT(env, env->leaf_nodemax >= env->subpage_limit + NODESIZE); + eASSERT(env, env->subpage_limit >= env->subpage_reserve_limit); + + whole = env->leaf_nodemax; + env->subpage_room_threshold = + (whole * env->options.subpage.room_threshold + 32767) >> 16; + env->subpage_reserve_prereq = + (whole * env->options.subpage.reserve_prereq + 32767) >> 16; + if (env->subpage_room_threshold + env->subpage_reserve_limit > + (intptr_t)page_space(env)) + env->subpage_reserve_prereq = page_space(env); + else if (env->subpage_reserve_prereq < + env->subpage_room_threshold + env->subpage_reserve_limit) + env->subpage_reserve_prereq = + env->subpage_room_threshold + env->subpage_reserve_limit; + eASSERT(env, env->subpage_reserve_prereq > + env->subpage_room_threshold + env->subpage_reserve_limit); +} + +size_t page_subleaf2_reserve(const MDBX_env *env, size_t host_page_room, size_t subpage_len, size_t item_len) { eASSERT(env, (subpage_len & 1) == 0); - eASSERT(env, env->subpage_reserve_prereq > env->subpage_room_threshold + - env->subpage_reserve_limit && - env->leaf_nodemax >= env->subpage_limit + NODESIZE); + eASSERT(env, env->leaf_nodemax >= env->subpage_limit + NODESIZE); size_t reserve = 0; for (size_t n = 0; n < 5 && reserve + item_len <= env->subpage_reserve_limit && diff --git a/src/page-ops.h b/src/page-ops.h index 5e58ab77..63cdd0b5 100644 --- a/src/page-ops.h +++ b/src/page-ops.h @@ -171,7 +171,7 @@ static inline void page_wash(MDBX_txn *txn, size_t di, page_t *const mp, pgno2bytes(txn->env, npages) - PAGEHDRSZ); } -MDBX_INTERNAL size_t page_subleaf2_reserve(const MDBX_env *const env, +MDBX_INTERNAL size_t page_subleaf2_reserve(const MDBX_env *env, size_t host_page_room, size_t subpage_len, size_t item_len); diff --git a/src/proto.h b/src/proto.h index 5c752405..ebee21a3 100644 --- a/src/proto.h +++ b/src/proto.h @@ -97,6 +97,7 @@ MDBX_INTERNAL int __must_check_result tree_rebalance(MDBX_cursor *mc); MDBX_INTERNAL int __must_check_result tree_propagate_key(MDBX_cursor *mc, const MDBX_val *key); MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env); +MDBX_INTERNAL void recalculate_subpage_thresholds(MDBX_env *env); /* subdb.c */ MDBX_INTERNAL int __must_check_result sdb_fetch(MDBX_txn *txn, size_t dbi); From b940ae8fada78530c832dc8ae2666694e13a8c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 May 2024 18:18:18 +0300 Subject: [PATCH 181/443] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`\n`?= =?UTF-8?q?=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE=20`std::endl`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx.c++ | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index cc5b35ef..aa3fb285 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -241,6 +241,21 @@ struct temp_buffer { } // namespace +#ifndef MDBX_CXX_ENDL +/* Манипулятор std::endl выталкивате буфферизированый вывод, что здесь не + * требуется. + * + * Кроме этого, при сборке libmdbx для символов по-умолчанию выключается + * видимость вне DSO, из-за чего обращение к std::endl иногда укачивает + * линковщики, если комплятор ошибочно формируют direct access к global weak + * symbol, коим является std::endl. */ +#if 0 +#define MDBX_CXX_ENDL ::std::endl +#else +#define MDBX_CXX_ENDL "\n" +#endif +#endif /* MDBX_CXX_ENDL */ + //------------------------------------------------------------------------------ namespace mdbx { @@ -666,7 +681,7 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { unsigned width = 0; for (const auto end = source.end_byte_ptr(); src != end; ++src) { if (wrap_width && width >= wrap_width) { - out << ::std::endl; + out << MDBX_CXX_ENDL; width = 0; } const int8_t hi = *src >> 4; @@ -861,7 +876,7 @@ char *to_base58::write_bytes(char *__restrict const dest, while (MDBX_LIKELY(begin < end) && *begin == 0) { out.put('1'); if (wrap_width && ++width >= wrap_width) { - out << ::std::endl; + out << MDBX_CXX_ENDL; width = 0; } ++begin; @@ -875,7 +890,7 @@ char *to_base58::write_bytes(char *__restrict const dest, for (size_t i = 0; i < chunk.length(); ++i) { out.put(chunk.char_ptr()[i]); if (wrap_width && ++width >= wrap_width) { - out << ::std::endl; + out << MDBX_CXX_ENDL; width = 0; } } @@ -1052,7 +1067,7 @@ char *to_base64::write_bytes(char *__restrict const dest, src += 3; out.write(&buf.front(), 4); if (wrap_width && (width += 4) >= wrap_width && left) { - out << ::std::endl; + out << MDBX_CXX_ENDL; width = 0; } continue; From e9c122af68eb27755c33c4bf1f4ed49ad35f6b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 May 2024 18:20:06 +0300 Subject: [PATCH 182/443] =?UTF-8?q?mdbx-windows:=20=D1=87=D0=B8=D1=81?= =?UTF-8?q?=D1=82=D0=BA=D0=B0=20=D1=80=D0=B5=D0=B7=D1=83=D0=BB=D1=8C=D1=82?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D0=B2=20`FormatMessageA()`=20=D0=BE=D1=82=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BD=D1=86=D0=B5=D0=B2=D1=8B=D1=85=20=D0=BF=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=B2=D0=BE=D0=B4=D0=BE=D0=B2=20=D1=81=D1=82=D1=80?= =?UTF-8?q?=D0=BE=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/misc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/misc.c b/src/misc.c index 70acce0b..5c61d641 100644 --- a/src/misc.c +++ b/src/misc.c @@ -177,10 +177,13 @@ __cold const char *mdbx_strerror_r(int errnum, char *buf, size_t buflen) { const char *msg = mdbx_liberr2str(errnum); if (!msg && buflen > 0 && buflen < INT_MAX) { #if defined(_WIN32) || defined(_WIN64) - const DWORD size = FormatMessageA( + DWORD size = FormatMessageA( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, nullptr); + while (size && buf[size - 1] <= ' ') + --size; + buf[size] = 0; return size ? buf : "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed"; #elif defined(_GNU_SOURCE) && defined(__GLIBC__) /* GNU-specific */ @@ -231,10 +234,13 @@ __cold const char *mdbx_strerror(int errnum) { const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, size_t buflen) { const char *msg = mdbx_liberr2str(errnum); if (!msg && buflen > 0 && buflen < INT_MAX) { - const DWORD size = FormatMessageA( + DWORD size = FormatMessageA( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, nullptr); + while (size && buf[size - 1] <= ' ') + --size; + buf[size] = 0; if (!size) msg = "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed"; else if (!CharToOemBuffA(buf, buf, size)) From e12b4ab7482a675e1891686c77edf67e6d9a2ae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 22 May 2024 01:19:54 +0300 Subject: [PATCH 183/443] =?UTF-8?q?mdbx-testing:=20=D0=BA=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D1=8B=D0=BB=D1=8C=20=D0=B4=D0=BB=D1=8F=20MSVC=20ARM/ARM6?= =?UTF-8?q?4=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D1=80=D0=B5=D0=B4=D0=BE=D1=82?= =?UTF-8?q?=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D1=8F=20ICE.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/chrono.c++ | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/chrono.c++ b/test/chrono.c++ index f9f20862..093d938c 100644 --- a/test/chrono.c++ +++ b/test/chrono.c++ @@ -31,7 +31,16 @@ uint32_t us2fractional(uint32_t us) { } uint32_t fractional2us(uint32_t fractional) { +#if !(defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) + /* Смеяться или плакать, но все существующие на май 2024 компиляторы Microsoft + * для ARM/ARM64, уже порядка 10 лет, падают на этом коде из-за внтутренней + * ошибке (aka ICE). */ return uint32_t((fractional * uint64_t(USEC_PER_SEC)) >> 32); +#else + static_assert(USEC_PER_SEC % 16 == 0, "WTF?"); + /* Crutch for MSVC ARM/ARM64 compilers to avoid internal compiler error. */ + return UInt32x32To64(fractional, USEC_PER_SEC / 16) >> 28; +#endif } #ifndef MSEC_PER_SEC From 8e29fb1f26fc8f883808c8d0468f2d8549283cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 22 May 2024 21:23:43 +0300 Subject: [PATCH 184/443] =?UTF-8?q?mdbx-testing:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D0=B5=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20keygen-setup.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/keygen.c++ | 2 +- test/keygen.h++ | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/keygen.c++ b/test/keygen.c++ index 7e0df590..a3879770 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -266,7 +266,7 @@ void maker::setup(const config::actor_params_pod &actor, value_essentials.bits - essentials::value_age_minwidth || mapping.split >= mapping.width) mapping.split -= 1; - if (split != mapping.width) + if (split != mapping.split) log_notice("keygen: reduce mapping-split from %u to %u", split, mapping.split); diff --git a/test/keygen.h++ b/test/keygen.h++ index 80a87d0e..a9d69dfe 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -101,9 +101,9 @@ class maker { uint16_t flags{0}; uint32_t maxlen{0}; serial_t mask{0}; - unsigned bits; + unsigned bits{0}; } key_essentials, value_essentials; - unsigned value_age_bits; + unsigned value_age_bits{0}; serial_t value_age_mask{0}; static serial_t mk_begin(serial_t serial, const essentials ¶ms, From a2753c9ae10a435657407e57dcce998bc62b74be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 23 May 2024 12:42:59 +0300 Subject: [PATCH 185/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`gc=5Fupdate()`=20=D1=81=20=D0=BE?= =?UTF-8?q?=D1=82=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20?= =?UTF-8?q?=D0=BD=D0=BE=D0=B2=D0=BE=D0=B3=D0=BE/=D0=BD=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=B0=D0=B1=D0=B8=D0=BB=D1=8C=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=B4=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-put.c | 145 ++++++++++++++++++++++++++++++++++----------------- src/gc.h | 16 ++++-- 2 files changed, 109 insertions(+), 52 deletions(-) diff --git a/src/gc-put.c b/src/gc-put.c index 81106b7a..e5a036d9 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -3,8 +3,12 @@ #include "internals.h" -MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(gcu_t *ctx) { - return ctx->lifo ? " lifo" : " fifo"; +MDBX_NOTHROW_PURE_FUNCTION static bool is_lifo(const MDBX_txn *txn) { + return (txn->env->flags & MDBX_LIFORECLAIM) != 0; +} + +MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(const gcu_t *ctx) { + return is_lifo(ctx->cursor.txn) ? " lifo" : " fifo"; } static inline size_t backlog_size(MDBX_txn *txn) { @@ -314,7 +318,7 @@ static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { #endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) + const size_t at = (is_lifo(txn) == MDBX_PNL_ASCENDING) ? left - chunk : ctx->retired_stored; pgno_t *const begin = txn->tw.retired_pages + at; @@ -387,7 +391,7 @@ typedef struct gcu_rid_result { static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) { rid_t r; - if (ctx->lifo) { + if (is_lifo(txn)) { if (txn->tw.gc.reclaimed == nullptr) { txn->tw.gc.reclaimed = txl_alloc(); if (unlikely(!txn->tw.gc.reclaimed)) { @@ -584,35 +588,43 @@ int gc_update(MDBX_txn *txn, gcu_t *ctx) { MDBX_env *const env = txn->env; ctx->cursor.next = txn->cursors[FREE_DBI]; txn->cursors[FREE_DBI] = &ctx->cursor; + int rc; + + // tASSERT(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages) || + // ctx->cleaned_slot < + // (txn->tw.gc.reclaimed ? + // MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0) + // || ctx->cleaned_id < txn->tw.gc.last_reclaimed); - pgno_t prev_first_unallocated = 0; /* txn->tw.relist[] can grow and shrink during this call. * txn->tw.gc.last_reclaimed and txn->tw.retired_pages[] can only grow. * But page numbers cannot disappear from txn->tw.retired_pages[]. */ +#if MDBX_ENABLE_GC_EXPERIMENTAL retry_clean_adj: ctx->reserve_adj = 0; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ retry: - ctx->loop += prev_first_unallocated == txn->geo.first_unallocated; - prev_first_unallocated = txn->geo.first_unallocated; + ctx->loop += ctx->prev_first_unallocated == txn->geo.first_unallocated; + TRACE(">> restart, loop %u", ctx->loop); - if (ctx->loop) - TRACE("%s", " >> restart"); - int rc = MDBX_SUCCESS; tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) { - ERROR("too more loops %zu, bailout", ctx->loop); + ERROR("too more loops %u, bailout", ctx->loop); rc = MDBX_PROBLEM; goto bailout; } - if (unlikely(ctx->dense)) { + if (unlikely(ctx->dense || + ctx->prev_first_unallocated > txn->geo.first_unallocated)) { rc = clean_stored_retired(txn, ctx); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } + ctx->prev_first_unallocated = txn->geo.first_unallocated; + rc = MDBX_SUCCESS; ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; @@ -623,19 +635,11 @@ retry: /* Come back here after each Put() in case retired-list changed */ TRACE("%s", " >> continue"); - if (ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages) && - (ctx->loop == 1 || ctx->retired_stored > env->maxgc_large1page || - MDBX_PNL_GETSIZE(txn->tw.retired_pages) > env->maxgc_large1page)) { - rc = prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); MDBX_val key, data; - if (ctx->lifo) { + if (is_lifo(txn)) { if (ctx->cleaned_slot < (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)) { ctx->reserved = 0; @@ -654,11 +658,9 @@ retry: continue; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - if (likely(!ctx->dense)) { - rc = prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } + rc = prepare_backlog(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak); TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix(ctx), ctx->cleaned_slot, ctx->cleaned_id); @@ -671,7 +673,8 @@ retry: } } else { /* Удаляем оставшиеся вынутые из GC записи. */ - while (ctx->cleaned_id <= txn->tw.gc.last_reclaimed) { + while (txn->tw.gc.last_reclaimed && + ctx->cleaned_id <= txn->tw.gc.last_reclaimed) { rc = outer_first(&ctx->cursor, &key, nullptr); if (rc == MDBX_NOTFOUND) break; @@ -692,11 +695,9 @@ retry: ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base); if (ctx->cleaned_id > txn->tw.gc.last_reclaimed) break; - if (likely(!ctx->dense)) { - rc = prepare_backlog(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } + rc = prepare_backlog(txn, ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; tASSERT(txn, ctx->cleaned_id <= txn->tw.gc.last_reclaimed); tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak); TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix(ctx), @@ -744,7 +745,9 @@ retry: env->maxgc_large1page / 2)) { TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); +#if MDBX_ENABLE_GC_EXPERIMENTAL ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist); @@ -768,6 +771,7 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } +#if MDBX_ENABLE_GC_EXPERIMENTAL const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj; TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, " "lifo-reclaimed-slots %zu, " @@ -775,6 +779,15 @@ retry: dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left, txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, ctx->reused_slot); +#else + const size_t left = ctx->amount - ctx->reserved; + TRACE("%s: amount %zu, reserved %zd, left %zd, " + "lifo-reclaimed-slots %zu, " + "reused-gc-slots %zu", + dbg_prefix(ctx), ctx->amount, ctx->reserved, left, + txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, + ctx->reused_slot); +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ if (0 >= (intptr_t)left) break; @@ -897,7 +910,9 @@ retry: TRACE("%s", " >> filling"); /* Fill in the reserved records */ +#if MDBX_ENABLE_GC_EXPERIMENTAL size_t excess_slots = 0; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ ctx->fill_idx = txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot @@ -913,15 +928,12 @@ retry: size_t left = ctx->amount, excess = 0; if (txn->tw.gc.reclaimed == nullptr) { - tASSERT(txn, ctx->lifo == 0); + tASSERT(txn, is_lifo(txn) == 0); rc = outer_first(&ctx->cursor, &key, &data); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == MDBX_NOTFOUND && ctx->reserve_adj) - goto retry_clean_adj; + if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - } } else { - tASSERT(txn, ctx->lifo != 0); + tASSERT(txn, is_lifo(txn) != 0); } while (true) { @@ -929,29 +941,37 @@ retry: TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, MDBX_PNL_GETSIZE(txn->tw.relist)); if (txn->tw.gc.reclaimed == nullptr) { - tASSERT(txn, ctx->lifo == 0); + tASSERT(txn, is_lifo(txn) == 0); fill_gc_id = unaligned_peek_u64(4, key.iov_base); if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) { +#if MDBX_ENABLE_GC_EXPERIMENTAL if (!left) break; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN ", left %zu", ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); +#if MDBX_ENABLE_GC_EXPERIMENTAL ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } ctx->fill_idx -= 1; } else { - tASSERT(txn, ctx->lifo != 0); + tASSERT(txn, is_lifo(txn) != 0); if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { +#if MDBX_ENABLE_GC_EXPERIMENTAL if (!left) break; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ NOTICE("** restart: reserve depleted (fill_idx %zu >= " "gc.reclaimed %zu, left %zu", ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left); +#if MDBX_ENABLE_GC_EXPERIMENTAL ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } ctx->fill_idx += 1; @@ -978,12 +998,14 @@ retry: if (unlikely(chunk > left)) { const size_t delta = chunk - left; excess += delta; + TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, + left, fill_gc_id); +#if MDBX_ENABLE_GC_EXPERIMENTAL if (!left) { excess_slots += 1; goto next; } - TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, - left, fill_gc_id); +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || delta > env->maxgc_large1page) data.iov_len = (left + 1) * sizeof(pgno_t); @@ -999,8 +1021,10 @@ retry: NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); +#if MDBX_ENABLE_GC_EXPERIMENTAL if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) goto retry_clean_adj; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } @@ -1036,28 +1060,39 @@ retry: goto bailout; } +#if MDBX_ENABLE_GC_EXPERIMENTAL next: +#else + if (left == 0) + break; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ + if (txn->tw.gc.reclaimed == nullptr) { - tASSERT(txn, ctx->lifo == 0); + tASSERT(txn, is_lifo(txn) == 0); rc = outer_next(&ctx->cursor, &key, &data, MDBX_NEXT); if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND) - goto bailout; - rc = MDBX_SUCCESS; - break; +#if MDBX_ENABLE_GC_EXPERIMENTAL + if (rc == MDBX_NOTFOUND && !left) { + rc = MDBX_SUCCESS; + break; + } +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ + goto bailout; } } else { - tASSERT(txn, ctx->lifo != 0); + tASSERT(txn, is_lifo(txn) != 0); } } if (excess) { +#if MDBX_ENABLE_GC_EXPERIMENTAL size_t n = excess, adj = excess; while (n >= env->maxgc_large1page) adj -= n /= env->maxgc_large1page; ctx->reserve_adj += adj; TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx), excess, adj, ctx->reserve_adj); +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ } } @@ -1069,6 +1104,7 @@ retry: goto retry; } +#if MDBX_ENABLE_GC_EXPERIMENTAL if (unlikely(excess_slots)) { const bool will_retry = ctx->loop < 5 || excess_slots > 1; NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " @@ -1078,6 +1114,17 @@ retry: if (will_retry) goto retry; } +#else + if (unlikely(ctx->fill_idx != (txn->tw.gc.reclaimed + ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) + : 0))) { + const bool will_retry = ctx->loop < 9; + NOTICE("** %s: reserve excess (filled-idx %zu, loop %u)", + will_retry ? "restart" : "ignore", ctx->fill_idx, ctx->loop); + if (will_retry) + goto retry; + } +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ tASSERT(txn, txn->tw.gc.reclaimed == nullptr || ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); @@ -1089,6 +1136,6 @@ bailout: #if MDBX_ENABLE_PROFGC env->lck->pgops.gc_prof.wloops += (uint32_t)ctx->loop; #endif /* MDBX_ENABLE_PROFGC */ - TRACE("<<< %zu loops, rc = %d", ctx->loop, rc); + TRACE("<<< %u loops, rc = %d", ctx->loop, rc); return rc; } diff --git a/src/gc.h b/src/gc.h index f77a3bd5..41e787ba 100644 --- a/src/gc.h +++ b/src/gc.h @@ -5,12 +5,22 @@ #include "essentials.h" +#ifndef MDBX_ENABLE_GC_EXPERIMENTAL +#define MDBX_ENABLE_GC_EXPERIMENTAL 0 +#elif !(MDBX_ENABLE_GC_EXPERIMENTAL == 0 || MDBX_ENABLE_GC_EXPERIMENTAL == 1) +#error MDBX_ENABLE_GC_EXPERIMENTAL must be defined as 0 or 1 +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ + typedef struct gc_update_context { - size_t loop, reserve_adj; + unsigned loop; + pgno_t prev_first_unallocated; + bool dense; +#if MDBX_ENABLE_GC_EXPERIMENTAL + intptr_t reserve_adj; +#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ size_t retired_stored; size_t amount, reserved, cleaned_slot, reused_slot, fill_idx; txnid_t cleaned_id, rid; - bool lifo, dense; #if MDBX_ENABLE_BIGFOOT txnid_t bigfoot; #endif /* MDBX_ENABLE_BIGFOOT */ @@ -22,7 +32,7 @@ typedef struct gc_update_context { static inline int gc_update_init(MDBX_txn *txn, gcu_t *ctx) { memset(ctx, 0, offsetof(gcu_t, cursor)); - ctx->lifo = (txn->env->flags & MDBX_LIFORECLAIM) != 0; + ctx->dense = txn->txnid < MIN_TXNID; #if MDBX_ENABLE_BIGFOOT ctx->bigfoot = txn->txnid; #endif /* MDBX_ENABLE_BIGFOOT */ From 74ff4dba0a774c500a33edf3db34c00c71d87e29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 22 May 2024 21:33:58 +0300 Subject: [PATCH 186/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`tmux-battery`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/battery-tmux.sh | 38 ++++++++++++++++++++++++++++++++++++++ test/long_stochastic.sh | 10 ++++++++++ test/tmux.conf | 3 +++ 3 files changed, 51 insertions(+) create mode 100755 test/battery-tmux.sh create mode 100644 test/tmux.conf diff --git a/test/battery-tmux.sh b/test/battery-tmux.sh new file mode 100755 index 00000000..3fae3fc8 --- /dev/null +++ b/test/battery-tmux.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 + +TEST="./test/long_stochastic.sh --skip-make" +PREFIX="/dev/shm/mdbxtest-" + +tmux kill-session -t mdbx +rm -rf ${PREFIX}* +# git clean -x -f -d && make test-assertions +tmux -f ./test/tmux.conf new-session -d -s mdbx htop + +W=0 +for ps in min 4k max; do + for from in 1 30000; do + for n in 0 1 2 3 4 5 6 7; do + CMD="${TEST} --delay $((n * 7)) --page-size ${ps} --from ${from} --dir ${PREFIX}page-${ps}.from-${from}.${n}" + if [ $n -eq 0 ]; then + tmux new-window -t mdbx:$((++W)) -n "page-${ps}.from-${from}" -k -d "$CMD" + tmux select-layout -E tiled + else + tmux split-window -t mdbx:$W -l 20% -d $CMD + fi + done + for n in 0 1 2 3 4 5 6 7; do + CMD="${TEST} --delay $((3 + n * 7)) --page-size ${ps} --from ${from} --dir ${PREFIX}page-${ps}.from-${from}.${n}-extra" + if [ $n -eq 0 ]; then + tmux new-window -t mdbx:$((++W)) -n "page-${ps}.from-${from}-extra" -k -d "$CMD" + tmux select-layout -E tiled + else + tmux split-window -t mdbx:$W -l 20% -d $CMD + fi + done + done +done + +tmux attach -t mdbx diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index d3e30763..d761d73e 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -17,6 +17,7 @@ PAGESIZE=min DONT_CHECK_RAM=no EXTRA=no TAILLOG=0 +DELAY=0 while [ -n "$1" ] do @@ -40,6 +41,7 @@ do echo "--dont-check-ram-size Don't check available RAM" echo "--extra Iterate extra modes/flags" echo "--taillog Dump tail of test log on failure" + echo "--delay NN Delay NN seconds before run test" echo "--help Print this usage help and exit" exit -2 ;; @@ -158,6 +160,10 @@ do --extra) EXTRA=yes ;; + --delay) + DELAY=$(($2)) + shift + ;; *) echo "Unknown option '$1'" exit -2 @@ -437,6 +443,10 @@ function probe { #------------------------------------------------------------------------------ +if [ "$DELAY" != "0" ]; then + sleep $DELAY +fi + count=0 loop=0 cases='?' diff --git a/test/tmux.conf b/test/tmux.conf new file mode 100644 index 00000000..49a78caa --- /dev/null +++ b/test/tmux.conf @@ -0,0 +1,3 @@ +setw -g aggressive-resize on +set-option -g remain-on-exit on +# set-option -g remain-on-exit-format FAILED From 25efb587904a1b2034ec49ccf95315648ce155eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 9 Jun 2024 14:42:41 +0300 Subject: [PATCH 187/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B3=D1=80=D1=83=D0=B7=D0=BE=D0=BA=20put/insert/upsert=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20`mdbx::pair`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index f6912342..32b5dd3b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4552,6 +4552,19 @@ public: inline value_result try_update_reserve(map_handle map, const slice &key, size_t value_length); + void put(map_handle map, const pair &kv, put_mode mode) { + return put(map, kv.key, kv.value, mode); + } + void insert(map_handle map, const pair &kv) { + return insert(map, kv.key, kv.value); + } + value_result try_insert(map_handle map, const pair &kv) { + return try_insert(map, kv.key, kv.value); + } + void upsert(map_handle map, const pair &kv) { + return upsert(map, kv.key, kv.value); + } + /// \brief Removes all values for given key. inline bool erase(map_handle map, const slice &key); @@ -4600,6 +4613,10 @@ public: /// to pages of nested b+tree of multimap's values. inline void append(map_handle map, const slice &key, const slice &value, bool multivalue_order_preserved = true); + inline void append(map_handle map, const pair &kv, + bool multivalue_order_preserved = true) { + return append(map, kv.key, kv.value, multivalue_order_preserved); + } size_t put_multiple(map_handle map, const slice &key, const size_t value_length, const void *values_array, @@ -5096,6 +5113,7 @@ public: inline MDBX_error_t put(const slice &key, slice *value, MDBX_put_flags_t flags) noexcept; + inline void put(const slice &key, slice value, put_mode mode); inline void insert(const slice &key, slice value); inline value_result try_insert(const slice &key, slice value); inline slice insert_reserve(const slice &key, size_t value_length); @@ -5109,6 +5127,15 @@ public: inline slice update_reserve(const slice &key, size_t value_length); inline value_result try_update_reserve(const slice &key, size_t value_length); + void put(const pair &kv, put_mode mode) { + return put(kv.key, kv.value, mode); + } + void insert(const pair &kv) { return insert(kv.key, kv.value); } + value_result try_insert(const pair &kv) { + return try_insert(kv.key, kv.value); + } + void upsert(const pair &kv) { return upsert(kv.key, kv.value); } + /// \brief Removes single key-value pair or all multi-values at the current /// cursor position. inline bool erase(bool whole_multivalue = false); @@ -7137,6 +7164,10 @@ inline MDBX_error_t cursor::put(const slice &key, slice *value, return MDBX_error_t(::mdbx_cursor_put(handle_, &key, value, flags)); } +inline void cursor::put(const slice &key, slice value, put_mode mode) { + error::success_or_throw(put(key, &value, MDBX_put_flags_t(mode))); +} + inline void cursor::insert(const slice &key, slice value) { error::success_or_throw( put(key, &value /* takes the present value in case MDBX_KEYEXIST */, From 3517db6178a1a02b8b8df9f1aecd373333458522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 25 May 2024 23:20:34 +0300 Subject: [PATCH 188/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/crunched=5F?= =?UTF-8?q?delete'.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 8 + test/extra/crunched_delete.c++ | 414 +++++++++++++++++++++++++++++++++ 2 files changed, 422 insertions(+) create mode 100644 test/extra/crunched_delete.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fa7ada56..c334601d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -117,6 +117,13 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_doubtless_positioning PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() + add_executable(test_extra_crunched_delete extra/crunched_delete.c++) + target_include_directories(test_extra_crunched_delete PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_crunched_delete ${TOOL_MDBX_LIB}) + if(MDBX_CXX_STANDARD) + set_target_properties(test_extra_crunched_delete PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() endif() endif() @@ -207,6 +214,7 @@ else() if (ENABLE_MEMCHECK) set_tests_properties(extra_doubtless_positioning PROPERTIES TIMEOUT 10800) endif() + add_test(NAME extra_crunched_delete COMMAND test_extra_crunched_delete) endif() endif() diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ new file mode 100644 index 00000000..0693ec97 --- /dev/null +++ b/test/extra/crunched_delete.c++ @@ -0,0 +1,414 @@ +#include "mdbx.h++" + +#include +#include +#include +#include + +#if MDBX_DEBUG || !defined(NDEBUG) +#define NN 1024 +#else +#define NN 16384 +#endif + +std::string format_va(const char *fmt, va_list ap) { + va_list ones; + va_copy(ones, ap); +#ifdef _MSC_VER + int needed = _vscprintf(fmt, ap); +#else + int needed = vsnprintf(nullptr, 0, fmt, ap); +#endif + assert(needed >= 0); + std::string result; + result.reserve(size_t(needed + 1)); + result.resize(size_t(needed), '\0'); + assert(int(result.capacity()) > needed); + int actual = vsnprintf(const_cast(result.data()), result.capacity(), + fmt, ones); + assert(actual == needed); + (void)actual; + va_end(ones); + return result; +} + +std::string format(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + std::string result = format_va(fmt, ap); + va_end(ap); + return result; +} + +struct acase { + unsigned klen_min, klen_max; + unsigned vlen_min, vlen_max; + unsigned dupmax_log2; + + acase(unsigned klen_min, unsigned klen_max, unsigned vlen_min, + unsigned vlen_max, unsigned dupmax_log2) + : klen_min(klen_min), klen_max(klen_max), vlen_min(vlen_min), + vlen_max(vlen_max), dupmax_log2(dupmax_log2) {} +}; + +// std::random_device rd; +std::mt19937_64 rnd; + +static unsigned prng_fast(uint32_t &seed) { + seed = seed * 1103515245 + 12345; + return seed >> 17; +} + +static mdbx::slice mk(mdbx::default_buffer &buf, unsigned min, unsigned max) { + uint32_t seed = rnd() % (NN + NN); + unsigned len = (min < max) ? min + prng_fast(seed) % (max - min) : min; + buf.clear_and_reserve(len); + for (unsigned i = 0; i < len; ++i) + buf.append_byte(prng_fast(seed)); + return buf.slice(); +} + +static mdbx::slice mk_key(mdbx::default_buffer &buf, const acase &thecase) { + return mk(buf, thecase.klen_min, thecase.klen_max); +} + +static mdbx::slice mk_val(mdbx::default_buffer &buf, const acase &thecase) { + return mk(buf, thecase.vlen_min, thecase.vlen_max); +} + +static std::string name(unsigned n) { return format("Commitment_%05u", n); } + +static mdbx::map_handle create_and_fill(mdbx::txn txn, const acase &thecase, + const unsigned n) { + auto map = txn.create_map(name(n), + (thecase.klen_min == thecase.klen_max && + (thecase.klen_min == 4 || thecase.klen_max == 8)) + ? mdbx::key_mode::ordinal + : mdbx::key_mode::usual, + (thecase.vlen_min == thecase.vlen_max) + ? mdbx::value_mode::multi_samelength + : mdbx::value_mode::multi); + + if (txn.get_map_stat(map).ms_entries < NN) { + mdbx::buffer k, v; + for (auto i = 0u; i < NN; i++) { + mk_key(k, thecase); + for (auto ii = thecase.dupmax_log2 + ? 1u + (rnd() & ((2u << thecase.dupmax_log2) - 1u)) + : 1u; + ii > 0; --ii) + txn.upsert(map, k, mk_val(v, thecase)); + } + } + return map; +} + +static void chunched_delete(mdbx::txn txn, const acase &thecase, + const unsigned n) { + // printf(">> %s, case #%i\n", __FUNCTION__, n); + mdbx::buffer k, v; + auto map = txn.open_map_accede(name(n)); + + { + auto cursor = txn.open_cursor(map); + while (true) { + const unsigned all = cursor.txn().get_map_stat(cursor.map()).ms_entries; + // printf("== seek random of %u\n", all); + + const char *last_op; + bool last_r; + + if ((last_op = "MDBX_GET_BOTH", + last_r = cursor.find_multivalue(mk_key(k, thecase), + mk_val(v, thecase), false)) || + rnd() % 3 == 0 || + (last_op = "MDBX_SET_RANGE", + last_r = cursor.lower_bound(mk_key(k, thecase), false))) { + int i = int(rnd() % 7) - 3; + // if (i) + // printf(" %s -> %s\n", last_op, last_r ? "true" : "false"); + // printf("== shift multi %i\n", i); + try { + while (i < 0 && (last_op = "MDBX_PREV_DUP", + last_r = cursor.to_current_prev_multi(false))) + ++i; + while (i > 0 && (last_op = "MDBX_NEXT_DUP", + last_r = cursor.to_current_next_multi(false))) + --i; + } catch (const mdbx::no_data &) { + printf("cursor_del() -> exception, last %s %s\n", last_op, + last_r ? "true" : "false"); + continue; + } + } + // printf(" %s -> %s\n", last_op, last_r ? "true" : "false"); + + if (all < 42) { + // printf("== erase-tail\n"); + break; + } + auto i = all % 17 + 1; + try { + last_r = cursor.erase(); + do { + // printf("== erase-chunk: %u\n", i); + // printf(" cursor_del() -> %s\n", last_r ? "true" : "false"); + } while (cursor.to_next(false) && --i > 0); + } catch (const mdbx::no_data &) { + printf("cursor_del() -> exception, last %s %s\n", last_op, + last_r ? "true" : "false"); + } + + // (void) last_op; + // (void) last_r; + } + + if (cursor.to_first(false)) + do + cursor.erase(); + while (cursor.to_next(false)); + } + + // printf("<< %s, case #%i\n", __FUNCTION__, n); +} + +static char log_buffer[1024]; + +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, + int line, const char *msg, unsigned length) noexcept { + (void)length; + (void)loglevel; + fprintf(stdout, "%s:%u %s", function, line, msg); +} + +bool outofrange_prev(mdbx::env env) { + mdbx::cursor_managed cursor; + const std::array items = { + {{"k1", "v1"}, {"k1", "v2"}, {"k2", "v1"}, {"k2", "v2"}}}; + + auto txn = env.start_write(); + auto multi = + txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + auto simple = txn.create_map("simple"); + txn.clear_map(multi); + txn.clear_map(simple); + + txn.insert(simple, items[0]); + txn.insert(simple, items[3]); + cursor.bind(txn, simple); + const auto simple_oor = cursor.lower_bound("k3"); + if (simple_oor) { + std::cerr << "simple-outofrange " << simple_oor << "\n"; + return false; + } + const auto simple_oor_prevdup = cursor.to_current_prev_multi(false); + if (simple_oor_prevdup) { + std::cerr << "simple-outofrange-prevdup " << simple_oor_prevdup << "\n"; + return false; + } + const auto simple_oor_prev = cursor.to_previous(false); + if (!simple_oor_prev || simple_oor_prev != items[3]) { + std::cerr << "simple-outofrange-prev " << simple_oor_prev << "\n"; + return false; + } + + txn.append(multi, items[0]); + txn.append(multi, items[1]); + txn.append(multi, items[2]); + txn.append(multi, items[3]); + cursor.bind(txn, multi); + const auto multi_oor = cursor.lower_bound("k3"); + if (multi_oor) { + std::cerr << "multi-outofrange " << multi_oor << "\n"; + return false; + } + const auto multi_oor_prevdup = cursor.to_current_prev_multi(false); + if (multi_oor_prevdup) { + std::cerr << "multi-outofrange-prevdup " << multi_oor_prevdup << "\n"; + return false; + } + const auto multi_oor_prev = cursor.to_previous(false); + if (!multi_oor_prev || multi_oor_prev != items[3]) { + std::cerr << "multi-outofrange-prev " << multi_oor_prev << "\n"; + return false; + } + + txn.commit(); + return true; +} + +bool next_prev_current(mdbx::env env) { + const std::array items = { + {{"k1", "v1"}, {"k1", "v2"}, {"k2", "v1"}, {"k2", "v2"}}}; + + auto txn = env.start_write(); + auto map = + txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + txn.clear_map(map); + for (const auto &i : items) + txn.upsert(map, i); + + auto cursor = txn.open_cursor(map); + const auto first = cursor.to_first(false); + if (!first || first != items[0]) { + std::cerr << "bad-first " << first << "\n"; + return false; + } + const auto next1 = cursor.to_next(false); + if (!next1 || next1 != items[1]) { + std::cerr << "bad-next-1 " << next1 << "\n"; + return false; + } + const auto next2 = cursor.to_next(false); + if (!next2 || next2 != items[2]) { + std::cerr << "bad-next-2 " << next2 << "\n"; + return false; + } + const auto prev1 = cursor.to_previous(false); + if (!prev1 || prev1 != items[1]) { + std::cerr << "bad-prev-1 " << prev1 << "\n"; + return false; + } + const auto prev2 = cursor.to_previous(false); + if (!prev2 || prev2 != items[0]) { + std::cerr << "bad-prev-2 " << prev2 << "\n"; + return false; + } + + if (!cursor.erase(false)) { + std::cerr << "bad-erase\n"; + return false; + } + + const auto after_del = cursor.current(false); + if (!after_del || after_del != items[1]) { + std::cerr << "bad-after-del, current " << after_del << "\n"; + return false; + } + const auto next_after_del1 = cursor.to_next(false); + if (!next_after_del1 || next_after_del1 != items[2]) { + std::cerr << "bad-next_after_del1 " << next_after_del1; + return false; + } + const auto next_after_del2 = cursor.to_next(false); + if (!next_after_del2 || next_after_del2 != items[3]) { + std::cerr << "bad-next_after_del2 " << next_after_del2; + return false; + } + const auto next_after_del3 = cursor.to_next(false); + if (next_after_del3) { + std::cerr << "bad-next_after_del3 " << next_after_del3; + return false; + } + txn.commit(); + return true; +} + +bool simple(mdbx::env env) { + const std::array items = { + {{"k0", "v0"}, {"k1", "v1"}, {"k2", "v2"}}}; + + auto txn = env.start_write(); + auto map = txn.create_map("simple"); + txn.clear_map(map); + for (const auto &i : items) + txn.insert(map, i); + + auto cursor = txn.open_cursor(map); + cursor.seek(items[1].key); + + const auto seek = cursor.current(false); + if (seek != items[1]) { + std::cerr << "bad-seek, current " << seek << "\n"; + return false; + } + if (!cursor.erase()) { + std::cerr << "bad-erase\n"; + return false; + } + + const auto next = cursor.to_next(false); + if (!next || next != items[2]) { + std::cerr << "bad-next " << next; + return false; + } + + const auto after_del = cursor.current(false); + if (!after_del || after_del != items[2]) { + std::cerr << "bad-after-del, current " << after_del << "\n"; + return false; + } + txn.commit(); + + txn = env.start_read(); + cursor.bind(txn, map); + +#define BAD_CODE 1 +#if BAD_CODE + const auto first = cursor.to_next(false); +#else + const auto first = cursor.to_first(false); +#endif + const auto second = cursor.to_next(false); + const auto eof = cursor.to_next(false); + + if (!first || first != items[0]) { + std::cerr << "bad-first " << first << "\n"; + return false; + } + if (!second || second != items[2]) { + std::cerr << "bad-second " << second << "\n"; + return false; + } + if (eof) { + std::cerr << "bad-eof " << eof << "\n"; + return false; + } + + return true; +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, + log_buffer, sizeof(log_buffer)); + + const char *filename = "test-crunched-del"; + mdbx::env::remove(filename); + + std::vector testset; + // Там ключи разной длины - от 1 до 64 байт. + // Значения разной длины от 100 до 1000 байт. + testset.emplace_back(/* keylen_min */ 1, /* keylen_max */ 64, + /* datalen_min */ 100, /* datalen_max */ 4000, + /* dups_log2 */ 6); + // В одной таблице DupSort: path -> version_u64+data + // path - это префикс в дереве. Самые частые длины: 1-5 байт и 32-36 байт. + testset.emplace_back(1, 5, 100, 1000, 8); + testset.emplace_back(32, 36, 100, 1000, 7); + // В другой DupSort: timestamp_u64 -> path + testset.emplace_back(8, 8, 1, 5, 10); + testset.emplace_back(8, 8, 32, 36, 9); + + mdbx::env_managed env(filename, mdbx::env_managed::create_parameters(), + mdbx::env::operate_parameters(42)); + if (!simple(env) || !next_prev_current(env) || !outofrange_prev(env)) + return EXIT_FAILURE; + + auto txn = env.start_write(); + for (unsigned i = 0; i < testset.size(); ++i) + create_and_fill(txn, testset[i], i); + txn.commit(); + + // mdbx_setup_debug_nofmt(MDBX_LOG_TRACE, MDBX_DBG_AUDIT | MDBX_DBG_ASSERT, + // logger_nofmt, log_buffer, sizeof(log_buffer)); + txn = env.start_write(); + for (unsigned i = 0; i < testset.size(); ++i) + chunched_delete(txn, testset[i], i); + txn.commit(); + + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From a79a318d61c79fbbad9990abb65fab64d9c10e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 May 2024 13:58:52 +0300 Subject: [PATCH 189/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B0=20`MDBX=5FVALIDATION`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 2 ++ src/mdbx.c++ | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 32b5dd3b..91341ca2 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3685,6 +3685,8 @@ public: bool disable_readahead{false}; /// \copydoc MDBX_NOMEMINIT bool disable_clear_memory{false}; + /// \copydoc MDBX_VALIDATION + bool enable_validation{false}; MDBX_CXX11_CONSTEXPR operate_options() noexcept {} MDBX_CXX11_CONSTEXPR operate_options(const operate_options &) noexcept = default; diff --git a/src/mdbx.c++ b/src/mdbx.c++ index aa3fb285..7c27f1d8 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1232,6 +1232,8 @@ env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { flags |= MDBX_NORDAHEAD; if (options.disable_clear_memory) flags |= MDBX_NOMEMINIT; + if (options.enable_validation) + flags |= MDBX_VALIDATION; if (mode != readonly) { if (options.nested_write_transactions) From bcd955aeb9891cfb4f5e9c7ce95fd476c229d817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 May 2024 17:12:40 +0300 Subject: [PATCH 190/443] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`append=5Fu8()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 91341ca2..2d982041 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2895,7 +2895,7 @@ public: buffer &append(const void *src, size_t bytes) { if (MDBX_UNLIKELY(tailroom() < check_length(bytes))) MDBX_CXX20_UNLIKELY reserve_tailroom(bytes); - memcpy(slice_.byte_ptr() + size(), src, bytes); + memcpy(end_byte_ptr(), src, bytes); slice_.iov_len += bytes; return *this; } @@ -2964,7 +2964,7 @@ public: buffer &append_u8(uint_fast8_t u8) { if (MDBX_UNLIKELY(tailroom() < 1)) MDBX_CXX20_UNLIKELY reserve_tailroom(1); - *slice_.byte_ptr() = u8; + *slice_.end_byte_ptr() = uint8_t(u8); slice_.iov_len += 1; return *this; } @@ -2974,7 +2974,7 @@ public: buffer &append_u16(uint_fast16_t u16) { if (MDBX_UNLIKELY(tailroom() < 2)) MDBX_CXX20_UNLIKELY reserve_tailroom(2); - const auto ptr = slice_.byte_ptr(); + const auto ptr = slice_.end_byte_ptr(); ptr[0] = uint8_t(u16); ptr[1] = uint8_t(u16 >> 8); slice_.iov_len += 2; @@ -2984,7 +2984,7 @@ public: buffer &append_u24(uint_fast32_t u24) { if (MDBX_UNLIKELY(tailroom() < 3)) MDBX_CXX20_UNLIKELY reserve_tailroom(3); - const auto ptr = slice_.byte_ptr(); + const auto ptr = slice_.end_byte_ptr(); ptr[0] = uint8_t(u24); ptr[1] = uint8_t(u24 >> 8); ptr[2] = uint8_t(u24 >> 16); @@ -2995,7 +2995,7 @@ public: buffer &append_u32(uint_fast32_t u32) { if (MDBX_UNLIKELY(tailroom() < 4)) MDBX_CXX20_UNLIKELY reserve_tailroom(4); - const auto ptr = slice_.byte_ptr(); + const auto ptr = slice_.end_byte_ptr(); ptr[0] = uint8_t(u32); ptr[1] = uint8_t(u32 >> 8); ptr[2] = uint8_t(u32 >> 16); @@ -3007,7 +3007,7 @@ public: buffer &append_u48(uint_fast64_t u48) { if (MDBX_UNLIKELY(tailroom() < 6)) MDBX_CXX20_UNLIKELY reserve_tailroom(6); - const auto ptr = slice_.byte_ptr(); + const auto ptr = slice_.end_byte_ptr(); ptr[0] = uint8_t(u48); ptr[1] = uint8_t(u48 >> 8); ptr[2] = uint8_t(u48 >> 16); @@ -3021,7 +3021,7 @@ public: buffer &append_u64(uint_fast64_t u64) { if (MDBX_UNLIKELY(tailroom() < 8)) MDBX_CXX20_UNLIKELY reserve_tailroom(8); - const auto ptr = slice_.byte_ptr(); + const auto ptr = slice_.end_byte_ptr(); ptr[0] = uint8_t(u64); ptr[1] = uint8_t(u64 >> 8); ptr[2] = uint8_t(u64 >> 16); From f65642e38c40f23c2f2b222414e849be64341826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 8 Jun 2024 20:44:56 +0300 Subject: [PATCH 191/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index b65bf5ff..c003df54 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -12,6 +12,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Новое: + - Изменение лицензии на Apache 2.0, пояснения и подробности в файле `COPYRIGHT`. + + - Реструктуризация исходного кода с рефакторингом. + + - Переработка курсоров для унификации поведения, более регулярного + кода, уменьшения количества ветвлений и машинных операций. + - Перенос функционала утилиты `mdbx_chk` внутрь библиотеки в виде функции `mdbx_env_chk() `для проверка целостности структуры БД, в том числе с вовлечением логики приложения. @@ -68,7 +75,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Опция `MDBX_opt_prefer_waf_insteadof_balance`. - - TODO: Опции `MDBX_opt_subpage_limit`, `MDBX_opt_subpage_room_threshold`, `MDBX_opt_subpage_reserve_prereq`, `MDBX_opt_subpage_reserve_limit`. + - Опции `MDBX_opt_subpage_limit`, `MDBX_opt_subpage_room_threshold`, `MDBX_opt_subpage_reserve_prereq`, `MDBX_opt_subpage_reserve_limit`. - Управление основной блокировкой lock/unlock/upgrade/downgrade для координации пишущих транзакций. @@ -99,6 +106,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Опция сборки `MDBX_USE_VALGRIND` заменена на общепринятую `ENABLE_MEMCHECK`. - В структуре `MDBX_envinfo` серии полей вида `meta1`, `meta2` и `meta3` заменены на массивы вида `meta[3]`. - В шаблонных классах и функциях С++ API по-умолчанию вместо `mdbx::legacy_buffer` использован тип `mdbx::default_buffer` использующий полиморфные аллокаторы С++ 17. + - Удаление `DEFAULT_MAPSIZE` и изменение геометрии по-умолчанию при создании БД. ## v0.13.0 от 2023-04-23 From d4c09f9b785d73df35e75090ade8436cf038bbd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 12 Jun 2024 17:31:05 +0300 Subject: [PATCH 192/443] =?UTF-8?q?mdbx-testing:=20=D0=B8=D0=B7=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D1=80=D1=8F?= =?UTF-8?q?=D0=B4=D0=BA=D0=B0=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B1=D0=BE=D1=80?= =?UTF-8?q?=D0=B0=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=BE=D0=B2=20=D0=B2=20?= =?UTF-8?q?"=D0=B4=D0=BE=D0=BB=D0=B3=D0=BE=D0=BC=20=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D1=85=D0=B0=D1=81=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE?= =?UTF-8?q?=D0=BC".?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index d761d73e..0e38a301 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -394,7 +394,7 @@ else fi if [ "$EXTRA" != "no" ]; then - options=(writemap lifo nostickythreads perturb nomeminit nordahead) + options=(perturb nomeminit nordahead writemap lifo nostickythreads) else options=(writemap lifo nostickythreads) fi From a10506fb6a08e01b1c57fb4764d902f141d3a2e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 13 Jun 2024 11:59:19 +0300 Subject: [PATCH 193/443] =?UTF-8?q?mdbx-testing:=20=D1=82=D0=B5=D1=85?= =?UTF-8?q?=D0=BD=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B8=D0=B9=20=D0=B2=D0=BE?= =?UTF-8?q?=D0=B7=D0=B2=D1=80=D0=B0=D1=82=20`bool`=20=D0=B8=D0=B7=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BE=D1=87=D0=BD=D1=8B=D1=85=20?= =?UTF-8?q?=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B9=20=D0=B2=20=D1=82?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B5=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=B4?= =?UTF-8?q?=D0=BE=D0=B1=D1=81=D1=82=D0=B2=D0=B0=20ad-hoc=20=D0=B4=D0=BE?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BE=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 55 ++++++++++++++++++++++++++++----------------------- test/test.h++ | 8 ++++---- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index 21900731..1926bb83 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -726,7 +726,7 @@ void testcase::verbose(const char *where, const char *stage, const MDBX_val &k, mdbx_dump_val(&v, dump_value, sizeof(dump_value))); } -void testcase::speculum_check_iterator(const char *where, const char *stage, +bool testcase::speculum_check_iterator(const char *where, const char *stage, const testcase::SET::const_iterator &it, const MDBX_val &k, const MDBX_val &v) const { @@ -737,16 +737,17 @@ void testcase::speculum_check_iterator(const char *where, const char *stage, // mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), // mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); if (!is_samedata(it_key, k)) - failure("speculum-%s: %s key mismatch %s (must) != %s", where, stage, - mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), - mdbx_dump_val(&k, dump_value, sizeof(dump_value))); + return failure("speculum-%s: %s key mismatch %s (must) != %s", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&k, dump_value, sizeof(dump_value))); if (!is_samedata(it_data, v)) - failure("speculum-%s: %s data mismatch %s (must) != %s", where, stage, - mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), - mdbx_dump_val(&v, dump_value, sizeof(dump_value))); + return failure("speculum-%s: %s data mismatch %s (must) != %s", where, + stage, mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), + mdbx_dump_val(&v, dump_value, sizeof(dump_value))); + return true; } -void testcase::failure(const char *fmt, ...) const { +bool testcase::failure(const char *fmt, ...) const { va_list ap; va_start(ap, fmt); fflush(nullptr); @@ -756,10 +757,11 @@ void testcase::failure(const char *fmt, ...) const { if (txn_guard) mdbx_txn_commit(const_cast(this)->txn_guard.release()); exit(EXIT_FAILURE); + return false; } #if SPECULUM_CURSORS -void testcase::speculum_check_cursor(const char *where, const char *stage, +bool testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, int cursor_err, const MDBX_val &cursor_key, const MDBX_val &cursor_data) const { @@ -767,25 +769,29 @@ void testcase::speculum_check_cursor(const char *where, const char *stage, // verbose(where, stage, it); if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND && cursor_err != MDBX_RESULT_TRUE && cursor_err != MDBX_ENODATA) - failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", cursor_err, - mdbx_strerror(cursor_err)); + return failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", + cursor_err, mdbx_strerror(cursor_err)); char dump_key[32], dump_value[32]; if (it == speculum.end() && cursor_err != MDBX_NOTFOUND) - failure("speculum-%s: %s extra pair {%s, %s}", where, stage, - mdbx_dump_val(&cursor_key, dump_key, sizeof(dump_key)), - mdbx_dump_val(&cursor_data, dump_value, sizeof(dump_value))); + return failure("speculum-%s: %s extra pair {%s, %s}", where, stage, + mdbx_dump_val(&cursor_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&cursor_data, dump_value, sizeof(dump_value))); else if (it != speculum.end() && cursor_err == MDBX_NOTFOUND) { MDBX_val it_key = dataview2iov(it->first); MDBX_val it_data = dataview2iov(it->second); - failure("speculum-%s: %s lack pair {%s, %s}", where, stage, - mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), - mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + return failure("speculum-%s: %s lack pair {%s, %s}", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); } else if (cursor_err == MDBX_SUCCESS || cursor_err == MDBX_RESULT_TRUE) - speculum_check_iterator(where, stage, it, cursor_key, cursor_data); + return speculum_check_iterator(where, stage, it, cursor_key, cursor_data); + else { + assert(it == speculum.end() && cursor_err == MDBX_NOTFOUND); + return true; + } } -void testcase::speculum_check_cursor(const char *where, const char *stage, +bool testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, MDBX_cursor *cursor, const MDBX_cursor_op op) const { @@ -908,11 +914,10 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, check_seek_cursor = speculum_cursors[seek_check].get(); seek_check_key = akey->value; seek_check_data = adata->value; - seek_check_err = mdbx_cursor_get( - check_seek_cursor, &seek_check_key, &seek_check_data, - (config.params.table_flags & MDBX_DUPSORT) ? MDBX_GET_BOTH - : MDBX_SET_KEY); - if (seek_check_err != MDBX_SUCCESS && seek_check_err != MDBX_NOTFOUND) + seek_check_err = mdbx_cursor_get(check_seek_cursor, &seek_check_key, + &seek_check_data, MDBX_SET_LOWERBOUND); + if (seek_check_err != MDBX_SUCCESS && seek_check_err != MDBX_NOTFOUND && + seek_check_err != MDBX_RESULT_TRUE) failure("speculum-%s: %s pre-insert %d %s", "insert", "seek", seek_check_err, mdbx_strerror(seek_check_err)); #endif /* SPECULUM_CURSORS */ @@ -940,7 +945,7 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, #if SPECULUM_CURSORS if (insertion_result.second) { - if (seek_check_err != MDBX_NOTFOUND) { + if (seek_check_err == MDBX_SUCCESS) { log_error( "speculum.pre-insert-seek: unexpected %d {%s, %s}", seek_check_err, mdbx_dump_val(&seek_check_key, dump_key, sizeof(dump_key)), diff --git a/test/test.h++ b/test/test.h++ index 583e539a..3e0ac49c 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -194,16 +194,16 @@ protected: #if SPECULUM_CURSORS scoped_cursor_guard speculum_cursors[5 + 1]; void speculum_prepare_cursors(const Item &item); - void speculum_check_cursor(const char *where, const char *stage, + bool speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, int cursor_err, const MDBX_val &cursor_key, const MDBX_val &cursor_data) const; - void speculum_check_cursor(const char *where, const char *stage, + bool speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, MDBX_cursor *cursor, const MDBX_cursor_op op) const; #endif /* SPECULUM_CURSORS */ - void speculum_check_iterator(const char *where, const char *stage, + bool speculum_check_iterator(const char *where, const char *stage, const testcase::SET::const_iterator &it, const MDBX_val &k, const MDBX_val &v) const; @@ -271,7 +271,7 @@ protected: void signal(); bool should_continue(bool check_timeout_only = false) const; - void failure(const char *fmt, ...) const; + bool failure(const char *fmt, ...) const; void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, keygen::buffer &out_value, keygen::serial_t data_age) { keyvalue_maker.pair(serial, out_key, out_value, data_age, false); From 9670cf57092e939db852ee364cb8d6b5ed341ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 13 Jun 2024 23:56:44 +0300 Subject: [PATCH 194/443] =?UTF-8?q?mdbx-testing:=20=D0=B2=D1=8B=D0=B2?= =?UTF-8?q?=D0=BE=D0=B4=20"=D1=82=D0=B0=D0=B1=D0=BB=D0=BE"=20=D1=81=20?= =?UTF-8?q?=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8=D0=B5?= =?UTF-8?q?=D0=B9=20=D0=BE=20=D0=BF=D0=BE=D0=BB=D0=BE=D0=B6=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B8=20=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=B4=D0=BE=D0=B1=D1=81=D1=82?= =?UTF-8?q?=D0=B2=D0=B0=20=D0=BE=D1=82=D0=BB=D0=B0=D0=B4=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 150 +++++++++++++++++++++++++++++++++++++++++++++----- test/test.h++ | 8 ++- 2 files changed, 143 insertions(+), 15 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index 1926bb83..d1e1059e 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -728,22 +728,26 @@ void testcase::verbose(const char *where, const char *stage, const MDBX_val &k, bool testcase::speculum_check_iterator(const char *where, const char *stage, const testcase::SET::const_iterator &it, - const MDBX_val &k, - const MDBX_val &v) const { + const MDBX_val &k, const MDBX_val &v, + MDBX_cursor *cursor) const { char dump_key[32], dump_value[32]; MDBX_val it_key = dataview2iov(it->first); MDBX_val it_data = dataview2iov(it->second); // log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, // mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), // mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); - if (!is_samedata(it_key, k)) + if (!is_samedata(it_key, k)) { + speculum_render(it, cursor); return failure("speculum-%s: %s key mismatch %s (must) != %s", where, stage, mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&k, dump_value, sizeof(dump_value))); - if (!is_samedata(it_data, v)) + } + if (!is_samedata(it_data, v)) { + speculum_render(it, cursor); return failure("speculum-%s: %s data mismatch %s (must) != %s", where, stage, mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), mdbx_dump_val(&v, dump_value, sizeof(dump_value))); + } return true; } @@ -761,32 +765,139 @@ bool testcase::failure(const char *fmt, ...) const { } #if SPECULUM_CURSORS + +static void speculum_render_cursor(const MDBX_val &ikey, const MDBX_val &ival, + const MDBX_cursor *cursor, + const MDBX_cursor *ref) { + scoped_cursor_guard guard(mdbx_cursor_create(nullptr)); + if (!guard) + failure("mdbx_cursor_create()"); + /* работаем с копией курсора, чтобы не влиять на состояние оригинала. */ + int err = mdbx_cursor_copy(cursor, guard.get()); + if (err) + failure("mdbx_cursor_copy(), err %d", err); + + MDBX_cursor *const clone = guard.get(); + char status[10], *s = status; + if (cursor == ref) { + *s++ = '_'; + *s++ = '_'; + } + + if (mdbx_cursor_eof(clone) == MDBX_RESULT_TRUE) + *s++ = 'e'; + if (mdbx_cursor_on_first(clone) == MDBX_RESULT_TRUE) + *s++ = 'F'; + if (mdbx_cursor_on_first_dup(clone) == MDBX_RESULT_TRUE) + *s++ = 'f'; + if (mdbx_cursor_on_last(clone) == MDBX_RESULT_TRUE) + *s++ = 'L'; + if (mdbx_cursor_on_last_dup(clone) == MDBX_RESULT_TRUE) + *s++ = 'l'; + + MDBX_val ckey, cval; + if (mdbx_cursor_get(clone, &ckey, &cval, MDBX_GET_CURRENT) != MDBX_SUCCESS) + *s++ = '!'; + else { + const int kcmp = + mdbx_cmp(mdbx_cursor_txn(clone), mdbx_cursor_dbi(clone), &ikey, &ckey); + if (kcmp < 0) + *s++ = '<'; + else if (kcmp > 0) + *s++ = '>'; + else { + *s++ = '='; + const int vcmp = mdbx_dcmp(mdbx_cursor_txn(clone), mdbx_cursor_dbi(clone), + &ival, &cval); + if (vcmp < 0) + *s++ = '<'; + else if (vcmp > 0) + *s++ = '>'; + else + *s++ = '='; + } + } + + if (clone == ref) { + *s++ = '_'; + *s++ = '_'; + } + *s = '\0'; + + printf(" | %-10.10s", status); +} + +void testcase::speculum_render(const testcase::SET::const_iterator &it, + const MDBX_cursor *ref) const { + char dump_key[32], dump_value[32]; + + auto top = it; + int offset = 0; + while (offset > -5 && top != speculum.begin()) { + --top; + --offset; + } + printf("## %-20.20s %-20.20s | %-10.10s | %-10.10s | %-10.10s | %-10.10s | " + "%-10.10s | %-10.10s |\n", + "k0_1_2_3_4_5_6_7_8_9", "v0_1_2_3_4_5_6_7_8_9", "prev-prev", "prev", + "seek", "lowerbound", "next", "next-next"); + while (offset < 5 && top != speculum.end()) { + const MDBX_val ikey = dataview2iov(top->first); + const MDBX_val idata = dataview2iov(top->second); + printf("%+d) %20.20s %20.20s", offset, + mdbx_dump_val(&ikey, dump_key, sizeof(dump_key)), + mdbx_dump_val(&idata, dump_value, sizeof(dump_value))); + + speculum_render_cursor(ikey, idata, speculum_cursors[prev_prev].get(), ref); + speculum_render_cursor(ikey, idata, speculum_cursors[prev].get(), ref); + speculum_render_cursor(ikey, idata, speculum_cursors[seek_check].get(), + ref); + speculum_render_cursor(ikey, idata, speculum_cursors[lowerbound].get(), + ref); + speculum_render_cursor(ikey, idata, speculum_cursors[next].get(), ref); + speculum_render_cursor(ikey, idata, speculum_cursors[next_next].get(), ref); + + printf(" %s\n", "|"); + ++top; + ++offset; + } +} + bool testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, int cursor_err, const MDBX_val &cursor_key, - const MDBX_val &cursor_data) const { + const MDBX_val &cursor_data, + MDBX_cursor *cursor) const { // verbose(where, stage, cursor_key, cursor_data, cursor_err); // verbose(where, stage, it); if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND && - cursor_err != MDBX_RESULT_TRUE && cursor_err != MDBX_ENODATA) + cursor_err != MDBX_RESULT_TRUE && cursor_err != MDBX_ENODATA) { + speculum_render(it, cursor); return failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", cursor_err, mdbx_strerror(cursor_err)); + } char dump_key[32], dump_value[32]; - if (it == speculum.end() && cursor_err != MDBX_NOTFOUND) + if (it == speculum.end() && cursor_err != MDBX_NOTFOUND && + cursor_err != MDBX_ENODATA) { + speculum_render(it, cursor); return failure("speculum-%s: %s extra pair {%s, %s}", where, stage, mdbx_dump_val(&cursor_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&cursor_data, dump_value, sizeof(dump_value))); - else if (it != speculum.end() && cursor_err == MDBX_NOTFOUND) { + } else if (it != speculum.end() && + (cursor_err == MDBX_NOTFOUND || cursor_err == MDBX_ENODATA)) { + speculum_render(it, cursor); MDBX_val it_key = dataview2iov(it->first); MDBX_val it_data = dataview2iov(it->second); return failure("speculum-%s: %s lack pair {%s, %s}", where, stage, mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); } else if (cursor_err == MDBX_SUCCESS || cursor_err == MDBX_RESULT_TRUE) - return speculum_check_iterator(where, stage, it, cursor_key, cursor_data); + return speculum_check_iterator(where, stage, it, cursor_key, cursor_data, + cursor); else { - assert(it == speculum.end() && cursor_err == MDBX_NOTFOUND); + assert(it == speculum.end() && + (cursor_err == MDBX_NOTFOUND || cursor_err == MDBX_ENODATA)); return true; } } @@ -798,7 +909,8 @@ bool testcase::speculum_check_cursor(const char *where, const char *stage, MDBX_val cursor_key = {0, 0}; MDBX_val cursor_data = {0, 0}; int err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, op); - return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data); + return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data, + cursor); } void testcase::speculum_prepare_cursors(const Item &item) { @@ -822,6 +934,7 @@ void testcase::speculum_prepare_cursors(const Item &item) { guard.reset(cursor); } + // mdbx_cursor_reset(speculum_cursors[seek_check].get()); const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); const MDBX_val item_key = dataview2iov(item.first), item_data = dataview2iov(item.second); @@ -840,7 +953,7 @@ void testcase::speculum_prepare_cursors(const Item &item) { auto it_lowerbound = speculum.lower_bound(item); // verbose("prepare-cursors", "lowerbound", it_lowerbound); speculum_check_cursor("prepare-cursors", "lowerbound", it_lowerbound, err, - lowerbound_key, lowerbound_data); + lowerbound_key, lowerbound_data, cursor_lowerbound); const auto cursor_prev = speculum_cursors[prev].get(); err = mdbx_cursor_copy(cursor_lowerbound, cursor_prev); @@ -916,6 +1029,7 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, seek_check_data = adata->value; seek_check_err = mdbx_cursor_get(check_seek_cursor, &seek_check_key, &seek_check_data, MDBX_SET_LOWERBOUND); + // speculum_render(speculum.find(item), check_seek_cursor); if (seek_check_err != MDBX_SUCCESS && seek_check_err != MDBX_NOTFOUND && seek_check_err != MDBX_RESULT_TRUE) failure("speculum-%s: %s pre-insert %d %s", "insert", "seek", @@ -959,7 +1073,8 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, mdbx_dump_val(&seek_check_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&seek_check_data, dump_value, sizeof(dump_value))); speculum_check_iterator("insert", "pre-seek", insertion_result.first, - seek_check_key, seek_check_data); + seek_check_key, seek_check_data, + check_seek_cursor); rc = false; } } @@ -999,6 +1114,8 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, } } } + // speculum_render(insertion_result.first, + // speculum_cursors[seek_check].get()); #endif /* SPECULUM_CURSORS */ } @@ -1049,6 +1166,12 @@ int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { item.second = iov2dataview(adata); #if SPECULUM_CURSORS speculum_prepare_cursors(item); + // MDBX_cursor *check_seek_cursor = speculum_cursors[seek_check].get(); + // MDBX_val seek_check_key = akey->value; + // MDBX_val seek_check_data = adata->value; + // mdbx_cursor_get(check_seek_cursor, &seek_check_key, &seek_check_data, + // MDBX_SET_LOWERBOUND); + // speculum_render(speculum.find(item), check_seek_cursor); #endif /* SPECULUM_CURSORS */ } @@ -1075,6 +1198,7 @@ int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { } #if SPECULUM_CURSORS + speculum_render(it_found, speculum_cursors[seek_check].get()); if (it_found != speculum.begin()) { const auto cursor_prev = speculum_cursors[prev].get(); auto it_prev = it_found; diff --git a/test/test.h++ b/test/test.h++ index 3e0ac49c..f3375a6e 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -197,15 +197,19 @@ protected: bool speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, int cursor_err, const MDBX_val &cursor_key, - const MDBX_val &cursor_data) const; + const MDBX_val &cursor_data, + MDBX_cursor *cursor) const; bool speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, MDBX_cursor *cursor, const MDBX_cursor_op op) const; + void speculum_render(const testcase::SET::const_iterator &it, + const MDBX_cursor *ref) const; #endif /* SPECULUM_CURSORS */ bool speculum_check_iterator(const char *where, const char *stage, const testcase::SET::const_iterator &it, - const MDBX_val &k, const MDBX_val &v) const; + const MDBX_val &k, const MDBX_val &v, + MDBX_cursor *cursor) const; void verbose(const char *where, const char *stage, const testcase::SET::const_iterator &it) const; From bdd0b487ae08cada8dce088e12d0861575a8f628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 20 Jun 2024 12:50:22 +0300 Subject: [PATCH 195/443] =?UTF-8?q?mdbx-doc:=20=D0=BA=D0=BE=D1=80=D1=80?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B8=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20Doxygen.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Doxyfile.in | 62 ++++-------------------------------------------- mdbx.h | 6 ++--- 2 files changed, 7 insertions(+), 61 deletions(-) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index ca91f8b8..8158a457 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -1022,6 +1022,7 @@ EXCLUDE_SYMBOLS = NOMINMAX \ MDBX_HAVE_CXX20_CONCEPTS \ CONSTEXPR_ENUM_FLAGS_OPERATIONS \ DEFINE_ENUM_FLAG_OPERATORS \ + MDBX_DEPRECATED_ENUM \ bool \ false \ true \ @@ -1217,46 +1218,6 @@ USE_HTAGS = NO VERBATIM_HEADERS = YES -# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the -# clang parser (see: -# http://clang.llvm.org/) for more accurate parsing at the cost of reduced -# performance. This can be particularly helpful with template rich C++ code for -# which doxygen's built-in parser lacks the necessary type information. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. -# The default value is: NO. - -CLANG_ASSISTED_PARSING = NO - -# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS -# tag is set to YES then doxygen will add the directory of each input to the -# include path. -# The default value is: YES. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - -CLANG_ADD_INC_PATHS = YES - -# If clang assisted parsing is enabled you can provide the compiler with command -# line options that you would normally use when invoking the compiler. Note that -# the include paths will already be set by doxygen for the files and directories -# specified with INPUT and INCLUDE_PATH. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - -CLANG_OPTIONS = - -# If clang assisted parsing is enabled you can provide the clang parser with the -# path to the directory containing a file called compile_commands.json. This -# file is the compilation database (see: -# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the -# options used when the source files were built. This is equivalent to -# specifying the -p option to a clang tool, such as clang-check. These options -# will then be passed to the parser. Any options specified with CLANG_OPTIONS -# will be added as well. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. - -CLANG_DATABASE_PATH = - #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- @@ -1414,15 +1375,6 @@ HTML_COLORSTYLE_SAT = 100 HTML_COLORSTYLE_GAMMA = 80 -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will @@ -2083,14 +2035,6 @@ LATEX_HIDE_INDICES = NO LATEX_BIB_STYLE = plain -# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated -# page will contain the date and time when the page was generated. Setting this -# to NO can help when comparing the output of multiple runs. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_TIMESTAMP = NO - # The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute) # path from which the emoji images will be read. If a relative path is entered, # it will be relative to the LATEX_OUTPUT directory. If left blank the @@ -2382,7 +2326,9 @@ PREDEFINED = DOXYGEN \ MDBX_CXX20_LIKELY=[[likely]] \ MDBX_CXX20_UNLIKELY=[[unlikely]] \ MDBX_MAYBE_UNUSED=[[maybe_unused]] \ - MDBX_DEPRECATED=[[deprecated]] + MDBX_DEPRECATED=[[deprecated]] \ + MDBX_DEPRECATED_ENUM=[[deprecated]] \ + "DEFINE_ENUM_FLAG_OPERATORS(ENUM)=" # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/mdbx.h b/mdbx.h index 7a7b25e0..cb3e2a51 100644 --- a/mdbx.h +++ b/mdbx.h @@ -523,7 +523,7 @@ typedef mode_t mdbx_mode_t; * - the proper implementation of DEFINE_ENUM_FLAG_OPERATORS for C++ required * the constexpr feature which is broken in most old compilers; * - DEFINE_ENUM_FLAG_OPERATORS may be defined broken as in the Windows SDK. */ -#ifndef DEFINE_ENUM_FLAG_OPERATORS +#if !defined(DEFINE_ENUM_FLAG_OPERATORS) && !defined(DOXYGEN) #ifdef __cplusplus #if !defined(__cpp_constexpr) || __cpp_constexpr < 200704L || \ @@ -1635,7 +1635,7 @@ typedef enum MDBX_db_flags { * application could determine the actual flags by \ref mdbx_dbi_flags(). */ MDBX_DB_ACCEDE = MDBX_ACCEDE } MDBX_db_flags_t; -DEFINE_ENUM_FLAG_OPERATORS(MDBX_db_flags_t) +DEFINE_ENUM_FLAG_OPERATORS(MDBX_db_flags) /** \brief Data changing flags * \ingroup c_crud @@ -6379,7 +6379,7 @@ typedef struct MDBX_chk_callbacks { * библиотеку. * * Проверка выполняется в несколько стадий, начиная с инициализации и до - * завершения, более подробно см \ref MDBX_chk_stage. О начале и завершении + * завершения, более подробно см \ref MDBX_chk_stage_t. О начале и завершении * каждой стадии код приложения уведомляется через соответствующие функции * обратного вызова, более подробно см \ref MDBX_chk_callbacks_t. * From d5fb37460bb0f97799e8dc6f1cc2e662ef9e415d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 21 Jun 2024 12:18:40 +0300 Subject: [PATCH 196/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B0=D1=80=D1=8B?= =?UTF-8?q?=20=D0=BE=D0=BF=D0=B5=D1=87=D0=B0=D1=82=D0=BE=D0=BA=20=D0=B2=20?= =?UTF-8?q?COPYRIGHT.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- COPYRIGHT | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/COPYRIGHT b/COPYRIGHT index 23614d29..81fadf1b 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -28,9 +28,10 @@ Briefly: to change the license. Below are more detailed explanations. Кратко: - Исторически в 2015 году ранний исходный MDBX был заимствован из «LMDB - engine», созданной Howard Chu в 2011-2015, на основе - btree.c созданного Martin Hedenfalk в 2009-2010. + + Исторически в 2015 году ранний исходный код MDBX был заимствован из + «LMDB engine», созданной Howard Chu в 2011-2015, + на основе btree.c, ранее созданного Martin Hedenfalk . К 2024 году исходный код MDBX фактически переписан и имеет настолько мало общего с первоначальным заимствованием из LMDB, что я счел @@ -83,7 +84,7 @@ OpenLDAP, совершенно без намерения как-либо зад 2.1. Исходная лицензия OpenLDAP 2.8 и актуальная лицензия Apache 2.0 совпадают по базовым условиям. При этом лицензия Apache 2.0 уточняет, определяет и проясняет многие аспекты. Поэтому смену лицензии я склонен -трактовать как уточнение, но как принципиальное изменение, которое +трактовать как уточнение, но НЕ как принципиальное изменение, которое могло-бы нарушить чьи-либо права. 2.2. С процедурной точки зрения, у меня есть право сменить лицензию на From 7abeac762f36276ad035546b7caa015838784f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 25 Jun 2024 23:57:10 +0300 Subject: [PATCH 197/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=D0=B5=D1=80=D0=BE=D1=8F?= =?UTF-8?q?=D1=82=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20`SIGSEGV`=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B8=20=D0=B2=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B8?= =?UTF-8?q?=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20`MDBX=5FLOG=5FTRACE`=20=D0=B2=20=D0=BE=D1=82=D0=BB?= =?UTF-8?q?=D0=B0=D0=B4=D0=BE=D1=87=D0=BD=D1=8B=D1=85=20=D1=81=D0=B1=D0=BE?= =?UTF-8?q?=D1=80=D0=BA=D0=B0=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cursor.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cursor.c b/src/cursor.c index 524ac1b5..a6434539 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -775,9 +775,11 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, int err; DKBUF_DEBUG; MDBX_env *const env = mc->txn->env; + if (LOG_ENABLED(MDBX_LOG_DEBUG) && (flags & MDBX_RESERVE)) + data->iov_base = nullptr; DEBUG("==> put db %d key [%s], size %" PRIuPTR ", data [%s] size %" PRIuPTR, - cursor_dbi_dbg(mc), DKEY_DEBUG(key), key->iov_len, - DVAL_DEBUG((flags & MDBX_RESERVE) ? nullptr : data), data->iov_len); + cursor_dbi_dbg(mc), DKEY_DEBUG(key), key->iov_len, DVAL_DEBUG(data), + data->iov_len); if ((flags & MDBX_CURRENT) != 0 && (mc->flags & z_inner) == 0) { if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE))) From 69df6e6ac034b08e0b2ddbd532a136aef783b084 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 28 Jun 2024 12:14:44 +0300 Subject: [PATCH 198/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20assert-?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=B2=D0=BD?= =?UTF-8?q?=D1=83=D1=82=D1=80=D0=B8=20`meta=5Foverride()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit После доработок/рефакторинга условие проверки стало неверным. --- src/meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meta.c b/src/meta.c index 5a4ced84..b7333a0a 100644 --- a/src/meta.c +++ b/src/meta.c @@ -475,7 +475,7 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), globals.sys_pagesize); } - eASSERT(env, (!env->txn && !env->basal_txn) || + eASSERT(env, (!env->txn && (env->flags & ENV_ACTIVE) == 0) || (env->stuck_meta == (int)target && (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)); From 49c6e14b3060281cabe2c28cab525f2f1fa94fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 30 Jun 2024 14:35:42 +0300 Subject: [PATCH 199/443] =?UTF-8?q?mdbx++:=20=D1=80=D0=B0=D1=81=D1=88?= =?UTF-8?q?=D0=B8=D1=80=D0=B5=D0=BD=D0=B8=D0=B5=20API=20=D0=BC=D0=B5=D1=82?= =?UTF-8?q?=D0=BE=D0=B4=D0=B0=D0=BC=D0=B8=20=D0=BF=D1=80=D0=B8=D0=BD=D0=B8?= =?UTF-8?q?=D0=BC=D0=B0=D1=8E=D1=89=D0=B8=D0=BC=D0=B8=20=D0=B8=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D0=B0=20subDb=20=D1=87=D0=B5=D1=80=D0=B5=D0=B7=20`mdbx::?= =?UTF-8?q?slice`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 166 +++++++++++++++++++++++++-------------------------- src/mdbx.c++ | 33 +++------- 2 files changed, 90 insertions(+), 109 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 2d982041..9f52793d 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4384,11 +4384,18 @@ public: const ::std::string &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + /// \brief Open existing key-value map. + inline map_handle open_map( + const ::mdbx::slice &name, + const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; /// \brief Open existing key-value map. inline map_handle open_map_accede(const char *name) const; /// \brief Open existing key-value map. inline map_handle open_map_accede(const ::std::string &name) const; + /// \brief Open existing key-value map. + inline map_handle open_map_accede(const ::mdbx::slice &name) const; /// \brief Create new or open existing key-value map. inline map_handle @@ -4400,6 +4407,11 @@ public: create_map(const ::std::string &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); + /// \brief Create new or open existing key-value map. + inline map_handle + create_map(const ::mdbx::slice &name, + const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); /// \brief Drops key-value map using handle. inline void drop_map(map_handle map); @@ -4411,6 +4423,10 @@ public: /// \return `True` if the key-value map existed and was deleted, either /// `false` if the key-value map did not exist and there is nothing to delete. inline bool drop_map(const ::std::string &name, bool throw_if_absent = false); + /// \brief Drop key-value map. + /// \return `True` if the key-value map existed and was deleted, either + /// `false` if the key-value map did not exist and there is nothing to delete. + bool drop_map(const ::mdbx::slice &name, bool throw_if_absent = false); /// \brief Clear key-value map. inline void clear_map(map_handle map); @@ -4421,12 +4437,17 @@ public: /// `false` if the key-value map did not exist and there is nothing to clear. inline bool clear_map(const ::std::string &name, bool throw_if_absent = false); + /// \return `True` if the key-value map existed and was cleared, either + /// `false` if the key-value map did not exist and there is nothing to clear. + bool clear_map(const ::mdbx::slice &name, bool throw_if_absent = false); /// \brief Переименовывает таблицу ключ-значение. inline void rename_map(map_handle map, const char *new_name); /// \brief Переименовывает таблицу ключ-значение. inline void rename_map(map_handle map, const ::std::string &new_name); /// \brief Переименовывает таблицу ключ-значение. + inline void rename_map(map_handle map, const ::mdbx::slice &new_name); + /// \brief Переименовывает таблицу ключ-значение. /// \return `True` если таблица существует и была переименована, либо /// `false` в случае отсутствия исходной таблицы. bool rename_map(const char *old_name, const char *new_name, @@ -4436,6 +4457,11 @@ public: /// `false` в случае отсутствия исходной таблицы. bool rename_map(const ::std::string &old_name, const ::std::string &new_name, bool throw_if_absent = false); + /// \brief Переименовывает таблицу ключ-значение. + /// \return `True` если таблица существует и была переименована, либо + /// `false` в случае отсутствия исходной таблицы. + bool rename_map(const ::mdbx::slice &old_name, const ::mdbx::slice &new_name, + bool throw_if_absent = false); #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) @@ -4444,21 +4470,29 @@ public: inline map_handle open_map( const ::std::string_view &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const { + return open_map(::mdbx::slice(name), key_mode, value_mode); + } /// \brief Open existing key-value map. inline map_handle open_map_accede(const ::std::string_view &name) const; /// \brief Create new or open existing key-value map. inline map_handle create_map(const ::std::string_view &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) { + return create_map(::mdbx::slice(name), key_mode, value_mode); + } /// \brief Drop key-value map. /// \return `True` if the key-value map existed and was deleted, either /// `false` if the key-value map did not exist and there is nothing to delete. - bool drop_map(const ::std::string_view &name, bool throw_if_absent = false); + bool drop_map(const ::std::string_view &name, bool throw_if_absent = false) { + return drop_map(::mdbx::slice(name), throw_if_absent); + } /// \return `True` if the key-value map existed and was cleared, either /// `false` if the key-value map did not exist and there is nothing to clear. - bool clear_map(const ::std::string_view &name, bool throw_if_absent = false); + bool clear_map(const ::std::string_view &name, bool throw_if_absent = false) { + return clear_map(::mdbx::slice(name), throw_if_absent); + } /// \brief Переименовывает таблицу ключ-значение. inline void rename_map(map_handle map, const ::std::string_view &new_name); /// \brief Переименовывает таблицу ключ-значение. @@ -4466,8 +4500,10 @@ public: /// `false` в случае отсутствия исходной таблицы. bool rename_map(const ::std::string_view &old_name, const ::std::string_view &new_name, - bool throw_if_absent = false); - + bool throw_if_absent = false) { + return rename_map(::mdbx::slice(old_name), ::mdbx::slice(new_name), + throw_if_absent); + } #endif /* __cpp_lib_string_view >= 201606L */ using map_stat = ::MDBX_stat; @@ -6427,6 +6463,17 @@ inline size_t txn::release_all_cursors(bool unbind) const { return size_t(err); } +inline ::mdbx::map_handle +txn::open_map(const ::mdbx::slice &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { + ::mdbx::map_handle map; + error::success_or_throw(::mdbx_dbi_open2( + handle_, name, MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), + &map.dbi)); + assert(map.dbi != 0); + return map; +} + inline ::mdbx::map_handle txn::open_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) const { @@ -6438,6 +6485,15 @@ txn::open_map(const char *name, const ::mdbx::key_mode key_mode, return map; } +inline ::mdbx::map_handle +txn::open_map_accede(const ::mdbx::slice &name) const { + ::mdbx::map_handle map; + error::success_or_throw( + ::mdbx_dbi_open2(handle_, name, MDBX_DB_ACCEDE, &map.dbi)); + assert(map.dbi != 0); + return map; +} + inline ::mdbx::map_handle txn::open_map_accede(const char *name) const { ::mdbx::map_handle map; error::success_or_throw( @@ -6446,6 +6502,18 @@ inline ::mdbx::map_handle txn::open_map_accede(const char *name) const { return map; } +inline ::mdbx::map_handle txn::create_map(const ::mdbx::slice &name, + const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) { + ::mdbx::map_handle map; + error::success_or_throw(::mdbx_dbi_open2( + handle_, name, + MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), + &map.dbi)); + assert(map.dbi != 0); + return map; +} + inline ::mdbx::map_handle txn::create_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { @@ -6470,109 +6538,39 @@ inline void txn::rename_map(map_handle map, const char *new_name) { error::success_or_throw(::mdbx_dbi_rename(handle_, map, new_name)); } -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - -inline ::mdbx::map_handle -txn::open_map(const ::std::string_view &name, const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) const { - ::mdbx::map_handle map; - error::success_or_throw(::mdbx_dbi_open2( - handle_, ::mdbx::slice(name), - MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), &map.dbi)); - assert(map.dbi != 0); - return map; -} - -inline ::mdbx::map_handle -txn::open_map_accede(const ::std::string_view &name) const { - ::mdbx::map_handle map; - error::success_or_throw( - ::mdbx_dbi_open2(handle_, ::mdbx::slice(name), MDBX_DB_ACCEDE, &map.dbi)); - assert(map.dbi != 0); - return map; -} - -inline ::mdbx::map_handle txn::create_map(const ::std::string_view &name, - const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) { - ::mdbx::map_handle map; - error::success_or_throw(::mdbx_dbi_open2( - handle_, ::mdbx::slice(name), - MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), - &map.dbi)); - assert(map.dbi != 0); - return map; -} - -inline void txn::rename_map(map_handle map, - const ::std::string_view &new_name) { - error::success_or_throw( - ::mdbx_dbi_rename2(handle_, map, ::mdbx::slice(new_name))); +inline void txn::rename_map(map_handle map, const ::mdbx::slice &new_name) { + error::success_or_throw(::mdbx_dbi_rename2(handle_, map, new_name)); } inline ::mdbx::map_handle txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) const { - return open_map(::std::string_view(name), key_mode, value_mode); + return open_map(::mdbx::slice(name), key_mode, value_mode); } inline ::mdbx::map_handle txn::open_map_accede(const ::std::string &name) const { - return open_map_accede(::std::string_view(name)); + return open_map_accede(::mdbx::slice(name)); } inline ::mdbx::map_handle txn::create_map(const ::std::string &name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { - return create_map(::std::string_view(name), key_mode, value_mode); + return create_map(::mdbx::slice(name), key_mode, value_mode); } inline bool txn::drop_map(const ::std::string &name, bool throw_if_absent) { - return drop_map(::std::string_view(name), throw_if_absent); + return drop_map(::mdbx::slice(name), throw_if_absent); } inline bool txn::clear_map(const ::std::string &name, bool throw_if_absent) { - return clear_map(::std::string_view(name), throw_if_absent); + return clear_map(::mdbx::slice(name), throw_if_absent); } inline void txn::rename_map(map_handle map, const ::std::string &new_name) { - return rename_map(map, ::std::string_view(new_name)); + return rename_map(map, ::mdbx::slice(new_name)); } -#else - -inline ::mdbx::map_handle -txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) const { - return open_map(name.c_str(), key_mode, value_mode); -} - -inline ::mdbx::map_handle -txn::open_map_accede(const ::std::string &name) const { - return open_map_accede(name.c_str()); -} - -inline ::mdbx::map_handle txn::create_map(const ::std::string &name, - const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) { - return create_map(name.c_str(), key_mode, value_mode); -} - -inline bool txn::drop_map(const ::std::string &name, bool throw_if_absent) { - return drop_map(name.c_str(), throw_if_absent); -} - -inline bool txn::clear_map(const ::std::string &name, bool throw_if_absent) { - return clear_map(name.c_str(), throw_if_absent); -} - -inline void txn::rename_map(map_handle map, const ::std::string &new_name) { - return rename_map(map, new_name.c_str()); -} - -#endif /* __cpp_lib_string_view >= 201606L */ - inline txn::map_stat txn::get_map_stat(map_handle map) const { txn::map_stat r; error::success_or_throw(::mdbx_dbi_stat(handle_, map.dbi, &r, sizeof(r))); diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 7c27f1d8..583b46a4 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1618,13 +1618,9 @@ __cold bool txn::rename_map(const char *old_name, const char *new_name, } } -#if defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L - -__cold bool txn::drop_map(const ::std::string_view &name, - bool throw_if_absent) { +__cold bool txn::drop_map(const ::mdbx::slice &name, bool throw_if_absent) { map_handle map; - const int err = - ::mdbx_dbi_open2(handle_, mdbx::slice(name), MDBX_DB_ACCEDE, &map.dbi); + const int err = ::mdbx_dbi_open2(handle_, name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { case MDBX_SUCCESS: drop_map(map); @@ -1639,11 +1635,9 @@ __cold bool txn::drop_map(const ::std::string_view &name, } } -__cold bool txn::clear_map(const ::std::string_view &name, - bool throw_if_absent) { +__cold bool txn::clear_map(const ::mdbx::slice &name, bool throw_if_absent) { map_handle map; - const int err = - ::mdbx_dbi_open2(handle_, mdbx::slice(name), MDBX_DB_ACCEDE, &map.dbi); + const int err = ::mdbx_dbi_open2(handle_, name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { case MDBX_SUCCESS: clear_map(map); @@ -1658,12 +1652,11 @@ __cold bool txn::clear_map(const ::std::string_view &name, } } -__cold bool txn::rename_map(const ::std::string_view &old_name, - const ::std::string_view &new_name, +__cold bool txn::rename_map(const ::mdbx::slice &old_name, + const ::mdbx::slice &new_name, bool throw_if_absent) { map_handle map; - const int err = ::mdbx_dbi_open2(handle_, mdbx::slice(old_name), - MDBX_DB_ACCEDE, &map.dbi); + const int err = ::mdbx_dbi_open2(handle_, old_name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { case MDBX_SUCCESS: rename_map(map, new_name); @@ -1681,20 +1674,10 @@ __cold bool txn::rename_map(const ::std::string_view &old_name, __cold bool txn::rename_map(const ::std::string &old_name, const ::std::string &new_name, bool throw_if_absent) { - return rename_map(::std::string_view(old_name), ::std::string_view(new_name), + return rename_map(::mdbx::slice(old_name), ::mdbx::slice(new_name), throw_if_absent); } -#else - -__cold bool txn::rename_map(const ::std::string &old_name, - const ::std::string &new_name, - bool throw_if_absent) { - return rename_map(old_name.c_str(), new_name.c_str(), throw_if_absent); -} - -#endif /* __cpp_lib_string_view >= 201606L */ - //------------------------------------------------------------------------------ void cursor_managed::close() { From b4f395be5070500c2b02dcb741fe084ee2b0f9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Jul 2024 14:03:00 +0300 Subject: [PATCH 200/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D0=B4=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B0=D1=8E=D1=89=D0=B5=D0=B3=D0=BE=20=D0=BC=D0=B5?= =?UTF-8?q?=D1=82=D0=BE=D0=B4=D0=B0=20`mdbx::env::limits::max=5Fmap=5Fhand?= =?UTF-8?q?les()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 9f52793d..58a2b7ce 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3840,6 +3840,9 @@ public: /// \brief Returns the maximal write transaction size (i.e. limit for /// summary volume of dirty pages) in bytes for specified page size. static inline size_t transaction_size_max(intptr_t pagesize); + + /// \brief Returns the maximum opened map handles, aka DBI-handles. + static inline size_t max_map_handles(void); }; /// \brief Returns the minimal database size in bytes for the environment. @@ -6125,6 +6128,8 @@ inline size_t env::limits::transaction_size_max(intptr_t pagesize) { return static_cast(result); } +inline size_t env::limits::max_map_handles(void) { return MDBX_MAX_DBI; } + inline env::operate_parameters env::get_operation_parameters() const { const auto flags = get_flags(); return operate_parameters(max_maps(), max_readers(), From 9fbf0099f2f3d9ad7fccf9c231c2ea2a644fb82f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 4 Jul 2024 14:03:54 +0300 Subject: [PATCH 201/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`\see`=20=D1=81=D1=81?= =?UTF-8?q?=D1=8B=D0=BB=D0=BE=D0=BA=20=D0=BD=D0=B0=20`MDBX=5Fdb=5Fflags=5F?= =?UTF-8?q?t`=20=D0=B8=20`MDBX=5Fdbi=5Fstate=5Ft`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index cb3e2a51..b465d763 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4553,6 +4553,8 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state) /** \brief Retrieve the DB flags and status for a database handle. * \ingroup c_statinfo + * \see MDBX_db_flags_t + * \see MDBX_dbi_state_t * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). @@ -4564,7 +4566,8 @@ LIBMDBX_API int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state); /** \brief The shortcut to calling \ref mdbx_dbi_flags_ex() with `state=NULL` * for discarding it result. - * \ingroup c_statinfo */ + * \ingroup c_statinfo + * \see MDBX_db_flags_t */ LIBMDBX_INLINE_API(int, mdbx_dbi_flags, (const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags)) { unsigned state; From 9acbe885669be49c0ce874dc53705948500adad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 5 Jul 2024 00:25:28 +0300 Subject: [PATCH 202/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fenumerate=5Fsubdb()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 40 ++++++++++++++++++++++ src/audit.c | 95 ++++++++++++++++------------------------------------- src/dbi.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ src/dbi.h | 3 ++ 4 files changed, 152 insertions(+), 66 deletions(-) diff --git a/mdbx.h b/mdbx.h index b465d763..07140fbf 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4443,6 +4443,46 @@ LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *name); +/** \brief Функция обратного вызова для перечисления + * пользовательских именованных таблиц. + * + * \ingroup c_statinfo + * \see mdbx_enumerate_subdb() + * + * \param [in] ctx Указатель на контекст переданный аналогичным + * параметром в \ref mdbx_enumerate_subdb(). + * \param [in] txn Транзазакция. + * \param [in] name Имя таблицы. + * \param [in] flags Флаги \ref MDBX_db_flags_t. + * \param [in] stat Базовая информация \ref MDBX_stat о таблице. + * \param [in] dbi Отличное от 0 значение DBI-дескриптора, + * если таковой был открыт для этой таблицы. + * Либо 0 если такого открытого дескриптора нет. + * + * \returns Ноль при успехе и продолжении перечисления, при возвращении другого + * значения оно будет немедленно возвращено вызывающему + * без продолжения перечисления. */ +typedef int(MDBX_subdb_enum_func)(void *ctx, const MDBX_txn *txn, + const MDBX_val *name, MDBX_db_flags_t flags, + const struct MDBX_stat *stat, + MDBX_dbi dbi) MDBX_CXX17_NOEXCEPT; + +/** \brief Enumerate the entries in the reader lock table. + * \ingroup c_statinfo + * \see MDBX_subdb_enum_func + * + * \param [in] txn Транзакция запущенная посредством + * \ref mdbx_txn_begin(). + * \param [in] func Указатель на пользовательскую функцию-перечислитель + * с сигнатурой \ref MDBX_subdb_enum_func, + * которая будет вызвана для каждой таблицы. + * \param [in] ctx Указатель на некоторый контект, который будет передан + * в функцию-перечислитель как есть. + * + * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ + LIBMDBX_API int mdbx_enumerate_subdb(const MDBX_txn *txn, + MDBX_subdb_enum_func *func, void *ctx); + /** \defgroup value2key Value-to-Key functions * \brief Value-to-Key functions to * \ref avoid_custom_comparators "avoid using custom comparators" diff --git a/src/audit.c b/src/audit.c index 7e6bee78..e816aa0a 100644 --- a/src/audit.c +++ b/src/audit.c @@ -3,28 +3,24 @@ #include "internals.h" -__cold static tree_t *audit_db_dig(const MDBX_txn *txn, const size_t dbi, - tree_t *fallback) { - const MDBX_txn *dig = txn; - do { - tASSERT(txn, txn->n_dbi == dig->n_dbi); - const uint8_t state = dbi_state(dig, dbi); - if (state & DBI_LINDO) - switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) { - case DBI_VALID: - case DBI_OLDEN: - return dig->dbs + dbi; - case 0: - return nullptr; - case DBI_VALID | DBI_STALE: - case DBI_OLDEN | DBI_STALE: - break; - default: - tASSERT(txn, !!"unexpected dig->dbi_state[dbi]"); - } - dig = dig->parent; - } while (dig); - return fallback; +struct audit_ctx { + size_t used; + uint8_t *const done_bitmap; +}; + +static int audit_dbi(void *ctx, const MDBX_txn *txn, const MDBX_val *name, + MDBX_db_flags_t flags, const struct MDBX_stat *stat, + MDBX_dbi dbi) { + struct audit_ctx *audit_ctx = ctx; + (void)name; + (void)txn; + (void)flags; + audit_ctx->used += (size_t)stat->ms_branch_pages + + (size_t)stat->ms_leaf_pages + + (size_t)stat->ms_overflow_pages; + if (dbi) + audit_ctx->done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; + return MDBX_SUCCESS; } static size_t audit_db_used(const tree_t *db) { @@ -71,8 +67,6 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, tASSERT(txn, rc == MDBX_NOTFOUND); const size_t done_bitmap_size = (txn->n_dbi + CHAR_BIT - 1) / CHAR_BIT; - uint8_t *const done_bitmap = alloca(done_bitmap_size); - memset(done_bitmap, 0, done_bitmap_size); if (txn->parent) { tASSERT(txn, txn->n_dbi == txn->parent->n_dbi && txn->n_dbi == txn->env->txn->n_dbi); @@ -82,51 +76,20 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, #endif /* MDBX_ENABLE_DBI_SPARSE */ } - size_t used = NUM_METAS + - audit_db_used(audit_db_dig(txn, FREE_DBI, nullptr)) + - audit_db_used(audit_db_dig(txn, MAIN_DBI, nullptr)); - rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; + struct audit_ctx ctx = {0, alloca(done_bitmap_size)}; + memset(ctx.done_bitmap, 0, done_bitmap_size); + ctx.used = NUM_METAS + audit_db_used(dbi_dig(txn, FREE_DBI, nullptr)) + + audit_db_used(dbi_dig(txn, MAIN_DBI, nullptr)); - rc = tree_search(&cx.outer, nullptr, Z_FIRST); - while (rc == MDBX_SUCCESS) { - page_t *mp = cx.outer.pg[cx.outer.top]; - for (size_t k = 0; k < page_numkeys(mp); k++) { - node_t *node = page_node(mp, k); - if (node_flags(node) != N_SUBDATA) - continue; - if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); - return MDBX_CORRUPTED; - } - - tree_t reside; - const tree_t *db = memcpy(&reside, node_data(node), sizeof(reside)); - const MDBX_val name = {node_key(node), node_ks(node)}; - for (size_t dbi = CORE_DBS; dbi < env->n_dbi; ++dbi) { - if (dbi >= txn->n_dbi || !(env->dbs_flags[dbi] & DB_VALID)) - continue; - if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name)) - continue; - - done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; - db = audit_db_dig(txn, dbi, &reside); - break; - } - used += audit_db_used(db); - } - rc = cursor_sibling_right(&cx.outer); - } - tASSERT(txn, rc == MDBX_NOTFOUND); + rc = mdbx_enumerate_subdb(txn, audit_dbi, &ctx); + tASSERT(txn, rc == MDBX_SUCCESS); for (size_t dbi = CORE_DBS; dbi < txn->n_dbi; ++dbi) { - if (done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT)) + if (ctx.done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT)) continue; - const tree_t *db = audit_db_dig(txn, dbi, nullptr); + const tree_t *db = dbi_dig(txn, dbi, nullptr); if (db) - used += audit_db_used(db); + ctx.used += audit_db_used(db); else if (dbi_state(txn, dbi)) WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", @@ -135,7 +98,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, (const char *)env->kvs[dbi].name.iov_base, dbi_state(txn, dbi)); } - if (pending + gc + used == txn->geo.first_unallocated) + if (pending + gc + ctx.used == txn->geo.first_unallocated) return MDBX_SUCCESS; if ((txn->flags & MDBX_TXN_RDONLY) == 0) @@ -148,7 +111,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, ERROR("audit @%" PRIaTXN ": %zu(pending) + %zu" "(gc) + %zu(count) = %zu(total) <> %zu" "(allocated)", - txn->txnid, pending, gc, used, pending + gc + used, + txn->txnid, pending, gc, ctx.used, pending + gc + ctx.used, (size_t)txn->geo.first_unallocated); return MDBX_PROBLEM; } diff --git a/src/dbi.c b/src/dbi.c index b8becf4d..5f2c8ccb 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -952,3 +952,83 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, stat_get(&txn->dbs[dbi], dest, bytes); return MDBX_SUCCESS; } + +__cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, + tree_t *fallback) { + const MDBX_txn *dig = txn; + do { + tASSERT(txn, txn->n_dbi == dig->n_dbi); + const uint8_t state = dbi_state(dig, dbi); + if (state & DBI_LINDO) + switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) { + case DBI_VALID: + case DBI_OLDEN: + return dig->dbs + dbi; + case 0: + return nullptr; + case DBI_VALID | DBI_STALE: + case DBI_OLDEN | DBI_STALE: + break; + default: + tASSERT(txn, !!"unexpected dig->dbi_state[dbi]"); + } + dig = dig->parent; + } while (dig); + return fallback; +} + +__cold int mdbx_enumerate_subdb(const MDBX_txn *txn, MDBX_subdb_enum_func *func, + void *ctx) { + if (unlikely(!func)) + return MDBX_EINVAL; + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS; + rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) { + node_t *node = + page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + if (node_flags(node) != N_SUBDATA) + continue; + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, + "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); + rc = MDBX_CORRUPTED; + break; + } + + tree_t reside; + const tree_t *tree = memcpy(&reside, node_data(node), sizeof(reside)); + const MDBX_val name = {node_key(node), node_ks(node)}; + const MDBX_env *const env = txn->env; + MDBX_dbi dbi = 0; + for (size_t i = CORE_DBS; i < env->n_dbi; ++i) { + if (i >= txn->n_dbi || !(env->dbs_flags[i] & DB_VALID)) + continue; + if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[i].name)) + continue; + + tree = dbi_dig(txn, i, &reside); + dbi = (MDBX_dbi)i; + break; + } + + MDBX_stat stat; + stat_get(tree, &stat, sizeof(stat)); + rc = func(ctx, txn, &name, tree->flags, &stat, dbi); + if (rc != MDBX_SUCCESS) + break; + } + txn->cursors[MAIN_DBI] = cx.outer.next; + + return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; +} diff --git a/src/dbi.h b/src/dbi.h index 29c1bf93..401c1b59 100644 --- a/src/dbi.h +++ b/src/dbi.h @@ -131,3 +131,6 @@ MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); + +MDBX_INTERNAL const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, + tree_t *fallback); From 3798d47a719a19b111896c22468abf139e333cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 5 Jul 2024 20:33:43 +0300 Subject: [PATCH 203/443] =?UTF-8?q?mdbx-doc:=20=D0=BD=D0=B5=D1=81=D1=83?= =?UTF-8?q?=D1=89=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D0=B0=D1=8F=20?= =?UTF-8?q?=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B0=20doxygen-=D0=BE=D0=BF=D0=B8=D1=81=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mdbx.h b/mdbx.h index 07140fbf..1f4c2bc2 100644 --- a/mdbx.h +++ b/mdbx.h @@ -3067,8 +3067,8 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { * \param [in,out] env Экземпляр среды созданный функцией * \ref mdbx_env_create(). * - * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении, - * некоторые возможные ошибки таковы: + * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. + * Некоторые возможные ошибки таковы: * * \retval MDBX_BUSY В родительском процессе БД была открыта * в режиме \ref MDBX_EXCLUSIVE. @@ -4424,7 +4424,7 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); -/** \brief Переименовает таблицу по DBI-хендлу. +/** \brief Переименовает таблицу по DBI-дескриптору. * \ingroup c_dbi * * Переименовывает пользовательскую именованную subDB связанную с передаваемым @@ -4437,7 +4437,7 @@ mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, * * \param [in] name Новое имя для переименования. * - * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении. */ + * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); /** \copydoc mdbx_dbi_rename() */ LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, @@ -4480,7 +4480,7 @@ typedef int(MDBX_subdb_enum_func)(void *ctx, const MDBX_txn *txn, * в функцию-перечислитель как есть. * * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ - LIBMDBX_API int mdbx_enumerate_subdb(const MDBX_txn *txn, +LIBMDBX_API int mdbx_enumerate_subdb(const MDBX_txn *txn, MDBX_subdb_enum_func *func, void *ctx); /** \defgroup value2key Value-to-Key functions @@ -6178,7 +6178,7 @@ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); * нет препятствий к тому, чтобы другой процесс удалил БД и создал её заново с * другим размером страницы и/или изменением любых других параметров. * - * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении. */ + * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *info, size_t bytes); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) From 319753661a2c76be40adbed5fc06d61bb3b28078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 5 Jul 2024 22:00:05 +0300 Subject: [PATCH 204/443] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20`coherency=5Fcheck=5Fwri?= =?UTF-8?q?tten()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/coherency.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/coherency.c b/src/coherency.c index 4bab049f..5e491b1b 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -168,26 +168,24 @@ int coherency_check_written(const MDBX_env *env, const txnid_t txnid, uint64_t *timestamp) { const bool report = !(timestamp && *timestamp); const txnid_t head_txnid = meta_txnid(meta); - if (unlikely(head_txnid < MIN_TXNID || head_txnid < txnid)) { - if (report) { - env->lck->pgops.incoherence.weak = - (env->lck->pgops.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->lck->pgops.incoherence.weak + 1; - WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s", - (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid, - bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), - "(workaround for incoherent flaw of unified page/buffer cache)"); + if (likely(head_txnid >= MIN_TXNID && head_txnid >= txnid)) { + if (likely( + coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) { + eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY); + eASSERT(env, check_sdb_flags(meta->trees.main.flags)); + return MDBX_SUCCESS; } - return coherency_timeout(timestamp, pgno, env); + } else if (report) { + env->lck->pgops.incoherence.weak = + (env->lck->pgops.incoherence.weak >= INT32_MAX) + ? INT32_MAX + : env->lck->pgops.incoherence.weak + 1; + WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s", + (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid, + bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), + "(workaround for incoherent flaw of unified page/buffer cache)"); } - if (unlikely( - !coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) - return coherency_timeout(timestamp, pgno, env); - - eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY); - eASSERT(env, check_sdb_flags(meta->trees.main.flags)); - return MDBX_SUCCESS; + return coherency_timeout(timestamp, pgno, env); } bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, From fe31958d46ac96acbfe14439196c7aa4bc1e467a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 6 Jul 2024 10:46:42 +0300 Subject: [PATCH 205/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20UUID=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=B8=D0=B4=D0=B5=D0=BD=D1=82=D0=B8=D1=84=D0=B8=D0=BA=D0=B0?= =?UTF-8?q?=D1=86=D0=B8=D0=B8=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- mdbx.h | 5 + src/api-env.c | 14 ++- src/chk.c | 14 ++- src/dxb.c | 10 +- src/layout-dxb.h | 12 +-- src/meta.c | 21 +++-- src/meta.h | 5 + src/osal.c | 215 +++++++++++++++++++++++++++++++++---------- src/osal.h | 11 +++ src/txn.c | 1 + src/windows-import.c | 5 + src/windows-import.h | 3 + 13 files changed, 246 insertions(+), 72 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d533fa40..00334955 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -826,7 +826,7 @@ macro(libmdbx_setup_libs TARGET MODE) target_link_libraries(${TARGET} ${MODE} Threads::Threads) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - target_link_libraries(${TARGET} ${MODE} ntdll user32 kernel32 advapi32) + target_link_libraries(${TARGET} ${MODE} ntdll user32 kernel32 advapi32 ole32) if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT) target_link_libraries(${TARGET} ${MODE} ntdll_extra) endif() diff --git a/mdbx.h b/mdbx.h index 1f4c2bc2..8ee45375 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2745,6 +2745,11 @@ struct MDBX_envinfo { uint64_t fsync; /**< Number of explicit fsync-to-disk operations (not a pages) */ } mi_pgop_stat; + + /* GUID of the database DXB file. */ + struct { + uint64_t x, y; + } mi_dxbid; }; #ifndef __cplusplus /** \ingroup c_statinfo */ diff --git a/src/api-env.c b/src/api-env.c index 1e8988b6..ce2d0755 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -727,6 +727,7 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(env->flags & ENV_FATAL_ERROR)) return MDBX_PANIC; @@ -773,6 +774,8 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, memcpy(&out->mi_bootid.meta[0], &meta0->bootid, 16); memcpy(&out->mi_bootid.meta[1], &meta1->bootid, 16); memcpy(&out->mi_bootid.meta[2], &meta2->bootid, 16); + if (likely(bytes > size_before_dxbid)) + memcpy(&out->mi_dxbid, &meta0->dxbid, 16); } const volatile meta_t *txn_meta = head.ptr_v; @@ -895,8 +898,9 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat) + bytes != size_before_pgop_stat && bytes != size_before_dxbid) return MDBX_EINVAL; if (txn) { @@ -938,8 +942,9 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat) + bytes != size_before_pgop_stat && bytes != size_before_dxbid) return MDBX_EINVAL; memset(out, 0, bytes); @@ -993,8 +998,11 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, const unsigned n = 0; out->mi_recent_txnid = constmeta_txnid(&header); out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.sign); - if (likely(bytes > size_before_bootid)) + if (likely(bytes > size_before_bootid)) { memcpy(&out->mi_bootid.meta[n], &header.bootid, 16); + if (likely(bytes > size_before_dxbid)) + memcpy(&out->mi_dxbid, &header.dxbid, 16); + } bailout: env_close(&env, false); diff --git a/src/chk.c b/src/chk.c index 1e7cb6dd..88675536 100644 --- a/src/chk.c +++ b/src/chk.c @@ -1590,13 +1590,23 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { return chk_error_rc(scope, err, "env_info"); MDBX_chk_line_t *line = - chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); + chk_puts(chk_line_begin(scope, MDBX_chk_info - + (1 << MDBX_chk_severity_prio_shift)), + "dxb-id "); + if (chk->envinfo.mi_dxbid.x | chk->envinfo.mi_dxbid.y) + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, + chk->envinfo.mi_dxbid.x, chk->envinfo.mi_dxbid.y); + else + line = chk_puts(line, "is absent"); + chk_line_end(line); + + line = chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_bootid.current.x, chk->envinfo.mi_bootid.current.y); else - line = chk_puts(line, "unavailable"); + line = chk_puts(line, "is unavailable"); chk_line_end(line); err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); diff --git a/src/dxb.c b/src/dxb.c index 9da9c009..401115d9 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -1045,14 +1045,16 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if ((env->flags & MDBX_RDONLY) == 0 && env->stuck_meta < 0 && (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) == 0) { - for (int n = 0; n < NUM_METAS; ++n) { + for (unsigned n = 0; n < NUM_METAS; ++n) { meta_t *const meta = METAPAGE(env, n); if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != - MDBX_DATA_MAGIC)) { - const txnid_t txnid = constmeta_txnid(meta); + MDBX_DATA_MAGIC) || + (meta->dxbid.x | meta->dxbid.y) == 0) { + const txnid_t txnid = + meta_is_used(&troika, n) ? constmeta_txnid(meta) : 0; NOTICE("%s %s" "meta[%u], txnid %" PRIaTXN, - "updating db-format signature for", + "updating db-format/guid signature for", meta_is_steady(meta) ? "stead-" : "weak-", n, txnid); err = meta_override(env, n, txnid, meta); if (unlikely(err != MDBX_SUCCESS) && diff --git a/src/layout-dxb.h b/src/layout-dxb.h index 06b0e834..78f05aeb 100644 --- a/src/layout-dxb.h +++ b/src/layout-dxb.h @@ -91,15 +91,6 @@ typedef struct geo { }; } geo_t; -typedef union bin128 { - __anonymous_struct_extension__ struct { - uint64_t x, y; - }; - __anonymous_struct_extension__ struct { - uint32_t a, b, c, d; - }; -} bin128_t; - /* Meta page content. * A meta page is the start point for accessing a database snapshot. * Pages 0-2 are meta pages. */ @@ -158,6 +149,9 @@ typedef struct meta { * steady sync point. Zeros mean that no relevant information is available * from the system. */ bin128_t bootid; + + /* GUID базы данных, начиная с v0.13.1 */ + bin128_t dxbid; } meta_t; #pragma pack(1) diff --git a/src/meta.c b/src/meta.c index b7333a0a..ee2a5aef 100644 --- a/src/meta.c +++ b/src/meta.c @@ -332,8 +332,8 @@ int meta_sync(const MDBX_env *env, const meta_ptr_t head) { return rc; } -__cold static page_t *meta_model(const MDBX_env *env, page_t *model, - size_t num) { +__cold static page_t *meta_model(const MDBX_env *env, page_t *model, size_t num, + const bin128_t *guid) { ENSURE(env, is_powerof2(env->ps)); ENSURE(env, env->ps >= MDBX_MIN_PAGESIZE); ENSURE(env, env->ps <= MDBX_MAX_PAGESIZE); @@ -373,6 +373,7 @@ __cold static page_t *meta_model(const MDBX_env *env, page_t *model, model_meta->trees.gc.flags = MDBX_INTEGERKEY; model_meta->trees.gc.root = P_INVALID; model_meta->trees.main.root = P_INVALID; + memcpy(&model_meta->dxbid, guid, sizeof(model_meta->dxbid)); meta_set_txnid(env, model_meta, MIN_TXNID + num); unaligned_poke_u64(4, model_meta->sign, meta_sign_calculate(model_meta)); eASSERT(env, coherency_check_meta(env, model_meta, true)); @@ -380,10 +381,11 @@ __cold static page_t *meta_model(const MDBX_env *env, page_t *model, } __cold meta_t *meta_init_triplet(const MDBX_env *env, void *buffer) { + const bin128_t guid = osal_guid(env); page_t *page0 = (page_t *)buffer; - page_t *page1 = meta_model(env, page0, 0); - page_t *page2 = meta_model(env, page1, 1); - meta_model(env, page2, 2); + page_t *page1 = meta_model(env, page0, 0, &guid); + page_t *page2 = meta_model(env, page1, 1, &guid); + meta_model(env, page2, 2, &guid); return page_meta(page2); } @@ -394,7 +396,8 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, if (unlikely(rc != MDBX_SUCCESS)) return rc; page_t *const page = env->page_auxbuf; - meta_model(env, page, target); + meta_model(env, page, target, + &((target == 0 && shape) ? shape : METAPAGE(env, 0))->dxbid); meta_t *const model = page_meta(page); meta_set_txnid(env, model, txnid); if (txnid) @@ -430,6 +433,12 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, } } } + + if (target == 0 && (model->dxbid.x | model->dxbid.y) == 0) { + const bin128_t guid = osal_guid(env); + memcpy(&model->dxbid, &guid, sizeof(model->dxbid)); + } + meta_sign_as_steady(model); rc = meta_validate(env, model, page, (pgno_t)target, nullptr); if (unlikely(MDBX_IS_ERROR(rc))) diff --git a/src/meta.h b/src/meta.h index 51a8d66f..706061c1 100644 --- a/src/meta.h +++ b/src/meta.h @@ -83,7 +83,12 @@ static inline meta_ptr_t meta_tail(const MDBX_env *env, return r; } +static inline bool meta_is_used(const troika_t *troika, unsigned n) { + return n == troika->recent || n == troika->prefer_steady; +} + static inline bool meta_bootid_match(const meta_t *meta) { + return memcmp(&meta->bootid, &globals.bootid, 16) == 0 && (globals.bootid.x | globals.bootid.y) != 0; } diff --git a/src/osal.c b/src/osal.c index d8d58392..1ae7dcf9 100644 --- a/src/osal.c +++ b/src/osal.c @@ -2931,17 +2931,40 @@ __cold static void bootid_collect(bin128_t *p, const void *s, size_t n) { bootid_shake(p); /* minor non-linear tomfoolery */ - const unsigned z = p->x % 61; + const unsigned z = p->x % 61 + 1; p->y = p->y << z | p->y >> (64 - z); bootid_shake(p); bootid_shake(p); - const unsigned q = p->x % 59; + const unsigned q = p->x % 59 + 1; p->y = p->y << q | p->y >> (64 - q); bootid_shake(p); bootid_shake(p); bootid_shake(p); } +static size_t hamming_weight(size_t v) { + const size_t m1 = (size_t)UINT64_C(0x5555555555555555); + const size_t m2 = (size_t)UINT64_C(0x3333333333333333); + const size_t m4 = (size_t)UINT64_C(0x0f0f0f0f0f0f0f0f); + const size_t h01 = (size_t)UINT64_C(0x0101010101010101); + v -= (v >> 1) & m1; + v = (v & m2) + ((v >> 2) & m2); + v = (v + (v >> 4)) & m4; + return (v * h01) >> (sizeof(v) * 8 - 8); +} + +static inline size_t hw64(uint64_t v) { + size_t r = hamming_weight((size_t)v); + if (sizeof(v) > sizeof(r)) + r += hamming_weight((size_t)(v >> sizeof(r) * 4 >> sizeof(r) * 4)); + return r; +} + +static bool check_uuid(bin128_t uuid) { + size_t hw = hw64(uuid.x) + hw64(uuid.y) + hw64(uuid.x ^ uuid.y); + return (hw >> 6) == 1; +} + #if defined(_WIN32) || defined(_WIN64) __cold static uint64_t windows_systemtime_ms() { @@ -3043,7 +3066,7 @@ bootid_parse_uuid(bin128_t *s, const void *p, const size_t n) { s->y += aligned.y; } else bootid_collect(s, p, n); - return true; + return check_uuid(*s); } if (n) @@ -3051,28 +3074,33 @@ bootid_parse_uuid(bin128_t *s, const void *p, const size_t n) { return false; } +#if defined(__linux__) || defined(__gnu_linux__) +__cold static bool proc_read_uuid(const char *path, bin128_t *target) { + const int fd = open(path, O_RDONLY | O_NOFOLLOW); + if (fd != -1) { + struct statfs fs; + char buf[42]; + const ssize_t len = + (fstatfs(fd, &fs) == 0 && fs.f_type == /* procfs */ 0x9FA0) + ? read(fd, buf, sizeof(buf)) + : -1; + const int err = close(fd); + assert(err == 0); + (void)err; + if (len > 0) + return bootid_parse_uuid(target, buf, len); + } + return false; +} +#endif /* Linux */ + __cold static bin128_t osal_bootid(void) { - bin128_t bin = {{0, 0}}; + bin128_t uuid = {{0, 0}}; bool got_machineid = false, got_boottime = false, got_bootseq = false; #if defined(__linux__) || defined(__gnu_linux__) - { - const int fd = - open("/proc/sys/kernel/random/boot_id", O_RDONLY | O_NOFOLLOW); - if (fd != -1) { - struct statfs fs; - char buf[42]; - const ssize_t len = - (fstatfs(fd, &fs) == 0 && fs.f_type == /* procfs */ 0x9FA0) - ? read(fd, buf, sizeof(buf)) - : -1; - const int err = close(fd); - assert(err == 0); - (void)err; - if (len > 0 && bootid_parse_uuid(&bin, buf, len)) - return bin; - } - } + if (proc_read_uuid("/proc/sys/kernel/random/boot_id", &uuid)) + return uuid; #endif /* Linux */ #if defined(__APPLE__) || defined(__MACH__) @@ -3080,16 +3108,15 @@ __cold static bin128_t osal_bootid(void) { char buf[42]; size_t len = sizeof(buf); if (!sysctlbyname("kern.bootsessionuuid", buf, &len, nullptr, 0) && - bootid_parse_uuid(&bin, buf, len)) - return bin; + bootid_parse_uuid(&uuid, buf, len)) + return uuid; #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ __MAC_OS_X_VERSION_MIN_REQUIRED > 1050 - uuid_t uuid; + uuid_t hostuuid; struct timespec wait = {0, 1000000000u / 42}; - if (!gethostuuid(uuid, &wait) && - bootid_parse_uuid(&bin, uuid, sizeof(uuid))) - got_machineid = true; + if (!gethostuuid(hostuuid, &wait)) + got_machineid = bootid_parse_uuid(&uuid, hostuuid, sizeof(hostuuid)); #endif /* > 10.5 */ struct timeval boottime; @@ -3127,7 +3154,7 @@ __cold static bin128_t osal_bootid(void) { "MachineGuid", &buf.MachineGuid, &len) == ERROR_SUCCESS && len < sizeof(buf)) - got_machineid = bootid_parse_uuid(&bin, &buf.MachineGuid, len); + got_machineid = bootid_parse_uuid(&uuid, &buf.MachineGuid, len); if (!got_machineid) { /* again, Windows is madness */ @@ -3145,7 +3172,7 @@ __cold static bin128_t osal_bootid(void) { "DigitalProductId", &buf.DigitalProductId, &len) == ERROR_SUCCESS && len > 42 && len < sizeof(buf)) { - bootid_collect(&bin, &buf.DigitalProductId, len); + bootid_collect(&uuid, &buf.DigitalProductId, len); got_machineid = true; } len = sizeof(buf); @@ -3153,7 +3180,7 @@ __cold static bin128_t osal_bootid(void) { "DigitalProductId", &buf.DigitalProductId, &len) == ERROR_SUCCESS && len > 42 && len < sizeof(buf)) { - bootid_collect(&bin, &buf.DigitalProductId, len); + bootid_collect(&uuid, &buf.DigitalProductId, len); got_machineid = true; } len = sizeof(buf); @@ -3161,7 +3188,7 @@ __cold static bin128_t osal_bootid(void) { "DigitalProductId", &buf.DigitalProductId, &len) == ERROR_SUCCESS && len > 42 && len < sizeof(buf)) { - bootid_collect(&bin, &buf.DigitalProductId, len); + bootid_collect(&uuid, &buf.DigitalProductId, len); got_machineid = true; } } @@ -3173,7 +3200,7 @@ __cold static bin128_t osal_bootid(void) { if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_PrefetcherParams, "BootId", &buf.BootId, &len) == ERROR_SUCCESS && len > 1 && len < sizeof(buf)) { - bootid_collect(&bin, &buf.BootId, len); + bootid_collect(&uuid, &buf.BootId, len); got_bootseq = true; } @@ -3181,7 +3208,7 @@ __cold static bin128_t osal_bootid(void) { if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_PrefetcherParams, "BaseTime", &buf.BaseTime, &len) == ERROR_SUCCESS && len >= sizeof(buf.BaseTime) && buf.BaseTime) { - bootid_collect(&bin, &buf.BaseTime, len); + bootid_collect(&uuid, &buf.BaseTime, len); got_boottime = true; } @@ -3197,7 +3224,7 @@ __cold static bin128_t osal_bootid(void) { buf.SysTimeOfDayInfoHacked.BootTime.QuadPart - buf.SysTimeOfDayInfoHacked.BootTimeBias; if (UnbiasedBootTime) { - bootid_collect(&bin, &UnbiasedBootTime, sizeof(UnbiasedBootTime)); + bootid_collect(&uuid, &UnbiasedBootTime, sizeof(UnbiasedBootTime)); got_boottime = true; } } @@ -3205,7 +3232,7 @@ __cold static bin128_t osal_bootid(void) { if (!got_boottime) { uint64_t boottime = windows_bootime(); if (boottime) { - bootid_collect(&bin, &boottime, sizeof(boottime)); + bootid_collect(&uuid, &boottime, sizeof(boottime)); got_boottime = true; } } @@ -3223,7 +3250,7 @@ __cold static bin128_t osal_bootid(void) { #endif mib, ARRAY_LENGTH(mib), &buf, &len, nullptr, 0) == 0) - got_machineid = bootid_parse_uuid(&bin, buf, len); + got_machineid = bootid_parse_uuid(&uuid, buf, len); } #endif /* CTL_HW && HW_UUID */ @@ -3238,7 +3265,7 @@ __cold static bin128_t osal_bootid(void) { #endif mib, ARRAY_LENGTH(mib), &buf, &len, nullptr, 0) == 0) - got_machineid = bootid_parse_uuid(&bin, buf, len); + got_machineid = bootid_parse_uuid(&uuid, buf, len); } #endif /* CTL_KERN && KERN_HOSTUUID */ @@ -3247,7 +3274,7 @@ __cold static bin128_t osal_bootid(void) { char buf[42]; size_t len = sizeof(buf); if (sysctlbyname("machdep.dmi.system-uuid", buf, &len, nullptr, 0) == 0) - got_machineid = bootid_parse_uuid(&bin, buf, len); + got_machineid = bootid_parse_uuid(&uuid, buf, len); } #endif /* __NetBSD__ */ @@ -3255,7 +3282,7 @@ __cold static bin128_t osal_bootid(void) { if (!got_machineid) { const int hostid = gethostid(); if (hostid > 0) { - bootid_collect(&bin, &hostid, sizeof(hostid)); + bootid_collect(&uuid, &hostid, sizeof(hostid)); got_machineid = true; } } @@ -3263,8 +3290,8 @@ __cold static bin128_t osal_bootid(void) { if (!got_machineid) { lack: - bin.x = bin.y = 0; - return bin; + uuid.x = uuid.y = 0; + return uuid; } /*--------------------------------------------------------------------------*/ @@ -3281,7 +3308,7 @@ __cold static bin128_t osal_bootid(void) { mib, ARRAY_LENGTH(mib), &boottime, &len, nullptr, 0) == 0 && len == sizeof(boottime) && boottime.tv_sec) { - bootid_collect(&bin, &boottime, len); + bootid_collect(&uuid, &boottime, len); got_boottime = true; } } @@ -3298,11 +3325,11 @@ __cold static bin128_t osal_bootid(void) { switch (kn->data_type) { case KSTAT_DATA_INT32: case KSTAT_DATA_UINT32: - bootid_collect(&bin, &kn->value, sizeof(int32_t)); + bootid_collect(&uuid, &kn->value, sizeof(int32_t)); got_boottime = true; case KSTAT_DATA_INT64: case KSTAT_DATA_UINT64: - bootid_collect(&bin, &kn->value, sizeof(int64_t)); + bootid_collect(&uuid, &kn->value, sizeof(int64_t)); got_boottime = true; } } @@ -3318,12 +3345,12 @@ __cold static bin128_t osal_bootid(void) { const struct utmpx id = {.ut_type = BOOT_TIME}; const struct utmpx *entry = getutxid(&id); if (entry) { - bootid_collect(&bin, entry, sizeof(*entry)); + bootid_collect(&uuid, entry, sizeof(*entry)); got_boottime = true; while (unlikely((entry = getutxid(&id)) != nullptr)) { /* have multiple reboot records, assuming we can distinguish next * bootsession even if RTC is wrong or absent */ - bootid_collect(&bin, entry, sizeof(*entry)); + bootid_collect(&uuid, entry, sizeof(*entry)); got_bootseq = true; } } @@ -3352,7 +3379,7 @@ __cold static bin128_t osal_bootid(void) { goto lack; } - return bin; + return uuid; } __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, @@ -3474,6 +3501,100 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, return MDBX_SUCCESS; } +/*----------------------------------------------------------------------------*/ + +#ifdef __FreeBSD__ +#include +#endif /* FreeBSD */ + +#if __GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ + defined(__APPLE__) || __has_include() +#include +#endif /* sys/random.h */ + +MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *env) { + struct { + uint64_t begin, end, cputime; + uintptr_t thread, pid; + const void *x, *y; + bin128_t (*z)(const MDBX_env *env); + } salt; + + salt.begin = osal_monotime(); + bin128_t uuid = {{0, 0}}; + +#if defined(__linux__) || defined(__gnu_linux__) + if (proc_read_uuid("/proc/sys/kernel/random/uuid", &uuid) && check_uuid(uuid)) + return uuid; +#endif /* Linux */ + +#ifdef __FreeBSD__ + STATIC_ASSERT(sizeof(uuid) == sizeof(struct uuid)); + if (uuidgen((struct uuid *)&uuid, 1) == 0 && check_uuid(uuid)) + return uuid; +#endif /* FreeBSD */ + +#if defined(_WIN32) || defined(_WIN64) + if (imports.CoCreateGuid && imports.CoCreateGuid(&uuid) == 0 && + check_uuid(uuid)) + return uuid; + + HCRYPTPROV hCryptProv = 0; + if (CryptAcquireContextW(&hCryptProv, nullptr, nullptr, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) { + const BOOL ok = + CryptGenRandom(hCryptProv, sizeof(uuid), (unsigned char *)&uuid); + CryptReleaseContext(hCryptProv, 0); + if (ok && check_uuid(uuid)) + return uuid; + } +#elif defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && defined(__IPHONE_8_0) +#if __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0 + if (CCRandomGenerateBytes(&uuid, sizeof(uuid)) == kCCSuccess && + check_uuid(uuid)) + return uuid; +#endif /* iOS >= 8.x */ +#else + const int fd = open("/dev/urandom", O_RDONLY); + if (fd != -1) { + const ssize_t len = read(fd, &uuid, sizeof(uuid)); + const int err = close(fd); + assert(err == 0); + (void)err; + if (len == sizeof(uuid) && check_uuid(uuid)) + return uuid; + } +#if (__GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__)) && \ + !defined(__APPLE__) && !defined(__ANDROID_API__) + if (getrandom(&uuid, sizeof(uuid), 0) == sizeof(uuid) && check_uuid(uuid)) + return uuid; +#elif defined(__OpenBSD__) || (defined(__sun) && defined(__SVR4)) || \ + (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ + __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200) + if (getentropy(&uuid, sizeof(uuid)) == 0 && check_uuid(uuid)) + return uuid; +#endif /* getrandom() / getentropy() */ +#endif /* !Windows */ + + uuid = globals.bootid; + bootid_collect(&uuid, env, sizeof(*env)); + salt.thread = osal_thread_self(); + salt.pid = osal_getpid(); + salt.x = &salt; + salt.y = env; + salt.z = &osal_guid; + do { + salt.cputime = osal_cputime(nullptr); + salt.end = osal_monotime(); + bootid_collect(&uuid, &salt, sizeof(salt)); + } while (!check_uuid(uuid)); + return uuid; +} + +/*--------------------------------------------------------------------------*/ + void osal_ctor(void) { #if MDBX_HAVE_PWRITEV && defined(_SC_IOV_MAX) osal_iov_max = sysconf(_SC_IOV_MAX); diff --git a/src/osal.h b/src/osal.h index 15831c99..23669cf6 100644 --- a/src/osal.h +++ b/src/osal.h @@ -576,6 +576,17 @@ MDBX_INTERNAL void osal_dtor(void); MDBX_INTERNAL int osal_mb2w(const char *const src, wchar_t **const pdst); #endif /* Windows */ +typedef union bin128 { + __anonymous_struct_extension__ struct { + uint64_t x, y; + }; + __anonymous_struct_extension__ struct { + uint32_t a, b, c, d; + }; +} bin128_t; + +MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *); + /*----------------------------------------------------------------------------*/ MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t diff --git a/src/txn.c b/src/txn.c index 17845876..c3d32eb5 100644 --- a/src/txn.c +++ b/src/txn.c @@ -839,6 +839,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { meta.trees.gc = txn->dbs[FREE_DBI]; meta.trees.main = txn->dbs[MAIN_DBI]; meta.canary = txn->canary; + memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid)); txnid_t commit_txnid = txn->txnid; #if MDBX_ENABLE_BIGFOOT diff --git a/src/windows-import.c b/src/windows-import.c index 0e702c3e..a401014c 100644 --- a/src/windows-import.c +++ b/src/windows-import.c @@ -147,6 +147,11 @@ void windows_import(void) { if (hAdvapi32dll) { MDBX_IMPORT(hAdvapi32dll, RegGetValueA); } + + const HINSTANCE hOle32dll = GetModuleHandleA("ole32.dll"); + if (hOle32dll) { + MDBX_IMPORT(hOle32dll, CoCreateGuid); + } } #undef MDBX_IMPORT diff --git a/src/windows-import.h b/src/windows-import.h index 7a6c8789..b7c461f9 100644 --- a/src/windows-import.h +++ b/src/windows-import.h @@ -109,6 +109,8 @@ typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, LPDWORD pdwType, PVOID pvData, LPDWORD pcbData); +typedef long(WINAPI *MDBX_CoCreateGuid)(bin128_t *guid); + NTSYSAPI ULONG RtlRandomEx(PULONG Seed); typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, @@ -131,6 +133,7 @@ struct libmdbx_imports { MDBX_GetTickCount64 GetTickCount64; MDBX_RegGetValueA RegGetValueA; MDBX_SetFileIoOverlappedRange SetFileIoOverlappedRange; + MDBX_CoCreateGuid CoCreateGuid; }; MDBX_INTERNAL void windows_import(void); From f335a16c92945c901aa7826ed85121d0ae4bbca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 6 Jul 2024 14:18:15 +0300 Subject: [PATCH 206/443] =?UTF-8?q?mdbx-testsing:=20=D0=BE=D1=82=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=B8=D1=88=D0=BD?= =?UTF-8?q?=D0=B5=D0=B3=D0=BE/=D0=B2=D1=80=D0=B5=D0=BC=D0=B5=D0=BD=D0=BD?= =?UTF-8?q?=D0=BE=D0=B3=D0=BE=20=D0=BE=D1=82=D0=BB=D0=B0=D0=B4=D0=BE=D1=87?= =?UTF-8?q?=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=B2=D1=8B=D0=B2=D0=BE=D0=B4=D0=B0?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.c++ b/test/test.c++ index d1e1059e..0451b162 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -1198,7 +1198,7 @@ int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { } #if SPECULUM_CURSORS - speculum_render(it_found, speculum_cursors[seek_check].get()); + // speculum_render(it_found, speculum_cursors[seek_check].get()); if (it_found != speculum.begin()) { const auto cursor_prev = speculum_cursors[prev].get(); auto it_prev = it_found; From ec0ada7b8cbed840dd08a7a8dfb6ea25a69196a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 9 Jul 2024 16:04:01 +0300 Subject: [PATCH 207/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B0=D1=80=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B0=20=D1=87=D0=B8=D1=82=D0=B0=D1=8E=D1=89=D0=B8?= =?UTF-8?q?=D1=85=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 135 +++++++++++++++++++++++++++++++++++---- src/api-env.c | 3 +- src/bits.md | 6 +- src/cogs.h | 10 ++- src/dbi.c | 2 +- src/internals.h | 3 +- src/layout-lck.h | 8 ++- src/lck-windows.c | 3 +- src/mvcc-readers.c | 154 +++++++++++++++++++++++++++++++++++++++------ src/proto.h | 12 ++-- src/txn.c | 143 +++++++++++++++++++++++++++++++---------- 11 files changed, 399 insertions(+), 80 deletions(-) diff --git a/mdbx.h b/mdbx.h index 8ee45375..5a956e64 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1556,34 +1556,54 @@ typedef enum MDBX_txn_flags { MDBX_TXN_INVALID = INT32_MIN, /** Transaction is finished or never began. - * \note Transaction state flag. Returned from \ref mdbx_txn_flags() + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ MDBX_TXN_FINISHED = 0x01, /** Transaction is unusable after an error. - * \note Transaction state flag. Returned from \ref mdbx_txn_flags() + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ MDBX_TXN_ERROR = 0x02, /** Transaction must write, even if dirty list is empty. - * \note Transaction state flag. Returned from \ref mdbx_txn_flags() + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ MDBX_TXN_DIRTY = 0x04, /** Transaction or a parent has spilled pages. - * \note Transaction state flag. Returned from \ref mdbx_txn_flags() + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ MDBX_TXN_SPILLS = 0x08, /** Transaction has a nested child transaction. - * \note Transaction state flag. Returned from \ref mdbx_txn_flags() + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ MDBX_TXN_HAS_CHILD = 0x10, - /** Most operations on the transaction are currently illegal. - * \note Transaction state flag. Returned from \ref mdbx_txn_flags() + /** Transaction is parked by \ref mdbx_txn_park(). + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ - MDBX_TXN_BLOCKED = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD + MDBX_TXN_PARKED = 0x20, + + /** Transaction is parked by \ref mdbx_txn_park() with `autounpark=true`, + * and therefore it can be used without explicitly calling + * \ref mdbx_txn_unpark() first. + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() + * but can't be used with \ref mdbx_txn_begin(). */ + MDBX_TXN_AUTOUNPARK = 0x40, + + /** The transaction was blocked using the \ref mdbx_txn_park() function, + * and then ousted by a write transaction because + * this transaction was interfered with garbage recycling. + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() + * but can't be used with \ref mdbx_txn_begin(). */ + MDBX_TXN_OUSTED = 0x80, + + /** Most operations on the transaction are currently illegal. + * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() + * but can't be used with \ref mdbx_txn_begin(). */ + MDBX_TXN_BLOCKED = + MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD | MDBX_TXN_PARKED } MDBX_txn_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_txn_flags) @@ -1962,8 +1982,11 @@ typedef enum MDBX_error { * corresponding DBI-handle could be (re)used */ MDBX_DANGLING_DBI = -30412, + /** Транзакция была асинхронно отменена/вытеснена */ + MDBX_OUSTED = -30411, + /* The last of MDBX-added error codes */ - MDBX_LAST_ADDED_ERRCODE = MDBX_DANGLING_DBI, + MDBX_LAST_ADDED_ERRCODE = MDBX_OUSTED, #if defined(_WIN32) || defined(_WIN64) MDBX_ENODATA = ERROR_HANDLE_EOF, @@ -3972,7 +3995,8 @@ mdbx_txn_env(const MDBX_txn *txn); * * \returns A transaction flags, valid if input is an valid transaction, * otherwise \ref MDBX_TXN_INVALID. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_txn_flags(const MDBX_txn *txn); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_txn_flags_t +mdbx_txn_flags(const MDBX_txn *txn); /** \brief Return the transaction's ID. * \ingroup c_statinfo @@ -4190,8 +4214,8 @@ LIBMDBX_API int mdbx_txn_break(MDBX_txn *txn); * transaction soon, and also locking overhead if \ref MDBX_NOSTICKYTHREADS is * in use. The reader table lock is released, but the table slot stays tied to * its thread or \ref MDBX_txn. Use \ref mdbx_txn_abort() to discard a reset - * handle, and to free its lock table slot if \ref MDBX_NOSTICKYTHREADS is in - * use. + * handle, and to free its lock table slot if \ref MDBX_NOSTICKYTHREADS + * is in use. * * Cursors opened within the transaction must not be used again after this * call, except with \ref mdbx_cursor_renew() and \ref mdbx_cursor_close(). @@ -4216,6 +4240,93 @@ LIBMDBX_API int mdbx_txn_break(MDBX_txn *txn); * \retval MDBX_EINVAL Transaction handle is NULL. */ LIBMDBX_API int mdbx_txn_reset(MDBX_txn *txn); +/** \brief Переводит читающую транзакцию в "припаркованное" состояние. + * \ingroup c_transactions + * + * Выполняющиеся читающие транзакции не позволяют перерабатывать старые + * MVCC-снимки данных, начиная с самой старой используемой/читаемой версии и все + * последующие. Припаркованная же транзакция может быть вытеснена транзакцией + * записи, если будет мешать переработке мусора (старых MVCC-снимков данных). + * А если вытеснения не произойдет, то восстановление (перевод в рабочее + * состояние и продолжение выполнение) читающей транзакции будет существенно + * дешевле. Таким образом, парковка транзакций позволяет предотвратить + * негативные последствия связанные с остановкой переработки мусора, + * одновременно сохранив накладные расходы на минимальном уровне. + * + * Для продолжения выполнения (чтения и/или использования данных) припаркованная + * транзакция должна быть восстановлена посредством \ref mdbx_txn_unpark(). + * Для удобства использования и предотвращения лишних вызовов API, посредством + * параметра `autounpark`, предусмотрена возможность автоматической + * «распарковки» при использовании припаркованной транзакции в функциях API + * предполагающих чтение данных. + * + * \warning До восстановления/распарковки транзакции, вне зависимости от + * аргумента `autounpark`, нельзя допускать разыменования указателей полученных + * ранее при чтении данных в рамках припаркованной транзакции, так как + * MVCC-снимок в котором размещены эти данные не удерживается и может + * переработан в любой момент. + * + * Припаркованная транзакция без "распарковки" может быть прервана, сброшена + * или перезапущена в любой момент посредством \ref mdbx_txn_abort(), + * \ref mdbx_txn_reset() и \ref mdbx_txn_renew(), соответственно. + * + * \see long-lived-read + * \see mdbx_txn_unpark() + * \see mdbx_txn_flags() + * + * \param [in] txn Транзакция чтения запущенная посредством + * \ref mdbx_txn_begin(). + * + * \param [in] autounpark Позволяет включить автоматическую + * распарковку/восстановление транзакции при вызове + * функций API предполагающих чтение данных. + * + * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ +LIBMDBX_API int mdbx_txn_park(MDBX_txn *txn, bool autounpark); + +/** \brief Распарковывает ранее припаркованную читающую транзакцию. + * \ingroup c_transactions + * + * Функция пытается восстановить ранее припаркованную транзакцию. Если + * припаркованная транзакция была вытеснена ради переработки старых + * MVCC-снимков, то в зависимости от аргумента `restart_if_ousted` выполняется + * её перезапуск аналогично \ref mdbx_txn_renew(), либо транзакция сбрасывается + * и возвращается код ошибки \ref MDBX_OUSTED. + * + * \see long-lived-read + * \see mdbx_txn_park() + * \see mdbx_txn_flags() + * + * \param [in] txn Транзакция чтения запущенная посредством + * \ref mdbx_txn_begin() и затем припаркованная + * посредством \ref mdbx_txn_park. + * + * \param [in] restart_if_ousted Позволяет сразу выполнить перезапуск + * транзакции, если она была вынестена. + * + * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. + * Некоторые специфичекие коды результата: + * + * \retval MDBX_SUCCESS Припаркованная транзакция успешно восстановлена, + * либо она не была припаркована. + * + * \retval MDBX_OUSTED Читающая транзакция была вытеснена пишущей + * транзакцией ради переработки старых MVCC-снимков, + * а аргумент `restart_if_ousted` был задан `false`. + * Транзакция сбрасывается в состояние аналогичное + * после вызова \ref mdbx_txn_reset(), но экземпляр + * (хендл) не освобождается и может быть использован + * повторно посредством \ref mdbx_txn_renew(), либо + * освобожден посредством \ref mdbx_txn_abort(). + * + * \retval MDBX_RESULT_TRUE Читающая транзакция была вынеснена, но теперь + * перезапущена для чтения другого (последнего) + * MVCC-снимка, так как restart_if_ousted` был задан + * `true`. + * + * \retval MDBX_BAD_TXN Транзакция уже завершена, либо не была запущена. */ +LIBMDBX_API int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted); + /** \brief Renew a read-only transaction. * \ingroup c_transactions * diff --git a/src/api-env.c b/src/api-env.c index ce2d0755..852d3394 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -1350,8 +1350,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, begin + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); for (const reader_slot_t *reader = begin; reader < end; ++reader) { - if (reader->pid.weak == env->pid && reader->tid.weak && - reader->tid.weak != CurrentTid) { + if (reader->pid.weak == env->pid && reader->tid.weak != CurrentTid) { /* At least one thread may don't use SRWL */ rc = MDBX_EPERM; break; diff --git a/src/bits.md b/src/bits.md index 29154b67..b0712ee8 100644 --- a/src/bits.md +++ b/src/bits.md @@ -5,9 +5,9 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |N_DUPDATA|P_LARGE | | 3 |0000 0008|ALLOC_SSCAN|TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | | 4 |0000 0010|ALLOC_FIFO |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | | -5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA | | |P_DUPFIX | | -6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_OLDEN | |P_SUBP | | -7 |0000 0080| | |DB_VALID |ALLDUPS |DBI_LINDO | | | | +5 |0000 0020| |TXN_PARKED |INTEGERDUP|NODUPDATA | | |P_DUPFIX | | +6 |0000 0040| |TXN_AUTOUNPARK|REVERSEDUP|CURRENT |DBI_OLDEN | |P_SUBP | | +7 |0000 0080| |TXN_DRAINED_GC|DB_VALID |ALLDUPS |DBI_LINDO | | | | 8 |0000 0100| _MAY_MOVE | | | | | | | <= | 9 |0000 0200| _MAY_UNMAP| | | | | | | <= | 10|0000 0400| | | | | | | | | diff --git a/src/cogs.h b/src/cogs.h index caaed0bd..f0677f23 100644 --- a/src/cogs.h +++ b/src/cogs.h @@ -469,8 +469,12 @@ static inline int check_txn(const MDBX_txn *txn, int bad_bits) { if (unlikely(txn->signature != txn_signature)) return MDBX_EBADSIGN; - if (unlikely(txn->flags & bad_bits)) - return MDBX_BAD_TXN; + if (bad_bits && unlikely(txn->flags & bad_bits)) { + if ((bad_bits & MDBX_TXN_PARKED) == 0) + return MDBX_BAD_TXN; + else + return txn_check_badbits_parked(txn, bad_bits); + } tASSERT(txn, (txn->flags & MDBX_TXN_FINISHED) || (txn->flags & MDBX_NOSTICKYTHREADS) == @@ -490,7 +494,7 @@ static inline int check_txn(const MDBX_txn *txn, int bad_bits) { } static inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) { - int err = check_txn(txn, bad_bits); + int err = check_txn(txn, bad_bits & ~MDBX_TXN_PARKED); if (unlikely(err)) return err; diff --git a/src/dbi.c b/src/dbi.c index 5f2c8ccb..9a6d0169 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -864,7 +864,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; diff --git a/src/internals.h b/src/internals.h index e986e6c5..e15f7c09 100644 --- a/src/internals.h +++ b/src/internals.h @@ -154,7 +154,8 @@ enum txn_flags { txn_ro_begin_flags = MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE, txn_rw_begin_flags = MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY, txn_shrink_allowed = UINT32_C(0x40000000), - txn_gc_drained = 0x20 /* GC was depleted up to oldest reader */, + txn_parked = MDBX_TXN_PARKED, + txn_gc_drained = 0x40 /* GC was depleted up to oldest reader */, txn_state_flags = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | txn_gc_drained diff --git a/src/layout-lck.h b/src/layout-lck.h index 27edec08..b24b20c6 100644 --- a/src/layout-lck.h +++ b/src/layout-lck.h @@ -8,7 +8,7 @@ #include "essentials.h" /* The version number for a database's lockfile format. */ -#define MDBX_LOCK_VERSION 5 +#define MDBX_LOCK_VERSION 6 #if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES @@ -159,6 +159,12 @@ typedef struct reader_slot { * We simply re-init the table when we know that we're the only process * opening the lock file. */ + /* Псевдо thread_id для пометки вытесненных читающих транзакций. */ +#define MDBX_TID_TXN_OUSTED (UINT64_MAX - 1) + + /* Псевдо thread_id для пометки припаркованных читающих транзакций. */ +#define MDBX_TID_TXN_PARKED UINT64_MAX + /* The thread ID of the thread owning this txn. */ mdbx_atomic_uint64_t tid; diff --git a/src/lck-windows.c b/src/lck-windows.c index 62710142..fb8aa78e 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -240,7 +240,8 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); const uintptr_t WriteTxnOwner = env->basal_txn ? env->basal_txn->owner : 0; for (const reader_slot_t *reader = begin; reader < end; ++reader) { - if (reader->pid.weak != env->pid || !reader->tid.weak) { + if (reader->pid.weak != env->pid || !reader->tid.weak || + reader->tid.weak >= MDBX_TID_TXN_OUSTED) { skip_lck: continue; } diff --git a/src/mvcc-readers.c b/src/mvcc-readers.c index 025881df..4bfdfa5b 100644 --- a/src/mvcc-readers.c +++ b/src/mvcc-readers.c @@ -3,7 +3,7 @@ #include "internals.h" -bsr_t mvcc_bind_slot(MDBX_env *env, const uintptr_t tid) { +bsr_t mvcc_bind_slot(MDBX_env *env) { eASSERT(env, env->lck_mmap.lck); eASSERT(env, env->lck->magic_and_version == MDBX_LOCK_MAGIC); eASSERT(env, env->lck->os_and_format == MDBX_LOCK_FORMAT); @@ -61,7 +61,8 @@ bsr_t mvcc_bind_slot(MDBX_env *env, const uintptr_t tid) { safe64_reset(&result.rslot->txnid, true); if (slot == nreaders) env->lck->rdt_length.weak = (uint32_t)++nreaders; - result.rslot->tid.weak = (env->flags & MDBX_NOSTICKYTHREADS) ? 0 : tid; + result.rslot->tid.weak = + (env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self(); atomic_store32(&result.rslot->pid, env->pid, mo_AcquireRelease); lck_rdt_unlock(env); @@ -318,6 +319,92 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, return rc; } +int txn_park(MDBX_txn *txn, bool autounpark) { + reader_slot_t *const rslot = txn->to.reader; + tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | + MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY); + tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED); + if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | + MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY)) + return MDBX_BAD_TXN; + + const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); + const uint64_t tid = atomic_load64(&rslot->tid, mo_Relaxed); + const uint64_t txnid = atomic_load64(&rslot->txnid, mo_Relaxed); + if (unlikely(pid != txn->env->pid)) { + ERROR("unexpected pid %u%s%u", pid, " != must ", txn->env->pid); + return MDBX_PROBLEM; + } + if (unlikely(tid != txn->owner || txnid != txn->txnid)) { + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%0zx" + " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, + tid, " != must ", txn->owner, txnid, " != must ", txn->txnid); + return MDBX_BAD_RSLOT; + } + + atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease); + atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed); + txn->flags += + autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED; + return MDBX_SUCCESS; +} + +int txn_unpark(MDBX_txn *txn) { + if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | + MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != + (MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) + return MDBX_BAD_TXN; + + for (reader_slot_t *const rslot = txn->to.reader; rslot; atomic_yield()) { + const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); + uint64_t tid = safe64_read(&rslot->tid); + uint64_t txnid = safe64_read(&rslot->txnid); + if (unlikely(pid != txn->env->pid)) { + ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid); + return MDBX_PROBLEM; + } + if (unlikely(tid == MDBX_TID_TXN_OUSTED || + txnid >= SAFE64_INVALID_THRESHOLD)) + break; + if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) { + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 + " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, + tid, " != must ", MDBX_TID_TXN_OUSTED, txnid, " != must ", + txn->txnid); + break; + } + if (unlikely((txn->flags & MDBX_TXN_ERROR))) + break; + +#if MDBX_64BIT_CAS + if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner))) + continue; +#else + atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), + mo_Relaxed); + if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, + (uint32_t)txn->owner))) { + atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), + mo_AcquireRelease); + continue; + } +#endif + txnid = safe64_read(&rslot->txnid); + tid = safe64_read(&rslot->tid); + if (unlikely(txnid != txn->txnid || tid != txn->owner)) { + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%zx" + " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, + tid, " != must ", txn->owner, txnid, " != must ", txn->txnid); + break; + } + txn->flags &= ~(MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK); + return MDBX_SUCCESS; + } + + int err = txn_end(txn, TXN_END_OUSTED | TXN_END_RESET | TXN_END_UPDATE); + return err ? err : MDBX_OUSTED; +} + __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler); osal_memory_fence(mo_AcquireRelease, false); @@ -341,29 +428,61 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { if (MDBX_IS_ERROR(mvcc_cleanup_dead(env, false, nullptr))) break; - if (!callback) - break; - reader_slot_t *stucked = nullptr; uint64_t hold_retired = 0; for (size_t i = 0; i < lck->rdt_length.weak; ++i) { - const uint64_t snap_retired = - atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed); - const txnid_t rtxn = safe64_read(&lck->rdt[i].txnid); + uint32_t pid; + reader_slot_t *const rslot = &lck->rdt[i]; + txnid_t rtxn = safe64_read(&rslot->txnid); + retry: if (rtxn == straggler && - atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { - hold_retired = snap_retired; - stucked = &lck->rdt[i]; + (pid = atomic_load32(&rslot->pid, mo_AcquireRelease)) != 0) { + const uint64_t tid = safe64_read(&rslot->tid); + if (tid == MDBX_TID_TXN_PARKED) { + /* Читающая транзакция была помечена владельцем как "припаркованная", + * т.е. подлежащая асинхронному прерыванию, либо восстановлению + * по активности читателя. + * + * Если первый CAS(slot->tid) будет успешным, то + * safe64_reset_compare() безопасно очистит txnid, либо откажется + * из-за того что читатель сбросил и/или перезапустил транзакцию. + * При этом читатеть может не заметить вытестения, если приступит + * к завершению транзакции. Все эти исходы нас устраивют. + * + * Если первый CAS(slot->tid) будет НЕ успешным, то значит читатеть + * восстановил транзакцию, либо завершил её, либо даже освободил слот. + */ + bool ousted = +#if MDBX_64BIT_CAS + atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, + MDBX_TID_TXN_OUSTED); +#else + atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, + (uint32_t)MDBX_TID_TXN_OUSTED); +#endif + if (likely(ousted)) { + ousted = safe64_reset_compare(&rslot->txnid, rtxn); + NOTICE("ousted-%s parked read-txn %" PRIaTXN + ", pid %u, tid 0x%" PRIx64, + ousted ? "complete" : "half", rtxn, pid, tid); + eASSERT(env, ousted || safe64_read(&rslot->txnid) > straggler); + continue; + } + rtxn = safe64_read(&rslot->txnid); + goto retry; + } + hold_retired = + atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed); + stucked = rslot; } } - if (!stucked) + if (!callback || !stucked) break; uint32_t pid = atomic_load32(&stucked->pid, mo_AcquireRelease); - uint64_t tid = atomic_load64(&stucked->tid, mo_AcquireRelease); - if (safe64_read(&stucked->txnid) != straggler || !pid || - stucked->snapshot_pages_retired.weak != hold_retired) + uint64_t tid = safe64_read(&stucked->tid); + if (safe64_read(&stucked->txnid) != straggler || !pid) continue; const meta_ptr_t head = meta_recent(env, &env->txn->tw.troika); @@ -437,10 +556,7 @@ __cold int mdbx_thread_register(const MDBX_env *env) { return MDBX_RESULT_TRUE /* already registered */; } - const uintptr_t tid = osal_thread_self(); - if (env->txn && unlikely(env->basal_txn->owner == tid)) - return MDBX_TXN_OVERLAPPING; - return mvcc_bind_slot((MDBX_env *)env, tid).err; + return mvcc_bind_slot((MDBX_env *)env).err; } __cold int mdbx_thread_unregister(const MDBX_env *env) { diff --git a/src/proto.h b/src/proto.h index ebee21a3..a2aaa3e1 100644 --- a/src/proto.h +++ b/src/proto.h @@ -12,7 +12,7 @@ MDBX_INTERNAL int audit_ex(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc); /* mvcc-readers.c */ -MDBX_INTERNAL bsr_t mvcc_bind_slot(MDBX_env *env, const uintptr_t tid); +MDBX_INTERNAL bsr_t mvcc_bind_slot(MDBX_env *env); MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest); MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, @@ -56,10 +56,13 @@ MDBX_INTERNAL bool txn_refund(MDBX_txn *txn); MDBX_INTERNAL txnid_t txn_snapshot_oldest(const MDBX_txn *const txn); MDBX_INTERNAL int txn_abort(MDBX_txn *txn); MDBX_INTERNAL int txn_renew(MDBX_txn *txn, unsigned flags); +MDBX_INTERNAL int txn_park(MDBX_txn *txn, bool autounpark); +MDBX_INTERNAL int txn_unpark(MDBX_txn *txn); +MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits); #define TXN_END_NAMES \ - {"committed", "empty-commit", "abort", "reset", \ - "reset-tmp", "fail-begin", "fail-beginchild"} + {"committed", "empty-commit", "abort", "reset", \ + "reset-tmp", "fail-begin", "fail-beginchild", "ousted"} enum { /* txn_end operation number, for logging */ TXN_END_COMMITTED, @@ -69,6 +72,7 @@ enum { TXN_END_RESET_TMP, TXN_END_FAIL_BEGIN, TXN_END_FAIL_BEGINCHILD, + TXN_END_OUSTED, TXN_END_OPMASK = 0x0F /* mask for txn_end() operation number */, TXN_END_UPDATE = 0x10 /* update env state (DBIs) */, @@ -76,7 +80,7 @@ enum { TXN_END_EOTDONE = 0x40 /* txn's cursors already closed */, TXN_END_SLOT = 0x80 /* release any reader slot if NOSTICKYTHREADS */ }; -MDBX_INTERNAL int txn_end(MDBX_txn *txn, const unsigned mode); +MDBX_INTERNAL int txn_end(MDBX_txn *txn, unsigned mode); MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx); /* env.c */ diff --git a/src/txn.c b/src/txn.c index c3d32eb5..59c661f1 100644 --- a/src/txn.c +++ b/src/txn.c @@ -453,8 +453,8 @@ static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { } int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { - STATIC_ASSERT(MDBX_TXN_FINISHED == - MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR); + STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - + MDBX_TXN_ERROR - MDBX_TXN_PARKED); const uint64_t ts_0 = latency ? osal_monotime() : 0; uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; @@ -919,7 +919,6 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } #endif /* MDBX_ENV_CHECKPID */ - const uintptr_t tid = osal_thread_self(); flags |= env->flags & (MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); if (flags & MDBX_TXN_RDONLY) { eASSERT(env, (flags & ~(txn_ro_begin_flags | MDBX_WRITEMAP | @@ -949,7 +948,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { r->txnid.weak < SAFE64_INVALID_THRESHOLD)) return MDBX_BAD_RSLOT; } else if (env->lck_mmap.lck) { - bsr_t brs = mvcc_bind_slot(env, tid); + bsr_t brs = mvcc_bind_slot(env); if (unlikely(brs.err != MDBX_SUCCESS)) return brs.err; r = brs.rslot; @@ -968,7 +967,11 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; return MDBX_SUCCESS; } - txn->owner = tid; + txn->owner = (uintptr_t)r->tid.weak; + if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && + unlikely(env->basal_txn->owner == txn->owner) && + (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) + return MDBX_TXN_OVERLAPPING; /* Seek & fetch the last meta */ uint64_t timestamp = 0; @@ -980,7 +983,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { ? /* regular */ meta_recent(env, &troika) : /* recovery mode */ meta_ptr(env, env->stuck_meta); if (likely(r)) { - safe64_reset(&r->txnid, false); + safe64_reset(&r->txnid, true); atomic_store32(&r->snapshot_pages_used, head.ptr_v->geometry.first_unallocated, mo_Relaxed); atomic_store64( @@ -1014,7 +1017,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { rc = MDBX_PROBLEM; txn->txnid = INVALID_TXNID; if (likely(r)) - safe64_reset(&r->txnid, false); + safe64_reset(&r->txnid, true); goto bailout; } timestamp = 0; @@ -1029,7 +1032,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { if (unlikely(rc != MDBX_RESULT_TRUE)) { txn->txnid = INVALID_TXNID; if (likely(r)) - safe64_reset(&r->txnid, false); + safe64_reset(&r->txnid, true); goto bailout; } } @@ -1037,7 +1040,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { if (unlikely(txn->txnid < MIN_TXNID || txn->txnid > MAX_TXNID)) { ERROR("%s", "environment corrupted by died writer, must shutdown!"); if (likely(r)) - safe64_reset(&r->txnid, false); + safe64_reset(&r->txnid, true); txn->txnid = INVALID_TXNID; rc = MDBX_CORRUPTED; goto bailout; @@ -1050,6 +1053,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } else { eASSERT(env, (flags & ~(txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); + const uintptr_t tid = osal_thread_self(); if (unlikely(txn->owner == tid || /* not recovery mode */ env->stuck_meta >= 0)) return MDBX_BUSY; @@ -1165,7 +1169,8 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { if (unlikely(env->dbs_flags[MAIN_DBI] != (DB_VALID | txn->dbs[MAIN_DBI].flags))) { - const bool need_txn_lock = env->basal_txn && env->basal_txn->owner != tid; + const bool need_txn_lock = + env->basal_txn && env->basal_txn->owner != osal_thread_self(); bool should_unlock = false; if (need_txn_lock) { rc = lck_txn_lock(env, true); @@ -1330,7 +1335,7 @@ bailout: return rc; } -int txn_end(MDBX_txn *txn, const unsigned mode) { +int txn_end(MDBX_txn *txn, unsigned mode) { MDBX_env *env = txn->env; static const char *const names[] = TXN_END_NAMES; @@ -1349,14 +1354,27 @@ int txn_end(MDBX_txn *txn, const unsigned mode) { reader_slot_t *slot = txn->to.reader; eASSERT(env, slot->pid.weak == env->pid); if (likely(!(txn->flags & MDBX_TXN_FINISHED))) { - ENSURE(env, txn->txnid >= - /* paranoia is appropriate here */ env->lck - ->cached_oldest.weak); - eASSERT(env, txn->txnid == slot->txnid.weak && - slot->txnid.weak >= env->lck->cached_oldest.weak); + if (likely((txn->flags & MDBX_TXN_PARKED) == 0)) { + ENSURE(env, txn->txnid >= + /* paranoia is appropriate here */ env->lck + ->cached_oldest.weak); + eASSERT(env, txn->txnid == slot->txnid.weak && + slot->txnid.weak >= env->lck->cached_oldest.weak); + } else { + if ((mode & TXN_END_OUSTED) == 0 && + safe64_read(&slot->tid) == MDBX_TID_TXN_OUSTED) + mode += TXN_END_OUSTED; + do { + safe64_reset(&slot->txnid, false); + atomic_store64(&slot->tid, txn->owner, mo_AcquireRelease); + atomic_yield(); + } while ( + unlikely(safe64_read(&slot->txnid) < SAFE64_INVALID_THRESHOLD || + safe64_read(&slot->tid) != txn->owner)); + } dxb_sanitize_tail(env, nullptr); atomic_store32(&slot->snapshot_pages_used, 0, mo_Relaxed); - safe64_reset(&slot->txnid, false); + safe64_reset(&slot->txnid, true); atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); } else { eASSERT(env, slot->pid.weak == env->pid); @@ -1373,7 +1391,9 @@ int txn_end(MDBX_txn *txn, const unsigned mode) { imports.srwl_ReleaseShared(&env->remap_guard); #endif txn->n_dbi = 0; /* prevent further DBI activity */ - txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; + txn->flags = (mode & TXN_END_OUSTED) + ? MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED + : MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; txn->owner = 0; } else if (!(txn->flags & MDBX_TXN_FINISHED)) { ENSURE(env, @@ -1483,16 +1503,17 @@ int mdbx_txn_renew(MDBX_txn *txn) { if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) return MDBX_EINVAL; - int rc; if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) { - rc = mdbx_txn_reset(txn); + int rc = mdbx_txn_reset(txn); if (unlikely(rc != MDBX_SUCCESS)) return rc; } - rc = txn_renew(txn, MDBX_TXN_RDONLY); + int rc = txn_renew(txn, MDBX_TXN_RDONLY); if (rc == MDBX_SUCCESS) { - tASSERT(txn, txn->owner == osal_thread_self()); + tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) + ? 0 + : osal_thread_self()); DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, @@ -1550,12 +1571,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); - } else if (flags & MDBX_TXN_RDONLY) { - if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && - unlikely(env->basal_txn->owner == osal_thread_self()) && - (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) - return MDBX_TXN_OVERLAPPING; - } else { + } else if ((flags & MDBX_TXN_RDONLY) == 0) { /* Reuse preallocated write txn. However, do not touch it until * txn_renew() succeeds, since it currently may be active. */ txn = env->basal_txn; @@ -1789,8 +1805,10 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { info->txn_reader_lag = head.txnid - info->txn_id; info->txn_space_dirty = info->txn_space_retired = 0; - uint64_t reader_snapshot_pages_retired; + uint64_t reader_snapshot_pages_retired = 0; if (txn->to.reader && + ((txn->flags & MDBX_TXN_PARKED) == 0 || + safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) && head_retired > (reader_snapshot_pages_retired = atomic_load64( &txn->to.reader->snapshot_pages_retired, mo_Relaxed))) { @@ -1808,19 +1826,21 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { retry: if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { jitter4testing(true); + const uint64_t snap_tid = safe64_read(&lck->rdt[i].tid); const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); const uint64_t snap_retired = atomic_load64( &lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease); if (unlikely(snap_retired != atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed)) || - snap_txnid != safe64_read(&lck->rdt[i].txnid)) + snap_txnid != safe64_read(&lck->rdt[i].txnid) || + snap_tid != safe64_read(&lck->rdt[i].tid)) goto retry; if (snap_txnid <= txn->txnid) { retired_next_reader = 0; break; } - if (snap_txnid < next_reader) { + if (snap_txnid < next_reader && snap_tid >= MDBX_TID_TXN_OUSTED) { next_reader = snap_txnid; retired_next_reader = pgno2bytes( env, (pgno_t)(snap_retired - @@ -1885,7 +1905,7 @@ uint64_t mdbx_txn_id(const MDBX_txn *txn) { return txn->txnid; } -int mdbx_txn_flags(const MDBX_txn *txn) { +MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) { STATIC_ASSERT( (MDBX_TXN_INVALID & (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | @@ -1894,7 +1914,12 @@ int mdbx_txn_flags(const MDBX_txn *txn) { if (unlikely(!txn || txn->signature != txn_signature)) return MDBX_TXN_INVALID; assert(0 == (int)(txn->flags & MDBX_TXN_INVALID)); - return txn->flags; + + MDBX_txn_flags_t flags = txn->flags; + if (F_ISSET(flags, MDBX_TXN_PARKED | MDBX_TXN_RDONLY) && txn->to.reader && + safe64_read(&txn->to.reader->tid) == MDBX_TID_TXN_OUSTED) + flags |= MDBX_TXN_OUSTED; + return flags; } int mdbx_txn_reset(MDBX_txn *txn) { @@ -1946,3 +1971,55 @@ int mdbx_txn_abort(MDBX_txn *txn) { return txn_abort(txn); } + +int mdbx_txn_park(MDBX_txn *txn, bool autounpark) { + STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_ERROR); + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) + return MDBX_TXN_INVALID; + + if (unlikely((txn->flags & MDBX_TXN_ERROR))) { + rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); + return rc ? rc : MDBX_OUSTED; + } + + return txn_park(txn, autounpark); +} + +int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted) { + STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED + MDBX_TXN_ERROR); + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED - MDBX_TXN_ERROR); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(!F_ISSET(txn->flags, MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) + return MDBX_SUCCESS; + + rc = txn_unpark(txn); + if (likely(rc != MDBX_OUSTED) || !restart_if_ousted) + return rc; + + tASSERT(txn, txn->flags & MDBX_TXN_FINISHED); + rc = txn_renew(txn, MDBX_TXN_RDONLY); + return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : rc; +} + +int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) { + tASSERT(txn, (bad_bits & MDBX_TXN_PARKED) && (txn->flags & bad_bits)); + /* Здесь осознано заложено отличие в поведении припаркованных транзакций: + * - некоторые функции (например mdbx_env_info_ex()), допускают + * использование поломанных транзакций (с флагом MDBX_TXN_ERROR), но + * не могут работать с припаркованными транзакциями (требуют распарковки). + * - но при распарковке поломанные транзакции завершаются. + * - получается что транзакцию можно припарковать, потом поломать вызвав + * mdbx_txn_break(), но далее любое её использование приведет к завершению + * при распарковке. */ + if ((txn->flags & (bad_bits | MDBX_TXN_AUTOUNPARK)) != + (MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK)) + return MDBX_BAD_TXN; + + tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || + bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + return mdbx_txn_unpark((MDBX_txn *)txn, false); +} From 23117062727aed537261f04dc69748542e384088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 10 Jul 2024 22:31:41 +0300 Subject: [PATCH 208/443] =?UTF-8?q?mdbx-testing:=20=D1=82=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BF?= =?UTF-8?q?=D0=B0=D1=80=D0=BA=D0=BE=D0=B2=D0=BA=D0=B8=20=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/config.c++ | 12 +++++- test/config.h++ | 2 +- test/fork.c++ | 2 +- test/jitter.c++ | 3 ++ test/main.c++ | 4 +- test/osal-unix.c++ | 2 + test/osal-windows.c++ | 4 ++ test/osal.h++ | 1 + test/test.c++ | 93 +++++++++++++++++++++++++++++++++++++++++++ test/test.h++ | 4 +- 10 files changed, 120 insertions(+), 7 deletions(-) diff --git a/test/config.c++ b/test/config.c++ index a06b99d2..4732d95b 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -137,8 +137,16 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, if (strcmp(value_cstr, "rnd") == 0 || strcmp(value_cstr, "rand") == 0 || strcmp(value_cstr, "random") == 0) { value = minval; - if (maxval > minval) - value += (prng32() + UINT64_C(44263400549519813)) % (maxval - minval); + if (maxval > minval) { + uint64_t salt = (scale != entropy) + ? prng64() ^ UINT64_C(44263400549519813) + : (chrono::now_monotonic().fixedpoint ^ + UINT64_C(0xD85794512ED321FD)) * + UINT64_C(0x9120038359EAF3) ^ + chrono::now_realtime().fixedpoint * + UINT64_C(0x2FE5232BDC8E5F); + value += salt % (maxval - minval); + } if (scale == intkey) value &= ~3u; return true; diff --git a/test/config.h++ b/test/config.h++ index 4da0ed3a..12bc1b66 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -52,7 +52,7 @@ const char *keygencase2str(const keygen_case); namespace config { -enum scale_mode { no_scale, decimal, binary, duration, intkey }; +enum scale_mode { no_scale, decimal, binary, duration, intkey, entropy }; bool parse_option(int argc, char *const argv[], int &narg, const char *option, const char **value, const char *default_value = nullptr); diff --git a/test/fork.c++ b/test/fork.c++ index 29c95ff8..05fb250a 100644 --- a/test/fork.c++ +++ b/test/fork.c++ @@ -28,7 +28,7 @@ public: bool testcase_smoke4fork::open_dbi() { if (!dbi || dbi_invalid) { if (dbi_stable || - (mdbx_txn_flags(txn_guard.get()) & int(MDBX_TXN_RDONLY)) == 0) { + (mdbx_txn_flags(txn_guard.get()) & MDBX_TXN_RDONLY) == 0) { dbi = db_table_open(!dbi_stable); dbi_invalid = false; } diff --git a/test/jitter.c++ b/test/jitter.c++ index b868c9c5..8a4cd0b3 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -164,6 +164,9 @@ bool testcase_jitter::run() { failure_perror("mdbx_env_set_geometry-1", err); } } + if (flipcoin()) { + // err = + } txn_end(flipcoin()); if (global::config::geometry_jitter) { diff --git a/test/main.c++ b/test/main.c++ index 6b482807..9a7fb4df 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -394,7 +394,7 @@ int main(int argc, char *const argv[]) { continue; } if (config::parse_option(argc, argv, narg, "repeat", params.nrepeat, - config::no_scale)) + config::entropy)) continue; if (config::parse_option(argc, argv, narg, "threads", params.nthreads, config::no_scale, 1, 64)) @@ -443,7 +443,7 @@ int main(int argc, char *const argv[]) { params.keygen.mesh, 0, 64)) continue; if (config::parse_option(argc, argv, narg, "prng-seed", params.prng_seed, - config::no_scale)) { + config::entropy)) { prng_seed(params.prng_seed); continue; } diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index 91cf7da5..df340c1c 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -320,6 +320,8 @@ static void handler_SIGUSR(int signum) { } } +bool osal_multiactor_mode(void) { return overlord_pid != 0; } + bool osal_progress_push(bool active) { if (overlord_pid) { if (kill(overlord_pid, active ? SIGUSR1 : SIGUSR2)) diff --git a/test/osal-windows.c++ b/test/osal-windows.c++ index 54a4ed15..0ce04cc8 100644 --- a/test/osal-windows.c++ +++ b/test/osal-windows.c++ @@ -175,6 +175,10 @@ bool actor_config::osal_deserialize(const char *str, const char *end, typedef std::pair child; static std::unordered_map children; +bool osal_multiactor_mode(void) { + return hProgressActiveEvent || hProgressPassiveEvent; +} + bool osal_progress_push(bool active) { if (!children.empty()) { if (!SetEvent(active ? hProgressActiveEvent : hProgressPassiveEvent)) diff --git a/test/osal.h++ b/test/osal.h++ index 7d11dbf3..058f7078 100644 --- a/test/osal.h++ +++ b/test/osal.h++ @@ -16,6 +16,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout); void osal_wait4barrier(void); bool osal_progress_push(bool active); +bool osal_multiactor_mode(void); int osal_delay(unsigned seconds); void osal_udelay(size_t us); diff --git a/test/test.c++ b/test/test.c++ index 0451b162..0c865116 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -212,6 +212,9 @@ void testcase::txn_begin(bool readonly, MDBX_txn_flags_t flags) { log_trace("== counter %u, env_warmup(flags %u), rc %d", counter, warmup_flags, err); } + + if (readonly && flipcoin()) + txn_probe_parking(); } int testcase::breakable_commit() { @@ -267,6 +270,9 @@ void testcase::txn_end(bool abort) { log_trace(">> txn_end(%s)", abort ? "abort" : "commit"); assert(txn_guard); + if (flipcoin()) + txn_probe_parking(); + MDBX_txn *txn = txn_guard.release(); if (abort) { int err = mdbx_txn_abort(txn); @@ -321,6 +327,13 @@ int testcase::breakable_restart() { int rc = MDBX_SUCCESS; if (txn_guard) rc = breakable_commit(); + if (flipcoin()) { + txn_begin(true); + txn_probe_parking(); + int err = mdbx_txn_abort(txn_guard.release()); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_txn_abort()", err); + } txn_begin(false, MDBX_TXN_READWRITE); if (cursor_guard) cursor_renew(); @@ -1426,3 +1439,83 @@ bool testcase::check_batch_get() { mdbx_cursor_close(batch_cursor); return rc; } + +bool testcase::txn_probe_parking() { + MDBX_txn_flags_t state = + mdbx_txn_flags(txn_guard.get()) & + (MDBX_TXN_RDONLY | MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK | + MDBX_TXN_OUSTED | MDBX_TXN_BLOCKED); + if (state != MDBX_TXN_RDONLY) + return true; + + const bool autounpark = flipcoin(); + int err = mdbx_txn_park(txn_guard.get(), autounpark); + if (err != MDBX_SUCCESS) + failure("mdbx_txn_park(), err %d", err); + + MDBX_txn_info txn_info; + if (flipcoin()) { + err = mdbx_txn_info(txn_guard.get(), &txn_info, flipcoin()); + if (err != MDBX_SUCCESS) + failure("mdbx_txn_info(1), state 0x%x, err %d", + state = mdbx_txn_flags(txn_guard.get()), err); + } + + if (osal_multiactor_mode() && !mode_readonly()) { + while (flipcoin() && + ((state = mdbx_txn_flags(txn_guard.get())) & MDBX_TXN_OUSTED) == 0) + osal_udelay(4242); + } + + if (flipcoin()) { + err = mdbx_txn_info(txn_guard.get(), &txn_info, flipcoin()); + if (err != MDBX_SUCCESS) + failure("mdbx_txn_info(2), state 0x%x, err %d", + state = mdbx_txn_flags(txn_guard.get()), err); + } + + if (flipcoin()) { + MDBX_envinfo env_info; + err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, + sizeof(env_info)); + if (!autounpark) { + if (err != MDBX_BAD_TXN) + failure("mdbx_env_info_ex(autounpark=%s), flags 0x%x, unexpected err " + "%d, must %d", + autounpark ? "true" : "false", state, err, MDBX_BAD_TXN); + } else if (err != MDBX_SUCCESS) { + if (err != MDBX_OUSTED || + ((state = mdbx_txn_flags(txn_guard.get())) & MDBX_TXN_OUSTED) == 0) + failure("mdbx_env_info_ex(autounpark=%s), flags 0x%x, err %d", + autounpark ? "true" : "false", state, err); + else { + err = mdbx_txn_renew(txn_guard.get()); + if (err != MDBX_SUCCESS) + failure("mdbx_txn_renew(), state 0x%x, err %d", + state = mdbx_txn_flags(txn_guard.get()), err); + } + } + } + + const bool autorestart = flipcoin(); + err = mdbx_txn_unpark(txn_guard.get(), autorestart); + if (MDBX_IS_ERROR(err)) { + if (err != MDBX_OUSTED || autorestart) + failure("mdbx_txn_unpark(autounpark=%s, autorestart=%s), err %d", + autounpark ? "true" : "false", autorestart ? "true" : "false", + err); + else { + err = mdbx_txn_renew(txn_guard.get()); + if (err != MDBX_SUCCESS) + failure("mdbx_txn_renew(), state 0x%x, err %d", + state = mdbx_txn_flags(txn_guard.get()), err); + } + } + + state = mdbx_txn_flags(txn_guard.get()) & + (MDBX_TXN_RDONLY | MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK | + MDBX_TXN_OUSTED | MDBX_TXN_BLOCKED); + if (state != MDBX_TXN_RDONLY) + failure("unexpected txn-state 0x%x", state); + return state == MDBX_TXN_RDONLY; +} diff --git a/test/test.h++ b/test/test.h++ index f3375a6e..39ce1118 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -254,6 +254,8 @@ protected: void cursor_renew(); void txn_inject_writefault(void); void txn_inject_writefault(MDBX_txn *txn); + bool txn_probe_parking(); + void fetch_canary(); void update_canary(uint64_t increment); bool checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, @@ -275,7 +277,7 @@ protected: void signal(); bool should_continue(bool check_timeout_only = false) const; - bool failure(const char *fmt, ...) const; + bool MDBX_PRINTF_ARGS(2, 3) failure(const char *fmt, ...) const; void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, keygen::buffer &out_value, keygen::serial_t data_age) { keyvalue_maker.pair(serial, out_key, out_value, data_age, false); From 32df0ad1ebcb7d36ec744c94d8fa134873e7f011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 12 Jul 2024 11:40:47 +0300 Subject: [PATCH 209/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B5=D0=B3=D1=80=D0=B5?= =?UTF-8?q?=D1=81=D1=81=D0=B0=20`SIGSEGV`=20=D0=BF=D1=80=D0=B8=20=D0=BE?= =?UTF-8?q?=D1=82=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D0=B8=20=D0=91=D0=94=20?= =?UTF-8?q?=D1=81=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=BD=D1=8B?= =?UTF-8?q?=D0=BC=20=D1=80=D0=B0=D0=B7=D0=BC=D0=B5=D1=80=D0=BE=D0=BC=20?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D1=8B.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-env.c | 5 ----- src/dxb.c | 6 ++++++ src/env.c | 19 +++++++++++-------- src/meta.c | 5 +---- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/api-env.c b/src/api-env.c index 852d3394..4aa7bd2e 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -530,12 +530,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, base + bitmap_bytes + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_seqs[0]) + sizeof(txn->dbi_state[0])); - rc = env_page_auxbuffer(env); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - memset(env->page_auxbuf, -1, env->ps * (size_t)2); - memset(ptr_disp(env->page_auxbuf, env->ps * (size_t)2), 0, env->ps); txn = osal_calloc(1, size); if (unlikely(!txn)) { rc = MDBX_ENOMEM; diff --git a/src/dxb.c b/src/dxb.c index 401115d9..c91880fe 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -641,6 +641,12 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if (env->ps != header.pagesize) env_setup_pagesize(env, header.pagesize); + if ((env->flags & MDBX_RDONLY) == 0) { + err = env_page_auxbuffer(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + const size_t used_bytes = pgno2bytes(env, header.geometry.first_unallocated); const size_t used_aligned2os_bytes = ceil_powerof2(used_bytes, globals.sys_pagesize); diff --git a/src/env.c b/src/env.c index 2d5dadc5..c4902765 100644 --- a/src/env.c +++ b/src/env.c @@ -10,10 +10,16 @@ bool env_txn0_owned(const MDBX_env *env) { } int env_page_auxbuffer(MDBX_env *env) { - return env->page_auxbuf ? MDBX_SUCCESS - : osal_memalign_alloc(globals.sys_pagesize, - env->ps * (size_t)NUM_METAS, - &env->page_auxbuf); + const int err = + env->page_auxbuf + ? MDBX_SUCCESS + : osal_memalign_alloc(globals.sys_pagesize, + env->ps * (size_t)NUM_METAS, &env->page_auxbuf); + if (likely(err == MDBX_SUCCESS)) { + memset(env->page_auxbuf, -1, env->ps * (size_t)2); + memset(ptr_disp(env->page_auxbuf, env->ps * (size_t)2), 0, env->ps); + } + return err; } __cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { @@ -22,11 +28,8 @@ __cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { ENSURE(env, is_powerof2(pagesize)); ENSURE(env, pagesize >= MDBX_MIN_PAGESIZE); ENSURE(env, pagesize <= MDBX_MAX_PAGESIZE); + ENSURE(env, !env->page_auxbuf && env->ps != pagesize); env->ps = (unsigned)pagesize; - if (env->page_auxbuf) { - osal_memalign_free(env->page_auxbuf); - env->page_auxbuf = nullptr; - } STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MIN_PAGESIZE) > 4); STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MAX_PAGESIZE) < PAGELIST_LIMIT); diff --git a/src/meta.c b/src/meta.c index ee2a5aef..7214335b 100644 --- a/src/meta.c +++ b/src/meta.c @@ -392,9 +392,6 @@ __cold meta_t *meta_init_triplet(const MDBX_env *env, void *buffer) { __cold int __must_check_result meta_override(MDBX_env *env, size_t target, txnid_t txnid, const meta_t *shape) { - int rc = env_page_auxbuffer(env); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; page_t *const page = env->page_auxbuf; meta_model(env, page, target, &((target == 0 && shape) ? shape : METAPAGE(env, 0))->dxbid); @@ -440,7 +437,7 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, } meta_sign_as_steady(model); - rc = meta_validate(env, model, page, (pgno_t)target, nullptr); + int rc = meta_validate(env, model, page, (pgno_t)target, nullptr); if (unlikely(MDBX_IS_ERROR(rc))) return MDBX_PROBLEM; From bdfec14992d5fcac85ce412f2f594730c2cd4481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 12 Jul 2024 14:55:17 +0300 Subject: [PATCH 210/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index c003df54..5763b3c8 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -38,6 +38,16 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic использования блокировок, управляемый опцией сборки `MDBX_ENABLE_DBI_LOCKFREE`, которая включена по-умолчанию. +- Поддержка "парковки" читающих транзакций с их вытеснением ради + переработки старых MVCC-снимков и предотвращения проблем вызываемых + приостановкой переработки мусора. Механизм парковки и вытеснения + припаркованных транзакций является как дополнением, так и более простой + в использовании альтернативой обратному вызову + [Handle-Slow-Readers](https://libmdbx.dqdkfa.ru/group__c__err.html#ga2cb11b56414c282fe06dd942ae6cade6). + Для удобства функции `mdbx_txn_park()` и `mdbx_txn_unpack()` имеют + дополнительные аргументы, позволяющие запросить автоматическую + "распарковку" припаркованных и перезапуск вытесненных транзакций. + - Расширение API позиционирования курсоров более удобными и очевидными операциями по аналогии условиям `<`, `<=`, `==`, `>=`, `>` как для ключей, так и для пар ключ-значение. @@ -65,6 +75,9 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Функция `mdbx_preopen_snapinfo()` для получения информации о БД без её открытия. + - Функция `mdbx_enumerate_subdb()` для получение информации + об именованных пользовательских таблицах. + - Поддержка функций логирования обратного вызова без функционала `vprintf()`, что существенно облегчает использование логирования в привязках к другим языкам программирования. @@ -75,13 +88,17 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Опция `MDBX_opt_prefer_waf_insteadof_balance`. - - Опции `MDBX_opt_subpage_limit`, `MDBX_opt_subpage_room_threshold`, `MDBX_opt_subpage_reserve_prereq`, `MDBX_opt_subpage_reserve_limit`. + - Опции `MDBX_opt_subpage_limit`, `MDBX_opt_subpage_room_threshold`, + `MDBX_opt_subpage_reserve_prereq`, `MDBX_opt_subpage_reserve_limit`. - Управление основной блокировкой lock/unlock/upgrade/downgrade для координации пишущих транзакций. - Функции `mdbx_limits_keysize_min()` и `mdbx_limits_valsize_min()` для получения нижней границы длины ключей и значений. + - Для идентификации БД добавлен UUID доступный в поле `mi_dxbid` структуры `MDBX_envinfo`, + получаемой посредством `mdbx_env_info_ex()`. + - Расширение и доработка C++ API: - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов @@ -97,7 +114,8 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - добавлены статические методы `buffer::hex()`, `base64()`, `base58()`; - для транзакций и курсоров добавлены методы `get_/set_context`; - добавлен метод `cursor::clone()`; - - поддержка base58 переработана и приведена в соответствии с черновиком RFC, в текущем понимании теперь это одна из самых высокопроизводительных реализаций; + - поддержка base58 переработана и приведена в соответствии с черновиком RFC, + в текущем понимании теперь это одна из самых высокопроизводительных реализаций base58; - переработка `to_hex()` и `from_hex()`. Нарушение совместимости: From 8e8ac09e14dadb84c117f73df39ff46ae71c1e33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 13 Jul 2024 16:13:11 +0300 Subject: [PATCH 211/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20`mdbx=5Fenum?= =?UTF-8?q?erate=5Fsubdb()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 13 ++++++++++--- src/dbi.c | 8 +++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/mdbx.h b/mdbx.h index 5a956e64..6908f969 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4583,17 +4583,24 @@ typedef int(MDBX_subdb_enum_func)(void *ctx, const MDBX_txn *txn, const struct MDBX_stat *stat, MDBX_dbi dbi) MDBX_CXX17_NOEXCEPT; -/** \brief Enumerate the entries in the reader lock table. +/** \brief Перечисляет пользовательские именнованные таблицы. + * + * Производит перечисление пользовательских именнованных таблиц, вызывая + * специфицируемую пользователем функцию-визитер для каждой именованной таблицы. + * Перечисление продолжается до исчерпания именованных таблиц, либо до возврата + * отличного от нуля результата из заданной пользователем функции, которое будет + * сразу возвращено в качестве результата. + * * \ingroup c_statinfo * \see MDBX_subdb_enum_func * * \param [in] txn Транзакция запущенная посредством * \ref mdbx_txn_begin(). - * \param [in] func Указатель на пользовательскую функцию-перечислитель + * \param [in] func Указатель на пользовательскую функцию * с сигнатурой \ref MDBX_subdb_enum_func, * которая будет вызвана для каждой таблицы. * \param [in] ctx Указатель на некоторый контект, который будет передан - * в функцию-перечислитель как есть. + * в функцию `func()` как есть. * * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ LIBMDBX_API int mdbx_enumerate_subdb(const MDBX_txn *txn, diff --git a/src/dbi.c b/src/dbi.c index 9a6d0169..ffd1ca6a 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -1026,9 +1026,11 @@ __cold int mdbx_enumerate_subdb(const MDBX_txn *txn, MDBX_subdb_enum_func *func, stat_get(tree, &stat, sizeof(stat)); rc = func(ctx, txn, &name, tree->flags, &stat, dbi); if (rc != MDBX_SUCCESS) - break; + goto bailout; } - txn->cursors[MAIN_DBI] = cx.outer.next; + rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; - return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; + bailout: + txn->cursors[MAIN_DBI] = cx.outer.next; + return rc; } From 8a04337e79cfc71bce45ce46a7593ff94837c3d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 13 Jul 2024 16:15:21 +0300 Subject: [PATCH 212/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 5763b3c8..4a042104 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -38,15 +38,15 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic использования блокировок, управляемый опцией сборки `MDBX_ENABLE_DBI_LOCKFREE`, которая включена по-умолчанию. -- Поддержка "парковки" читающих транзакций с их вытеснением ради - переработки старых MVCC-снимков и предотвращения проблем вызываемых - приостановкой переработки мусора. Механизм парковки и вытеснения - припаркованных транзакций является как дополнением, так и более простой - в использовании альтернативой обратному вызову - [Handle-Slow-Readers](https://libmdbx.dqdkfa.ru/group__c__err.html#ga2cb11b56414c282fe06dd942ae6cade6). - Для удобства функции `mdbx_txn_park()` и `mdbx_txn_unpack()` имеют - дополнительные аргументы, позволяющие запросить автоматическую - "распарковку" припаркованных и перезапуск вытесненных транзакций. + - Поддержка "парковки" читающих транзакций с их вытеснением ради + переработки старых MVCC-снимков и предотвращения проблем вызываемых + приостановкой переработки мусора. Механизм парковки и вытеснения + припаркованных транзакций является как дополнением, так и более простой + в использовании альтернативой обратному вызову + [Handle-Slow-Readers](https://libmdbx.dqdkfa.ru/group__c__err.html#ga2cb11b56414c282fe06dd942ae6cade6). + Для удобства функции `mdbx_txn_park()` и `mdbx_txn_unpack()` имеют + дополнительные аргументы, позволяющие запросить автоматическую + "распарковку" припаркованных и перезапуск вытесненных транзакций. - Расширение API позиционирования курсоров более удобными и очевидными операциями по аналогии условиям `<`, `<=`, `==`, `>=`, `>` как для From f20addd7fc6c6fc7f14c539b652d93428b79f050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 13 Jul 2024 17:03:06 +0300 Subject: [PATCH 213/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20doxygen-=D1=81=D1=81=D1=8B?= =?UTF-8?q?=D0=BB=D0=BE=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_restrictions.md | 5 +++-- mdbx.h | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/_restrictions.md b/docs/_restrictions.md index 64c54de6..7e905b5e 100644 --- a/docs/_restrictions.md +++ b/docs/_restrictions.md @@ -35,11 +35,12 @@ or debugging of a client application while retaining an active read transaction. LMDB this results in `MDB_MAP_FULL` error and subsequent write performance degradation. -MDBX mostly solve "long-lived" readers issue by using the +MDBX mostly solve "long-lived" readers issue by offering to use a +transaction parking-and-ousting approach by \ref mdbx_txn_park(), Handle-Slow-Readers \ref MDBX_hsr_func callback which allows to abort long-lived read transactions, and using the \ref MDBX_LIFORECLAIM mode which addresses subsequent performance degradation. The "next" version -of libmdbx (\ref MithrilDB) will completely solve this. +of libmdbx (aka \ref MithrilDB) will completely solve this. - Avoid suspending a process with active transactions. These would then be "long-lived" as above. diff --git a/mdbx.h b/mdbx.h index 6908f969..95caf473 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4270,9 +4270,10 @@ LIBMDBX_API int mdbx_txn_reset(MDBX_txn *txn); * или перезапущена в любой момент посредством \ref mdbx_txn_abort(), * \ref mdbx_txn_reset() и \ref mdbx_txn_renew(), соответственно. * - * \see long-lived-read * \see mdbx_txn_unpark() * \see mdbx_txn_flags() + * \see mdbx_env_set_hsr() + * \see Long-lived read transactions * * \param [in] txn Транзакция чтения запущенная посредством * \ref mdbx_txn_begin(). @@ -4293,9 +4294,9 @@ LIBMDBX_API int mdbx_txn_park(MDBX_txn *txn, bool autounpark); * её перезапуск аналогично \ref mdbx_txn_renew(), либо транзакция сбрасывается * и возвращается код ошибки \ref MDBX_OUSTED. * - * \see long-lived-read * \see mdbx_txn_park() * \see mdbx_txn_flags() + * \see Long-lived read transactions * * \param [in] txn Транзакция чтения запущенная посредством * \ref mdbx_txn_begin() и затем припаркованная @@ -6127,6 +6128,7 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env); * with a "long-lived" read transactions. * \see mdbx_env_set_hsr() * \see mdbx_env_get_hsr() + * \see mdbx_txn_park() * \see Long-lived read transactions * * Using this callback you can choose how to resolve the situation: @@ -6201,6 +6203,7 @@ typedef int(MDBX_hsr_func)(const MDBX_env *env, const MDBX_txn *txn, * * \see MDBX_hsr_func * \see mdbx_env_get_hsr() + * \see mdbx_txn_park() * \see Long-lived read transactions * * \param [in] env An environment handle returned @@ -6216,6 +6219,7 @@ LIBMDBX_API int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr_callback); * recycled. * \see MDBX_hsr_func * \see mdbx_env_set_hsr() + * \see mdbx_txn_park() * \see Long-lived read transactions * * \param [in] env An environment handle returned by \ref mdbx_env_create(). From 242ebefdb721e29f117868eceaac95f0d463b536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 14 Jul 2024 23:42:24 +0300 Subject: [PATCH 214/443] =?UTF-8?q?mdbx-windows:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D1=82?= =?UTF-8?q?=D0=B5=D1=80=D1=8F=D0=BD=D0=BD=D0=BE=D0=B3=D0=BE=20`#include=20?= =?UTF-8?q?`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/osal.c b/src/osal.c index 1ae7dcf9..18edf932 100644 --- a/src/osal.c +++ b/src/osal.c @@ -3513,6 +3513,10 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #include #endif /* sys/random.h */ +#if defined(_WIN32) || defined(_WIN64) +#include +#endif /* Windows */ + MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *env) { struct { uint64_t begin, end, cputime; From 7dee88e27f180e9b51c4e40f7d4af6cffd2404b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 18 Jul 2024 01:10:59 +0300 Subject: [PATCH 215/443] =?UTF-8?q?mdbx-test:=20=D0=B2=D1=8B=D0=B2=D0=BE?= =?UTF-8?q?=D0=B4=20=D0=BA=D0=B0=D0=B4=D1=80=D0=BE=D0=B2=20=D1=81=D1=82?= =?UTF-8?q?=D0=B5=D0=BA=D0=B0=20=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B5=D1=88?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D1=80=D0=BE=D0=B1=D0=BB=D0=B5?= =?UTF-8?q?=D0=BC=20Windows.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.h | 2 +- test/test.c++ | 198 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 188 insertions(+), 12 deletions(-) diff --git a/src/osal.h b/src/osal.h index 23669cf6..64f9eb41 100644 --- a/src/osal.h +++ b/src/osal.h @@ -93,7 +93,7 @@ typedef CRITICAL_SECTION osal_fastmutex_t; /* *INDENT-OFF* */ /* clang-format off */ #define __try -#define __except(COND) if (false) +#define __except(COND) if (/* (void)(COND), */ false) /* *INDENT-ON* */ /* clang-format on */ #endif /* stub for MSVC's __try/__except */ diff --git a/test/test.c++ b/test/test.c++ index 0c865116..079f50c7 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -634,21 +634,184 @@ bool testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, //----------------------------------------------------------------------------- -bool test_execute(const actor_config &config_const) { - const mdbx_pid_t pid = osal_getpid(); - actor_config config = config_const; +#ifdef _MSC_VER - if (global::singlemode) { - logging::setup(format("single_%s", testcase2str(config.testcase))); - } else { - logging::setup((logging::loglevel)config.params.loglevel, - format("child_%u.%u", config.actor_id, config.space_id)); - log_trace(">> wait4barrier"); - osal_wait4barrier(); - log_trace("<< wait4barrier"); +#include "dbghelp.h" +#pragma comment(lib, "Dbghelp.lib") + +static void dump_stack(CONTEXT *ctx, FILE *out) { + const int MaxNameLen = 256; + + BOOL result; + HANDLE process; + HANDLE thread; + HMODULE hModule; + STACKFRAME64 stack; + ULONG frame; + DWORD64 displacement; + DWORD disp; + + char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)]; + char module[MaxNameLen]; + PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer; + + // On x64, StackWalk64 modifies the context record, that could + // cause crashes, so we create a copy to prevent it + CONTEXT ctxCopy; + memcpy(&ctxCopy, ctx, sizeof(CONTEXT)); + memset(&stack, 0, sizeof(STACKFRAME64)); + + process = GetCurrentProcess(); + thread = GetCurrentThread(); + displacement = 0; +#if defined(_M_IX86) + stack.AddrPC.Offset = (*ctx).Eip; + stack.AddrPC.Mode = AddrModeFlat; + stack.AddrStack.Offset = (*ctx).Esp; + stack.AddrStack.Mode = AddrModeFlat; + stack.AddrFrame.Offset = (*ctx).Ebp; + stack.AddrFrame.Mode = AddrModeFlat; +#endif /* _M_IX86 */ + + SymInitialize(process, NULL, TRUE); + + for (frame = 0;; frame++) { + // get next call from stack + result = StackWalk64( +#if defined(_M_AMD64) + IMAGE_FILE_MACHINE_AMD64 +#elif defined(_M_ARM64) + IMAGE_FILE_MACHINE_ARM64 +#elif defined(_M_ARM) + IMAGE_FILE_MACHINE_ARM +#elif defined(_M_IX86) + IMAGE_FILE_MACHINE_I386 +#else +#error "FIXME" +#endif + , + process, thread, &stack, &ctxCopy, NULL, SymFunctionTableAccess64, + SymGetModuleBase64, NULL); + + if (!result) + break; + + // get symbol name for address + pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO); + pSymbol->MaxNameLen = MAX_SYM_NAME; + SymFromAddr(process, (ULONG64)stack.AddrPC.Offset, &displacement, pSymbol); + + IMAGEHLP_LINE64 line; + line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); + + // try to get line + if (SymGetLineFromAddr64(process, stack.AddrPC.Offset, &disp, &line)) { + fprintf(out, "\tat %s in %s: line: %lu: address: 0x%0" PRIx64 "\n", + pSymbol->Name, line.FileName, line.LineNumber, pSymbol->Address); + } else { + // failed to get line + fprintf(out, "\tat %s, address 0x%0" PRIx64 ".\n", pSymbol->Name, + pSymbol->Address); + hModule = NULL; + lstrcpyA(module, ""); + GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCTSTR)(stack.AddrPC.Offset), &hModule); + + // at least print module name + if (hModule != NULL) + GetModuleFileNameA(hModule, module, MaxNameLen); + + fprintf(out, "in %s\n", module); + } } + fflush(stderr); +} +static LONG seh_filter(struct _EXCEPTION_POINTERS *ExInfo, FILE *out) { + const char *caption = ""; + switch (ExInfo->ExceptionRecord->ExceptionCode) { + case EXCEPTION_BREAKPOINT: + caption = "BREAKPOINT"; + break; + case EXCEPTION_SINGLE_STEP: + caption = "SINGLE STEPT"; + break; + case STATUS_CONTROL_C_EXIT: + caption = "CONTROL-C"; + break; + case /* STATUS_INTERRUPTED */ 0xC0000515L: + caption = "INTERRUPTED"; + break; + case EXCEPTION_ACCESS_VIOLATION: + caption = "ACCESS VIOLATION"; + break; + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: + caption = "ARRAY BOUNDS EXCEEDED"; + break; + case EXCEPTION_DATATYPE_MISALIGNMENT: + caption = "MISALIGNMENT"; + break; + case EXCEPTION_STACK_OVERFLOW: + caption = "STACK OVERFLOW"; + break; + case EXCEPTION_INVALID_DISPOSITION: + caption = "INVALID DISPOSITION"; + break; + case EXCEPTION_ILLEGAL_INSTRUCTION: + caption = "ILLEGAL INSTRUCTION"; + break; + case EXCEPTION_NONCONTINUABLE_EXCEPTION: + caption = "NONCONTINUABLE EXCEPTION"; + break; + case /* STATUS_STACK_BUFFER_OVERRUN, STATUS_BUFFER_OVERFLOW_PREVENTED */ + 0xC0000409L: + caption = "BUFFER OVERRUN"; + break; + case /* STATUS_ASSERTION_FAILURE */ 0xC0000420L: + caption = "ASSERTION FAILURE"; + break; + case /* STATUS_HEAP_CORRUPTION */ 0xC0000374L: + caption = "HEAP CORRUPTION"; + break; + case /* STATUS_CONTROL_STACK_VIOLATION */ 0xC00001B2L: + caption = "CONTROL STACK VIOLATION"; + break; + case EXCEPTION_FLT_DIVIDE_BY_ZERO: + caption = "FLT DIVIDE BY ZERO"; + break; + default: + caption = "(unknown)"; + break; + } + PVOID CodeAdress = ExInfo->ExceptionRecord->ExceptionAddress; + fprintf(out, "****************************************************\n"); + fprintf(out, "*** A Program Fault occurred:\n"); + fprintf(out, "*** Error code %08X: %s\n", + ExInfo->ExceptionRecord->ExceptionCode, caption); + fprintf(out, "****************************************************\n"); + fprintf(out, "*** Address: %08zX\n", (intptr_t)CodeAdress); + fprintf(out, "*** Flags: %08X\n", + ExInfo->ExceptionRecord->ExceptionFlags); + dump_stack(ExInfo->ContextRecord, out); + return EXCEPTION_EXECUTE_HANDLER; +} +#endif /* _MSC_VER */ + +static bool execute_thunk(const actor_config *const_config, + const mdbx_pid_t pid) { + actor_config config = *const_config; try { + if (global::singlemode) { + logging::setup(format("single_%s", testcase2str(config.testcase))); + } else { + logging::setup((logging::loglevel)config.params.loglevel, + format("child_%u.%u", config.actor_id, config.space_id)); + log_trace(">> wait4barrier"); + osal_wait4barrier(); + log_trace("<< wait4barrier"); + } + std::unique_ptr test(registry::create_actor(config, pid)); size_t iter = 0; do { @@ -686,6 +849,19 @@ bool test_execute(const actor_config &config_const) { } } +bool test_execute(const actor_config &config) { +#ifdef _MSC_VER + __try { +#endif + return execute_thunk(&config, osal_getpid()); +#ifdef _MSC_VER + } __except (seh_filter(GetExceptionInformation(), stderr)) { + fprintf(stderr, "Exception \n"); + return false; + } +#endif +} + //----------------------------------------------------------------------------- enum speculum_cursors : int { From c46270ec56aa4cacd5da24aefe0ff97840fee90a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 19 Jul 2024 15:46:08 +0300 Subject: [PATCH 216/443] =?UTF-8?q?mdbx-windows:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B0?= =?UTF-8?q?=D0=B4=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D1=80=D0=B8=20=D0=BB?= =?UTF-8?q?=D0=BE=D0=B3=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20?= =?UTF-8?q?=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20`WriteFileGather()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 55 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/src/osal.c b/src/osal.c index 18edf932..ddf637eb 100644 --- a/src/osal.c +++ b/src/osal.c @@ -872,12 +872,12 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { } else { r.err = (int)GetLastError(); if (unlikely(r.err != ERROR_IO_PENDING)) { - ERROR("%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 - ", err %d", + void *data = Ptr64ToPtr(item->sgv[0].Buffer); + ERROR("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," + " offset %" PRId64 ", err %d", "WriteFileGather", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((page_t *)item->single.iov_base)->pgno, - bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), - r.err); + item - ior->pool, data, ((page_t *)data)->pgno, bytes, + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; } assert(wait_for > ior->event_pool + ior->event_stack); @@ -894,22 +894,23 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { r.err = (int)GetLastError(); switch (r.err) { default: - ERROR("%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 - ", err %d", + ERROR("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," + " offset %" PRId64 ", err %d", "WriteFileEx", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((page_t *)item->single.iov_base)->pgno, - bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), - r.err); + item - ior->pool, item->single.iov_base, + ((page_t *)item->single.iov_base)->pgno, bytes, + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; case ERROR_NOT_FOUND: case ERROR_USER_MAPPED_FILE: case ERROR_LOCK_VIOLATION: - WARNING( - "%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 - ", err %d", - "WriteFileEx", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((page_t *)item->single.iov_base)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); + WARNING("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," + " offset %" PRId64 ", err %d", + "WriteFileEx", fd, __Wpedantic_format_voidptr(item), + item - ior->pool, item->single.iov_base, + ((page_t *)item->single.iov_base)->pgno, bytes, + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), + r.err); SleepEx(0, true); goto retry; case ERROR_INVALID_USER_BUFFER: @@ -927,10 +928,11 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { if (!WriteFile(fd, item->single.iov_base, (DWORD)bytes, &written, &item->ov)) { r.err = (int)GetLastError(); - ERROR("%s: fd %p, item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 - ", err %d", + ERROR("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," + " offset %" PRId64 ", err %d", "WriteFile", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, ((page_t *)item->single.iov_base)->pgno, bytes, + item - ior->pool, item->single.iov_base, + ((page_t *)item->single.iov_base)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; } else if (unlikely(written != bytes)) { @@ -984,7 +986,9 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { assert(ior->async_waiting == ior->async_completed); for (ior_item_t *item = ior->pool; item <= ior->last;) { size_t i = 1, bytes = item->single.iov_len - ior_WriteFile_flag; + void *data = item->single.iov_base; if (bytes & ior_WriteFile_flag) { + data = Ptr64ToPtr(item->sgv[0].Buffer); bytes = ior->pagesize; /* Zap: Reading invalid data from 'item->sgv' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6385); @@ -995,11 +999,10 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { if (!HasOverlappedIoCompleted(&item->ov)) { DWORD written = 0; if (unlikely(!GetOverlappedResult(fd, &item->ov, &written, true))) { - ERROR("%s: item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 - ", err %d", + ERROR("%s: item %p (%zu), addr %p pgno %u, bytes %zu," + " offset %" PRId64 ", err %d", "GetOverlappedResult", __Wpedantic_format_voidptr(item), - item - ior->pool, ((page_t *)item->single.iov_base)->pgno, - bytes, + item - ior->pool, data, ((page_t *)data)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), (int)GetLastError()); goto bailout_geterr; @@ -1017,10 +1020,10 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { if ((r.err & 0x80000000) && GetOverlappedResult(nullptr, &item->ov, &written, true)) r.err = (int)GetLastError(); - ERROR("%s: item %p (%zu), pgno %u, bytes %zu, offset %" PRId64 - ", err %d", + ERROR("%s: item %p (%zu), addr %p pgno %u, bytes %zu," + " offset %" PRId64 ", err %d", "Result", __Wpedantic_format_voidptr(item), item - ior->pool, - ((page_t *)item->single.iov_base)->pgno, bytes, + data, ((page_t *)data)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), (int)GetLastError()); goto bailout_rc; From b1cc8b2e9f730e2007c78e923d85880d4e01f0c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 19 Jul 2024 20:43:28 +0300 Subject: [PATCH 217/443] =?UTF-8?q?mdbx-windows:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B0?= =?UTF-8?q?=D1=80=D0=B5=D0=B7=D0=BA=D0=B8=20FILE=5FSEGMENT=5FELEMENT.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ошибка слишком грубая. Похоже при переработке I/O под Windows при `git pull --rebase` потерялся коммит. К повреждению БД проблема не приводила, так как сбой происходил во время записи данных с возвратом ERROR_INVALID_PARAMETER из системного вызова. --- src/osal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/osal.c b/src/osal.c index ddf637eb..d99cd630 100644 --- a/src/osal.c +++ b/src/osal.c @@ -763,11 +763,10 @@ MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, item->sgv[0].Buffer = PtrToPtr64(data); for (size_t i = 1; i < segments; ++i) { data = ptr_disp(data, ior->pagesize); - item->sgv[slots_used].Buffer = PtrToPtr64(data); + item->sgv[i].Buffer = PtrToPtr64(data); } - item->sgv[slots_used].Buffer = 0; + item->sgv[slots_used = segments].Buffer = 0; assert((item->single.iov_len & ior_WriteFile_flag) == 0); - slots_used = segments; } ior->last_bytes = bytes; ior_last_sgvcnt(ior, item) = slots_used; From 9d9a19ae17ddbb99a5d0f69819e79e3ce555f90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 20 Jul 2024 13:15:27 +0300 Subject: [PATCH 218/443] =?UTF-8?q?mdbx:=20=D1=84=D0=BE=D1=80=D0=BC=D0=B0?= =?UTF-8?q?=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5/=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B1=D0=B5=D0=BB=D1=8B/=D0=BA=D0=BE=D1=81=D0=BC?= =?UTF-8?q?=D0=B5=D1=82=D0=B8=D0=BA=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dbi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbi.c b/src/dbi.c index ffd1ca6a..c043e1c9 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -1030,7 +1030,7 @@ __cold int mdbx_enumerate_subdb(const MDBX_txn *txn, MDBX_subdb_enum_func *func, } rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; - bailout: +bailout: txn->cursors[MAIN_DBI] = cx.outer.next; return rc; } From 5c643f72b5d7f74b756c2ede2b52b9d19c55bdd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 20 Jul 2024 13:19:25 +0300 Subject: [PATCH 219/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B4=D0=BB=D0=B8?= =?UTF-8?q?=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D0=BE=D1=81=D1=82=D0=B8/=D0=B3?= =?UTF-8?q?=D0=BB=D1=83=D0=B1=D0=B8=D0=BD=D1=8B=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20`extra/crunc?= =?UTF-8?q?hed=5Fdelete`=20=D0=BD=D0=B0=20MacOS.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/crunched_delete.c++ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index 0693ec97..edb91d5b 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -5,7 +5,7 @@ #include #include -#if MDBX_DEBUG || !defined(NDEBUG) +#if MDBX_DEBUG || !defined(NDEBUG) || defined(__APPLE__) #define NN 1024 #else #define NN 16384 From 6b2b15ebc842b410c66a31888d11c4156ce7b4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 20 Jul 2024 14:05:16 +0300 Subject: [PATCH 220/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D1=83=D1=81?= =?UTF-8?q?=D0=BA=D0=B0=D0=B5=D0=BC=20=D0=BB=D0=B8=D1=88=D0=BD=D0=B8=D0=B5?= =?UTF-8?q?/=D1=83=D1=81=D1=82=D0=B0=D1=80=D0=B5=D0=B2=D1=88=D0=B8=D0=B5?= =?UTF-8?q?=20=D1=84=D0=BB=D0=B0=D0=B3=D0=B8=20=D0=B4=D0=BB=D1=8F=20GC/Fre?= =?UTF-8?q?eDB=20=D0=B4=D0=BB=D1=8F=20=D1=81=D1=82=D0=B0=D1=80=D1=8B=D1=85?= =?UTF-8?q?=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/coherency.c | 10 ++++++++++ src/dxb.c | 6 ++++-- src/meta.c | 4 +++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/coherency.c b/src/coherency.c index 5e491b1b..d8406da1 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -158,6 +158,16 @@ __hot int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, *timestamp == 0))) return coherency_timeout(timestamp, -1, txn->env); + if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) { + if ((txn->dbs[FREE_DBI].flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || + unaligned_peek_u64(4, &head.ptr_c->magic_and_version) == + MDBX_DATA_MAGIC) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + txn->dbs[FREE_DBI].flags); + return MDBX_INCOMPATIBLE; + } + txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS; + } tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); return MDBX_SUCCESS; diff --git a/src/dxb.c b/src/dxb.c index c91880fe..f9a62d36 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -626,7 +626,8 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); - if (unlikely(header.trees.gc.flags != MDBX_INTEGERKEY)) { + if (unlikely((header.trees.gc.flags & DB_PERSISTENT_FLAGS) != + MDBX_INTEGERKEY)) { ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", header.trees.gc.flags); return MDBX_INCOMPATIBLE; @@ -1055,7 +1056,8 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, meta_t *const meta = METAPAGE(env, n); if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != MDBX_DATA_MAGIC) || - (meta->dxbid.x | meta->dxbid.y) == 0) { + (meta->dxbid.x | meta->dxbid.y) == 0 || + (meta->gc_flags & ~DB_PERSISTENT_FLAGS)) { const txnid_t txnid = meta_is_used(&troika, n) ? constmeta_txnid(meta) : 0; NOTICE("%s %s" diff --git a/src/meta.c b/src/meta.c index 7214335b..3711c747 100644 --- a/src/meta.c +++ b/src/meta.c @@ -541,7 +541,9 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, return MDBX_RESULT_TRUE; } - if (unlikely(meta->trees.gc.flags != MDBX_INTEGERKEY)) { + if (unlikely(meta->trees.gc.flags != MDBX_INTEGERKEY) && + ((meta->trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || + magic_and_version == MDBX_DATA_MAGIC)) { WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, "GC/FreeDB", meta->trees.gc.flags); return MDBX_INCOMPATIBLE; From 6941ec17bca5a77c2fe08d5173eabb1e65c42374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 20 Jul 2024 16:43:28 +0300 Subject: [PATCH 221/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 4a042104..e53a9ec3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -126,6 +126,18 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - В шаблонных классах и функциях С++ API по-умолчанию вместо `mdbx::legacy_buffer` использован тип `mdbx::default_buffer` использующий полиморфные аллокаторы С++ 17. - Удаление `DEFAULT_MAPSIZE` и изменение геометрии по-умолчанию при создании БД. +Исправления: + + - Windows: устранение ошибки (потерянный коммит при git-rebase) при + формировании вектора сегментов для `WriteFileGather()`. При выполнении + условий необходимых для проявления ошибки, запись данных неизбежно + завершалась неудачей, поэтому проблема могла приводить к падениям и + невозможности зафиксировать транзакцию, но не к повреждению БД. + В текущем понимании, вероятность проявления проблемы достаточно низкая, + так как выявлена она была на собственных синтетических тестах libmdbx и + соответствующих сообщений/жалоб от пользователей не поступало. + + ## v0.13.0 от 2023-04-23 From 0a36ed3ca152536ada7cb277ec204f65aa45cf64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 21 Jul 2024 22:26:35 +0300 Subject: [PATCH 222/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20=D0=B8=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=20`ior=5Fsgv=5Fgap4terminator`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/osal.h b/src/osal.h index 64f9eb41..813baa9a 100644 --- a/src/osal.h +++ b/src/osal.h @@ -226,20 +226,20 @@ typedef struct osal_mmap { typedef struct ior_item { #if defined(_WIN32) || defined(_WIN64) OVERLAPPED ov; -#define ior_svg_gap4terminator 1 +#define ior_sgv_gap4terminator 1 #define ior_sgv_element FILE_SEGMENT_ELEMENT #else size_t offset; #if MDBX_HAVE_PWRITEV size_t sgvcnt; -#define ior_svg_gap4terminator 0 +#define ior_sgv_gap4terminator 0 #define ior_sgv_element struct iovec #endif /* MDBX_HAVE_PWRITEV */ #endif /* !Windows */ union { MDBX_val single; #if defined(ior_sgv_element) - ior_sgv_element sgv[1 + ior_svg_gap4terminator]; + ior_sgv_element sgv[1 + ior_sgv_gap4terminator]; #endif /* ior_sgv_element */ }; } ior_item_t; From ad0ba7a66127e7dffd38fc942e08a7eb0ef45d6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 23 Jul 2024 14:16:18 +0300 Subject: [PATCH 223/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D0=B5=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20=D0=BE=20`v0.12.11`=20=D0=B2=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index e53a9ec3..36a4be43 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -165,6 +165,76 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic ******************************************************************************** +## v0.12.11 "Лиза и Соня" от 2024-07-23 + +Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов, +в память об убитых в Крыму девочках 2 и 9 лет. + +Лиза и Соня погибли 23 Июня 2024 на глазах у родителей, в результате +удара по общественному городскому пляжу ракетами ATACMS с кассетными +боеприпасами. Всего пострадало более 150 граждан России, в том числе 27 +детей. Ракеты были выпущенными украинскими бандеровцами/фашистами, но +полетные задания формировались и загружались военными США, а управление +и наведение ATACAMS невозможно без использования орбитальной группировки +военных спутников США. + + +``` +git diff' stat: 29 commits, 14 files changed, 379 insertions(+), 151 deletions(-) +Signed-off-by: Леонид Юрьев (Leonid Yuriev) +``` + +Значимые исправления: + + - Исправление для ОС Windows нарезки `FILE_SEGMENT_ELEMENT`. + Похоже что был потерян коммит входе работы над оптимизацией пути записи + на диск в ОС Windows. В текущем понимании, вероятность проявления ошибки + достаточно низкая, так как выявлена она была синтетическими тестами в + ходе других доработок, а соответствующих сообщений/жалоб не поступало. К + повреждению БД ошибка не приводила, так как сбой происходил до записи + данных с возвратом `ERROR_INVALID_PARAMETER` из системного вызова, т.е. + либо ошибка не проявлялась, либо транзакция не фиксировалась. + + - Устранение вероятности `SIGSEGV` при включении логирования + уровня `MDBX_LOG_TRACE` в отладочных сборках. + + - Исправление генерации исключения `key_exists` в C++ API. + + - Исправление опечаток в документации и README. + + - Исправление обработки курсоров, открытых в родительских транзакциях и + закрытых до завершения вложенных транзакций. В описанной ситуации + закрытые курсоры "воскрешались", что приводило к утечке памяти + выделенной под такие курсоры. + + - Костыль для MSVC ARM/ARM64 для предотвращения ICE (Internal Compiler Error). + + - Устранение `MDBX_EINVAL` для случая вызова `mdbx_env_remove(".")`. + + - Исправление инверсии bool-результата `env::remove()` в C++ API. + + - Исправление опечатки `равно`/`неравно` в условии внутри + `update_gc()`. Существенных последствий ошибки не было, но в + определенных сценариях, сходимость требовала еще одного цикла повтора + внутри update_gc(). + +Прочие доработки: + + - Проверка совместимости флагов GC/FreeDB на случай их изменения в будущих версиях. + - Очистка сообщений `FormatMessageA()` от концевых переводов строк. + - Уточнение макроса `__always_inline` для особо яблочных версий CLANG. + - Использование `\n` вместо `std::endl` в C++ API при . + - Проверка дополнительных и пока не используемых полей в meta-страницах. + - Отключение ненужной отладки внутри `txn_merge()`. + - Исправление условий и привязки к версиям компиляторов при формировании макроса `MDBX_DEPRECATED`. + - Больше атрибутов `__cold` для редко-используемых функций (backport). + - Добавление методов `buffer::append_bytes()` и `buffer::clear_and_reserve()`. + - Отключение установки признака фатальной ошибки для не-активной среды при отличии идентификатора процесса. + + +-------------------------------------------------------------------------------- + + ## v0.12.10 "СЭМ" от 2024-03-12 Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов From 9309aa7e12573d2150810707758a9d66851a9d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 23 Jul 2024 15:58:59 +0300 Subject: [PATCH 224/443] =?UTF-8?q?mdbx:=20=D1=83=D1=82=D0=BE=D1=87=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D1=82=D0=B0=D1=80=D0=B8=D0=B5=D0=B2=20=D0=B2=20=D0=BA=D0=BE?= =?UTF-8?q?=D0=B4=D0=B5=20(=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8?= =?UTF-8?q?=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tree.c b/src/tree.c index e691ac4f..a1b47350 100644 --- a/src/tree.c +++ b/src/tree.c @@ -324,7 +324,7 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { const size_t dbi = cursor_dbi(csrc); cASSERT(csrc, csrc->top == cdst->top); if (fromleft) { - /* If we're adding on the left, bump others up */ + /* Перемещаем с левой страницы нв правую, нужно сдвинуть ki на +1 */ for (m2 = csrc->txn->cursors[dbi]; m2; m2 = m2->next) { m3 = (csrc->flags & z_inner) ? &m2->subcur->cursor : m2; if (!is_related(csrc, m3)) @@ -351,7 +351,7 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { } } } else { - /* Adding on the right, bump others down */ + /* Перемещаем с правой страницы на левую, нужно сдвинуть ki на -1 */ for (m2 = csrc->txn->cursors[dbi]; m2; m2 = m2->next) { m3 = (csrc->flags & z_inner) ? &m2->subcur->cursor : m2; if (!is_related(csrc, m3)) From 69aa9e0fe1be2f63e6cb52120068919407f0c737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 23 Jul 2024 15:59:59 +0300 Subject: [PATCH 225/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=B8=D1=88=D0=BD=D0=B5?= =?UTF-8?q?=D0=B9=20=D0=B8=D1=82=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20?= =?UTF-8?q?=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`tree=5Frebalance()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Допускаем итерацию с не-вовлечением еще не-измененных страниц, только когда страницы для объединения доступны справа и слева, Т.е. допускаем итерацию для выбора лучшей альтернативы (справа или слева), и избегаем этой итерации когда альтернативы нет. --- src/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree.c b/src/tree.c index a1b47350..a7357ccc 100644 --- a/src/tree.c +++ b/src/tree.c @@ -855,7 +855,7 @@ int tree_rebalance(MDBX_cursor *mc) { const size_t right_room = right ? page_room(right) : 0; const size_t left_nkeys = left ? page_numkeys(left) : 0; const size_t right_nkeys = right ? page_numkeys(right) : 0; - bool involve = false; + bool involve = !(left && right); retry: cASSERT(mc, mc->top > 0); if (left_room > room_threshold && left_room >= right_room && From 485d6d1f509ee73830f190a9e20e9828e22f1e02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 23 Jul 2024 16:15:05 +0300 Subject: [PATCH 226/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0/=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D1=83=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20?= =?UTF-8?q?ChangeLog=20=D0=B4=D0=BB=D1=8F=20v0.13.x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 36a4be43..cac91166 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -6,9 +6,9 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic ## v0.13.1 (в процессе подготовки выпуска) -Новая версия с существенным расширением API и добавлением функционала. -В том числе, с незначительным нарушением обратной совместимости API -библиотеки. +Новая версия со сменой лицензии, существенным расширением API, +добавлением функционала и внутренними переработками. В том числе, +с незначительным нарушением обратной совместимости API библиотеки. Новое: @@ -99,6 +99,10 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Для идентификации БД добавлен UUID доступный в поле `mi_dxbid` структуры `MDBX_envinfo`, получаемой посредством `mdbx_env_info_ex()`. + - Расширение API функциями lock/unlock/upgrade/downgrade основной блокировки. + + - Добавление в API функций `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()`. + - Расширение и доработка C++ API: - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов @@ -119,39 +123,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - переработка `to_hex()` и `from_hex()`. Нарушение совместимости: + - Опция `MDBX_COALESCE` объявлена устаревшей, так как соответствующий функционал всегда включен начиная с предыдущей версии 0.12. - Опция `MDBX_NOTLS` объявлена устаревшей и заменена на `MDBX_NOSTICKYTHREADS`. - Опция сборки `MDBX_USE_VALGRIND` заменена на общепринятую `ENABLE_MEMCHECK`. - В структуре `MDBX_envinfo` серии полей вида `meta1`, `meta2` и `meta3` заменены на массивы вида `meta[3]`. - В шаблонных классах и функциях С++ API по-умолчанию вместо `mdbx::legacy_buffer` использован тип `mdbx::default_buffer` использующий полиморфные аллокаторы С++ 17. - Удаление `DEFAULT_MAPSIZE` и изменение геометрии по-умолчанию при создании БД. - -Исправления: - - - Windows: устранение ошибки (потерянный коммит при git-rebase) при - формировании вектора сегментов для `WriteFileGather()`. При выполнении - условий необходимых для проявления ошибки, запись данных неизбежно - завершалась неудачей, поэтому проблема могла приводить к падениям и - невозможности зафиксировать транзакцию, но не к повреждению БД. - В текущем понимании, вероятность проявления проблемы достаточно низкая, - так как выявлена она была на собственных синтетических тестах libmdbx и - соответствующих сообщений/жалоб от пользователей не поступало. - - - -## v0.13.0 от 2023-04-23 - -Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API. - -Новое: - - - Расширение API функционалом проверки целостности структуры БД, с - переработкой и переноса функционала утилиты `mdbx_chk` внутрь библиотеки. - - - Расширение API функциями lock/unlock/upgrade/downgrade основной блокировки. - - - Добавление в API функций `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()`. - - Возвращение `MDBX_TXN_INVALID` (`INT32_MIN`) вместо `-1` из `mdbx_txn_flags()` при передаче невалидной транзакции. @@ -161,6 +139,11 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавление `--read-var-info=yes` для Valgrind. - Вывод из `mdbx_chk` информации об уровне детализации/verbosity. +## v0.13.0 от 2023-04-23 + +Технический тэг, отмечающий начало ветки `0.13` +с новым функционалом и изменением API. + ******************************************************************************** From a430b3b288a1695dd3daf3facf09ebc98590e7e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 24 Jul 2024 11:22:59 +0300 Subject: [PATCH 227/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8=20`0x%u`=20=D0=B2=20=D0=BB=D0=BE=D0=B3?= =?UTF-8?q?=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/coherency.c | 4 ++-- src/dxb.c | 4 ++-- src/meta.c | 4 ++-- src/txn.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coherency.c b/src/coherency.c index d8406da1..41986ac1 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -162,8 +162,8 @@ __hot int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, if ((txn->dbs[FREE_DBI].flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || unaligned_peek_u64(4, &head.ptr_c->magic_and_version) == MDBX_DATA_MAGIC) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - txn->dbs[FREE_DBI].flags); + ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, + "GC/FreeDB"); return MDBX_INCOMPATIBLE; } txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS; diff --git a/src/dxb.c b/src/dxb.c index f9a62d36..e15e0832 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -628,8 +628,8 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if (unlikely((header.trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - header.trees.gc.flags); + ERROR("unexpected/invalid db-flags 0x%x for %s", header.trees.gc.flags, + "GC/FreeDB"); return MDBX_INCOMPATIBLE; } env->dbs_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; diff --git a/src/meta.c b/src/meta.c index 3711c747..3f2bfcd6 100644 --- a/src/meta.c +++ b/src/meta.c @@ -544,13 +544,13 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, if (unlikely(meta->trees.gc.flags != MDBX_INTEGERKEY) && ((meta->trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || magic_and_version == MDBX_DATA_MAGIC)) { - WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, "GC/FreeDB", meta->trees.gc.flags); return MDBX_INCOMPATIBLE; } if (unlikely(!check_sdb_flags(meta->trees.main.flags))) { - WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, "MainDB", meta->trees.main.flags); return MDBX_INCOMPATIBLE; } diff --git a/src/txn.c b/src/txn.c index 59c661f1..276692bc 100644 --- a/src/txn.c +++ b/src/txn.c @@ -1222,8 +1222,8 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - txn->dbs[FREE_DBI].flags); + ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, + "GC/FreeDB"); rc = MDBX_INCOMPATIBLE; goto bailout; } From cb743d44fc6b7cbf69f4688d5218f595b382bbb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 24 Jul 2024 11:27:41 +0300 Subject: [PATCH 228/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B9/?= =?UTF-8?q?=D0=BF=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=BD?= =?UTF-8?q?=D1=8B=D0=B9=20clang-format.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- src/chk.c | 2 +- src/lck-posix.c | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mdbx.h b/mdbx.h index 95caf473..edfd3df2 100644 --- a/mdbx.h +++ b/mdbx.h @@ -559,7 +559,7 @@ typedef mode_t mdbx_mode_t; } \ MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(unsigned a, \ ENUM b) { \ - return ENUM(a & unsigned(b)); \ + return ENUM(a &unsigned(b)); \ } \ MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, \ ENUM b) { \ diff --git a/src/chk.c b/src/chk.c index 88675536..be28cf0a 100644 --- a/src/chk.c +++ b/src/chk.c @@ -580,8 +580,8 @@ static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { // использованы еще не все слоты, добавляем интервал assert(i < size); if (p->ranges[i].count) { - assert(i < last); // раздвигаем + assert(i < last); #ifdef __COVERITY__ if (i < last) /* avoid Coverity false-positive issue */ #endif /* __COVERITY__ */ diff --git a/src/lck-posix.c b/src/lck-posix.c index 43ddd8ce..083817de 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -621,16 +621,16 @@ __cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, if LCK already opened/used inside current process */ ; - /* FIXME: Unfortunately, there is no other reliable way but to long testing - * on each platform. On the other hand, behavior like FreeBSD is incorrect - * and we can expect it to be rare. Moreover, even on FreeBSD without - * additional in-process initialization, the probability of an problem - * occurring is vanishingly small, and the symptom is a return of EINVAL - * while locking a mutex. In other words, in the worst case, the problem - * results in an EINVAL error at the start of the transaction, but NOT data - * loss, nor database corruption, nor other fatal troubles. Thus, the code - * below I am inclined to think the workaround for erroneous platforms (like - * FreeBSD), rather than a defect of libmdbx. */ + /* FIXME: Unfortunately, there is no other reliable way but to long testing + * on each platform. On the other hand, behavior like FreeBSD is incorrect + * and we can expect it to be rare. Moreover, even on FreeBSD without + * additional in-process initialization, the probability of an problem + * occurring is vanishingly small, and the symptom is a return of EINVAL + * while locking a mutex. In other words, in the worst case, the problem + * results in an EINVAL error at the start of the transaction, but NOT data + * loss, nor database corruption, nor other fatal troubles. Thus, the code + * below I am inclined to think the workaround for erroneous platforms (like + * FreeBSD), rather than a defect of libmdbx. */ #if defined(__FreeBSD__) /* seems that shared mutexes on FreeBSD required in-process initialization */ (void)global_uniqueness_flag; From 2e7d325cf1ac5ff0bb093e21516223b54d261bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 24 Jul 2024 15:27:48 +0300 Subject: [PATCH 229/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5?= =?UTF-8?q?=D1=80=D0=B6=D0=BA=D0=B8=20`MDBX=5FOUSTED`=20=D0=B2=20`mdbx=5Fs?= =?UTF-8?q?trerror()`=20=D0=B8=20C++=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 3 ++- mdbx.h++ | 1 + src/mdbx.c++ | 3 ++- src/misc.c | 3 +++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mdbx.h b/mdbx.h index edfd3df2..9ae8b798 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1982,7 +1982,8 @@ typedef enum MDBX_error { * corresponding DBI-handle could be (re)used */ MDBX_DANGLING_DBI = -30412, - /** Транзакция была асинхронно отменена/вытеснена */ + /** The parked read transaction was outed for the sake of + * recycling old MVCC snapshots. */ MDBX_OUSTED = -30411, /* The last of MDBX-added error codes */ diff --git a/mdbx.h++ b/mdbx.h++ index 58a2b7ce..9d2197bb 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -582,6 +582,7 @@ MDBX_DECLARE_EXCEPTION(transaction_full); MDBX_DECLARE_EXCEPTION(transaction_overlapping); MDBX_DECLARE_EXCEPTION(duplicated_lck_file); MDBX_DECLARE_EXCEPTION(dangling_map_id); +MDBX_DECLARE_EXCEPTION(transaction_ousted); #undef MDBX_DECLARE_EXCEPTION [[noreturn]] LIBMDBX_API void throw_too_small_target_buffer(); diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 583b46a4..adef7c34 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -339,7 +339,7 @@ DEFINE_EXCEPTION(transaction_full) DEFINE_EXCEPTION(transaction_overlapping) DEFINE_EXCEPTION(duplicated_lck_file) DEFINE_EXCEPTION(dangling_map_id) - +DEFINE_EXCEPTION(transaction_ousted) #undef DEFINE_EXCEPTION __cold const char *error::what() const noexcept { @@ -428,6 +428,7 @@ __cold void error::throw_exception() const { CASE_EXCEPTION(transaction_overlapping, MDBX_TXN_OVERLAPPING); CASE_EXCEPTION(duplicated_lck_file, MDBX_DUPLICATED_CLK); CASE_EXCEPTION(dangling_map_id, MDBX_DANGLING_DBI); + CASE_EXCEPTION(transaction_ousted, MDBX_OUSTED); #undef CASE_EXCEPTION default: if (is_mdbx_error()) diff --git a/src/misc.c b/src/misc.c index 5c61d641..b6839c11 100644 --- a/src/misc.c +++ b/src/misc.c @@ -168,6 +168,9 @@ __cold const char *mdbx_liberr2str(int errnum) { case MDBX_DANGLING_DBI: return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be" " closed before subDb or corresponding DBI-handle could be (re)used"; + case MDBX_OUSTED: + return "MDBX_OUSTED: The parked read transaction was outed for the sake" + " of recycling old MVCC snapshots"; default: return nullptr; } From 7873118cdb4faeb745f230da72729ccea1c077bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 24 Jul 2024 19:58:02 +0300 Subject: [PATCH 230/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20(=D1=80=D0=B0=D1=81)?= =?UTF-8?q?=D0=BF=D0=B0=D1=80=D0=BA=D0=BE=D0=B2=D0=BA=D0=B8=20=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9=20=D1=87=D1=82?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=B2=20C++=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 9d2197bb..7ef55c18 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4357,12 +4357,19 @@ public: //---------------------------------------------------------------------------- - /// \brief Reset a read-only transaction. + /// \brief Reset read-only transaction. inline void reset_reading(); - /// \brief Renew a read-only transaction. + /// \brief Renew read-only transaction. inline void renew_reading(); + /// \brief Park read-only transaction. + inline void park_reading(bool autounpark = true); + + /// \brief Resume parked read-only transaction. + /// \returns True if transaction was restarted while `restart_if_ousted=true`. + inline bool unpark_reading(bool restart_if_ousted = true); + /// \brief Start nested write transaction. txn_managed start_nested(); @@ -6450,6 +6457,14 @@ inline void txn::renew_reading() { error::success_or_throw(::mdbx_txn_renew(handle_)); } +inline void txn::park_reading(bool autounpark) { + error::success_or_throw(::mdbx_txn_park(handle_, autounpark)); +} + +inline bool txn::unpark_reading(bool restart_if_ousted) { + return error::boolean_or_throw(::mdbx_txn_unpark(handle_, restart_if_ousted)); +} + inline txn::info txn::get_info(bool scan_reader_lock_table) const { txn::info r; error::success_or_throw(::mdbx_txn_info(handle_, &r, scan_reader_lock_table)); From 9e3a36b74df1f1fd9ffb3547018b24ffea57469a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 27 Jul 2024 12:44:06 +0300 Subject: [PATCH 231/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20`txn=5Fend()?= =?UTF-8?q?`=20=D0=B4=D0=BB=D1=8F=20=D1=83=D1=81=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BB=D0=B8=D1=88=D0=BD=D0=B8=D1=85?= =?UTF-8?q?=20`MDBX=5FTXN=5FOUSTED`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/txn.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/txn.c b/src/txn.c index 276692bc..0742cb1c 100644 --- a/src/txn.c +++ b/src/txn.c @@ -1361,9 +1361,9 @@ int txn_end(MDBX_txn *txn, unsigned mode) { eASSERT(env, txn->txnid == slot->txnid.weak && slot->txnid.weak >= env->lck->cached_oldest.weak); } else { - if ((mode & TXN_END_OUSTED) == 0 && + if ((mode & TXN_END_OPMASK) != TXN_END_OUSTED && safe64_read(&slot->tid) == MDBX_TID_TXN_OUSTED) - mode += TXN_END_OUSTED; + mode = (mode & TXN_END_OPMASK) | TXN_END_OUSTED; do { safe64_reset(&slot->txnid, false); atomic_store64(&slot->tid, txn->owner, mo_AcquireRelease); @@ -1391,9 +1391,9 @@ int txn_end(MDBX_txn *txn, unsigned mode) { imports.srwl_ReleaseShared(&env->remap_guard); #endif txn->n_dbi = 0; /* prevent further DBI activity */ - txn->flags = (mode & TXN_END_OUSTED) - ? MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED - : MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; + txn->flags = ((mode & TXN_END_OPMASK) != TXN_END_OUSTED) + ? MDBX_TXN_RDONLY | MDBX_TXN_FINISHED + : MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED; txn->owner = 0; } else if (!(txn->flags & MDBX_TXN_FINISHED)) { ENSURE(env, From dc7f15c63e3f5adf00847e7b97f7392ace40fc21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 27 Jul 2024 12:47:21 +0300 Subject: [PATCH 232/443] =?UTF-8?q?mdbx-tools:=20=D0=BE=D1=82=D0=BE=D0=B1?= =?UTF-8?q?=D1=80=D0=B0=D0=B6=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=B0?= =?UTF-8?q?=D1=82=D1=83=D1=81=D0=BE=D0=B2=20`parked/ousted`=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/stat.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/tools/stat.c b/src/tools/stat.c index 8ad82f9c..76703a15 100644 --- a/src/tools/stat.c +++ b/src/tools/stat.c @@ -71,8 +71,14 @@ static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, "pid", (int)sizeof(size_t) * 2, "thread", "txnid", "lag", "used", "retained"); - printf(" %3d)\t[%d]\t%6" PRIdSIZE " %*" PRIxPTR, num, slot, (size_t)pid, - (int)sizeof(size_t) * 2, (uintptr_t)thread); + if (thread < (mdbx_tid_t)((intptr_t)MDBX_TID_TXN_OUSTED)) + printf(" %3d)\t[%d]\t%6" PRIdSIZE " %*" PRIxPTR, num, slot, (size_t)pid, + (int)sizeof(size_t) * 2, (uintptr_t)thread); + else + printf(" %3d)\t[%d]\t%6" PRIdSIZE " %sed", num, slot, (size_t)pid, + ((uintptr_t)thread == (uintptr_t)MDBX_TID_TXN_PARKED) ? "park" + : "oust"); + if (txnid) printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag, bytes_used / 1048576.0, bytes_retained / 1048576.0); From 0a9d96affdd4d4c555522904400f6ca7db680b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 1 Aug 2024 22:03:45 +0300 Subject: [PATCH 233/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BB=D0=BE=D0=B6=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20=D0=BD=D0=B5-?= =?UTF-8?q?=D0=BA=D0=BE=D0=B3=D0=B5=D1=80=D0=B5=D0=BD=D1=82=D0=BD=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B8=20=D0=BF=D1=80=D0=B8=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20?= =?UTF-8?q?mdbx=5Fdbi=5Fsequence(MAIN=5FDBI)=20=D0=B1=D0=B5=D0=B7=20=D0=B4?= =?UTF-8?q?=D1=80=D1=83=D0=B3=D0=B8=D1=85=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Временная подпорка для coherency_check(), которую в перспективе следует заменить вместе с переделкой установки mod_txnid. Суть проблемы: - coherency_check() в качестве одного из критериев "когерентности" проверяет условие meta.maindb.mod_txnid == maindb.root->txnid; - при обновлении maindb.sequence высталяется DBI_DIRTY, что приведет к обновлению meta.maindb.mod_txnid = current_txnid; - однако, если в само дерево maindb обновление не вносились и оно не пустое, то корневая страницы останеться с прежним txnid и из-за этого ложно сработает coherency_check(). Временное (текущее) решение: Принудительно обновляем корневую страницу в описанной выше ситуации. Это устраняет проблему, но и не создает рисков регресса. Итоговое решение, которое предстоит реализовать: - изменить семантику установки/обновления mod_txnid, привязав его строго к изменению b-tree, но не атрибутов; - обновлять mod_txnid при фиксации вложенных транзакций; - для dbi-хендлов пользовательских subDb (видимо) можно оставить DBI_DIRTY в качестве признака необходимости обновления записи subDb в MainDB, при этом взводить DBI_DIRTY вместе с обновлением mod_txnid, в том числе при обновлении sequence. - для MAIN_DBI при обновлении sequence не следует взводить DBI_DIRTY и/или обновлять mod_txnid, а только взводить MDBX_TXN_DIRTY. - альтернативно, можно перераспределить флажки-признаки dbi_state, чтобы различать состояние dirty-tree и dirty-attributes. --- src/misc.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/misc.c b/src/misc.c index b6839c11..415eee1c 100644 --- a/src/misc.c +++ b/src/misc.c @@ -60,9 +60,48 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, return MDBX_RESULT_TRUE; tASSERT(txn, new > dbs->sequence); + if ((txn->dbi_state[dbi] & DBI_DIRTY) == 0) { + txn->flags |= MDBX_TXN_DIRTY; + txn->dbi_state[dbi] |= DBI_DIRTY; + if (unlikely(dbi == MAIN_DBI) && txn->dbs[MAIN_DBI].root != P_INVALID) { + /* LY: Временная подпорка для coherency_check(), которую в перспективе + * следует заменить вместе с переделкой установки mod_txnid. + * + * Суть проблемы: + * - coherency_check() в качестве одного из критериев "когерентности" + * проверяет условие meta.maindb.mod_txnid == maindb.root->txnid; + * - при обновлении maindb.sequence высталяется DBI_DIRTY, что приведет + * к обновлению meta.maindb.mod_txnid = current_txnid; + * - однако, если в само дерево maindb обновление не вносились и оно + * не пустое, то корневая страницы останеться с прежним txnid и из-за + * этого ложно сработает coherency_check(). + * + * Временное (текущее) решение: Принудительно обновляем корневую + * страницу в описанной выше ситуации. Это устраняет проблему, но и + * не создает рисков регресса. + * + * FIXME: Итоговое решение, которое предстоит реализовать: + * - изменить семантику установки/обновления mod_txnid, привязав его + * строго к изменению b-tree, но не атрибутов; + * - обновлять mod_txnid при фиксации вложенных транзакций; + * - для dbi-хендлов пользовательских subDb (видимо) можно оставить + * DBI_DIRTY в качестве признака необходимости обновления записи + * subDb в MainDB, при этом взводить DBI_DIRTY вместе с обновлением + * mod_txnid, в том числе при обновлении sequence. + * - для MAIN_DBI при обновлении sequence не следует взводить DBI_DIRTY + * и/или обновлять mod_txnid, а только взводить MDBX_TXN_DIRTY. + * - альтернативно, можно перераспределить флажки-признаки dbi_state, + * чтобы различать состояние dirty-tree и dirty-attributes. */ + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = tree_search(&cx.outer, nullptr, Z_MODIFY | Z_ROOTONLY); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + } dbs->sequence = new; - txn->flags |= MDBX_TXN_DIRTY; - txn->dbi_state[dbi] |= DBI_DIRTY; } return MDBX_SUCCESS; From f34ebc853d58652fdb514f68993dabb26d98000f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 31 Jul 2024 22:02:01 +0300 Subject: [PATCH 234/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fdbi=5Fsequ?= =?UTF-8?q?ence()`=20=D0=B2=20jitter-=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80?= =?UTF-8?q?=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/jitter.c++ | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/jitter.c++ b/test/jitter.c++ index 8a4cd0b3..1f56978c 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -165,7 +165,11 @@ bool testcase_jitter::run() { } } if (flipcoin()) { - // err = + uint64_t unused; + err = mdbx_dbi_sequence(txn_guard.get(), MAIN_DBI, &unused, + mode_readonly() ? 0 : 1); + if (err) + failure_perror("mdbx_dbi_sequence()", err); } txn_end(flipcoin()); From e7488bc30cf00cde227ea3413da88fc834e17393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 12:12:29 +0300 Subject: [PATCH 235/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Ftxn=5Fcopy2fd()`=20?= =?UTF-8?q?=D0=B8=20`mdbx=5Ftxn=5Fcopy2pathname()`,=20=D0=B2=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B0=D1=8F=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB=D0=BD?= =?UTF-8?q?=D0=B8=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D1=86=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 98 ++++++++++++++++- src/copy.c | 313 +++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 325 insertions(+), 86 deletions(-) diff --git a/mdbx.h b/mdbx.h index 9ae8b798..0d04c73f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1713,7 +1713,24 @@ typedef enum MDBX_copy_flags { MDBX_CP_COMPACT = 1u, /** Force to make resizable copy, i.e. dynamic size instead of fixed */ - MDBX_CP_FORCE_DYNAMIC_SIZE = 2u + MDBX_CP_FORCE_DYNAMIC_SIZE = 2u, + + /** Don't explicitly flush the written data to an output media */ + MDBX_CP_DONT_FLUSH = 4u, + + /** Use read transaction parking during copying MVCC-snapshot + * \see mdbx_txn_park() */ + MDBX_CP_THROTTLE_MVCC = 8u, + + /** Abort/dispose passed transaction after copy + * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ + MDBX_CP_DISPOSE_TXN = 16u, + + /** Enable renew/restart read transaction in case it use outdated + * MVCC shapshot, otherwise the \ref MDBX_MVCC_RETARDED will be returned + * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ + MDBX_CP_RENEW_TXN = 32u + } MDBX_copy_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_copy_flags) @@ -1986,8 +2003,12 @@ typedef enum MDBX_error { * recycling old MVCC snapshots. */ MDBX_OUSTED = -30411, + /** MVCC snapshot used by read transaction is outdated and could not be + * copied since corresponding meta-pages was overwritten. */ + MDBX_MVCC_RETARDED = -30410, + /* The last of MDBX-added error codes */ - MDBX_LAST_ADDED_ERRCODE = MDBX_OUSTED, + MDBX_LAST_ADDED_ERRCODE = MDBX_MVCC_RETARDED, #if defined(_WIN32) || defined(_WIN64) MDBX_ENODATA = ERROR_HANDLE_EOF, @@ -2582,6 +2603,8 @@ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, * transaction. See long-lived transactions under \ref restrictions section. * * \note On Windows the \ref mdbx_env_copyW() is recommended to use. + * \see mdbx_env_copy2fd() + * \see mdbx_txn_copy2pathname() * * \param [in] env An environment handle returned by mdbx_env_create(). * It must have already been opened successfully. @@ -2608,12 +2631,56 @@ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, MDBX_copy_flags_t flags); +/** \brief Copy an MDBX environment by given read transaction to the specified + * path, with options. + * \ingroup c_extra + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * \note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under \ref restrictions section. + * + * \note On Windows the \ref mdbx_txn_copy2pathnameW() is recommended to use. + * \see mdbx_txn_copy2fd() + * \see mdbx_env_copy() + * + * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). + * \param [in] dest The pathname of a file in which the copy will reside. + * This file must not be already exist, but parent directory + * must be writable. + * \param [in] flags Specifies options for this operation. This parameter + * must be bitwise OR'ing together any of the constants + * described here: + * + * - \ref MDBX_CP_DEFAULTS + * Perform copy as-is without compaction, etc. + * + * - \ref MDBX_CP_COMPACT + * Perform compaction while copying: omit free pages and sequentially + * renumber all pages in output. This option consumes little bit more + * CPU for processing, but may running quickly than the default, on + * account skipping free pages. + * + * - \ref MDBX_CP_FORCE_DYNAMIC_SIZE + * Force to make resizable copy, i.e. dynamic size instead of fixed. + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, + MDBX_copy_flags_t flags); + #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_copy() * \note Available only on Windows. * \see mdbx_env_copy() */ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, MDBX_copy_flags_t flags); + +/** \copydoc mdbx_txn_copy2pathname() + * \note Available only on Windows. + * \see mdbx_txn_copy2pathname() */ +LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, + MDBX_copy_flags_t flags); #endif /* Windows */ /** \brief Copy an environment to the specified file descriptor, with @@ -2623,6 +2690,7 @@ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, * This function may be used to make a backup of an existing environment. * No lockfile is created, since it gets recreated at need. * \see mdbx_env_copy() + * \see mdbx_txn_copy2fd() * * \note This call can trigger significant file size growth if run in * parallel with write transactions, because it employs a read-only @@ -2642,6 +2710,32 @@ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, LIBMDBX_API int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, MDBX_copy_flags_t flags); +/** \brief Copy an environment by given read transaction to the specified file + * descriptor, with options. + * \ingroup c_extra + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * \see mdbx_txn_copy2pathname() + * \see mdbx_env_copy2fd() + * + * \note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under \ref restrictions + * section. + * + * \note Fails if the environment has suffered a page leak and the destination + * file descriptor is associated with a pipe, socket, or FIFO. + * + * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). + * \param [in] fd The file descriptor to write the copy to. It must have + * already been opened for Write access. + * \param [in] flags Special options for this operation. \see mdbx_env_copy() + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags); + /** \brief Statistics for a database in the environment * \ingroup c_statinfo * \see mdbx_env_stat_ex() \see mdbx_dbi_stat() */ diff --git a/src/copy.c b/src/copy.c index 4455bdf2..7802ae17 100644 --- a/src/copy.c +++ b/src/copy.c @@ -8,6 +8,7 @@ typedef struct compacting_context { MDBX_env *env; MDBX_txn *txn; + MDBX_copy_flags_t flags; pgno_t first_unallocated; osal_condpair_t condpair; volatile unsigned head; @@ -80,7 +81,11 @@ __cold static int compacting_toggle_write_buffers(ctx_t *ctx) { ctx->head += 1; osal_condpair_signal(&ctx->condpair, true); while (!ctx->error && ctx->head - ctx->tail == 2 /* both buffers in use */) { + if (ctx->flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(ctx->txn, false); int err = osal_condpair_wait(&ctx->condpair, false); + if (err == MDBX_SUCCESS && (ctx->flags & MDBX_CP_THROTTLE_MVCC) != 0) + err = mdbx_txn_unpark(ctx->txn, false); if (err != MDBX_SUCCESS) ctx->error = err; } @@ -362,7 +367,7 @@ __cold static void meta_make_sizeable(meta_t *meta) { } } -__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, +__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, uint8_t *buffer, const bool dest_is_pipe, const MDBX_copy_flags_t flags) { @@ -370,36 +375,40 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); meta_t *const meta = meta_init_triplet(env, buffer); - meta_set_txnid(env, meta, read_txn->txnid); + meta_set_txnid(env, meta, txn->txnid); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(meta); /* copy canary sequences if present */ - if (read_txn->canary.v) { - meta->canary = read_txn->canary; + if (txn->canary.v) { + meta->canary = txn->canary; meta->canary.v = constmeta_txnid(meta); } - if (read_txn->dbs[MAIN_DBI].root == P_INVALID) { + if (txn->dbs[MAIN_DBI].root == P_INVALID) { /* When the DB is empty, handle it specially to * fix any breakage like page leaks from ITS#8174. */ - meta->trees.main.flags = read_txn->dbs[MAIN_DBI].flags; + meta->trees.main.flags = txn->dbs[MAIN_DBI].flags; compacting_fixup_meta(env, meta); if (dest_is_pipe) { + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); int rc = osal_write(fd, buffer, meta_bytes); + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_THROTTLE_MVCC) != 0) + rc = mdbx_txn_unpark(txn, false); if (unlikely(rc != MDBX_SUCCESS)) return rc; } } else { /* Count free pages + GC pages. */ cursor_couple_t couple; - int rc = cursor_init(&couple.outer, read_txn, FREE_DBI); + int rc = cursor_init(&couple.outer, txn, FREE_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - pgno_t gc_npages = read_txn->dbs[FREE_DBI].branch_pages + - read_txn->dbs[FREE_DBI].leaf_pages + - read_txn->dbs[FREE_DBI].large_pages; + pgno_t gc_npages = txn->dbs[FREE_DBI].branch_pages + + txn->dbs[FREE_DBI].leaf_pages + + txn->dbs[FREE_DBI].large_pages; MDBX_val key, data; rc = outer_first(&couple.outer, &key, &data); while (rc == MDBX_SUCCESS) { @@ -410,7 +419,7 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, "invalid GC-record length", data.iov_len); return MDBX_CORRUPTED; } - if (unlikely(!pnl_check(pnl, read_txn->geo.first_unallocated))) { + if (unlikely(!pnl_check(pnl, txn->geo.first_unallocated))) { ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-record content"); return MDBX_CORRUPTED; @@ -421,9 +430,8 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, if (unlikely(rc != MDBX_NOTFOUND)) return rc; - meta->geometry.first_unallocated = - read_txn->geo.first_unallocated - gc_npages; - meta->trees.main = read_txn->dbs[MAIN_DBI]; + meta->geometry.first_unallocated = txn->geo.first_unallocated - gc_npages; + meta->trees.main = txn->dbs[MAIN_DBI]; ctx_t ctx; memset(&ctx, 0, sizeof(ctx)); @@ -437,16 +445,21 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, ctx.first_unallocated = NUM_METAS; ctx.env = env; ctx.fd = fd; - ctx.txn = read_txn; + ctx.txn = txn; + ctx.flags = flags; osal_thread_t thread; int thread_err = osal_thread_create(&thread, compacting_write_thread, &ctx); if (likely(thread_err == MDBX_SUCCESS)) { if (dest_is_pipe) { if (!meta->trees.main.mod_txnid) - meta->trees.main.mod_txnid = read_txn->txnid; + meta->trees.main.mod_txnid = txn->txnid; compacting_fixup_meta(env, meta); + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); rc = osal_write(fd, buffer, meta_bytes); + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_THROTTLE_MVCC) != 0) + rc = mdbx_txn_unpark(txn, false); } if (likely(rc == MDBX_SUCCESS)) rc = compacting_walk_tree(&ctx, &meta->trees.main); @@ -495,6 +508,9 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, compacting_fixup_meta(env, meta); } + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); + /* Extend file if required */ if (meta->geometry.now != meta->geometry.first_unallocated) { const size_t whole_size = pgno2bytes(env, meta->geometry.now); @@ -516,46 +532,78 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, return MDBX_SUCCESS; } -__cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, - mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe, +//---------------------------------------------------------------------------- + +__cold static int copy_asis(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, + uint8_t *buffer, const bool dest_is_pipe, const MDBX_copy_flags_t flags) { - int rc = txn_end(read_txn, TXN_END_RESET_TMP); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - /* Temporarily block writers until we snapshot the meta pages */ - rc = lck_txn_lock(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = txn_renew(read_txn, MDBX_TXN_RDONLY); - if (unlikely(rc != MDBX_SUCCESS)) { - lck_txn_unlock(env); - return rc; + bool should_unlock = false; + if ((txn->flags & MDBX_TXN_RDONLY) != 0 && (flags & MDBX_CP_RENEW_TXN) != 0) { + /* Try temporarily block writers until we snapshot the meta pages */ + int err = lck_txn_lock(env, true); + if (likely(err == MDBX_SUCCESS)) + should_unlock = true; + else if (unlikely(err != MDBX_BUSY)) + return err; } jitter4testing(false); + int rc = MDBX_SUCCESS; const size_t meta_bytes = pgno2bytes(env, NUM_METAS); - const troika_t troika = meta_tap(env); + troika_t troika = meta_tap(env); /* Make a snapshot of meta-pages, * but writing ones after the data was flushed */ +retry_snap_meta: memcpy(buffer, env->dxb_mmap.base, meta_bytes); - meta_t *const headcopy = /* LY: get pointer to the snapshot copy */ - ptr_disp(buffer, - ptr_dist(meta_recent(env, &troika).ptr_c, env->dxb_mmap.base)); - lck_txn_unlock(env); + const meta_ptr_t recent = meta_recent(env, &troika); + meta_t *headcopy = /* LY: get pointer to the snapshot copy */ + ptr_disp(buffer, ptr_dist(recent.ptr_c, env->dxb_mmap.base)); + jitter4testing(false); + if (txn->flags & MDBX_TXN_RDONLY) { + if (recent.txnid != txn->txnid) { + if (flags & MDBX_CP_RENEW_TXN) + rc = mdbx_txn_renew(txn); + else { + rc = MDBX_MVCC_RETARDED; + for (size_t n = 0; n < NUM_METAS; ++n) { + meta_t *const meta = page_meta(ptr_disp(buffer, pgno2bytes(env, n))); + if (troika.txnid[n] == txn->txnid && + ((/* is_steady */ (troika.fsm >> n) & 1) || rc != MDBX_SUCCESS)) { + rc = MDBX_SUCCESS; + headcopy = meta; + } else if (troika.txnid[n] > txn->txnid) + meta_set_txnid(env, meta, 0); + } + } + } + if (should_unlock) + lck_txn_unlock(env); + else { + troika_t snap = meta_tap(env); + if (memcmp(&troika, &snap, sizeof(troika_t)) && rc == MDBX_SUCCESS) { + troika = snap; + goto retry_snap_meta; + } + } + } + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (txn->flags & MDBX_TXN_RDONLY) + eASSERT(env, meta_txnid(headcopy) == txn->txnid); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(headcopy); /* Update signature to steady */ meta_sign_as_steady(headcopy); /* Copy the data */ - const size_t whole_size = pgno_align2os_bytes(env, read_txn->geo.end_pgno); - const size_t used_size = pgno2bytes(env, read_txn->geo.first_unallocated); + const size_t whole_size = pgno_align2os_bytes(env, txn->geo.end_pgno); + const size_t used_size = pgno2bytes(env, txn->geo.first_unallocated); jitter4testing(false); + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); + if (dest_is_pipe) rc = osal_write(fd, buffer, meta_bytes); @@ -570,7 +618,14 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, /* avoid use copyfilerange_unavailable() to ecryptfs due bugs */ not_the_same_filesystem = true; #endif /* MDBX_USE_COPYFILERANGE */ + for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) { + if (flags & MDBX_CP_THROTTLE_MVCC) { + rc = mdbx_txn_unpark(txn, false); + if (unlikely(rc != MDBX_SUCCESS)) + break; + } + #if MDBX_USE_SENDFILE static bool sendfile_unavailable; if (dest_is_pipe && likely(!sendfile_unavailable)) { @@ -579,6 +634,8 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, sendfile(fd, env->lazy_fd, &in_offset, used_size - offset); if (likely(written > 0)) { offset = in_offset; + if (flags & MDBX_CP_THROTTLE_MVCC) + rc = mdbx_txn_park(txn, false); continue; } rc = MDBX_ENODATA; @@ -596,6 +653,8 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, env->lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0); if (likely(bytes_copied > 0)) { offset = in_offset; + if (flags & MDBX_CP_THROTTLE_MVCC) + rc = mdbx_txn_park(txn, false); continue; } rc = MDBX_ENODATA; @@ -619,6 +678,8 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, : used_size - offset; /* copy to avoid EFAULT in case swapped-out */ memcpy(data_buffer, ptr_disp(env->dxb_mmap.base, offset), chunk); + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); rc = osal_write(fd, data_buffer, chunk); offset += chunk; } @@ -644,11 +705,22 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, return rc; } -__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; +//---------------------------------------------------------------------------- + +__cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + if (unlikely(txn->flags & MDBX_TXN_DIRTY)) + return MDBX_BAD_TXN; + + int rc = MDBX_SUCCESS; + if (txn->flags & MDBX_TXN_RDONLY) { + if (flags & MDBX_CP_THROTTLE_MVCC) { + rc = mdbx_txn_park(txn, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + } else if (unlikely(flags & (MDBX_CP_THROTTLE_MVCC | MDBX_CP_RENEW_TXN))) + return MDBX_EINVAL; const int dest_is_pipe = osal_is_pipe(fd); if (MDBX_IS_ERROR(dest_is_pipe)) @@ -660,6 +732,7 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return rc; } + MDBX_env *const env = txn->env; const size_t buffer_size = pgno_align2os_bytes(env, NUM_METAS) + ceil_powerof2(((flags & MDBX_CP_COMPACT) @@ -672,15 +745,6 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, if (unlikely(rc != MDBX_SUCCESS)) return rc; - MDBX_txn *read_txn = nullptr; - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. */ - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &read_txn); - if (unlikely(rc != MDBX_SUCCESS)) { - osal_memalign_free(buffer); - return rc; - } - if (!dest_is_pipe) { /* Firstly write a stub to meta-pages. * Now we sure to incomplete copy will not be used. */ @@ -688,22 +752,31 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, rc = osal_write(fd, buffer, pgno2bytes(env, NUM_METAS)); } + if (likely(rc == MDBX_SUCCESS)) + rc = mdbx_txn_unpark(txn, false); if (likely(rc == MDBX_SUCCESS)) { memset(buffer, 0, pgno2bytes(env, NUM_METAS)); rc = ((flags & MDBX_CP_COMPACT) ? copy_with_compacting : copy_asis)( - env, read_txn, fd, buffer, dest_is_pipe, flags); + env, txn, fd, buffer, dest_is_pipe, flags); + + if (likely(rc == MDBX_SUCCESS)) + rc = mdbx_txn_unpark(txn, false); } - mdbx_txn_abort(read_txn); + + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, true); + else if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_reset(txn); if (!dest_is_pipe) { - if (likely(rc == MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0) rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); /* Write actual meta */ if (likely(rc == MDBX_SUCCESS)) rc = osal_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); - if (likely(rc == MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0) rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); } @@ -711,38 +784,20 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return rc; } -__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, - MDBX_copy_flags_t flags) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *dest_pathW = nullptr; - int rc = osal_mb2w(dest_path, &dest_pathW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_copyW(env, dest_pathW, flags); - osal_free(dest_pathW); - } - return rc; -} - -__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, - MDBX_copy_flags_t flags) { -#endif /* Windows */ - - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!dest_path)) +__cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, + MDBX_copy_flags_t flags) { + if (unlikely(!dest_path || *dest_path == '\0')) return MDBX_EINVAL; /* The destination path must exist, but the destination file must not. * We don't want the OS to cache the writes, since the source data is * already in the OS cache. */ - mdbx_filehandle_t newfd; - rc = osal_openfile(MDBX_OPEN_COPY, env, dest_path, &newfd, + mdbx_filehandle_t newfd = INVALID_HANDLE_VALUE; + int rc = osal_openfile(MDBX_OPEN_COPY, txn->env, dest_path, &newfd, #if defined(_WIN32) || defined(_WIN64) - (mdbx_mode_t)-1 + (mdbx_mode_t)-1 #else - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP #endif ); @@ -767,7 +822,7 @@ __cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, #endif /* Windows / POSIX */ if (rc == MDBX_SUCCESS) - rc = mdbx_env_copy2fd(env, newfd, flags); + rc = copy2fd(txn, newfd, flags); if (newfd != INVALID_HANDLE_VALUE) { int err = osal_closefile(newfd); @@ -776,6 +831,96 @@ __cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, if (rc != MDBX_SUCCESS) (void)osal_removefile(dest_path); } - + return rc; +} + +//---------------------------------------------------------------------------- + +__cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2fd(txn, fd, flags); + if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_abort(txn); + return rc; +} + +__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) + return MDBX_EINVAL; + + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + MDBX_txn *txn = nullptr; + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2fd(txn, fd, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); + mdbx_txn_abort(txn); + return rc; +} + +__cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, + MDBX_copy_flags_t flags) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *dest_pathW = nullptr; + int rc = osal_mb2w(dest_path, &dest_pathW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_txn_copy2pathnameW(txn, dest_pathW, flags); + osal_free(dest_pathW); + } + return rc; +} + +__cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, + MDBX_copy_flags_t flags) { +#endif /* Windows */ + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2pathname(txn, dest_path, flags); + if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_abort(txn); + return rc; +} + +__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, + MDBX_copy_flags_t flags) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *dest_pathW = nullptr; + int rc = osal_mb2w(dest_path, &dest_pathW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_copyW(env, dest_pathW, flags); + osal_free(dest_pathW); + } + return rc; +} + +__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, + MDBX_copy_flags_t flags) { +#endif /* Windows */ + if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) + return MDBX_EINVAL; + + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + MDBX_txn *txn = nullptr; + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2pathname(txn, dest_path, + flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); + mdbx_txn_abort(txn); return rc; } From 4c0290b5764985f426e696876b2d997da5d0c97f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 12:13:25 +0300 Subject: [PATCH 236/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Ftxn=5Fcopy?= =?UTF-8?q?2pathname()`=20=D0=B2=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2?= =?UTF-8?q?=D1=8B=D0=B9=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B9?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 8 ++++---- test/copy.c++ | 34 ++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/mdbx.h b/mdbx.h index 0d04c73f..aa2989c5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1719,16 +1719,16 @@ typedef enum MDBX_copy_flags { MDBX_CP_DONT_FLUSH = 4u, /** Use read transaction parking during copying MVCC-snapshot - * \see mdbx_txn_park() */ + * \see mdbx_txn_park() */ MDBX_CP_THROTTLE_MVCC = 8u, /** Abort/dispose passed transaction after copy - * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ + * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ MDBX_CP_DISPOSE_TXN = 16u, /** Enable renew/restart read transaction in case it use outdated - * MVCC shapshot, otherwise the \ref MDBX_MVCC_RETARDED will be returned - * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ + * MVCC shapshot, otherwise the \ref MDBX_MVCC_RETARDED will be returned + * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ MDBX_CP_RENEW_TXN = 32u } MDBX_copy_flags_t; diff --git a/test/copy.c++ b/test/copy.c++ index 7ab96c24..9ca4fe9c 100644 --- a/test/copy.c++ +++ b/test/copy.c++ @@ -20,12 +20,34 @@ void testcase_copy::copy_db(const bool with_compaction) { if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE) failure_perror("osal_removefile()", err); - err = mdbx_env_copy(db_guard.get(), copy_pathname.c_str(), - with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS); - if (unlikely(err != MDBX_SUCCESS)) - failure_perror(with_compaction ? "mdbx_env_copy(MDBX_CP_COMPACT)" - : "mdbx_env_copy(MDBX_CP_ASIS)", - err); + if (flipcoin()) { + err = mdbx_env_copy(db_guard.get(), copy_pathname.c_str(), + with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror(with_compaction ? "mdbx_env_copy(MDBX_CP_COMPACT)" + : "mdbx_env_copy(MDBX_CP_ASIS)", + err); + } else { + const bool ro = mode_readonly() || flipcoin(); + const bool throttle = ro && flipcoin(); + const bool dynsize = flipcoin(); + const bool flush = flipcoin(); + const bool enable_renew = flipcoin(); + const MDBX_copy_flags_t flags = + (with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | + (dynsize ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS) | + (throttle ? MDBX_CP_THROTTLE_MVCC : MDBX_CP_DEFAULTS) | + (flush ? MDBX_CP_DEFAULTS : MDBX_CP_DONT_FLUSH) | + (enable_renew ? MDBX_CP_RENEW_TXN : MDBX_CP_DEFAULTS); + txn_begin(ro); + err = mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); + if (unlikely(err != MDBX_SUCCESS && (!throttle || err != MDBX_OUSTED) && + (!enable_renew && err != MDBX_MVCC_RETARDED))) + failure_perror(with_compaction ? "mdbx_txn_copy2pathname(MDBX_CP_COMPACT)" + : "mdbx_txn_copy2pathname(MDBX_CP_ASIS)", + err); + txn_end(err != MDBX_SUCCESS || flipcoin()); + } } bool testcase_copy::run() { From b6b126195b8443efcef1b0633eaabe77aeb40b55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 13:11:05 +0300 Subject: [PATCH 237/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D0=BF=D1=83=D1=89?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=BE=D0=B3=D0=BE=20`TXN=5FEND=5FEOTDONE`=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D1=81=D0=B1=D0=BE=D0=B5=20=D1=81=D1=82?= =?UTF-8?q?=D0=B0=D1=80=D1=82=D0=B0=20=D1=87=D0=B8=D1=82=D0=B0=D1=8E=D1=89?= =?UTF-8?q?=D0=B5=D0=B9=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Упомянутый флажок отсутствовал в пути разрушения транзакции при ошибке её запуска. Из-за чего делалась попытка разрушить курсоры, что приводило к падению отладочных сборок, так как в них соответствующий массив намеренно заполнен некорректными указателями. --- src/txn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/txn.c b/src/txn.c index 0742cb1c..0e32d893 100644 --- a/src/txn.c +++ b/src/txn.c @@ -1331,7 +1331,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } bailout: tASSERT(txn, rc != MDBX_SUCCESS); - txn_end(txn, TXN_END_SLOT | TXN_END_FAIL_BEGIN); + txn_end(txn, TXN_END_SLOT | TXN_END_EOTDONE | TXN_END_FAIL_BEGIN); return rc; } From d21ae28bb9a13cb18a13bc7c27d629f13a0cea17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 19:03:34 +0300 Subject: [PATCH 238/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=B1=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B8=20`MDBX=5FMVCC=5FRETARDED`=20?= =?UTF-8?q?=D0=B2=20C++=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 1 + src/mdbx.c++ | 2 ++ src/misc.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 7ef55c18..0d2e8d09 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -583,6 +583,7 @@ MDBX_DECLARE_EXCEPTION(transaction_overlapping); MDBX_DECLARE_EXCEPTION(duplicated_lck_file); MDBX_DECLARE_EXCEPTION(dangling_map_id); MDBX_DECLARE_EXCEPTION(transaction_ousted); +MDBX_DECLARE_EXCEPTION(mvcc_retarded); #undef MDBX_DECLARE_EXCEPTION [[noreturn]] LIBMDBX_API void throw_too_small_target_buffer(); diff --git a/src/mdbx.c++ b/src/mdbx.c++ index adef7c34..d6ca4c11 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -340,6 +340,7 @@ DEFINE_EXCEPTION(transaction_overlapping) DEFINE_EXCEPTION(duplicated_lck_file) DEFINE_EXCEPTION(dangling_map_id) DEFINE_EXCEPTION(transaction_ousted) +DEFINE_EXCEPTION(mvcc_retarded) #undef DEFINE_EXCEPTION __cold const char *error::what() const noexcept { @@ -429,6 +430,7 @@ __cold void error::throw_exception() const { CASE_EXCEPTION(duplicated_lck_file, MDBX_DUPLICATED_CLK); CASE_EXCEPTION(dangling_map_id, MDBX_DANGLING_DBI); CASE_EXCEPTION(transaction_ousted, MDBX_OUSTED); + CASE_EXCEPTION(mvcc_retarded, MDBX_MVCC_RETARDED); #undef CASE_EXCEPTION default: if (is_mdbx_error()) diff --git a/src/misc.c b/src/misc.c index 415eee1c..bf8246ab 100644 --- a/src/misc.c +++ b/src/misc.c @@ -210,6 +210,10 @@ __cold const char *mdbx_liberr2str(int errnum) { case MDBX_OUSTED: return "MDBX_OUSTED: The parked read transaction was outed for the sake" " of recycling old MVCC snapshots"; + case MDBX_MVCC_RETARDED: + return "MDBX_MVCC_RETARDED: MVCC snapshot used by read transaction" + " is outdated and could not be copied" + " since corresponding meta-pages was overwritten"; default: return nullptr; } From a0e278ff00f5ef288a7b539a896016e6df493590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 19:03:55 +0300 Subject: [PATCH 239/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=B1=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B8=20`MDBX=5FEDEADLK`=20=D0=B2=20?= =?UTF-8?q?=D0=A1++=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx.c++ | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index d6ca4c11..f33e6617 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -362,6 +362,7 @@ __cold const char *error::what() const noexcept { ERROR_CASE(MDBX_EINTR); ERROR_CASE(MDBX_ENOFILE); ERROR_CASE(MDBX_EREMOTE); + ERROR_CASE(MDBX_EDEADLK); #undef ERROR_CASE default: return "SYSTEM"; From 9eef3c3541d83789456c34b6360c3065a4fa5a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 19:23:15 +0300 Subject: [PATCH 240/443] =?UTF-8?q?mdbx-tools:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20thread=5Fid=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=BF=D1=80=D0=B8=D0=BF=D0=B0=D1=80=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0=D0=BD=D0=BD=D1=8B=D1=85=20=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/stat.c b/src/tools/stat.c index 76703a15..b0a47601 100644 --- a/src/tools/stat.c +++ b/src/tools/stat.c @@ -76,8 +76,8 @@ static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, (int)sizeof(size_t) * 2, (uintptr_t)thread); else printf(" %3d)\t[%d]\t%6" PRIdSIZE " %sed", num, slot, (size_t)pid, - ((uintptr_t)thread == (uintptr_t)MDBX_TID_TXN_PARKED) ? "park" - : "oust"); + (thread == (mdbx_tid_t)((uintptr_t)MDBX_TID_TXN_PARKED)) ? "park" + : "oust"); if (txnid) printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag, From 69f85af24260213ac509936d288106c9b488db16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 22:49:23 +0300 Subject: [PATCH 241/443] =?UTF-8?q?mdbx-tools:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8?= =?UTF-8?q?=D0=B9=20`-d`=20=D0=B8=20`-p`=20=D0=B4=D0=BB=D1=8F=20`mdbx=5Fco?= =?UTF-8?q?py`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/man1/mdbx_copy.1 | 20 ++++++++++++++++++++ src/tools/copy.c | 9 ++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/man1/mdbx_copy.1 b/src/man1/mdbx_copy.1 index 14663b8e..636bd754 100644 --- a/src/man1/mdbx_copy.1 +++ b/src/man1/mdbx_copy.1 @@ -14,6 +14,10 @@ mdbx_copy \- MDBX environment copy tool [\c .BR \-c ] [\c +.BR \-d ] +[\c +.BR \-p ] +[\c .BR \-n ] .B src_path [\c @@ -45,6 +49,22 @@ or unused pages will be omitted from the copy. This option will slow down the backup process as it is more CPU-intensive. Currently it fails if the environment has suffered a page leak. .TP +.BR \-d +Alters geometry to enforce the copy to be a dynamic size DB, +which could be growth and shrink by reasonable steps on the fly. +.TP +.BR \-p +Use read transaction parking/ousting during copying MVCC-snapshot. +This allows the writing transaction to oust the read +transaction used to copy the database if copying takes so long +that it will interfere with the recycling old MVCC snapshots +and may lead to an overflow of the database. +However, if the reading transaction is ousted the copy will +be aborted until successful completion. Thus, this option +allows copy the database without interfering with write +transactions and a threat of database overflow, but at the cost +that copying will be aborted to prevent such conditions. +.TP .BR \-u Warms up the DB before copying via notifying OS kernel of subsequent access to the database pages. .TP diff --git a/src/tools/copy.c b/src/tools/copy.c index 8a962e73..aa6157f3 100644 --- a/src/tools/copy.c +++ b/src/tools/copy.c @@ -39,10 +39,13 @@ static void signal_handler(int sig) { static void usage(const char *prog) { fprintf( stderr, - "usage: %s [-V] [-q] [-c] [-u|U] src_path [dest_path]\n" + "usage: %s [-V] [-q] [-c] [-d] [-p] [-u|U] src_path [dest_path]\n" " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" " -c\t\tenable compactification (skip unused pages)\n" + " -d\t\tenforce copy to be a dynamic size DB\n" + " -p\t\tusing transaction parking/ousting during copying MVCC-snapshot\n" + " \t\tto avoid stopping recycling and overflowing the DB\n" " -u\t\twarmup database before copying\n" " -U\t\twarmup and try lock database pages in memory before copying\n" " src_path\tsource database\n" @@ -66,6 +69,10 @@ int main(int argc, char *argv[]) { flags |= MDBX_NOSUBDIR; else if (argv[1][1] == 'c' && argv[1][2] == '\0') cpflags |= MDBX_CP_COMPACT; + else if (argv[1][1] == 'd' && argv[1][2] == '\0') + cpflags |= MDBX_CP_FORCE_DYNAMIC_SIZE; + else if (argv[1][1] == 'p' && argv[1][2] == '\0') + cpflags |= MDBX_CP_THROTTLE_MVCC; else if (argv[1][1] == 'q' && argv[1][2] == '\0') quiet = true; else if (argv[1][1] == 'u' && argv[1][2] == '\0') From ee8c9225d6a27f76e64f693e73ad33055759dd4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 23:24:17 +0300 Subject: [PATCH 242/443] =?UTF-8?q?mdbx-testing:=20=D0=BF=D0=BE=D0=B2?= =?UTF-8?q?=D1=82=D0=BE=D1=80=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8?= =?UTF-8?q?=D1=8F=20=D1=81=20`mdbx=5Ftxn=5Fcopy2pathname()`=20=D0=B4=D0=BE?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=BB=D1=83=D1=87=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D1=83=D1=81=D0=BF=D0=B5=D1=88=D0=BD=D0=BE=D0=B9=20=D0=BA=D0=BE?= =?UTF-8?q?=D0=BF=D0=B8=D0=B8=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D1=80=D0=B5?= =?UTF-8?q?=D0=B4=D0=BE=D1=82=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D1=81=D0=B1=D0=BE=D0=B5=D0=B2=20`test/CMakeLists.txt`?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/copy.c++ | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/test/copy.c++ b/test/copy.c++ index 9ca4fe9c..188fd1b1 100644 --- a/test/copy.c++ +++ b/test/copy.c++ @@ -28,25 +28,29 @@ void testcase_copy::copy_db(const bool with_compaction) { : "mdbx_env_copy(MDBX_CP_ASIS)", err); } else { - const bool ro = mode_readonly() || flipcoin(); - const bool throttle = ro && flipcoin(); - const bool dynsize = flipcoin(); - const bool flush = flipcoin(); - const bool enable_renew = flipcoin(); - const MDBX_copy_flags_t flags = - (with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | - (dynsize ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS) | - (throttle ? MDBX_CP_THROTTLE_MVCC : MDBX_CP_DEFAULTS) | - (flush ? MDBX_CP_DEFAULTS : MDBX_CP_DONT_FLUSH) | - (enable_renew ? MDBX_CP_RENEW_TXN : MDBX_CP_DEFAULTS); - txn_begin(ro); - err = mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); - if (unlikely(err != MDBX_SUCCESS && (!throttle || err != MDBX_OUSTED) && - (!enable_renew && err != MDBX_MVCC_RETARDED))) - failure_perror(with_compaction ? "mdbx_txn_copy2pathname(MDBX_CP_COMPACT)" - : "mdbx_txn_copy2pathname(MDBX_CP_ASIS)", - err); - txn_end(err != MDBX_SUCCESS || flipcoin()); + do { + const bool ro = mode_readonly() || flipcoin(); + const bool throttle = ro && flipcoin(); + const bool dynsize = flipcoin(); + const bool flush = flipcoin(); + const bool enable_renew = flipcoin(); + const MDBX_copy_flags_t flags = + (with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | + (dynsize ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS) | + (throttle ? MDBX_CP_THROTTLE_MVCC : MDBX_CP_DEFAULTS) | + (flush ? MDBX_CP_DEFAULTS : MDBX_CP_DONT_FLUSH) | + (enable_renew ? MDBX_CP_RENEW_TXN : MDBX_CP_DEFAULTS); + txn_begin(ro); + err = + mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); + if (unlikely(err != MDBX_SUCCESS && (!throttle || err != MDBX_OUSTED) && + (!enable_renew && err != MDBX_MVCC_RETARDED))) + failure_perror(with_compaction + ? "mdbx_txn_copy2pathname(MDBX_CP_COMPACT)" + : "mdbx_txn_copy2pathname(MDBX_CP_ASIS)", + err); + txn_end(err != MDBX_SUCCESS || flipcoin()); + } while (err != MDBX_SUCCESS); } } From dd0ee3f278552424190f2f0bf1e63779a10dece6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 3 Aug 2024 00:30:06 +0300 Subject: [PATCH 243/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index cac91166..3142f04c 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -103,6 +103,10 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавление в API функций `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()`. + - Добавление в API функций `mdbx_txn_copy2pathname()` и `mdbx_txn_copy2fd()`. + + - Добавление в утилиту `mdbx_copy` опций `-d` и `-p`. + - Расширение и доработка C++ API: - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов @@ -120,7 +124,10 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - добавлен метод `cursor::clone()`; - поддержка base58 переработана и приведена в соответствии с черновиком RFC, в текущем понимании теперь это одна из самых высокопроизводительных реализаций base58; - - переработка `to_hex()` и `from_hex()`. + - переработка `to_hex()` и `from_hex()`; + - добавлены перегрузи со `std::string_view` для методов `open_map`/`create_map`/`drop_map`/`clear_map`/`rename_map()`; + - добавлены перегрузки методов put/insert/upsert для `mdbx::pair`; + - добавлены методы принимающие имена таблиц/subDb через `mdbx::slice`. Нарушение совместимости: From dd5329c164da4fe34d7b49eeda21fe7b2fbb78f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 3 Aug 2024 12:48:23 +0300 Subject: [PATCH 244/443] =?UTF-8?q?mdbx-doc:=20=D0=BA=D0=BE=D1=80=D1=80?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0/=D0=B8?= =?UTF-8?q?=D1=81=D0=BF=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D1=80=D0=B0=D0=B7=D0=BC=D0=B5=D1=82=D0=BA=D0=B8=20Doxygen.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/mdbx.h b/mdbx.h index aa2989c5..660f0d54 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2587,6 +2587,7 @@ LIBMDBX_API int mdbx_env_delete(const char *pathname, #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_delete() + * \ingroup c_extra * \note Available only on Windows. * \see mdbx_env_delete() */ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, @@ -2671,12 +2672,14 @@ LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_copy() + * \ingroup c_extra * \note Available only on Windows. * \see mdbx_env_copy() */ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, MDBX_copy_flags_t flags); /** \copydoc mdbx_txn_copy2pathname() + * \ingroup c_extra * \note Available only on Windows. * \see mdbx_txn_copy2pathname() */ LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, @@ -3342,6 +3345,7 @@ LIBMDBX_API int mdbx_env_get_path(const MDBX_env *env, const char **dest); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_get_path() + * \ingroup c_statinfo * \note Available only on Windows. * \see mdbx_env_get_path() */ LIBMDBX_API int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **dest); @@ -4609,16 +4613,18 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * by current thread. */ LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi); -/** \copydoc mdbx_dbi_open() */ +/** \copydoc mdbx_dbi_open() + * \ingroup c_dbi */ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi); -/** \deprecated Please - * \ref avoid_custom_comparators "avoid using custom comparators" and use - * \ref mdbx_dbi_open() instead. - * +/** \brief Open or Create a named table in the environment + * with using custom comparison functions. * \ingroup c_dbi * + * \deprecated Please \ref avoid_custom_comparators + * "avoid using custom comparators" and use \ref mdbx_dbi_open() instead. + * * \param [in] txn transaction handle returned by \ref mdbx_txn_begin(). * \param [in] name The name of the database to open. If only a single * database is needed in the environment, @@ -4631,7 +4637,8 @@ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); -/** \copydoc mdbx_dbi_open_ex() */ +/** \copydoc mdbx_dbi_open_ex() + * \ingroup c_dbi */ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); @@ -4651,7 +4658,8 @@ mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, * * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); -/** \copydoc mdbx_dbi_rename() */ +/** \copydoc mdbx_dbi_rename() + * \ingroup c_dbi */ LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *name); @@ -6355,6 +6363,7 @@ LIBMDBX_API int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_open_for_recovery() + * \ingroup c_extra * \note Available only on Windows. * \see mdbx_env_open_for_recovery() */ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, @@ -6405,6 +6414,7 @@ LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *info, size_t bytes); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_preopen_snapinfo() + * \ingroup c_opening * \note Available only on Windows. * \see mdbx_preopen_snapinfo() */ LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, From 57e558a57d2521d16eb1d0248b0104e3f3758c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 3 Aug 2024 13:25:44 +0300 Subject: [PATCH 245/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5?= =?UTF-8?q?=D1=80=D0=BC=D0=B8=D0=BD=D0=B0=20"=D1=82=D0=B0=D0=B1=D0=BB?= =?UTF-8?q?=D0=B8=D1=86=D0=B0"=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE=20"s?= =?UTF-8?q?ub-database".?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 4 +- ChangeLog.md | 3 +- README.md | 8 +- TODO.md | 4 +- mdbx.h | 428 +++++++++++++++++++-------------------- mdbx.h++ | 30 +-- src/alloy.c | 2 +- src/api-cursor.c | 2 +- src/audit.c | 2 +- src/chk.c | 170 ++++++++-------- src/cogs.h | 6 +- src/cold.c | 4 +- src/cursor.c | 6 +- src/dbi.c | 20 +- src/internals.h | 4 +- src/layout-dxb.h | 6 +- src/man1/mdbx_chk.1 | 6 +- src/man1/mdbx_drop.1 | 6 +- src/man1/mdbx_dump.1 | 8 +- src/man1/mdbx_load.1 | 8 +- src/man1/mdbx_stat.1 | 8 +- src/misc.c | 8 +- src/proto.h | 4 +- src/{subdb.c => table.c} | 8 +- src/tools/chk.c | 30 +-- src/tools/drop.c | 2 +- src/tools/dump.c | 8 +- src/tools/load.c | 4 +- src/tools/stat.c | 38 ++-- src/tree.c | 10 +- src/txn.c | 2 +- src/walk.c | 8 +- src/walk.h | 2 +- 33 files changed, 430 insertions(+), 429 deletions(-) rename src/{subdb.c => table.c} (93%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 00334955..1a378bfc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,7 +138,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/sort.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/subdb.c" AND + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/table.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/chk.c" AND @@ -749,7 +749,7 @@ else() "${MDBX_SOURCE_DIR}/sort.h" "${MDBX_SOURCE_DIR}/spill.c" "${MDBX_SOURCE_DIR}/spill.h" - "${MDBX_SOURCE_DIR}/subdb.c" + "${MDBX_SOURCE_DIR}/table.c" "${MDBX_SOURCE_DIR}/tls.c" "${MDBX_SOURCE_DIR}/tls.h" "${MDBX_SOURCE_DIR}/tree.c" diff --git a/ChangeLog.md b/ChangeLog.md index 3142f04c..ddf1f60b 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -75,7 +75,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Функция `mdbx_preopen_snapinfo()` для получения информации о БД без её открытия. - - Функция `mdbx_enumerate_subdb()` для получение информации + - Функция `mdbx_enumerate_tables()` для получение информации об именованных пользовательских таблицах. - Поддержка функций логирования обратного вызова без функционала @@ -131,6 +131,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Нарушение совместимости: + - Использование термина "таблица" вместо "subDb". - Опция `MDBX_COALESCE` объявлена устаревшей, так как соответствующий функционал всегда включен начиная с предыдущей версии 0.12. - Опция `MDBX_NOTLS` объявлена устаревшей и заменена на `MDBX_NOSTICKYTHREADS`. - Опция сборки `MDBX_USE_VALGRIND` заменена на общепринятую `ENABLE_MEMCHECK`. diff --git a/README.md b/README.md index 2fc4f514..d82599b6 100644 --- a/README.md +++ b/README.md @@ -160,7 +160,7 @@ $ cc --version [MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control) and [CoW](https://en.wikipedia.org/wiki/Copy-on-write). -- Multiple key-value sub-databases within a single datafile. +- Multiple key-value tables/sub-databases within a single datafile. - Range lookups, including range query estimation. @@ -204,7 +204,7 @@ transaction journal. No crash recovery needed. No maintenance is required. - **Value size**: minimum `0`, maximum `2146435072` (`0x7FF00000`) bytes for maps, ≈½ pagesize for multimaps (`2022` bytes for default 4K pagesize, `32742` bytes for 64K pagesize). - **Write transaction size**: up to `1327217884` pages (`4.944272` TiB for default 4K pagesize, `79.108351` TiB for 64K pagesize). - **Database size**: up to `2147483648` pages (≈`8.0` TiB for default 4K pagesize, ≈`128.0` TiB for 64K pagesize). -- **Maximum sub-databases**: `32765`. +- **Maximum tables/sub-databases**: `32765`. ## Gotchas @@ -298,7 +298,7 @@ and/or optimize query execution plans. 11. Ability to determine whether the particular data is on a dirty page or not, that allows to avoid copy-out before updates. -12. Extended information of whole-database, sub-databases, transactions, readers enumeration. +12. Extended information of whole-database, tables/sub-databases, transactions, readers enumeration. > _libmdbx_ provides a lot of information, including dirty and leftover pages > for a write transaction, reading lag and holdover space for read transactions. @@ -321,7 +321,7 @@ pair, to the first, to the last, or not set to anything. ## Other fixes and specifics 1. Fixed more than 10 significant errors, in particular: page leaks, -wrong sub-database statistics, segfault in several conditions, +wrong table/sub-database statistics, segfault in several conditions, nonoptimal page merge strategy, updating an existing record with a change in data size (including for multimap), etc. diff --git a/TODO.md b/TODO.md index bf40f6f4..13deca20 100644 --- a/TODO.md +++ b/TODO.md @@ -14,12 +14,12 @@ So currently most of the links are broken due to noted malicious ~~Github~~ sabo - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). - [Support for RAW devices](https://libmdbx.dqdkfa.ru/dead-github/issues/124). - [Support MessagePack for Keys & Values](https://libmdbx.dqdkfa.ru/dead-github/issues/115). - - [Engage new terminology](https://libmdbx.dqdkfa.ru/dead-github/issues/137). - Packages for [Astra Linux](https://astralinux.ru/), [ALT Linux](https://www.altlinux.org/), [ROSA Linux](https://www.rosalinux.ru/), etc. Done ---- + - [Engage new terminology](https://libmdbx.dqdkfa.ru/dead-github/issues/137). - [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200). - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223). @@ -37,6 +37,6 @@ Canceled ОС. Для этого необходимо снять отображение, изменить размер файла и затем отобразить обратно. В свою очередь, для это необходимо приостановить работающие с БД потоки выполняющие транзакции чтения, либо - готовые к такому выполнению. Но режиме MDBX_NOSTICKYTHREADS нет + готовые к такому выполнению. Но в режиме MDBX_NOSTICKYTHREADS нет возможности отслеживать работающие с БД потоки, а приостановка всех потоков неприемлема для большинства приложений. diff --git a/mdbx.h b/mdbx.h index 660f0d54..90c50771 100644 --- a/mdbx.h +++ b/mdbx.h @@ -68,7 +68,7 @@ credits and acknowledgments. \defgroup c_err Error handling \defgroup c_opening Opening & Closing \defgroup c_transactions Transactions - \defgroup c_dbi Databases + \defgroup c_dbi Tables \defgroup c_crud Create/Read/Update/Delete (see Quick Reference in details) \details @@ -79,9 +79,9 @@ Historically, libmdbx inherits the API basis from LMDB, where it is often difficult to select flags/options and functions for the desired operation. So it is recommend using this hints. -## Databases with UNIQUE keys +## Tables with UNIQUE keys -In databases created without the \ref MDBX_DUPSORT option, keys are always +In tables created without the \ref MDBX_DUPSORT option, keys are always unique. Thus always a single value corresponds to the each key, and so there are only a few cases of changing data. @@ -104,10 +104,10 @@ are only a few cases of changing data. |Extract (read & delete) value by the key |\ref mdbx_replace() with zero flag and parameter `new_data = NULL`|Returning a deleted value| -## Databases with NON-UNIQUE keys +## Tables with NON-UNIQUE keys -In databases created with the \ref MDBX_DUPSORT (Sorted Duplicates) option, keys -may be non unique. Such non-unique keys in a key-value database may be treated +In tables created with the \ref MDBX_DUPSORT (Sorted Duplicates) option, keys +may be non unique. Such non-unique keys in a key-value table may be treated as a duplicates or as like a multiple values corresponds to keys. @@ -713,8 +713,8 @@ void LIBMDBX_API NTAPI mdbx_module_handler(PVOID module, DWORD reason, /* OPACITY STRUCTURES *********************************************************/ /** \brief Opaque structure for a database environment. - * \details An environment supports multiple key-value sub-databases (aka - * key-value spaces or tables), all residing in the same shared-memory map. + * \details An environment supports multiple key-value tables (aka key-value + * maps, spaces or sub-databases), all residing in the same shared-memory map. * \see mdbx_env_create() \see mdbx_env_close() */ #ifndef __cplusplus typedef struct MDBX_env MDBX_env; @@ -724,7 +724,7 @@ struct MDBX_env; /** \brief Opaque structure for a transaction handle. * \ingroup c_transactions - * \details All database operations require a transaction handle. Transactions + * \details All table operations require a transaction handle. Transactions * may be read-only or read-write. * \see mdbx_txn_begin() \see mdbx_txn_commit() \see mdbx_txn_abort() */ #ifndef __cplusplus @@ -733,16 +733,16 @@ typedef struct MDBX_txn MDBX_txn; struct MDBX_txn; #endif -/** \brief A handle for an individual database (key-value spaces) in the +/** \brief A handle for an individual table (key-value spaces) in the * environment. * \ingroup c_dbi - * \details Zero handle is used internally (hidden Garbage Collection subDB). + * \details Zero handle is used internally (hidden Garbage Collection table). * So, any valid DBI-handle great than 0 and less than or equal * \ref MDBX_MAX_DBI. * \see mdbx_dbi_open() \see mdbx_dbi_close() */ typedef uint32_t MDBX_dbi; -/** \brief Opaque structure for navigating through a database +/** \brief Opaque structure for navigating through a table * \ingroup c_cursors * \see mdbx_cursor_create() \see mdbx_cursor_bind() \see mdbx_cursor_close() */ @@ -753,15 +753,15 @@ struct MDBX_cursor; #endif /** \brief Generic structure used for passing keys and data in and out of the - * database. + * table. * \anchor MDBX_val \see mdbx::slice \see mdbx::buffer * - * \details Values returned from the database are valid only until a subsequent + * \details Values returned from the table are valid only until a subsequent * update operation, or the end of the transaction. Do not modify or * free them, they commonly point into the database itself. * * Key sizes must be between 0 and \ref mdbx_env_get_maxkeysize() inclusive. - * The same applies to data sizes in databases with the \ref MDBX_DUPSORT flag. + * The same applies to data sizes in tables with the \ref MDBX_DUPSORT flag. * Other data items can in theory be from 0 to \ref MDBX_MAXDATASIZE bytes long. * * \note The notable difference between MDBX and LMDB is that MDBX support zero @@ -1607,7 +1607,7 @@ typedef enum MDBX_txn_flags { } MDBX_txn_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_txn_flags) -/** \brief Database flags +/** \brief Table flags * \ingroup c_dbi * \anchor db_flags * \see mdbx_dbi_open() */ @@ -1643,15 +1643,15 @@ typedef enum MDBX_db_flags { /** Create DB if not already existing. */ MDBX_CREATE = UINT32_C(0x40000), - /** Opens an existing sub-database created with unknown flags. + /** Opens an existing table created with unknown flags. * - * The `MDBX_DB_ACCEDE` flag is intend to open a existing sub-database which + * The `MDBX_DB_ACCEDE` flag is intend to open a existing table which * was created with unknown flags (\ref MDBX_REVERSEKEY, \ref MDBX_DUPSORT, * \ref MDBX_INTEGERKEY, \ref MDBX_DUPFIXED, \ref MDBX_INTEGERDUP and * \ref MDBX_REVERSEDUP). * * In such cases, instead of returning the \ref MDBX_INCOMPATIBLE error, the - * sub-database will be opened with flags which it was created, and then an + * table will be opened with flags which it was created, and then an * application could determine the actual flags by \ref mdbx_dbi_flags(). */ MDBX_DB_ACCEDE = MDBX_ACCEDE } MDBX_db_flags_t; @@ -1668,7 +1668,7 @@ typedef enum MDBX_put_flags { /** For insertion: Don't write if the key already exists. */ MDBX_NOOVERWRITE = UINT32_C(0x10), - /** Has effect only for \ref MDBX_DUPSORT databases. + /** Has effect only for \ref MDBX_DUPSORT tables. * For upsertion: don't write if the key-value pair already exist. */ MDBX_NODUPDATA = UINT32_C(0x20), @@ -1678,7 +1678,7 @@ typedef enum MDBX_put_flags { * For deletion: remove only single entry at the current cursor position. */ MDBX_CURRENT = UINT32_C(0x40), - /** Has effect only for \ref MDBX_DUPSORT databases. + /** Has effect only for \ref MDBX_DUPSORT tables. * For deletion: remove all multi-values (aka duplicates) for given key. * For upsertion: replace all multi-values for given key with a new one. */ MDBX_ALLDUPS = UINT32_C(0x80), @@ -1691,7 +1691,7 @@ typedef enum MDBX_put_flags { * Don't split full pages, continue on a new instead. */ MDBX_APPEND = UINT32_C(0x20000), - /** Has effect only for \ref MDBX_DUPSORT databases. + /** Has effect only for \ref MDBX_DUPSORT tables. * Duplicate data is being appended. * Don't split full pages, continue on a new instead. */ MDBX_APPENDDUP = UINT32_C(0x40000), @@ -1920,14 +1920,14 @@ typedef enum MDBX_error { * or explicit call of \ref mdbx_env_set_geometry(). */ MDBX_UNABLE_EXTEND_MAPSIZE = -30785, - /** Environment or database is not compatible with the requested operation + /** Environment or table is not compatible with the requested operation * or the specified flags. This can mean: * - The operation expects an \ref MDBX_DUPSORT / \ref MDBX_DUPFIXED - * database. + * table. * - Opening a named DB when the unnamed DB has \ref MDBX_DUPSORT / * \ref MDBX_INTEGERKEY. - * - Accessing a data record as a database, or vice versa. - * - The database was dropped and recreated with different flags. */ + * - Accessing a data record as a named table, or vice versa. + * - The table was dropped and recreated with different flags. */ MDBX_INCOMPATIBLE = -30784, /** Invalid reuse of reader locktable slot, @@ -1939,8 +1939,8 @@ typedef enum MDBX_error { * or is invalid */ MDBX_BAD_TXN = -30782, - /** Invalid size or alignment of key or data for target database, - * either invalid subDB name */ + /** Invalid size or alignment of key or data for target table, + * either invalid table name */ MDBX_BAD_VALSIZE = -30781, /** The specified DBI-handle is invalid @@ -1995,7 +1995,7 @@ typedef enum MDBX_error { /** Alternative/Duplicate LCK-file is exists and should be removed manually */ MDBX_DUPLICATED_CLK = -30413, - /** Some cursors and/or other resources should be closed before subDb or + /** Some cursors and/or other resources should be closed before table or * corresponding DBI-handle could be (re)used */ MDBX_DANGLING_DBI = -30412, @@ -2134,11 +2134,11 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv); * \ingroup c_settings * \see mdbx_env_set_option() \see mdbx_env_get_option() */ typedef enum MDBX_option { - /** \brief Controls the maximum number of named databases for the environment. + /** \brief Controls the maximum number of named tables for the environment. * - * \details By default only unnamed key-value database could used and + * \details By default only unnamed key-value table could used and * appropriate value should set by `MDBX_opt_max_db` to using any more named - * subDB(s). To reduce overhead, use the minimum sufficient value. This option + * table(s). To reduce overhead, use the minimum sufficient value. This option * may only set after \ref mdbx_env_create() and before \ref mdbx_env_open(). * * \see mdbx_env_set_maxdbs() \see mdbx_env_get_maxdbs() */ @@ -2739,12 +2739,12 @@ LIBMDBX_API int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, LIBMDBX_API int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags); -/** \brief Statistics for a database in the environment +/** \brief Statistics for a table in the environment * \ingroup c_statinfo * \see mdbx_env_stat_ex() \see mdbx_dbi_stat() */ struct MDBX_stat { - uint32_t ms_psize; /**< Size of a database page. This is the same for all - databases. */ + uint32_t ms_psize; /**< Size of a table page. This is the same for all tables + in a database. */ uint32_t ms_depth; /**< Depth (height) of the B-tree */ uint64_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ uint64_t ms_leaf_pages; /**< Number of leaf pages */ @@ -3084,7 +3084,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_get_syncperiod, /** \brief Close the environment and release the memory map. * \ingroup c_opening * - * Only a single thread may call this function. All transactions, databases, + * Only a single thread may call this function. All transactions, tables, * and cursors must already be closed before calling this function. Attempts * to use any such handles after calling this function is UB and would cause * a `SIGSEGV`. The environment handle will be freed and must not be used again @@ -3243,7 +3243,7 @@ typedef enum MDBX_warmup_flags { * On successful, all currently allocated pages, both unused in GC and * containing payload, will be locked in memory until the environment closes, * or explicitly unblocked by using \ref MDBX_warmup_release, or the - * database geomenry will changed, including its auto-shrinking. */ + * database geometry will changed, including its auto-shrinking. */ MDBX_warmup_lock = 4, /** Alters corresponding current resource limits to be enough for lock pages @@ -3259,8 +3259,9 @@ typedef enum MDBX_warmup_flags { } MDBX_warmup_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_warmup_flags) -/** \brief Warms up the database by loading pages into memory, optionally lock - * ones. \ingroup c_settings +/** \brief Warms up the database by loading pages into memory, + * optionally lock ones. + * \ingroup c_settings * * Depending on the specified flags, notifies OS kernel about following access, * force loads the database pages, including locks ones in memory or releases @@ -3619,40 +3620,40 @@ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_dbsize_max(intptr_t pagesize); /** \brief Returns maximal key size in bytes for given page size - * and database flags, or -1 if pagesize is invalid. + * and table flags, or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); -/** \brief Returns minimal key size in bytes for given database flags. +/** \brief Returns minimal key size in bytes for given table flags. * \ingroup c_statinfo * \see db_flags */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes for given page size - * and database flags, or -1 if pagesize is invalid. + * and table flags, or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags); -/** \brief Returns minimal data size in bytes for given database flags. +/** \brief Returns minimal data size in bytes for given table flags. * \ingroup c_statinfo * \see db_flags */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags); /** \brief Returns maximal size of key-value pair to fit in a single page with - * the given size and database flags, or -1 if pagesize is invalid. + * the given size and table flags, or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes to fit in a leaf-page or - * single large/overflow-page with the given page size and database flags, + * single large/overflow-page with the given page size and table flags, * or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ @@ -3715,12 +3716,12 @@ LIBMDBX_INLINE_API(int, mdbx_env_get_maxreaders, return rc; } -/** \brief Set the maximum number of named databases for the environment. +/** \brief Set the maximum number of named tables for the environment. * \ingroup c_settings * - * This function is only needed if multiple databases will be used in the + * This function is only needed if multiple tables will be used in the * environment. Simpler applications that use the environment as a single - * unnamed database can ignore this option. + * unnamed table can ignore this option. * This function may only be called after \ref mdbx_env_create() and before * \ref mdbx_env_open(). * @@ -3730,7 +3731,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_get_maxreaders, * \see mdbx_env_get_maxdbs() * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [in] dbs The maximum number of databases. + * \param [in] dbs The maximum number of tables. * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: @@ -3740,12 +3741,12 @@ LIBMDBX_INLINE_API(int, mdbx_env_set_maxdbs, (MDBX_env * env, MDBX_dbi dbs)) { return mdbx_env_set_option(env, MDBX_opt_max_db, dbs); } -/** \brief Get the maximum number of named databases for the environment. +/** \brief Get the maximum number of named tables for the environment. * \ingroup c_statinfo * \see mdbx_env_set_maxdbs() * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [out] dbs Address to store the maximum number of databases. + * \param [out] dbs Address to store the maximum number of tables. * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: @@ -3788,7 +3789,7 @@ LIBMDBX_API int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, * \ingroup c_statinfo * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY + * \param [in] flags Table options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY * and so on). \see db_flags * * \returns The maximum size of a key can write, @@ -3800,7 +3801,7 @@ mdbx_env_get_maxkeysize_ex(const MDBX_env *env, MDBX_db_flags_t flags); * \ingroup c_statinfo * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY + * \param [in] flags Table options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY * and so on). \see db_flags * * \returns The maximum size of a data can write, @@ -3815,11 +3816,11 @@ MDBX_DEPRECATED MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_maxkeysize(const MDBX_env *env); /** \brief Returns maximal size of key-value pair to fit in a single page - * for specified database flags. + * for specified table flags. * \ingroup c_statinfo * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY + * \param [in] flags Table options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY * and so on). \see db_flags * * \returns The maximum size of a data can write, @@ -3828,11 +3829,11 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes to fit in a leaf-page or - * single large/overflow-page for specified database flags. + * single large/overflow-page for specified table flags. * \ingroup c_statinfo * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY + * \param [in] flags Table options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY * and so on). \see db_flags * * \returns The maximum size of a data can write, @@ -4117,7 +4118,7 @@ mdbx_txn_id(const MDBX_txn *txn); * \see mdbx_txn_commit_ex() */ struct MDBX_commit_latency { /** \brief Duration of preparation (commit child transactions, update - * sub-databases records and cursors destroying). */ + * table's records and cursors destroying). */ uint32_t preparation; /** \brief Duration of GC update by wall clock. */ uint32_t gc_wallclock; @@ -4499,7 +4500,7 @@ LIBMDBX_API int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary); * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary); -/** \brief A callback function used to compare two keys in a database +/** \brief A callback function used to compare two keys in a table * \ingroup c_crud * \see mdbx_cmp() \see mdbx_get_keycmp() * \see mdbx_get_datacmp \see mdbx_dcmp() @@ -4522,23 +4523,23 @@ LIBMDBX_API int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary); typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b) MDBX_CXX17_NOEXCEPT; -/** \brief Open or Create a database in the environment. +/** \brief Open or Create a named table in the environment. * \ingroup c_dbi * - * A database handle denotes the name and parameters of a database, - * independently of whether such a database exists. The database handle may be - * discarded by calling \ref mdbx_dbi_close(). The old database handle is - * returned if the database was already open. The handle may only be closed + * A table handle denotes the name and parameters of a table, + * independently of whether such a table exists. The table handle may be + * discarded by calling \ref mdbx_dbi_close(). The old table handle is + * returned if the table was already open. The handle may only be closed * once. * * \note A notable difference between MDBX and LMDB is that MDBX make handles - * opened for existing databases immediately available for other transactions, + * opened for existing tables immediately available for other transactions, * regardless this transaction will be aborted or reset. The REASON for this is * to avoiding the requirement for multiple opening a same handles in * concurrent read transactions, and tracking of such open but hidden handles * until the completion of read transactions which opened them. * - * Nevertheless, the handle for the NEWLY CREATED database will be invisible + * Nevertheless, the handle for the NEWLY CREATED table will be invisible * for other transactions until the this write transaction is successfully * committed. If the write transaction is aborted the handle will be closed * automatically. After a successful commit the such handle will reside in the @@ -4547,15 +4548,15 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * In contrast to LMDB, the MDBX allow this function to be called from multiple * concurrent transactions or threads in the same process. * - * To use named database (with name != NULL), \ref mdbx_env_set_maxdbs() + * To use named table (with name != NULL), \ref mdbx_env_set_maxdbs() * must be called before opening the environment. Table names are - * keys in the internal unnamed database, and may be read but not written. + * keys in the internal unnamed table, and may be read but not written. * * \param [in] txn transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] name The name of the database to open. If only a single - * database is needed in the environment, + * \param [in] name The name of the table to open. If only a single + * table is needed in the environment, * this value may be NULL. - * \param [in] flags Special options for this database. This parameter must + * \param [in] flags Special options for this table. This parameter must * be bitwise OR'ing together any of the constants * described here: * @@ -4569,12 +4570,12 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * uint64_t, and will be sorted as such. The keys must all be of the * same size and must be aligned while passing as arguments. * - \ref MDBX_DUPSORT - * Duplicate keys may be used in the database. Or, from another point of + * Duplicate keys may be used in the table. Or, from another point of * view, keys may have multiple data items, stored in sorted order. By * default keys must be unique and may have only a single data item. * - \ref MDBX_DUPFIXED * This flag may only be used in combination with \ref MDBX_DUPSORT. This - * option tells the library that the data items for this database are + * option tells the library that the data items for this table are * all the same size, which allows further optimizations in storage and * retrieval. When all data items are the same size, the * \ref MDBX_GET_MULTIPLE, \ref MDBX_NEXT_MULTIPLE and @@ -4589,7 +4590,7 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * strings in reverse order (the comparison is performed in the direction * from the last byte to the first). * - \ref MDBX_CREATE - * Create the named database if it doesn't exist. This option is not + * Create the named table if it doesn't exist. This option is not * allowed in a read-only transaction or a read-only environment. * * \param [out] dbi Address where the new \ref MDBX_dbi handle @@ -4601,13 +4602,13 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: - * \retval MDBX_NOTFOUND The specified database doesn't exist in the + * \retval MDBX_NOTFOUND The specified table doesn't exist in the * environment and \ref MDBX_CREATE was not specified. - * \retval MDBX_DBS_FULL Too many databases have been opened. + * \retval MDBX_DBS_FULL Too many tables have been opened. * \see mdbx_env_set_maxdbs() - * \retval MDBX_INCOMPATIBLE Database is incompatible with given flags, + * \retval MDBX_INCOMPATIBLE Table is incompatible with given flags, * i.e. the passed flags is different with which the - * database was created, or the database was already + * table was created, or the table was already * opened with a different comparison function(s). * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. */ @@ -4626,12 +4627,12 @@ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, * "avoid using custom comparators" and use \ref mdbx_dbi_open() instead. * * \param [in] txn transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] name The name of the database to open. If only a single - * database is needed in the environment, + * \param [in] name The name of the table to open. If only a single + * table is needed in the environment, * this value may be NULL. - * \param [in] flags Special options for this database. - * \param [in] keycmp Optional custom key comparison function for a database. - * \param [in] datacmp Optional custom data comparison function for a database. + * \param [in] flags Special options for this table. + * \param [in] keycmp Optional custom key comparison function for a table. + * \param [in] datacmp Optional custom data comparison function for a table. * \param [out] dbi Address where the new MDBX_dbi handle will be stored. * \returns A non-zero error value on failure and 0 on success. */ MDBX_DEPRECATED LIBMDBX_API int @@ -4643,15 +4644,16 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); -/** \brief Переименовает таблицу по DBI-дескриптору. +/** \brief Переименовает таблицу по DBI-дескриптору + * * \ingroup c_dbi * - * Переименовывает пользовательскую именованную subDB связанную с передаваемым + * Переименовывает пользовательскую именованную таблицу связанную с передаваемым * DBI-дескриптором. * * \param [in,out] txn Пишущая транзакция запущенная посредством * \ref mdbx_txn_begin(). - * \param [in] dbi Дескриптор таблицы (именованной пользовательской subDB) + * \param [in] dbi Дескриптор таблицы * открытый посредством \ref mdbx_dbi_open(). * * \param [in] name Новое имя для переименования. @@ -4667,10 +4669,10 @@ LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, * пользовательских именованных таблиц. * * \ingroup c_statinfo - * \see mdbx_enumerate_subdb() + * \see mdbx_enumerate_tables() * * \param [in] ctx Указатель на контекст переданный аналогичным - * параметром в \ref mdbx_enumerate_subdb(). + * параметром в \ref mdbx_enumerate_tables(). * \param [in] txn Транзазакция. * \param [in] name Имя таблицы. * \param [in] flags Флаги \ref MDBX_db_flags_t. @@ -4682,7 +4684,7 @@ LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, * \returns Ноль при успехе и продолжении перечисления, при возвращении другого * значения оно будет немедленно возвращено вызывающему * без продолжения перечисления. */ -typedef int(MDBX_subdb_enum_func)(void *ctx, const MDBX_txn *txn, +typedef int(MDBX_table_enum_func)(void *ctx, const MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, const struct MDBX_stat *stat, MDBX_dbi dbi) MDBX_CXX17_NOEXCEPT; @@ -4696,19 +4698,19 @@ typedef int(MDBX_subdb_enum_func)(void *ctx, const MDBX_txn *txn, * сразу возвращено в качестве результата. * * \ingroup c_statinfo - * \see MDBX_subdb_enum_func + * \see MDBX_table_enum_func * * \param [in] txn Транзакция запущенная посредством * \ref mdbx_txn_begin(). * \param [in] func Указатель на пользовательскую функцию - * с сигнатурой \ref MDBX_subdb_enum_func, + * с сигнатурой \ref MDBX_table_enum_func, * которая будет вызвана для каждой таблицы. * \param [in] ctx Указатель на некоторый контект, который будет передан * в функцию `func()` как есть. * * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ -LIBMDBX_API int mdbx_enumerate_subdb(const MDBX_txn *txn, - MDBX_subdb_enum_func *func, void *ctx); +LIBMDBX_API int mdbx_enumerate_tables(const MDBX_txn *txn, + MDBX_table_enum_func *func, void *ctx); /** \defgroup value2key Value-to-Key functions * \brief Value-to-Key functions to @@ -4768,11 +4770,11 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int64_t mdbx_int64_from_key(const MDBX_val); /** end of value2key @} */ -/** \brief Retrieve statistics for a database. +/** \brief Retrieve statistics for a table. * \ingroup c_statinfo * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [out] stat The address of an \ref MDBX_stat structure where * the statistics will be copied. * \param [in] bytes The size of \ref MDBX_stat. @@ -4786,11 +4788,11 @@ LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, size_t bytes); /** \brief Retrieve depth (bitmask) information of nested dupsort (multi-value) - * B+trees for given database. + * B+trees for given table. * \ingroup c_statinfo * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [out] mask The address of an uint32_t value where the bitmask * will be stored. * @@ -4799,7 +4801,7 @@ LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. - * \retval MDBX_RESULT_TRUE The dbi isn't a dupsort (multi-value) database. */ + * \retval MDBX_RESULT_TRUE The dbi isn't a dupsort (multi-value) table. */ LIBMDBX_API int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask); @@ -4818,13 +4820,13 @@ typedef enum MDBX_dbi_state { } MDBX_dbi_state_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state) -/** \brief Retrieve the DB flags and status for a database handle. +/** \brief Retrieve the DB flags and status for a table handle. * \ingroup c_statinfo * \see MDBX_db_flags_t * \see MDBX_dbi_state_t * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [out] flags Address where the flags will be returned. * \param [out] state Address where the state will be returned. * @@ -4841,10 +4843,10 @@ LIBMDBX_INLINE_API(int, mdbx_dbi_flags, return mdbx_dbi_flags_ex(txn, dbi, flags, &state); } -/** \brief Close a database handle. Normally unnecessary. +/** \brief Close a table handle. Normally unnecessary. * \ingroup c_dbi * - * Closing a database handle is not necessary, but lets \ref mdbx_dbi_open() + * Closing a table handle is not necessary, but lets \ref mdbx_dbi_open() * reuse the handle value. Usually it's better to set a bigger * \ref mdbx_env_set_maxdbs(), unless that value would be large. * @@ -4854,68 +4856,68 @@ LIBMDBX_INLINE_API(int, mdbx_dbi_flags, * (\ref MithrilDB) will solve this issue. * * Handles should only be closed if no other threads are going to reference - * the database handle or one of its cursors any further. Do not close a handle - * if an existing transaction has modified its database. Doing so can cause - * misbehavior from database corruption to errors like \ref MDBX_BAD_DBI + * the table handle or one of its cursors any further. Do not close a handle + * if an existing transaction has modified its table. Doing so can cause + * misbehavior from table corruption to errors like \ref MDBX_BAD_DBI * (since the DB name is gone). * * \param [in] env An environment handle returned by \ref mdbx_env_create(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi); -/** \brief Empty or delete and close a database. +/** \brief Empty or delete and close a table. * \ingroup c_crud * * \see mdbx_dbi_close() \see mdbx_dbi_open() * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [in] del `false` to empty the DB, `true` to delete it * from the environment and close the DB handle. * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del); -/** \brief Get items from a database. +/** \brief Get items from a table. * \ingroup c_crud * - * This function retrieves key/data pairs from the database. The address + * This function retrieves key/data pairs from the table. The address * and length of the data associated with the specified key are returned * in the structure to which data refers. - * If the database supports duplicate keys (\ref MDBX_DUPSORT) then the + * If the table supports duplicate keys (\ref MDBX_DUPSORT) then the * first data item for the key will be returned. Retrieval of other * items requires the use of \ref mdbx_cursor_get(). * * \note The memory pointed to by the returned values is owned by the - * database. The caller MUST not dispose of the memory, and MUST not modify it + * table. The caller MUST not dispose of the memory, and MUST not modify it * in any way regardless in a read-only nor read-write transactions! - * For case a database opened without the \ref MDBX_WRITEMAP modification - * attempts likely will cause a `SIGSEGV`. However, when a database opened with + * For case a table opened without the \ref MDBX_WRITEMAP modification + * attempts likely will cause a `SIGSEGV`. However, when a table opened with * the \ref MDBX_WRITEMAP or in case values returned inside read-write * transaction are located on a "dirty" (modified and pending to commit) pages, * such modification will silently accepted and likely will lead to DB and/or * data corruption. * - * \note Values returned from the database are valid only until a + * \note Values returned from the table are valid only until a * subsequent update operation, or the end of the transaction. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). - * \param [in] key The key to search for in the database. + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). + * \param [in] key The key to search for in the table. * \param [in,out] data The data corresponding to the key. * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. - * \retval MDBX_NOTFOUND The key was not in the database. + * \retval MDBX_NOTFOUND The key was not in the table. * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data); -/** \brief Get items from a database +/** \brief Get items from a table * and optionally number of data items for a given key. * * \ingroup c_crud @@ -4925,30 +4927,30 @@ LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * 1. If values_count is NOT NULL, then returns the count * of multi-values/duplicates for a given key. * 2. Updates BOTH the key and the data for pointing to the actual key-value - * pair inside the database. + * pair inside the table. * * \param [in] txn A transaction handle returned * by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). - * \param [in,out] key The key to search for in the database. + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). + * \param [in,out] key The key to search for in the table. * \param [in,out] data The data corresponding to the key. * \param [out] values_count The optional address to return number of values * associated with given key: * = 0 - in case \ref MDBX_NOTFOUND error; - * = 1 - exactly for databases + * = 1 - exactly for tables * WITHOUT \ref MDBX_DUPSORT; - * >= 1 for databases WITH \ref MDBX_DUPSORT. + * >= 1 for tables WITH \ref MDBX_DUPSORT. * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. - * \retval MDBX_NOTFOUND The key was not in the database. + * \retval MDBX_NOTFOUND The key was not in the table. * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count); -/** \brief Get equal or great item from a database. +/** \brief Get equal or great item from a table. * \ingroup c_crud * * Briefly this function does the same as \ref mdbx_get() with a few @@ -4956,17 +4958,17 @@ LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, * 1. Return equal or great (due comparison function) key-value * pair, but not only exactly matching with the key. * 2. On success return \ref MDBX_SUCCESS if key found exactly, - * and \ref MDBX_RESULT_TRUE otherwise. Moreover, for databases with + * and \ref MDBX_RESULT_TRUE otherwise. Moreover, for tables with * \ref MDBX_DUPSORT flag the data argument also will be used to match over * multi-value/duplicates, and \ref MDBX_SUCCESS will be returned only when * BOTH the key and the data match exactly. * 3. Updates BOTH the key and the data for pointing to the actual key-value - * pair inside the database. + * pair inside the table. * * \param [in] txn A transaction handle returned * by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). - * \param [in,out] key The key to search for in the database. + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). + * \param [in,out] key The key to search for in the table. * \param [in,out] data The data corresponding to the key. * * \returns A non-zero error value on failure and \ref MDBX_RESULT_FALSE @@ -4974,43 +4976,43 @@ LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, * Some possible errors are: * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. - * \retval MDBX_NOTFOUND The key was not in the database. + * \retval MDBX_NOTFOUND The key was not in the table. * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data); -/** \brief Store items into a database. +/** \brief Store items into a table. * \ingroup c_crud * - * This function stores key/data pairs in the database. The default behavior + * This function stores key/data pairs in the table. The default behavior * is to enter the new key/data pair, replacing any previously existing key * if duplicates are disallowed, or adding a duplicate data item if * duplicates are allowed (see \ref MDBX_DUPSORT). * * \param [in] txn A transaction handle returned * by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). - * \param [in] key The key to store in the database. + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). + * \param [in] key The key to store in the table. * \param [in,out] data The data to store. * \param [in] flags Special options for this operation. * This parameter must be set to 0 or by bitwise OR'ing * together one or more of the values described here: * - \ref MDBX_NODUPDATA * Enter the new key-value pair only if it does not already appear - * in the database. This flag may only be specified if the database + * in the table. This flag may only be specified if the table * was opened with \ref MDBX_DUPSORT. The function will return - * \ref MDBX_KEYEXIST if the key/data pair already appears in the database. + * \ref MDBX_KEYEXIST if the key/data pair already appears in the table. * * - \ref MDBX_NOOVERWRITE * Enter the new key/data pair only if the key does not already appear - * in the database. The function will return \ref MDBX_KEYEXIST if the key - * already appears in the database, even if the database supports + * in the table. The function will return \ref MDBX_KEYEXIST if the key + * already appears in the table, even if the table supports * duplicates (see \ref MDBX_DUPSORT). The data parameter will be set * to point to the existing item. * * - \ref MDBX_CURRENT * Update an single existing entry, but not add new ones. The function will - * return \ref MDBX_NOTFOUND if the given key not exist in the database. + * return \ref MDBX_NOTFOUND if the given key not exist in the table. * In case multi-values for the given key, with combination of * the \ref MDBX_ALLDUPS will replace all multi-values, * otherwise return the \ref MDBX_EMULTIVAL. @@ -5022,10 +5024,10 @@ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, * transaction ends. This saves an extra memcpy if the data is being * generated later. MDBX does nothing else with this memory, the caller * is expected to modify all of the space requested. This flag must not - * be specified if the database was opened with \ref MDBX_DUPSORT. + * be specified if the table was opened with \ref MDBX_DUPSORT. * * - \ref MDBX_APPEND - * Append the given key/data pair to the end of the database. This option + * Append the given key/data pair to the end of the table. This option * allows fast bulk loading when keys are already known to be in the * correct order. Loading unsorted keys with this flag will cause * a \ref MDBX_EKEYMISMATCH error. @@ -5035,14 +5037,14 @@ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, * * - \ref MDBX_MULTIPLE * Store multiple contiguous data elements in a single request. This flag - * may only be specified if the database was opened with + * may only be specified if the table was opened with * \ref MDBX_DUPFIXED. With combination the \ref MDBX_ALLDUPS * will replace all multi-values. * The data argument must be an array of two \ref MDBX_val. The `iov_len` * of the first \ref MDBX_val must be the size of a single data element. * The `iov_base` of the first \ref MDBX_val must point to the beginning * of the array of contiguous data elements which must be properly aligned - * in case of database with \ref MDBX_INTEGERDUP flag. + * in case of table with \ref MDBX_INTEGERDUP flag. * The `iov_len` of the second \ref MDBX_val must be the count of the * number of data elements to store. On return this field will be set to * the count of the number of elements actually written. The `iov_base` of @@ -5054,7 +5056,7 @@ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, * some possible errors are: * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. - * \retval MDBX_KEYEXIST The key/value pair already exists in the database. + * \retval MDBX_KEYEXIST The key/value pair already exists in the table. * \retval MDBX_MAP_FULL The database is full, see \ref mdbx_env_set_mapsize(). * \retval MDBX_TXN_FULL The transaction has too many dirty pages. * \retval MDBX_EACCES An attempt was made to write @@ -5063,7 +5065,7 @@ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags); -/** \brief Replace items in a database. +/** \brief Replace items in a table. * \ingroup c_crud * * This function allows to update or delete an existing value at the same time @@ -5078,7 +5080,7 @@ LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * field pointed by old_data argument to the appropriate value, without * performing any changes. * - * For databases with non-unique keys (i.e. with \ref MDBX_DUPSORT flag), + * For tables with non-unique keys (i.e. with \ref MDBX_DUPSORT flag), * another use case is also possible, when by old_data argument selects a * specific item from multi-value/duplicates with the same key for deletion or * update. To select this scenario in flags should simultaneously specify @@ -5088,8 +5090,8 @@ LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * * \param [in] txn A transaction handle returned * by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). - * \param [in] key The key to store in the database. + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). + * \param [in] key The key to store in the table. * \param [in] new_data The data to store, if NULL then deletion will * be performed. * \param [in,out] old_data The buffer for retrieve previous value as describe @@ -5118,24 +5120,24 @@ LIBMDBX_API int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_preserve_func preserver, void *preserver_context); -/** \brief Delete items from a database. +/** \brief Delete items from a table. * \ingroup c_crud * - * This function removes key/data pairs from the database. + * This function removes key/data pairs from the table. * - * \note The data parameter is NOT ignored regardless the database does + * \note The data parameter is NOT ignored regardless the table does * support sorted duplicate data items or not. If the data parameter * is non-NULL only the matching data item will be deleted. Otherwise, if data * parameter is NULL, any/all value(s) for specified key will be deleted. * * This function will return \ref MDBX_NOTFOUND if the specified key/data - * pair is not in the database. + * pair is not in the table. * * \see \ref c_crud_hints "Quick reference for Insert/Update/Delete operations" * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). - * \param [in] key The key to delete from the database. + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). + * \param [in] key The key to delete from the table. * \param [in] data The data to delete. * * \returns A non-zero error value on failure and 0 on success, @@ -5149,7 +5151,7 @@ LIBMDBX_API int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, /** \brief Create a cursor handle but not bind it to transaction nor DBI-handle. * \ingroup c_cursors * - * A cursor cannot be used when its database handle is closed. Nor when its + * A cursor cannot be used when its table handle is closed. Nor when its * transaction has ended, except with \ref mdbx_cursor_bind() and \ref * mdbx_cursor_renew(). Also it can be discarded with \ref mdbx_cursor_close(). * @@ -5200,7 +5202,7 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); * \ref mdbx_cursor_renew() but with specifying an arbitrary DBI-handle. * * A cursor may be associated with a new transaction, and referencing a new or - * the same database handle as it was created with. This may be done whether the + * the same table handle as it was created with. This may be done whether the * previous transaction is live or dead. * * \note In contrast to LMDB, the MDBX required that any opened cursors can be @@ -5210,7 +5212,7 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); * memory corruption and segfaults. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_create(). * * \returns A non-zero error value on failure and 0 on success, @@ -5264,7 +5266,7 @@ LIBMDBX_API int mdbx_cursor_reset(MDBX_cursor *cursor); * Using of the `mdbx_cursor_open()` is equivalent to calling * \ref mdbx_cursor_create() and then \ref mdbx_cursor_bind() functions. * - * A cursor cannot be used when its database handle is closed. Nor when its + * A cursor cannot be used when its table handle is closed. Nor when its * transaction has ended, except with \ref mdbx_cursor_bind() and \ref * mdbx_cursor_renew(). Also it can be discarded with \ref mdbx_cursor_close(). * @@ -5279,7 +5281,7 @@ LIBMDBX_API int mdbx_cursor_reset(MDBX_cursor *cursor); * memory corruption and segfaults. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [out] cursor Address where the new \ref MDBX_cursor handle will be * stored. * @@ -5361,7 +5363,7 @@ LIBMDBX_API int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_txn * mdbx_cursor_txn(const MDBX_cursor *cursor); -/** \brief Return the cursor's database handle. +/** \brief Return the cursor's table handle. * \ingroup c_cursors * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). */ @@ -5407,7 +5409,7 @@ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, /** \brief Retrieve by cursor. * \ingroup c_crud * - * This function retrieves key/data pairs from the database. The address and + * This function retrieves key/data pairs from the table. The address and * length of the key are returned in the object to which key refers (except * for the case of the \ref MDBX_SET option, in which the key object is * unchanged), and the address and length of the data are returned in the object @@ -5496,12 +5498,11 @@ typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, * DSO-трансграничных вызовов. * * Функция принимает курсор, который должен быть привязан к некоторой транзакции - * и DBI-дескриптору таблицы (именованной пользовательской subDB), выполняет - * первоначальное позиционирование курсора определяемое аргументом `start_op`. - * Далее, производится оценка каждой пары ключ-значения посредством - * предоставляемой вами предикативной функции `predicate` и затем, при - * необходимости, переход к следующему элементу посредством операции `turn_op`, - * до наступления одного из четырех событий: + * и DBI-дескриптору таблицы, выполняет первоначальное позиционирование курсора + * определяемое аргументом `start_op`. Далее, производится оценка каждой пары + * ключ-значения посредством предоставляемой вами предикативной функции + * `predicate` и затем, при необходимости, переход к следующему элементу + * посредством операции `turn_op`, до наступления одного из четырех событий: * - достигается конец данных; * - возникнет ошибка при позиционировании курсора; * - оценочная функция вернет \ref MDBX_RESULT_TRUE, сигнализируя @@ -5565,13 +5566,12 @@ LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, * \ingroup c_crud * * Функция принимает курсор, который должен быть привязан к некоторой транзакции - * и DBI-дескриптору таблицы (именованной пользовательской subDB), выполняет - * первоначальное позиционирование курсора определяемое аргументом `from_op`. - * а также аргументами `from_key` и `from_value`. - * Далее, производится оценка каждой пары ключ-значения посредством - * предоставляемой вами предикативной функции `predicate` и затем, при - * необходимости, переход к следующему элементу посредством операции `turn_op`, - * до наступления одного из четырех событий: + * и DBI-дескриптору таблицы, выполняет первоначальное позиционирование курсора + * определяемое аргументом `from_op`. а также аргументами `from_key` и + * `from_value`. Далее, производится оценка каждой пары ключ-значения + * посредством предоставляемой вами предикативной функции `predicate` и затем, + * при необходимости, переход к следующему элементу посредством операции + * `turn_op`, до наступления одного из четырех событий: * - достигается конец данных; * - возникнет ошибка при позиционировании курсора; * - оценочная функция вернет \ref MDBX_RESULT_TRUE, сигнализируя @@ -5653,8 +5653,8 @@ LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, /** \brief Retrieve multiple non-dupsort key/value pairs by cursor. * \ingroup c_crud * - * This function retrieves multiple key/data pairs from the database without - * \ref MDBX_DUPSORT option. For `MDBX_DUPSORT` databases please + * This function retrieves multiple key/data pairs from the table without + * \ref MDBX_DUPSORT option. For `MDBX_DUPSORT` tables please * use \ref MDBX_GET_MULTIPLE and \ref MDBX_NEXT_MULTIPLE. * * The number of key and value items is returned in the `size_t count` @@ -5698,7 +5698,7 @@ LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, /** \brief Store by cursor. * \ingroup c_crud * - * This function stores key/data pairs into the database. The cursor is + * This function stores key/data pairs into the table. The cursor is * positioned at the new item, or on failure usually near it. * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). @@ -5719,14 +5719,14 @@ LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, * * - \ref MDBX_NODUPDATA * Enter the new key-value pair only if it does not already appear in the - * database. This flag may only be specified if the database was opened + * table. This flag may only be specified if the table was opened * with \ref MDBX_DUPSORT. The function will return \ref MDBX_KEYEXIST - * if the key/data pair already appears in the database. + * if the key/data pair already appears in the table. * * - \ref MDBX_NOOVERWRITE * Enter the new key/data pair only if the key does not already appear - * in the database. The function will return \ref MDBX_KEYEXIST if the key - * already appears in the database, even if the database supports + * in the table. The function will return \ref MDBX_KEYEXIST if the key + * already appears in the table, even if the table supports * duplicates (\ref MDBX_DUPSORT). * * - \ref MDBX_RESERVE @@ -5734,11 +5734,11 @@ LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, * data. Instead, return a pointer to the reserved space, which the * caller can fill in later - before the next update operation or the * transaction ends. This saves an extra memcpy if the data is being - * generated later. This flag must not be specified if the database + * generated later. This flag must not be specified if the table * was opened with \ref MDBX_DUPSORT. * * - \ref MDBX_APPEND - * Append the given key/data pair to the end of the database. No key + * Append the given key/data pair to the end of the table. No key * comparisons are performed. This option allows fast bulk loading when * keys are already known to be in the correct order. Loading unsorted * keys with this flag will cause a \ref MDBX_KEYEXIST error. @@ -5748,14 +5748,14 @@ LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, * * - \ref MDBX_MULTIPLE * Store multiple contiguous data elements in a single request. This flag - * may only be specified if the database was opened with + * may only be specified if the table was opened with * \ref MDBX_DUPFIXED. With combination the \ref MDBX_ALLDUPS * will replace all multi-values. * The data argument must be an array of two \ref MDBX_val. The `iov_len` * of the first \ref MDBX_val must be the size of a single data element. * The `iov_base` of the first \ref MDBX_val must point to the beginning * of the array of contiguous data elements which must be properly aligned - * in case of database with \ref MDBX_INTEGERDUP flag. + * in case of table with \ref MDBX_INTEGERDUP flag. * The `iov_len` of the second \ref MDBX_val must be the count of the * number of data elements to store. On return this field will be set to * the count of the number of elements actually written. The `iov_base` of @@ -5794,7 +5794,7 @@ LIBMDBX_API int mdbx_cursor_put(MDBX_cursor *cursor, const MDBX_val *key, * - \ref MDBX_ALLDUPS * or \ref MDBX_NODUPDATA (supported for compatibility) * Delete all of the data items for the current key. This flag has effect - * only for database(s) was created with \ref MDBX_DUPSORT. + * only for table(s) was created with \ref MDBX_DUPSORT. * * \see \ref c_crud_hints "Quick reference for Insert/Update/Delete operations" * @@ -5813,7 +5813,7 @@ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, MDBX_put_flags_t flags); /** \brief Return count of duplicates for current key. * \ingroup c_crud * - * This call is valid for all databases, but reasonable only for that support + * This call is valid for all tables, but reasonable only for that support * sorted duplicate data items \ref MDBX_DUPSORT. * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). @@ -5933,7 +5933,7 @@ mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); * Please see notes on accuracy of the result in the details * of \ref c_rqest section. * - * Both cursors must be initialized for the same database and the same + * Both cursors must be initialized for the same table and the same * transaction. * * \param [in] first The first cursor for estimation. @@ -5982,7 +5982,7 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, * * \param [in] txn A transaction handle returned * by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [in] begin_key The key of range beginning or NULL for explicit FIRST. * \param [in] begin_data Optional additional data to seeking among sorted * duplicates. @@ -6041,18 +6041,18 @@ LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr); -/** \brief Sequence generation for a database. +/** \brief Sequence generation for a table. * \ingroup c_crud * * The function allows to create a linear sequence of unique positive integers - * for each database. The function can be called for a read transaction to + * for each table. The function can be called for a read transaction to * retrieve the current sequence value, and the increment must be zero. * Sequence changes become visible outside the current write transaction after * it is committed, and discarded on abort. * * \param [in] txn A transaction handle returned * by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [out] result The optional address where the value of sequence * before the change will be stored. * \param [in] increment Value to increase the sequence, @@ -6065,17 +6065,17 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment); -/** \brief Compare two keys according to a particular database. +/** \brief Compare two keys according to a particular table. * \ingroup c_crud * \see MDBX_cmp_func * * This returns a comparison as if the two data items were keys in the - * specified database. + * specified table. * * \warning There ss a Undefined behavior if one of arguments is invalid. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [in] a The first item to compare. * \param [in] b The second item to compare. * @@ -6085,22 +6085,22 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cmp(const MDBX_txn *txn, const MDBX_val *a, const MDBX_val *b); -/** \brief Returns default internal key's comparator for given database flags. +/** \brief Returns default internal key's comparator for given table flags. * \ingroup c_extra */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API MDBX_cmp_func * mdbx_get_keycmp(MDBX_db_flags_t flags); -/** \brief Compare two data items according to a particular database. +/** \brief Compare two data items according to a particular table. * \ingroup c_crud * \see MDBX_cmp_func * * This returns a comparison as if the two items were data items of the - * specified database. + * specified table. * * \warning There ss a Undefined behavior if one of arguments is invalid. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] dbi A database handle returned by \ref mdbx_dbi_open(). + * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). * \param [in] a The first item to compare. * \param [in] b The second item to compare. * @@ -6110,7 +6110,7 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_dcmp(const MDBX_txn *txn, const MDBX_val *a, const MDBX_val *b); -/** \brief Returns default internal data's comparator for given database flags +/** \brief Returns default internal data's comparator for given table flags * \ingroup c_extra */ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API MDBX_cmp_func * mdbx_get_datacmp(MDBX_db_flags_t flags); @@ -6477,7 +6477,7 @@ typedef enum MDBX_chk_stage { MDBX_chk_gc, MDBX_chk_space, MDBX_chk_maindb, - MDBX_chk_subdbs, + MDBX_chk_tables, MDBX_chk_conclude, MDBX_chk_unlock, MDBX_chk_finalize @@ -6517,7 +6517,7 @@ typedef struct MDBX_chk_scope { /** \brief Пользовательский тип для привязки дополнительных данных, * связанных с некоторой таблицей ключ-значение, при проверке целостности базы * данных. \see mdbx_env_chk() */ -typedef struct MDBX_chk_user_subdb_cookie MDBX_chk_user_subdb_cookie_t; +typedef struct MDBX_chk_user_table_cookie MDBX_chk_user_table_cookie_t; /** \brief Гистограмма с некоторой статистической информацией, * собираемой при проверке целостности БД. @@ -6532,8 +6532,8 @@ struct MDBX_chk_histogram { /** \brief Информация о некоторой таблицей ключ-значение, * при проверке целостности базы данных. * \see mdbx_env_chk() */ -typedef struct MDBX_chk_subdb { - MDBX_chk_user_subdb_cookie_t *cookie; +typedef struct MDBX_chk_table { + MDBX_chk_user_table_cookie_t *cookie; /** \brief Pseudo-name for MainDB */ #define MDBX_CHK_MAIN ((void *)((ptrdiff_t)0)) @@ -6564,7 +6564,7 @@ typedef struct MDBX_chk_subdb { /// Values length histogram struct MDBX_chk_histogram val_len; } histogram; -} MDBX_chk_subdb_t; +} MDBX_chk_table_t; /** \brief Контекст проверки целостности базы данных. * \see mdbx_env_chk() */ @@ -6576,17 +6576,17 @@ typedef struct MDBX_chk_context { uint8_t scope_nesting; struct { size_t total_payload_bytes; - size_t subdb_total, subdb_processed; + size_t table_total, table_processed; size_t total_unused_bytes, unused_pages; size_t processed_pages, reclaimable_pages, gc_pages, alloc_pages, backed_pages; size_t problems_meta, tree_problems, gc_tree_problems, kv_tree_problems, problems_gc, problems_kv, total_problems; uint64_t steady_txnid, recent_txnid; - /** Указатель на массив размером subdb_total с указателями на экземпляры - * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значение, + /** Указатель на массив размером table_total с указателями на экземпляры + * структур MDBX_chk_table_t с информацией о всех таблицах ключ-значение, * включая MainDB и GC/FreeDB. */ - const MDBX_chk_subdb_t *const *subdbs; + const MDBX_chk_table_t *const *tables; } result; } MDBX_chk_context_t; @@ -6616,14 +6616,14 @@ typedef struct MDBX_chk_callbacks { void (*issue)(MDBX_chk_context_t *ctx, const char *object, uint64_t entry_number, const char *issue, const char *extra_fmt, va_list extra_args); - MDBX_chk_user_subdb_cookie_t *(*subdb_filter)(MDBX_chk_context_t *ctx, + MDBX_chk_user_table_cookie_t *(*table_filter)(MDBX_chk_context_t *ctx, const MDBX_val *name, MDBX_db_flags_t flags); - int (*subdb_conclude)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb, + int (*table_conclude)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table, MDBX_cursor *cursor, int err); - void (*subdb_dispose)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb); + void (*table_dispose)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table); - int (*subdb_handle_kv)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb, + int (*table_handle_kv)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table, size_t entry_number, const MDBX_val *key, const MDBX_val *value); diff --git a/mdbx.h++ b/mdbx.h++ index 0d2e8d09..9c4101f9 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3537,8 +3537,8 @@ enum put_mode { /// instances, but does not destroys the represented underlying object from the /// own class destructor. /// -/// An environment supports multiple key-value sub-databases (aka key-value -/// spaces or tables), all residing in the same shared-memory map. +/// An environment supports multiple key-value tables (aka key-value +/// maps, spaces or sub-databases), all residing in the same shared-memory map. class LIBMDBX_API_TYPE env { friend class txn; @@ -4101,7 +4101,7 @@ public: /// environment is busy by other thread or none of the thresholds are reached. bool poll_sync_to_disk() { return sync_to_disk(false, true); } - /// \brief Close a key-value map (aka sub-database) handle. Normally + /// \brief Close a key-value map (aka table) handle. Normally /// unnecessary. /// /// Closing a database handle is not necessary, but lets \ref txn::open_map() @@ -4519,12 +4519,12 @@ public: #endif /* __cpp_lib_string_view >= 201606L */ using map_stat = ::MDBX_stat; - /// \brief Returns statistics for a sub-database. + /// \brief Returns statistics for a table. inline map_stat get_map_stat(map_handle map) const; /// \brief Returns depth (bitmask) information of nested dupsort (multi-value) /// B+trees for given database. inline uint32_t get_tree_deepmask(map_handle map) const; - /// \brief Returns information about key-value map (aka sub-database) handle. + /// \brief Returns information about key-value map (aka table) handle. inline map_handle::info get_handle_info(map_handle map) const; using canary = ::MDBX_canary; @@ -4536,39 +4536,39 @@ public: inline canary get_canary() const; /// Reads sequence generator associated with a key-value map (aka - /// sub-database). + /// table). inline uint64_t sequence(map_handle map) const; /// \brief Reads and increment sequence generator associated with a key-value - /// map (aka sub-database). + /// map (aka table). inline uint64_t sequence(map_handle map, uint64_t increment); /// \brief Compare two keys according to a particular key-value map (aka - /// sub-database). + /// table). inline int compare_keys(map_handle map, const slice &a, const slice &b) const noexcept; /// \brief Compare two values according to a particular key-value map (aka - /// sub-database). + /// table). inline int compare_values(map_handle map, const slice &a, const slice &b) const noexcept; /// \brief Compare keys of two pairs according to a particular key-value map - /// (aka sub-database). + /// (aka table). inline int compare_keys(map_handle map, const pair &a, const pair &b) const noexcept; /// \brief Compare values of two pairs according to a particular key-value map - /// (aka sub-database). + /// (aka table). inline int compare_values(map_handle map, const pair &a, const pair &b) const noexcept; - /// \brief Get value by key from a key-value map (aka sub-database). + /// \brief Get value by key from a key-value map (aka table). inline slice get(map_handle map, const slice &key) const; /// \brief Get first of multi-value and values count by key from a key-value - /// multimap (aka sub-database). + /// multimap (aka table). inline slice get(map_handle map, slice key, size_t &values_count) const; - /// \brief Get value by key from a key-value map (aka sub-database). + /// \brief Get value by key from a key-value map (aka table). inline slice get(map_handle map, const slice &key, const slice &value_at_absence) const; /// \brief Get first of multi-value and values count by key from a key-value - /// multimap (aka sub-database). + /// multimap (aka table). inline slice get(map_handle map, slice key, size_t &values_count, const slice &value_at_absence) const; /// \brief Get value for equal or great key from a database. diff --git a/src/alloy.c b/src/alloy.c index 7ca5f07b..f2cce532 100644 --- a/src/alloy.c +++ b/src/alloy.c @@ -41,7 +41,7 @@ #include "range-estimate.c" #include "refund.c" #include "spill.c" -#include "subdb.c" +#include "table.c" #include "tls.c" #include "tree.c" #include "txl.c" diff --git a/src/api-cursor.c b/src/api-cursor.c index 2f2506e5..c1e81aef 100644 --- a/src/api-cursor.c +++ b/src/api-cursor.c @@ -599,7 +599,7 @@ int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, return MDBX_BAD_DBI; if (unlikely(mc->subcur)) - return MDBX_INCOMPATIBLE /* must be a non-dupsort subDB */; + return MDBX_INCOMPATIBLE /* must be a non-dupsort table */; switch (op) { case MDBX_NEXT: diff --git a/src/audit.c b/src/audit.c index e816aa0a..34a0f68f 100644 --- a/src/audit.c +++ b/src/audit.c @@ -81,7 +81,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, ctx.used = NUM_METAS + audit_db_used(dbi_dig(txn, FREE_DBI, nullptr)) + audit_db_used(dbi_dig(txn, MAIN_DBI, nullptr)); - rc = mdbx_enumerate_subdb(txn, audit_dbi, &ctx); + rc = mdbx_enumerate_tables(txn, audit_dbi, &ctx); tASSERT(txn, rc == MDBX_SUCCESS); for (size_t dbi = CORE_DBS; dbi < txn->n_dbi; ++dbi) { diff --git a/src/chk.c b/src/chk.c index be28cf0a..2acda621 100644 --- a/src/chk.c +++ b/src/chk.c @@ -14,12 +14,12 @@ typedef struct MDBX_chk_internal { bool write_locked; uint8_t scope_depth; - MDBX_chk_subdb_t subdb_gc, subdb_main; + MDBX_chk_table_t table_gc, table_main; int16_t *pagemap; - MDBX_chk_subdb_t *last_lookup; + MDBX_chk_table_t *last_lookup; const void *last_nested; MDBX_chk_scope_t scope_stack[12]; - MDBX_chk_subdb_t *subdb[MDBX_MAX_DBI + CORE_DBS]; + MDBX_chk_table_t *table[MDBX_MAX_DBI + CORE_DBS]; MDBX_envinfo envinfo; troika_t troika; @@ -485,17 +485,17 @@ __cold static const char *chk_v2a(MDBX_chk_internal_t *chk, } __cold static void chk_dispose(MDBX_chk_internal_t *chk) { - assert(chk->subdb[FREE_DBI] == &chk->subdb_gc); - assert(chk->subdb[MAIN_DBI] == &chk->subdb_main); - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { - MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + assert(chk->table[FREE_DBI] == &chk->table_gc); + assert(chk->table[MAIN_DBI] == &chk->table_main); + for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) { + MDBX_chk_table_t *const sdb = chk->table[i]; if (sdb) { - chk->subdb[i] = nullptr; - if (chk->cb->subdb_dispose && sdb->cookie) { - chk->cb->subdb_dispose(chk->usr, sdb); + chk->table[i] = nullptr; + if (chk->cb->table_dispose && sdb->cookie) { + chk->cb->table_dispose(chk->usr, sdb); sdb->cookie = nullptr; } - if (sdb != &chk->subdb_gc && sdb != &chk->subdb_main) { + if (sdb != &chk->table_gc && sdb != &chk->table_main) { osal_free(sdb); } } @@ -640,7 +640,7 @@ histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, //----------------------------------------------------------------------------- __cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, - const walk_sdb_t *in, MDBX_chk_subdb_t **out) { + const walk_sdb_t *in, MDBX_chk_table_t **out) { MDBX_chk_internal_t *const chk = scope->internal; if (chk->last_lookup && chk->last_lookup->name.iov_base == in->name.iov_base) { @@ -648,15 +648,15 @@ __cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, return MDBX_SUCCESS; } - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { - MDBX_chk_subdb_t *sdb = chk->subdb[i]; + for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) { + MDBX_chk_table_t *sdb = chk->table[i]; if (!sdb) { - sdb = osal_calloc(1, sizeof(MDBX_chk_subdb_t)); + sdb = osal_calloc(1, sizeof(MDBX_chk_table_t)); if (unlikely(!sdb)) { *out = nullptr; - return chk_error_rc(scope, MDBX_ENOMEM, "alloc_subDB"); + return chk_error_rc(scope, MDBX_ENOMEM, "alloc_table"); } - chk->subdb[i] = sdb; + chk->table[i] = sdb; sdb->flags = in->internal->flags; sdb->id = -1; sdb->name = in->name; @@ -665,16 +665,16 @@ __cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, if (sdb->id < 0) { sdb->id = (int)i; sdb->cookie = - chk->cb->subdb_filter - ? chk->cb->subdb_filter(chk->usr, &sdb->name, sdb->flags) + chk->cb->table_filter + ? chk->cb->table_filter(chk->usr, &sdb->name, sdb->flags) : (void *)(intptr_t)-1; } *out = (chk->last_lookup = sdb); return MDBX_SUCCESS; } } - chk_scope_issue(scope, "too many subDBs > %u", - (unsigned)ARRAY_LENGTH(chk->subdb) - CORE_DBS - /* meta */ 1); + chk_scope_issue(scope, "too many tables > %u", + (unsigned)ARRAY_LENGTH(chk->table) - CORE_DBS - /* meta */ 1); *out = nullptr; return MDBX_PROBLEM; } @@ -751,7 +751,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; - MDBX_chk_subdb_t *sdb; + MDBX_chk_table_t *sdb; int err = chk_get_sdb(scope, sdb_info, &sdb); if (unlikely(err)) return err; @@ -773,7 +773,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, height -= sdb_info->internal->height; else { chk_object_issue(scope, "nested tree", pgno, "unexpected", - "subDb %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), + "table %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), sdb->flags, deep); nested = nullptr; } @@ -804,7 +804,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, histogram_acc(npages, &sdb->histogram.large_pages); if (sdb->flags & MDBX_DUPSORT) chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, subDb %s flags 0x%x, deep %i", + "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, deep); break; @@ -821,7 +821,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, case page_dupfix_leaf: if (!nested) chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, subDb %s flags 0x%x, deep %i", + "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, deep); /* fall through */ @@ -832,7 +832,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, sdb->pages.leaf += 1; if (height != sdb_info->internal->height) chk_object_issue(scope, "page", pgno, "wrong tree height", - "actual %i != %i subDb %s", height, + "actual %i != %i table %s", height, sdb_info->internal->height, chk_v2a(chk, &sdb->name)); } else { pagetype_caption = @@ -855,7 +855,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, sdb->pages.nested_subleaf += 1; if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, subDb %s flags 0x%x, deep %i", + "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, deep); break; @@ -888,8 +888,8 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, deep); sdb->pages.all += 1; } else if (chk->pagemap[spanpgno]) { - const MDBX_chk_subdb_t *const rival = - chk->subdb[chk->pagemap[spanpgno] - 1]; + const MDBX_chk_table_t *const rival = + chk->table[chk->pagemap[spanpgno] - 1]; chk_object_issue(scope, "page", spanpgno, (branch && rival == sdb) ? "loop" : "already used", "%s-page: by %s, deep %i", pagetype_caption, @@ -978,11 +978,11 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { if (!chk->pagemap[n]) usr->result.unused_pages += 1; - MDBX_chk_subdb_t total; + MDBX_chk_table_t total; memset(&total, 0, sizeof(total)); total.pages.all = NUM_METAS; - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { - MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { + MDBX_chk_table_t *const sdb = chk->table[i]; total.payload_bytes += sdb->payload_bytes; total.lost_bytes += sdb->lost_bytes; total.pages.all += sdb->pages.all; @@ -1007,8 +1007,8 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { err = chk_scope_restore(scope, err); if (scope->verbosity > MDBX_chk_info) { - for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { - MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { + MDBX_chk_table_t *const sdb = chk->table[i]; MDBX_chk_scope_t *inner = chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); if (sdb->pages.all == 0) @@ -1042,7 +1042,7 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { } line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, "tree deep density", "1", false); - if (sdb != &chk->subdb_gc && sdb->histogram.nested_tree.count) { + if (sdb != &chk->table_gc && sdb->histogram.nested_tree.count) { line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, sdb->histogram.nested_tree.count); line = histogram_dist(line, &sdb->histogram.nested_tree, " density", @@ -1098,23 +1098,23 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { } typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, - MDBX_chk_subdb_t *sdb, const size_t record_number, + MDBX_chk_table_t *sdb, const size_t record_number, const MDBX_val *key, const MDBX_val *data); __cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, - MDBX_chk_subdb_t *sdb, + MDBX_chk_table_t *sdb, const size_t record_number, const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; int err = MDBX_SUCCESS; assert(sdb->cookie); - if (chk->cb->subdb_handle_kv) - err = chk->cb->subdb_handle_kv(chk->usr, sdb, record_number, key, data); + if (chk->cb->table_handle_kv) + err = chk->cb->table_handle_kv(chk->usr, sdb, record_number, key, data); return err ? err : chk_check_break(scope); } __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, - MDBX_chk_subdb_t *sdb, chk_kv_visitor *handler) { + MDBX_chk_table_t *sdb, chk_kv_visitor *handler) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; @@ -1365,34 +1365,34 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) chk_object_issue(scope, "entry", record_count, - "unexpected sub-database", "node-flags 0x%x", + "unexpected table", "node-flags 0x%x", node_flags(node)); else if (data.iov_len != sizeof(tree_t)) chk_object_issue(scope, "entry", record_count, - "wrong sub-database node size", + "wrong table node size", "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, sizeof(tree_t)); else if (scope->stage == MDBX_chk_maindb) - /* подсчитываем subDB при первом проходе */ + /* подсчитываем table при первом проходе */ sub_databases += 1; else { - /* обработка subDB при втором проходе */ + /* обработка table при втором проходе */ tree_t aligned_db; memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); walk_sdb_t sdb_info = {.name = key}; sdb_info.internal = &aligned_db; - MDBX_chk_subdb_t *subdb; - err = chk_get_sdb(scope, &sdb_info, &subdb); + MDBX_chk_table_t *table; + err = chk_get_sdb(scope, &sdb_info, &table); if (unlikely(err)) goto bailout; - if (subdb->cookie) { + if (table->cookie) { err = chk_scope_begin( - chk, 0, MDBX_chk_subdbs, subdb, &usr->result.problems_kv, - "Processing subDB %s...", chk_v2a(chk, &subdb->name)); + chk, 0, MDBX_chk_tables, table, &usr->result.problems_kv, + "Processing table %s...", chk_v2a(chk, &table->name)); if (likely(!err)) { - err = chk_db(usr->scope, (MDBX_dbi)-1, subdb, chk_handle_kv); + err = chk_db(usr->scope, (MDBX_dbi)-1, table, chk_handle_kv); if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) - usr->result.subdb_processed += 1; + usr->result.table_processed += 1; } err = chk_scope_restore(scope, err); if (unlikely(err)) @@ -1400,7 +1400,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, } else chk_line_end(chk_flush( chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s...", chk_v2a(chk, &subdb->name)))); + "Skip processing %s...", chk_v2a(chk, &table->name)))); } } else if (handler) { err = handler(scope, sdb, record_count, &key, &data); @@ -1430,16 +1430,16 @@ bailout: chk_line_end(line); } if (scope->stage == MDBX_chk_maindb) - usr->result.subdb_total = sub_databases; - if (chk->cb->subdb_conclude) - err = chk->cb->subdb_conclude(usr, sdb, cursor, err); + usr->result.table_total = sub_databases; + if (chk->cb->table_conclude) + err = chk->cb->table_conclude(usr, sdb, cursor, err); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) line = chk_print(line, " %" PRIuSIZE " dups,", dups); if (sub_databases || dbi == MAIN_DBI) - line = chk_print(line, " %" PRIuSIZE " sub-databases,", sub_databases); + line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases); line = chk_print(line, " %" PRIuSIZE " key's bytes," " %" PRIuSIZE " data's bytes," @@ -1457,12 +1457,12 @@ bailout: } __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, - MDBX_chk_subdb_t *sdb, + MDBX_chk_table_t *sdb, const size_t record_number, const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; - assert(sdb == &chk->subdb_gc); + assert(sdb == &chk->table_gc); (void)sdb; const char *bad = ""; pgno_t *iptr = data->iov_base; @@ -1532,9 +1532,9 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, if (id == 0) chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; else if (id > 0) { - assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->subdb)); + assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->table)); chk_object_issue(scope, "page", pgno, "already used", "by %s", - chk_v2a(chk, &chk->subdb[id - 1]->name)); + chk_v2a(chk, &chk->table[id - 1]->name)); } else chk_object_issue(scope, "page", pgno, "already listed in GC", nullptr); @@ -1832,13 +1832,13 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { usr->result.problems_gc = usr->result.gc_tree_problems)); else { err = chk_scope_begin( - chk, -1, MDBX_chk_gc, &chk->subdb_gc, &usr->result.problems_gc, + chk, -1, MDBX_chk_gc, &chk->table_gc, &usr->result.problems_gc, "Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid); if (likely(!err)) - err = chk_db(usr->scope, FREE_DBI, &chk->subdb_gc, chk_handle_gc); + err = chk_db(usr->scope, FREE_DBI, &chk->table_gc, chk_handle_gc); line = chk_line_begin(scope, MDBX_chk_info); if (line) { - histogram_print(scope, line, &chk->subdb_gc.histogram.nested_tree, + histogram_print(scope, line, &chk->table_gc.histogram.nested_tree, "span(s)", "single", false); chk_line_end(line); } @@ -1970,32 +1970,32 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { subj_main, subj_tree, usr->result.problems_kv = usr->result.kv_tree_problems)); else { - err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->subdb_main, + err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->table_main, &usr->result.problems_kv, "Processing %s...", subj_main); if (likely(!err)) - err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, chk_handle_kv); + err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, chk_handle_kv); chk_scope_restore(scope, err); - const char *const subj_subdbs = "sub-database(s)"; - if (usr->result.problems_kv && usr->result.subdb_total) + const char *const subj_tables = "table(s)"; + if (usr->result.problems_kv && usr->result.table_total) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s", subj_subdbs)); - else if (usr->result.problems_kv == 0 && usr->result.subdb_total == 0) + "Skip processing %s", subj_tables)); + else if (usr->result.problems_kv == 0 && usr->result.table_total == 0) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", - subj_subdbs)); - else if (usr->result.problems_kv == 0 && usr->result.subdb_total) { + subj_tables)); + else if (usr->result.problems_kv == 0 && usr->result.table_total) { err = chk_scope_begin( - chk, 1, MDBX_chk_subdbs, nullptr, &usr->result.problems_kv, - "Processing %s by txn#%" PRIaTXN "...", subj_subdbs, txn->txnid); + chk, 1, MDBX_chk_tables, nullptr, &usr->result.problems_kv, + "Processing %s by txn#%" PRIaTXN "...", subj_tables, txn->txnid); if (!err) - err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, nullptr); + err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, nullptr); if (usr->scope->subtotal_issues) chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), "processed %" PRIuSIZE " of %" PRIuSIZE " %s, %" PRIuSIZE " problems(s)", - usr->result.subdb_processed, - usr->result.subdb_total, subj_subdbs, + usr->result.table_processed, + usr->result.table_total, subj_tables, usr->scope->subtotal_issues)); } chk_scope_restore(scope, err); @@ -2035,20 +2035,20 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, chk->usr->env = env; chk->flags = flags; - chk->subdb_gc.id = -1; - chk->subdb_gc.name.iov_base = MDBX_CHK_GC; - chk->subdb[FREE_DBI] = &chk->subdb_gc; + chk->table_gc.id = -1; + chk->table_gc.name.iov_base = MDBX_CHK_GC; + chk->table[FREE_DBI] = &chk->table_gc; - chk->subdb_main.id = -1; - chk->subdb_main.name.iov_base = MDBX_CHK_MAIN; - chk->subdb[MAIN_DBI] = &chk->subdb_main; + chk->table_main.id = -1; + chk->table_main.name.iov_base = MDBX_CHK_MAIN; + chk->table[MAIN_DBI] = &chk->table_main; chk->monotime_timeout = timeout_seconds_16dot16 ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() : 0; chk->usr->scope_nesting = 0; - chk->usr->result.subdbs = (const void *)&chk->subdb; + chk->usr->result.tables = (const void *)&chk->table; MDBX_chk_scope_t *const top = chk->scope_stack; top->verbosity = verbosity; @@ -2080,8 +2080,8 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, // doit if (likely(!rc)) { - chk->subdb_gc.flags = ctx->txn->dbs[FREE_DBI].flags; - chk->subdb_main.flags = ctx->txn->dbs[MAIN_DBI].flags; + chk->table_gc.flags = ctx->txn->dbs[FREE_DBI].flags; + chk->table_main.flags = ctx->txn->dbs[MAIN_DBI].flags; rc = env_chk(top); } diff --git a/src/cogs.h b/src/cogs.h index f0677f23..c9d39332 100644 --- a/src/cogs.h +++ b/src/cogs.h @@ -39,10 +39,10 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL bool pv2pages_verify(void); * LEAF_NODE_MAX = even_floor(PAGESPACE / 2 - sizeof(indx_t)); * DATALEN_NO_OVERFLOW = LEAF_NODE_MAX - NODESIZE - KEYLEN_MAX; * - * - SubDatabase-node must fit into one leaf-page: - * SUBDB_NAME_MAX = LEAF_NODE_MAX - node_hdr_len - sizeof(tree_t); + * - Table-node must fit into one leaf-page: + * TABLE_NAME_MAX = LEAF_NODE_MAX - node_hdr_len - sizeof(tree_t); * - * - Dupsort values itself are a keys in a dupsort-subdb and couldn't be longer + * - Dupsort values itself are a keys in a dupsort-table and couldn't be longer * than the KEYLEN_MAX. But dupsort node must not great than LEAF_NODE_MAX, * since dupsort value couldn't be placed on a large/overflow page: * DUPSORT_DATALEN_MAX = min(KEYLEN_MAX, diff --git a/src/cold.c b/src/cold.c index d4425ae4..a37a04bb 100644 --- a/src/cold.c +++ b/src/cold.c @@ -187,7 +187,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { - /* scan and account not opened named subDBs */ + /* scan and account not opened named tables */ err = tree_search(&cx.outer, nullptr, Z_FIRST); while (err == MDBX_SUCCESS) { const page_t *mp = cx.outer.pg[cx.outer.top]; @@ -197,7 +197,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { continue; if (unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid subDb node size", node_ds(node)); + "invalid table node size", node_ds(node)); return MDBX_CORRUPTED; } diff --git a/src/cursor.c b/src/cursor.c index a6434539..8ef2cdb5 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -860,7 +860,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } } else { csr_t csr = - /* olddata may not be updated in case DUPFIX-page of dupfix-subDB */ + /* olddata may not be updated in case DUPFIX-page of dupfix-table */ cursor_seek(mc, (MDBX_val *)key, &old_data, MDBX_SET); rc = csr.err; exact = csr.exact; @@ -878,7 +878,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, eASSERT(env, data->iov_len == 0 && (old_data.iov_len == 0 || /* olddata may not be updated in case - DUPFIX-page of dupfix-subDB */ + DUPFIX-page of dupfix-table */ (mc->tree->flags & MDBX_DUPFIXED))); return MDBX_SUCCESS; } @@ -1630,7 +1630,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { /* If sub-DB still has entries, we're done */ if (mc->subcur->nested_tree.items) { if (node_flags(node) & N_SUBDATA) { - /* update subDB info */ + /* update table info */ mc->subcur->nested_tree.mod_txnid = mc->txn->txnid; memcpy(node_data(node), &mc->subcur->nested_tree, sizeof(tree_t)); } else { diff --git a/src/dbi.c b/src/dbi.c index c043e1c9..bd2555bb 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -88,7 +88,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { if (parent) { /* вложенная пишущая транзакция */ int rc = dbi_check(parent, dbi); - /* копируем состояние subDB очищая new-флаги. */ + /* копируем состояние table очищая new-флаги. */ eASSERT(env, txn->dbi_seqs == parent->dbi_seqs); txn->dbi_state[dbi] = parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); @@ -259,15 +259,15 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, /* Если dbi уже использовался, то корректными считаем четыре варианта: * 1) user_flags равны MDBX_DB_ACCEDE - * = предполагаем что пользователь открывает существующую subDb, + * = предполагаем что пользователь открывает существующую table, * при этом код проверки не позволит установить другие компараторы. * 2) user_flags нулевые, а оба компаратора пустые/нулевые или равны текущим - * = предполагаем что пользователь открывает существующую subDb + * = предполагаем что пользователь открывает существующую table * старым способом с нулевыми с флагами по-умолчанию. * 3) user_flags совпадают, а компараторы не заданы или те же - * = предполагаем что пользователь открывает subDb указывая все параметры; - * 4) user_flags отличаются, но subDb пустая и задан флаг MDBX_CREATE - * = предполагаем что пользователь пересоздает subDb; + * = предполагаем что пользователь открывает table указывая все параметры; + * 4) user_flags отличаются, но table пустая и задан флаг MDBX_CREATE + * = предполагаем что пользователь пересоздает table; */ if ((user_flags & ~MDBX_CREATE) != (unsigned)(env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS)) { @@ -291,7 +291,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, if (unlikely(txn->dbs[dbi].leaf_pages)) return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; - /* Пересоздаём subDB если там пусто */ + /* Пересоздаём table если там пусто */ if (unlikely(txn->cursors[dbi])) return MDBX_DANGLING_DBI; env->dbs_flags[dbi] = DB_POISON; @@ -463,7 +463,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, return MDBX_INCOMPATIBLE; if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(tree_t))) { ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid subDb node size", body.iov_len); + "invalid table node size", body.iov_len); return MDBX_CORRUPTED; } memcpy(&txn->dbs[slot], body.iov_base, sizeof(tree_t)); @@ -977,8 +977,8 @@ __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, return fallback; } -__cold int mdbx_enumerate_subdb(const MDBX_txn *txn, MDBX_subdb_enum_func *func, - void *ctx) { +__cold int mdbx_enumerate_tables(const MDBX_txn *txn, + MDBX_table_enum_func *func, void *ctx) { if (unlikely(!func)) return MDBX_EINVAL; diff --git a/src/internals.h b/src/internals.h index e15f7c09..9d45fda6 100644 --- a/src/internals.h +++ b/src/internals.h @@ -96,7 +96,7 @@ typedef struct clc { size_t lmin, lmax; /* min/max length constraints */ } clc_t; -/* Вспомогательная информация о subDB. +/* Вспомогательная информация о table. * * Совокупность потребностей: * 1. Для транзакций и основного курсора нужны все поля. @@ -136,7 +136,7 @@ typedef struct clc2 { struct kvx { clc2_t clc; - MDBX_val name; /* имя subDB */ + MDBX_val name; /* имя table */ }; /* Non-shared DBI state flags inside transaction */ diff --git a/src/layout-dxb.h b/src/layout-dxb.h index 78f05aeb..aea506d2 100644 --- a/src/layout-dxb.h +++ b/src/layout-dxb.h @@ -191,7 +191,7 @@ typedef enum page_type { * * P_SUBP sub-pages are small leaf "pages" with duplicate data. * A node with flag N_DUPDATA but not N_SUBDATA contains a sub-page. - * (Duplicate data can also go in sub-databases, which use normal pages.) + * (Duplicate data can also go in tables, which use normal pages.) * * P_META pages contain meta_t, the start point of an MDBX snapshot. * @@ -225,7 +225,7 @@ typedef struct page { * Leaf node flags describe node contents. N_BIGDATA says the node's * data part is the page number of an overflow page with actual data. * N_DUPDATA and N_SUBDATA can be combined giving duplicate data in - * a sub-page/sub-database, and named databases (just N_SUBDATA). */ + * a sub-page/table, and named databases (just N_SUBDATA). */ typedef struct node { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ union { @@ -255,7 +255,7 @@ typedef struct node { typedef enum node_flags { N_BIGDATA = 0x01 /* data put on large page */, - N_SUBDATA = 0x02 /* data is a sub-database */, + N_SUBDATA = 0x02 /* data is a table */, N_DUPDATA = 0x04 /* data has duplicates */ } node_flags_t; diff --git a/src/man1/mdbx_chk.1 b/src/man1/mdbx_chk.1 index bc438e84..4ff7fc6a 100644 --- a/src/man1/mdbx_chk.1 +++ b/src/man1/mdbx_chk.1 @@ -22,7 +22,7 @@ mdbx_chk \- MDBX checking tool [\c .BR \-i ] [\c -.BI \-s \ subdb\fR] +.BI \-s \ table\fR] .BR \ dbpath .SH DESCRIPTION The @@ -69,8 +69,8 @@ pages. Ignore wrong order errors, which will likely false-positive if custom comparator(s) was used. .TP -.BR \-s \ subdb -Verify and show info only for a specific subdatabase. +.BR \-s \ table +Verify and show info only for a specific table. .TP .BR \-0 | \-1 | \-2 Using specific meta-page 0, or 2 for checking. diff --git a/src/man1/mdbx_drop.1 b/src/man1/mdbx_drop.1 index 7ae14f9b..99f8d370 100644 --- a/src/man1/mdbx_drop.1 +++ b/src/man1/mdbx_drop.1 @@ -11,7 +11,7 @@ mdbx_drop \- MDBX database delete tool [\c .BR \-d ] [\c -.BI \-s \ subdb\fR] +.BI \-s \ table\fR] [\c .BR \-n ] .BR \ dbpath @@ -28,8 +28,8 @@ Write the library version number to the standard output, and exit. .BR \-d Delete the specified database, don't just empty it. .TP -.BR \-s \ subdb -Operate on a specific subdatabase. If no database is specified, only the main database is dropped. +.BR \-s \ table +Operate on a specific table. If no table is specified, only the main table is dropped. .TP .BR \-n Dump an MDBX database which does not use subdirectories. diff --git a/src/man1/mdbx_dump.1 b/src/man1/mdbx_dump.1 index 51e6caca..ecd9618b 100644 --- a/src/man1/mdbx_dump.1 +++ b/src/man1/mdbx_dump.1 @@ -19,7 +19,7 @@ mdbx_dump \- MDBX environment export tool .BR \-p ] [\c .BR \-a \ | -.BI \-s \ subdb\fR] +.BI \-s \ table\fR] [\c .BR \-r ] [\c @@ -58,10 +58,10 @@ are considered printing characters, and databases dumped in this manner may be less portable to external systems. .TP .BR \-a -Dump all of the subdatabases in the environment. +Dump all of the tables in the environment. .TP -.BR \-s \ subdb -Dump a specific subdatabase. If no database is specified, only the main database is dumped. +.BR \-s \ table +Dump a specific table. If no database is specified, only the main table is dumped. .TP .BR \-r Rescure mode. Ignore some errors to dump corrupted DB. diff --git a/src/man1/mdbx_load.1 b/src/man1/mdbx_load.1 index b7fa87f7..6c2e16c7 100644 --- a/src/man1/mdbx_load.1 +++ b/src/man1/mdbx_load.1 @@ -16,7 +16,7 @@ mdbx_load \- MDBX environment import tool [\c .BI \-f \ file\fR] [\c -.BI \-s \ subdb\fR] +.BI \-s \ table\fR] [\c .BR \-N ] [\c @@ -71,11 +71,11 @@ on a database that uses custom compare functions. .BR \-f \ file Read from the specified file instead of from the standard input. .TP -.BR \-s \ subdb -Load a specific subdatabase. If no database is specified, data is loaded into the main database. +.BR \-s \ table +Load a specific table. If no table is specified, data is loaded into the main table. .TP .BR \-N -Don't overwrite existing records when loading into an already existing database; just skip them. +Don't overwrite existing records when loading into an already existing table; just skip them. .TP .BR \-T Load data from simple text files. The input must be paired lines of text, where the first diff --git a/src/man1/mdbx_stat.1 b/src/man1/mdbx_stat.1 index 997bdae2..2b87f201 100644 --- a/src/man1/mdbx_stat.1 +++ b/src/man1/mdbx_stat.1 @@ -21,7 +21,7 @@ mdbx_stat \- MDBX environment status tool .BR \-r [ r ]] [\c .BR \-a \ | -.BI \-s \ subdb\fR] +.BI \-s \ table\fR] .BR \ dbpath [\c .BR \-n ] @@ -61,10 +61,10 @@ table and clear them. The reader table will be printed again after the check is performed. .TP .BR \-a -Display the status of all of the subdatabases in the environment. +Display the status of all of the tables in the environment. .TP -.BR \-s \ subdb -Display the status of a specific subdatabase. +.BR \-s \ table +Display the status of a specific table. .TP .BR \-n Display the status of an MDBX database which does not use subdirectories. diff --git a/src/misc.c b/src/misc.c index bf8246ab..5e77e02a 100644 --- a/src/misc.c +++ b/src/misc.c @@ -84,9 +84,9 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, * - изменить семантику установки/обновления mod_txnid, привязав его * строго к изменению b-tree, но не атрибутов; * - обновлять mod_txnid при фиксации вложенных транзакций; - * - для dbi-хендлов пользовательских subDb (видимо) можно оставить + * - для dbi-хендлов пользовательских table (видимо) можно оставить * DBI_DIRTY в качестве признака необходимости обновления записи - * subDb в MainDB, при этом взводить DBI_DIRTY вместе с обновлением + * table в MainDB, при этом взводить DBI_DIRTY вместе с обновлением * mod_txnid, в том числе при обновлении sequence. * - для MAIN_DBI при обновлении sequence не следует взводить DBI_DIRTY * и/или обновлять mod_txnid, а только взводить MDBX_TXN_DIRTY. @@ -163,7 +163,7 @@ __cold const char *mdbx_liberr2str(int errnum) { "MDBX_BAD_TXN: Transaction is not valid for requested operation," " e.g. had errored and be must aborted, has a child, or is invalid", "MDBX_BAD_VALSIZE: Invalid size or alignment of key or data" - " for target database, either invalid subDB name", + " for target database, either invalid table name", "MDBX_BAD_DBI: The specified DBI-handle is invalid" " or changed by another thread/transaction", "MDBX_PROBLEM: Unexpected internal error, transaction should be aborted", @@ -206,7 +206,7 @@ __cold const char *mdbx_liberr2str(int errnum) { " please keep one and remove unused other"; case MDBX_DANGLING_DBI: return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be" - " closed before subDb or corresponding DBI-handle could be (re)used"; + " closed before table or corresponding DBI-handle could be (re)used"; case MDBX_OUSTED: return "MDBX_OUSTED: The parked read transaction was outed for the sake" " of recycling old MVCC snapshots"; diff --git a/src/proto.h b/src/proto.h index a2aaa3e1..898bf512 100644 --- a/src/proto.h +++ b/src/proto.h @@ -96,14 +96,14 @@ MDBX_INTERNAL int __must_check_result env_page_auxbuffer(MDBX_env *env); MDBX_INTERNAL unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize); /* tree.c */ -MDBX_INTERNAL int tree_drop(MDBX_cursor *mc, const bool may_have_subDBs); +MDBX_INTERNAL int tree_drop(MDBX_cursor *mc, const bool may_have_tables); MDBX_INTERNAL int __must_check_result tree_rebalance(MDBX_cursor *mc); MDBX_INTERNAL int __must_check_result tree_propagate_key(MDBX_cursor *mc, const MDBX_val *key); MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env); MDBX_INTERNAL void recalculate_subpage_thresholds(MDBX_env *env); -/* subdb.c */ +/* table.c */ MDBX_INTERNAL int __must_check_result sdb_fetch(MDBX_txn *txn, size_t dbi); MDBX_INTERNAL int __must_check_result sdb_setup(const MDBX_env *env, kvx_t *const kvx, diff --git a/src/subdb.c b/src/table.c similarity index 93% rename from src/subdb.c rename to src/table.c index c1481035..1f4c6082 100644 --- a/src/subdb.c +++ b/src/table.c @@ -41,7 +41,7 @@ int sdb_fetch(MDBX_txn *txn, size_t dbi) { rc = tree_search(&couple.outer, &kvx->name, 0); if (unlikely(rc != MDBX_SUCCESS)) { bailout: - NOTICE("dbi %zu refs to inaccessible subDB `%*s` for txn %" PRIaTXN + NOTICE("dbi %zu refs to inaccessible table `%*s` for txn %" PRIaTXN " (err %d)", dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, rc); @@ -55,7 +55,7 @@ int sdb_fetch(MDBX_txn *txn, size_t dbi) { goto bailout; } if (unlikely((node_flags(nsr.node) & (N_DUPDATA | N_SUBDATA)) != N_SUBDATA)) { - NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", + NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, "wrong flags"); return MDBX_INCOMPATIBLE; /* not a named DB */ @@ -67,7 +67,7 @@ int sdb_fetch(MDBX_txn *txn, size_t dbi) { return rc; if (unlikely(data.iov_len != sizeof(tree_t))) { - NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", + NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, "wrong rec-size"); return MDBX_INCOMPATIBLE; /* not a named DB */ @@ -78,7 +78,7 @@ int sdb_fetch(MDBX_txn *txn, size_t dbi) { * have dropped and recreated the DB with other flags. */ tree_t *const db = &txn->dbs[dbi]; if (unlikely((db->flags & DB_PERSISTENT_FLAGS) != flags)) { - NOTICE("dbi %zu refs to the re-created subDB `%*s` for txn %" PRIaTXN + NOTICE("dbi %zu refs to the re-created table `%*s` for txn %" PRIaTXN " with different flags (present 0x%X != wanna 0x%X)", dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, db->flags & DB_PERSISTENT_FLAGS, flags); diff --git a/src/tools/chk.c b/src/tools/chk.c index 80e37a7c..75586632 100644 --- a/src/tools/chk.c +++ b/src/tools/chk.c @@ -55,7 +55,7 @@ MDBX_env *env; MDBX_txn *txn; unsigned verbose = 0; bool quiet; -MDBX_val only_subdb; +MDBX_val only_table; int stuck_meta = -1; MDBX_chk_context_t chk; bool turn_meta = false; @@ -95,7 +95,7 @@ static bool silently(enum MDBX_chk_severity severity) { chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); int prio = (severity >> MDBX_chk_severity_prio_shift); - if (chk.scope && chk.scope->stage == MDBX_chk_subdbs && verbose < 2) + if (chk.scope && chk.scope->stage == MDBX_chk_tables && verbose < 2) prio += 1; return quiet || cutoff < ((prio > 0) ? prio : 0); } @@ -270,14 +270,14 @@ static void scope_pop(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, flush(); } -static MDBX_chk_user_subdb_cookie_t *subdb_filter(MDBX_chk_context_t *ctx, +static MDBX_chk_user_table_cookie_t *table_filter(MDBX_chk_context_t *ctx, const MDBX_val *name, MDBX_db_flags_t flags) { (void)ctx; (void)flags; - return (!only_subdb.iov_base || - (only_subdb.iov_len == name->iov_len && - memcmp(only_subdb.iov_base, name->iov_base, name->iov_len) == 0)) + return (!only_table.iov_base || + (only_table.iov_len == name->iov_len && + memcmp(only_table.iov_base, name->iov_base, name->iov_len) == 0)) ? (void *)(intptr_t)-1 : nullptr; } @@ -344,7 +344,7 @@ static void print_format(MDBX_chk_line_t *line, const char *fmt, va_list args) { static const MDBX_chk_callbacks_t cb = {.check_break = check_break, .scope_push = scope_push, .scope_pop = scope_pop, - .subdb_filter = subdb_filter, + .table_filter = table_filter, .stage_begin = stage_begin, .stage_end = stage_end, .print_begin = print_begin, @@ -357,7 +357,7 @@ static void usage(char *prog) { fprintf( stderr, "usage: %s " - "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n" + "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s table] [-u|U] dbpath\n" " -V\t\tprint version and exit\n" " -v\t\tmore verbose, could be repeated upto 9 times for extra details\n" " -q\t\tbe quiet\n" @@ -365,7 +365,7 @@ static void usage(char *prog) { " -w\t\twrite-mode checking\n" " -d\t\tdisable page-by-page traversal of B-tree\n" " -i\t\tignore wrong order errors (for custom comparators case)\n" - " -s subdb\tprocess a specific subdatabase only\n" + " -s table\tprocess a specific subdatabase only\n" " -u\t\twarmup database before checking\n" " -U\t\twarmup and try lock database pages in memory before checking\n" " -0|1|2\tforce using specific meta-page 0, or 2 for checking\n" @@ -380,7 +380,7 @@ static int conclude(MDBX_chk_context_t *ctx) { if (ctx->result.total_problems == 1 && ctx->result.problems_meta == 1 && (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && - (env_flags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && + (env_flags & MDBX_RDONLY) == 0 && !only_table.iov_base && stuck_meta < 0 && ctx->result.steady_txnid < ctx->result.recent_txnid) { const size_t step_lineno = print(MDBX_chk_resolution, @@ -399,7 +399,7 @@ static int conclude(MDBX_chk_context_t *ctx) { if (turn_meta && stuck_meta >= 0 && (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && - !only_subdb.iov_base && + !only_table.iov_base && (env_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { const bool successful_check = (err | ctx->result.total_problems | ctx->result.problems_meta) == 0; @@ -529,11 +529,11 @@ int main(int argc, char *argv[]) { chk_flags |= MDBX_CHK_SKIP_BTREE_TRAVERSAL; break; case 's': - if (only_subdb.iov_base && strcmp(only_subdb.iov_base, optarg)) + if (only_table.iov_base && strcmp(only_table.iov_base, optarg)) usage(prog); else { - only_subdb.iov_base = optarg; - only_subdb.iov_len = strlen(optarg); + only_table.iov_base = optarg; + only_table.iov_len = strlen(optarg); } break; case 'i': @@ -574,7 +574,7 @@ int main(int argc, char *argv[]) { "write-mode must be enabled to turn to the specified meta-page."); rc = EXIT_INTERRUPTED; } - if (only_subdb.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | + if (only_table.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL))) { error_fmt( "whole database checking with b-tree traversal are required to turn " diff --git a/src/tools/drop.c b/src/tools/drop.c index 483073b4..22bab11a 100644 --- a/src/tools/drop.c +++ b/src/tools/drop.c @@ -46,7 +46,7 @@ static void usage(void) { " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" " -d\t\tdelete the specified database, don't just empty it\n" - " -s name\tdrop the specified named subDB\n" + " -s name\tdrop the specified named table\n" " \t\tby default empty the main DB\n", prog); exit(EXIT_FAILURE); diff --git a/src/tools/dump.c b/src/tools/dump.c index 2a5952b1..7dc57d36 100644 --- a/src/tools/dump.c +++ b/src/tools/dump.c @@ -215,16 +215,16 @@ static void usage(void) { fprintf( stderr, "usage: %s " - "[-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s subdb] [-u|U] " + "[-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s table] [-u|U] " "dbpath\n" " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" " -f\t\twrite to file instead of stdout\n" - " -l\t\tlist subDBs and exit\n" + " -l\t\tlist tables and exit\n" " -p\t\tuse printable characters\n" " -r\t\trescue mode (ignore errors to dump corrupted DB)\n" - " -a\t\tdump main DB and all subDBs\n" - " -s name\tdump only the specified named subDB\n" + " -a\t\tdump main DB and all tables\n" + " -s name\tdump only the specified named table\n" " -u\t\twarmup database before dumping\n" " -U\t\twarmup and try lock database pages in memory before dumping\n" " \t\tby default dump only the main DB\n", diff --git a/src/tools/load.c b/src/tools/load.c index 77a81864..b0a5364f 100644 --- a/src/tools/load.c +++ b/src/tools/load.c @@ -477,10 +477,10 @@ static void usage(void) { " -a\t\tappend records in input order (required for custom " "comparators)\n" " -f file\tread from file instead of stdin\n" - " -s name\tload into specified named subDB\n" + " -s name\tload into specified named table\n" " -N\t\tdon't overwrite existing records when loading, just skip " "ones\n" - " -p\t\tpurge subDB before loading\n" + " -p\t\tpurge table before loading\n" " -T\t\tread plaintext\n" " -r\t\trescue mode (ignore errors to load corrupted DB dump)\n" " -n\t\tdon't use subdirectory for newly created database " diff --git a/src/tools/stat.c b/src/tools/stat.c index b0a47601..2306fb5a 100644 --- a/src/tools/stat.c +++ b/src/tools/stat.c @@ -47,15 +47,15 @@ static void print_stat(MDBX_stat *ms) { static void usage(const char *prog) { fprintf(stderr, - "usage: %s [-V] [-q] [-e] [-f[f[f]]] [-r[r]] [-a|-s name] dbpath\n" + "usage: %s [-V] [-q] [-e] [-f[f[f]]] [-r[r]] [-a|-s table] dbpath\n" " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" " -p\t\tshow statistics of page operations for current session\n" " -e\t\tshow whole DB info\n" " -f\t\tshow GC info\n" " -r\t\tshow readers\n" - " -a\t\tprint stat of main DB and all subDBs\n" - " -s name\tprint stat of only the specified named subDB\n" + " -a\t\tprint stat of main DB and all tables\n" + " -s table\tprint stat of only the specified named table\n" " \t\tby default print stat of only the main DB\n", prog); exit(EXIT_FAILURE); @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) { MDBX_envinfo mei; prog = argv[0]; char *envname; - char *subname = nullptr; + char *table = nullptr; bool alldbs = false, envinfo = false, pgop = false; int freinfo = 0, rdrinfo = 0; @@ -143,7 +143,7 @@ int main(int argc, char *argv[]) { pgop = true; break; case 'a': - if (subname) + if (table) usage(prog); alldbs = true; break; @@ -161,7 +161,7 @@ int main(int argc, char *argv[]) { case 's': if (alldbs) usage(prog); - subname = optarg; + table = optarg; break; default: usage(prog); @@ -199,7 +199,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - if (alldbs || subname) { + if (alldbs || table) { rc = mdbx_env_set_maxdbs(env, 2); if (unlikely(rc != MDBX_SUCCESS)) { error("mdbx_env_set_maxdbs", rc); @@ -327,7 +327,7 @@ int main(int argc, char *argv[]) { } else printf(" No stale readers.\n"); } - if (!(subname || alldbs || freinfo)) + if (!(table || alldbs || freinfo)) goto txn_abort; } @@ -450,7 +450,7 @@ int main(int argc, char *argv[]) { printf(" GC: %" PRIaPGNO " pages\n", pages); } - rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &dbi); + rc = mdbx_dbi_open(txn, table, MDBX_DB_ACCEDE, &dbi); if (unlikely(rc != MDBX_SUCCESS)) { error("mdbx_dbi_open", rc); goto txn_abort; @@ -462,7 +462,7 @@ int main(int argc, char *argv[]) { error("mdbx_dbi_stat", rc); goto txn_abort; } - printf("Status of %s\n", subname ? subname : "Main DB"); + printf("Status of %s\n", table ? table : "Main DB"); print_stat(&mst); if (alldbs) { @@ -476,16 +476,16 @@ int main(int argc, char *argv[]) { MDBX_val key; while (MDBX_SUCCESS == (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { - MDBX_dbi subdbi; + MDBX_dbi xdbi; if (memchr(key.iov_base, '\0', key.iov_len)) continue; - subname = osal_malloc(key.iov_len + 1); - memcpy(subname, key.iov_base, key.iov_len); - subname[key.iov_len] = '\0'; - rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &subdbi); + table = osal_malloc(key.iov_len + 1); + memcpy(table, key.iov_base, key.iov_len); + table[key.iov_len] = '\0'; + rc = mdbx_dbi_open(txn, table, MDBX_DB_ACCEDE, &xdbi); if (rc == MDBX_SUCCESS) - printf("Status of %s\n", subname); - osal_free(subname); + printf("Status of %s\n", table); + osal_free(table); if (unlikely(rc != MDBX_SUCCESS)) { if (rc == MDBX_INCOMPATIBLE) continue; @@ -493,14 +493,14 @@ int main(int argc, char *argv[]) { goto txn_abort; } - rc = mdbx_dbi_stat(txn, subdbi, &mst, sizeof(mst)); + rc = mdbx_dbi_stat(txn, xdbi, &mst, sizeof(mst)); if (unlikely(rc != MDBX_SUCCESS)) { error("mdbx_dbi_stat", rc); goto txn_abort; } print_stat(&mst); - rc = mdbx_dbi_close(env, subdbi); + rc = mdbx_dbi_close(env, xdbi); if (unlikely(rc != MDBX_SUCCESS)) { error("mdbx_dbi_close", rc); goto txn_abort; diff --git a/src/tree.c b/src/tree.c index a7357ccc..13932e48 100644 --- a/src/tree.c +++ b/src/tree.c @@ -49,15 +49,15 @@ void recalculate_merge_thresholds(MDBX_env *env) { : bytes / 4 /* 25 % */)); } -int tree_drop(MDBX_cursor *mc, const bool may_have_subDBs) { +int tree_drop(MDBX_cursor *mc, const bool may_have_tables) { MDBX_txn *txn = mc->txn; int rc = tree_search(mc, nullptr, Z_FIRST); if (likely(rc == MDBX_SUCCESS)) { - /* DUPSORT sub-DBs have no large-pages/subDBs. Omit scanning leaves. + /* DUPSORT sub-DBs have no large-pages/tables. Omit scanning leaves. * This also avoids any P_DUPFIX pages, which have no nodes. * Also if the DB doesn't have sub-DBs and has no large/overflow * pages, omit scanning leaves. */ - if (!(may_have_subDBs | mc->tree->large_pages)) + if (!(may_have_tables | mc->tree->large_pages)) cursor_pop(mc); rc = pnl_need(&txn->tw.retired_pages, (size_t)mc->tree->branch_pages + @@ -81,11 +81,11 @@ int tree_drop(MDBX_cursor *mc, const bool may_have_subDBs) { rc = page_retire_ex(mc, node_largedata_pgno(node), nullptr, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - if (!(may_have_subDBs | mc->tree->large_pages)) + if (!(may_have_tables | mc->tree->large_pages)) goto pop; } else if (node_flags(node) & N_SUBDATA) { if (unlikely((node_flags(node) & N_DUPDATA) == 0)) { - rc = /* disallowing implicit subDB deletion */ MDBX_INCOMPATIBLE; + rc = /* disallowing implicit table deletion */ MDBX_INCOMPATIBLE; goto bailout; } rc = cursor_dupsort_setup(mc, node, mp); diff --git a/src/txn.c b/src/txn.c index 0e32d893..f7b82787 100644 --- a/src/txn.c +++ b/src/txn.c @@ -685,7 +685,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { txn->dbs[FREE_DBI].root); if (txn->n_dbi > CORE_DBS) { - /* Update subDB root pointers */ + /* Update table root pointers */ cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) diff --git a/src/walk.c b/src/walk.c index 431d812f..dc1c4599 100644 --- a/src/walk.c +++ b/src/walk.c @@ -105,7 +105,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, case N_SUBDATA /* sub-db */: { if (unlikely(node_data_size != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid subDb node size", (unsigned)node_data_size); + "invalid table node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -227,11 +227,11 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, } else { tree_t aligned_db; memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); - walk_sdb_t subdb = {{node_key(node), node_ks(node)}, nullptr, nullptr}; - subdb.internal = &aligned_db; + walk_sdb_t table = {{node_key(node), node_ks(node)}, nullptr, nullptr}; + table.internal = &aligned_db; assert(err == MDBX_SUCCESS); ctx->deep += 1; - err = walk_sdb(ctx, &subdb); + err = walk_sdb(ctx, &table); ctx->deep -= 1; } break; diff --git a/src/walk.h b/src/walk.h index 7ac5a48e..8ef406c9 100644 --- a/src/walk.h +++ b/src/walk.h @@ -11,7 +11,7 @@ typedef struct walk_sdb { } walk_sdb_t; typedef int walk_func(const size_t pgno, const unsigned number, void *const ctx, - const int deep, const walk_sdb_t *subdb, + const int deep, const walk_sdb_t *table, const size_t page_size, const page_type_t page_type, const MDBX_error_t err, const size_t nentries, const size_t payload_bytes, const size_t header_bytes, From 7ed4a551f461d34dc2fbb05af39c2e027169ad5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 3 Aug 2024 14:23:18 +0300 Subject: [PATCH 246/443] =?UTF-8?q?mdbx:=20=D0=B2=D0=BD=D1=83=D1=82=D1=80?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20subDb?= =?UTF-8?q?=20=D0=B2=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86=D1=8B.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-cursor.c | 6 +- src/api-txn.c | 10 +- src/bits.md | 6 +- src/chk.c | 308 ++++++++++++++++++++-------------------- src/cogs.h | 2 +- src/coherency.c | 16 +-- src/cold.c | 2 +- src/copy.c | 9 +- src/cursor.c | 75 +++++----- src/cursor.h | 2 +- src/dbi.c | 24 ++-- src/dxb.c | 6 +- src/layout-dxb.h | 16 +-- src/logging_and_debug.c | 6 +- src/meta.c | 2 +- src/misc.c | 2 +- src/node.c | 13 +- src/node.h | 6 +- src/page-get.c | 16 +-- src/page-search.c | 2 +- src/proto.h | 4 +- src/spill.c | 2 +- src/table.c | 10 +- src/tools/dump.c | 8 +- src/tree.c | 19 ++- src/txn.c | 12 +- src/walk.c | 50 +++---- src/walk.h | 6 +- 28 files changed, 317 insertions(+), 323 deletions(-) diff --git a/src/api-cursor.c b/src/api-cursor.c index c1e81aef..f5f90d0f 100644 --- a/src/api-cursor.c +++ b/src/api-cursor.c @@ -293,12 +293,12 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, if (is_pointed(&l->subcur->cursor)) { const page_t *mp = l->pg[l->top]; const node_t *node = page_node(mp, l->ki[l->top]); - assert(node_flags(node) & N_DUPDATA); + assert(node_flags(node) & N_DUP); } if (is_pointed(&r->subcur->cursor)) { const page_t *mp = r->pg[r->top]; const node_t *node = page_node(mp, r->ki[r->top]); - assert(node_flags(node) & N_DUPDATA); + assert(node_flags(node) & N_DUP); } #endif /* MDBX_DEBUG */ @@ -342,7 +342,7 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { if (!inner_hollow(mc)) { const page_t *mp = mc->pg[mc->top]; const node_t *node = page_node(mp, mc->ki[mc->top]); - cASSERT(mc, node_flags(node) & N_DUPDATA); + cASSERT(mc, node_flags(node) & N_DUP); *countp = unlikely(mc->subcur->nested_tree.items > PTRDIFF_MAX) ? PTRDIFF_MAX : (size_t)mc->subcur->nested_tree.items; diff --git a/src/api-txn.c b/src/api-txn.c index 8d048f9a..054d1112 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -63,16 +63,16 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, const tree_t *db = node_data(node); const unsigned flags = node_flags(node); switch (flags) { - case N_BIGDATA: + case N_BIG: case 0: /* single-value entry, deep = 0 */ *mask |= 1 << 0; break; - case N_DUPDATA: + case N_DUP: /* single sub-page, deep = 1 */ *mask |= 1 << 1; break; - case N_DUPDATA | N_SUBDATA: + case N_DUP | N_TREE: /* sub-tree */ *mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height); break; @@ -332,7 +332,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, /* LY: allows update (explicit overwrite) only for unique keys */ node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); rc = MDBX_EMULTIVAL; @@ -445,7 +445,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (flags & MDBX_CURRENT) { /* disallow update/delete for multi-values */ node_t *node = page_node(page, cx.outer.ki[cx.outer.top]); - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); if (cx.outer.subcur->nested_tree.items > 1) { diff --git a/src/bits.md b/src/bits.md index b0712ee8..418f2089 100644 --- a/src/bits.md +++ b/src/bits.md @@ -1,8 +1,8 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NODE | PAGE | MRESIZE | --|---------|-----------|--------------|----------|-----------|------------|---------|----------|---------| -0 |0000 0001|ALLOC_RSRV |TXN_FINISHED | | |DBI_DIRTY |N_BIGDATA|P_BRANCH | | -1 |0000 0002|ALLOC_UNIMP|TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |N_SUBDATA|P_LEAF | | -2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |N_DUPDATA|P_LARGE | | +0 |0000 0001|ALLOC_RSRV |TXN_FINISHED | | |DBI_DIRTY |N_BIG |P_BRANCH | | +1 |0000 0002|ALLOC_UNIMP|TXN_ERROR |REVERSEKEY|N_TREE |DBI_STALE |N_TREE |P_LEAF | | +2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |N_DUP |P_LARGE | | 3 |0000 0008|ALLOC_SSCAN|TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | | 4 |0000 0010|ALLOC_FIFO |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | | 5 |0000 0020| |TXN_PARKED |INTEGERDUP|NODUPDATA | | |P_DUPFIX | | diff --git a/src/chk.c b/src/chk.c index 2acda621..8af68b3f 100644 --- a/src/chk.c +++ b/src/chk.c @@ -488,15 +488,15 @@ __cold static void chk_dispose(MDBX_chk_internal_t *chk) { assert(chk->table[FREE_DBI] == &chk->table_gc); assert(chk->table[MAIN_DBI] == &chk->table_main); for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) { - MDBX_chk_table_t *const sdb = chk->table[i]; - if (sdb) { + MDBX_chk_table_t *const tbl = chk->table[i]; + if (tbl) { chk->table[i] = nullptr; - if (chk->cb->table_dispose && sdb->cookie) { - chk->cb->table_dispose(chk->usr, sdb); - sdb->cookie = nullptr; + if (chk->cb->table_dispose && tbl->cookie) { + chk->cb->table_dispose(chk->usr, tbl); + tbl->cookie = nullptr; } - if (sdb != &chk->table_gc && sdb != &chk->table_main) { - osal_free(sdb); + if (tbl != &chk->table_gc && tbl != &chk->table_main) { + osal_free(tbl); } } } @@ -639,8 +639,8 @@ histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, //----------------------------------------------------------------------------- -__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, - const walk_sdb_t *in, MDBX_chk_table_t **out) { +__cold static int chk_get_tbl(MDBX_chk_scope_t *const scope, + const walk_tbl_t *in, MDBX_chk_table_t **out) { MDBX_chk_internal_t *const chk = scope->internal; if (chk->last_lookup && chk->last_lookup->name.iov_base == in->name.iov_base) { @@ -649,27 +649,27 @@ __cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, } for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) { - MDBX_chk_table_t *sdb = chk->table[i]; - if (!sdb) { - sdb = osal_calloc(1, sizeof(MDBX_chk_table_t)); - if (unlikely(!sdb)) { + MDBX_chk_table_t *tbl = chk->table[i]; + if (!tbl) { + tbl = osal_calloc(1, sizeof(MDBX_chk_table_t)); + if (unlikely(!tbl)) { *out = nullptr; return chk_error_rc(scope, MDBX_ENOMEM, "alloc_table"); } - chk->table[i] = sdb; - sdb->flags = in->internal->flags; - sdb->id = -1; - sdb->name = in->name; + chk->table[i] = tbl; + tbl->flags = in->internal->flags; + tbl->id = -1; + tbl->name = in->name; } - if (sdb->name.iov_base == in->name.iov_base) { - if (sdb->id < 0) { - sdb->id = (int)i; - sdb->cookie = + if (tbl->name.iov_base == in->name.iov_base) { + if (tbl->id < 0) { + tbl->id = (int)i; + tbl->cookie = chk->cb->table_filter - ? chk->cb->table_filter(chk->usr, &sdb->name, sdb->flags) + ? chk->cb->table_filter(chk->usr, &tbl->name, tbl->flags) : (void *)(intptr_t)-1; } - *out = (chk->last_lookup = sdb); + *out = (chk->last_lookup = tbl); return MDBX_SUCCESS; } } @@ -741,7 +741,7 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, - const int deep, const walk_sdb_t *sdb_info, + const int deep, const walk_tbl_t *tbl_info, const size_t page_size, const page_type_t pagetype, const MDBX_error_t page_err, const size_t nentries, const size_t payload_bytes, const size_t header_bytes, @@ -751,8 +751,8 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; - MDBX_chk_table_t *sdb; - int err = chk_get_sdb(scope, sdb_info, &sdb); + MDBX_chk_table_t *tbl; + int err = chk_get_tbl(scope, tbl_info, &tbl); if (unlikely(err)) return err; @@ -760,21 +760,21 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, chk_scope_issue(scope, "too deeply %u", deep); return MDBX_CORRUPTED /* avoid infinite loop/recursion */; } - histogram_acc(deep, &sdb->histogram.deep); + histogram_acc(deep, &tbl->histogram.deep); usr->result.processed_pages += npages; const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; int height = deep + 1; - if (sdb->id >= CORE_DBS) + if (tbl->id >= CORE_DBS) height -= usr->txn->dbs[MAIN_DBI].height; - const tree_t *nested = sdb_info->nested; + const tree_t *nested = tbl_info->nested; if (nested) { - if (sdb->flags & MDBX_DUPSORT) - height -= sdb_info->internal->height; + if (tbl->flags & MDBX_DUPSORT) + height -= tbl_info->internal->height; else { chk_object_issue(scope, "nested tree", pgno, "unexpected", - "table %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), - sdb->flags, deep); + "table %s flags 0x%x, deep %i", chk_v2a(chk, &tbl->name), + tbl->flags, deep); nested = nullptr; } } else @@ -787,82 +787,82 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, chk_object_issue(scope, "page", pgno, "unknown page-type", "type %u, deep %i", (unsigned)pagetype, deep); pagetype_caption = "unknown"; - sdb->pages.other += npages; + tbl->pages.other += npages; break; case page_broken: assert(page_err != MDBX_SUCCESS); pagetype_caption = "broken"; - sdb->pages.other += npages; + tbl->pages.other += npages; break; case page_sub_broken: assert(page_err != MDBX_SUCCESS); pagetype_caption = "broken-subpage"; - sdb->pages.other += npages; + tbl->pages.other += npages; break; case page_large: pagetype_caption = "large"; - histogram_acc(npages, &sdb->histogram.large_pages); - if (sdb->flags & MDBX_DUPSORT) + histogram_acc(npages, &tbl->histogram.large_pages); + if (tbl->flags & MDBX_DUPSORT) chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep); break; case page_branch: branch = true; if (!nested) { pagetype_caption = "branch"; - sdb->pages.branch += 1; + tbl->pages.branch += 1; } else { pagetype_caption = "nested-branch"; - sdb->pages.nested_branch += 1; + tbl->pages.nested_branch += 1; } break; case page_dupfix_leaf: if (!nested) chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep); /* fall through */ __fallthrough; case page_leaf: if (!nested) { pagetype_caption = "leaf"; - sdb->pages.leaf += 1; - if (height != sdb_info->internal->height) + tbl->pages.leaf += 1; + if (height != tbl_info->internal->height) chk_object_issue(scope, "page", pgno, "wrong tree height", "actual %i != %i table %s", height, - sdb_info->internal->height, chk_v2a(chk, &sdb->name)); + tbl_info->internal->height, chk_v2a(chk, &tbl->name)); } else { pagetype_caption = (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix"; - sdb->pages.nested_leaf += 1; + tbl->pages.nested_leaf += 1; if (chk->last_nested != nested) { - histogram_acc(height, &sdb->histogram.nested_tree); + histogram_acc(height, &tbl->histogram.nested_tree); chk->last_nested = nested; } if (height != nested->height) chk_object_issue(scope, "page", pgno, "wrong nested-tree height", "actual %i != %i dupsort-node %s", height, - nested->height, chk_v2a(chk, &sdb->name)); + nested->height, chk_v2a(chk, &tbl->name)); } break; case page_sub_dupfix_leaf: case page_sub_leaf: pagetype_caption = (pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix"; - sdb->pages.nested_subleaf += 1; - if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) + tbl->pages.nested_subleaf += 1; + if ((tbl->flags & MDBX_DUPSORT) == 0 || nested) chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep); break; } if (npages) { - if (sdb->cookie) { + if (tbl->cookie) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); if (npages == 1) chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); @@ -873,7 +873,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, " of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", - chk_v2a(chk, &sdb->name), header_bytes, + chk_v2a(chk, &tbl->name), header_bytes, (pagetype == page_branch) ? "keys" : "entries", nentries, payload_bytes, unused_bytes, deep)); } @@ -886,18 +886,18 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", pagetype_caption, spanpgno, usr->result.alloc_pages, deep); - sdb->pages.all += 1; + tbl->pages.all += 1; } else if (chk->pagemap[spanpgno]) { const MDBX_chk_table_t *const rival = chk->table[chk->pagemap[spanpgno] - 1]; chk_object_issue(scope, "page", spanpgno, - (branch && rival == sdb) ? "loop" : "already used", + (branch && rival == tbl) ? "loop" : "already used", "%s-page: by %s, deep %i", pagetype_caption, chk_v2a(chk, &rival->name), deep); already_used = true; } else { - chk->pagemap[spanpgno] = (int16_t)sdb->id + 1; - sdb->pages.all += 1; + chk->pagemap[spanpgno] = (int16_t)tbl->id + 1; + tbl->pages.all += 1; } } @@ -927,7 +927,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE " entries, deep %i", pagetype_caption, payload_bytes, nentries, deep); - sdb->pages.empty += 1; + tbl->pages.empty += 1; } if (npages) { @@ -938,9 +938,9 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, pagetype_caption, page_size, page_bytes, header_bytes, payload_bytes, unused_bytes, deep); if (page_size > page_bytes) - sdb->lost_bytes += page_size - page_bytes; + tbl->lost_bytes += page_size - page_bytes; } else { - sdb->payload_bytes += payload_bytes + header_bytes; + tbl->payload_bytes += payload_bytes + header_bytes; usr->result.total_payload_bytes += payload_bytes + header_bytes; } } @@ -982,17 +982,17 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { memset(&total, 0, sizeof(total)); total.pages.all = NUM_METAS; for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { - MDBX_chk_table_t *const sdb = chk->table[i]; - total.payload_bytes += sdb->payload_bytes; - total.lost_bytes += sdb->lost_bytes; - total.pages.all += sdb->pages.all; - total.pages.empty += sdb->pages.empty; - total.pages.other += sdb->pages.other; - total.pages.branch += sdb->pages.branch; - total.pages.leaf += sdb->pages.leaf; - total.pages.nested_branch += sdb->pages.nested_branch; - total.pages.nested_leaf += sdb->pages.nested_leaf; - total.pages.nested_subleaf += sdb->pages.nested_subleaf; + MDBX_chk_table_t *const tbl = chk->table[i]; + total.payload_bytes += tbl->payload_bytes; + total.lost_bytes += tbl->lost_bytes; + total.pages.all += tbl->pages.all; + total.pages.empty += tbl->pages.empty; + total.pages.other += tbl->pages.other; + total.pages.branch += tbl->pages.branch; + total.pages.leaf += tbl->pages.leaf; + total.pages.nested_branch += tbl->pages.nested_branch; + total.pages.nested_leaf += tbl->pages.nested_leaf; + total.pages.nested_subleaf += tbl->pages.nested_subleaf; } assert(total.pages.all == usr->result.processed_pages); @@ -1008,69 +1008,69 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { err = chk_scope_restore(scope, err); if (scope->verbosity > MDBX_chk_info) { for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { - MDBX_chk_table_t *const sdb = chk->table[i]; + MDBX_chk_table_t *const tbl = chk->table[i]; MDBX_chk_scope_t *inner = - chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); - if (sdb->pages.all == 0) + chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &tbl->name)); + if (tbl->pages.all == 0) chk_line_end( chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); else { MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); if (line) { line = chk_print(line, "page usage: subtotal %" PRIuSIZE, - sdb->pages.all); + tbl->pages.all); const size_t branch_pages = - sdb->pages.branch + sdb->pages.nested_branch; - const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf + - sdb->pages.nested_subleaf; - if (sdb->pages.other) - line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other); - if (sdb->pages.other == 0 || - (branch_pages | leaf_pages | sdb->histogram.large_pages.count) != + tbl->pages.branch + tbl->pages.nested_branch; + const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf + + tbl->pages.nested_subleaf; + if (tbl->pages.other) + line = chk_print(line, ", other %" PRIuSIZE, tbl->pages.other); + if (tbl->pages.other == 0 || + (branch_pages | leaf_pages | tbl->histogram.large_pages.count) != 0) { line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, branch_pages, leaf_pages); - if (sdb->histogram.large_pages.count || - (sdb->flags & MDBX_DUPSORT) == 0) { + if (tbl->histogram.large_pages.count || + (tbl->flags & MDBX_DUPSORT) == 0) { line = chk_print(line, ", large %" PRIuSIZE, - sdb->histogram.large_pages.count); - if (sdb->histogram.large_pages.amount | - sdb->histogram.large_pages.count) - line = histogram_print(inner, line, &sdb->histogram.large_pages, + tbl->histogram.large_pages.count); + if (tbl->histogram.large_pages.amount | + tbl->histogram.large_pages.count) + line = histogram_print(inner, line, &tbl->histogram.large_pages, " amount", "single", true); } } - line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, + line = histogram_dist(chk_line_feed(line), &tbl->histogram.deep, "tree deep density", "1", false); - if (sdb != &chk->table_gc && sdb->histogram.nested_tree.count) { + if (tbl != &chk->table_gc && tbl->histogram.nested_tree.count) { line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, - sdb->histogram.nested_tree.count); - line = histogram_dist(line, &sdb->histogram.nested_tree, " density", + tbl->histogram.nested_tree.count); + line = histogram_dist(line, &tbl->histogram.nested_tree, " density", "1", false); line = chk_print(chk_line_feed(line), "nested tree(s) pages %" PRIuSIZE ": branch %" PRIuSIZE ", leaf %" PRIuSIZE ", subleaf %" PRIuSIZE, - sdb->pages.nested_branch + sdb->pages.nested_leaf, - sdb->pages.nested_branch, sdb->pages.nested_leaf, - sdb->pages.nested_subleaf); + tbl->pages.nested_branch + tbl->pages.nested_leaf, + tbl->pages.nested_branch, tbl->pages.nested_leaf, + tbl->pages.nested_subleaf); } - const size_t bytes = pgno2bytes(env, sdb->pages.all); + const size_t bytes = pgno2bytes(env, tbl->pages.all); line = chk_print( chk_line_feed(line), "page filling: subtotal %" PRIuSIZE " bytes (%.1f%%), payload %" PRIuSIZE " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", - bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes, - sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes, - (bytes - sdb->payload_bytes) * 100.0 / bytes); - if (sdb->pages.empty) + bytes, bytes * 100.0 / total_page_bytes, tbl->payload_bytes, + tbl->payload_bytes * 100.0 / bytes, bytes - tbl->payload_bytes, + (bytes - tbl->payload_bytes) * 100.0 / bytes); + if (tbl->pages.empty) line = chk_print(line, ", %" PRIuSIZE " empty pages", - sdb->pages.empty); - if (sdb->lost_bytes) + tbl->pages.empty); + if (tbl->lost_bytes) line = - chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes); + chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes); chk_line_end(line); } } @@ -1098,23 +1098,23 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { } typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, - MDBX_chk_table_t *sdb, const size_t record_number, + MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data); __cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, - MDBX_chk_table_t *sdb, + MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; int err = MDBX_SUCCESS; - assert(sdb->cookie); + assert(tbl->cookie); if (chk->cb->table_handle_kv) - err = chk->cb->table_handle_kv(chk->usr, sdb, record_number, key, data); + err = chk->cb->table_handle_kv(chk->usr, tbl, record_number, key, data); return err ? err : chk_check_break(scope); } __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, - MDBX_chk_table_t *sdb, chk_kv_visitor *handler) { + MDBX_chk_table_t *tbl, chk_kv_visitor *handler) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; @@ -1127,14 +1127,14 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, chk_line_end( chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), "abort processing %s due to a previous error", - chk_v2a(chk, &sdb->name)))); + chk_v2a(chk, &tbl->name)))); err = MDBX_BAD_TXN; goto bailout; } if (0 > (int)dbi) { err = dbi_open( - txn, &sdb->name, MDBX_DB_ACCEDE, &dbi, + txn, &tbl->name, MDBX_DB_ACCEDE, &dbi, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); if (unlikely(err)) { @@ -1150,7 +1150,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, const tree_t *const db = txn->dbs + dbi; if (handler) { const char *key_mode = nullptr; - switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + switch (tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { case 0: key_mode = "usual"; break; @@ -1166,11 +1166,11 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, default: key_mode = "inconsistent"; chk_scope_issue(scope, "wrong key-mode (0x%x)", - sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); + tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); } const char *value_mode = nullptr; - switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | + switch (tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)) { case 0: value_mode = "single"; @@ -1199,7 +1199,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, default: value_mode = "inconsistent"; chk_scope_issue(scope, "wrong value-mode (0x%x)", - sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)); } @@ -1207,7 +1207,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, value_mode); line = chk_print(line, ", flags:"); - if (!sdb->flags) + if (!tbl->flags) line = chk_print(line, " none"); else { const uint8_t f[] = {MDBX_DUPSORT, @@ -1220,10 +1220,10 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, const char *const t[] = {"dupsort", "integerkey", "reversekey", "dupfix", "reversedup", "integerdup"}; for (size_t i = 0; f[i]; i++) - if (sdb->flags & f[i]) + if (tbl->flags & f[i]) line = chk_print(line, " %s", t[i]); } - chk_line_end(chk_print(line, " (0x%02X)", sdb->flags)); + chk_line_end(chk_print(line, " (0x%02X)", tbl->flags)); line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), "entries %" PRIu64 ", sequence %" PRIu64, db->items, @@ -1241,14 +1241,14 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, db->large_pages)); if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { - const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch; - const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf; + const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch; + const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf; const size_t subtotal_pages = db->branch_pages + db->leaf_pages + db->large_pages; - if (subtotal_pages != sdb->pages.all) + if (subtotal_pages != tbl->pages.all) chk_scope_issue( scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", - "subtotal", subtotal_pages, sdb->pages.all); + "subtotal", subtotal_pages, tbl->pages.all); if (db->branch_pages != branch_pages) chk_scope_issue( scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", @@ -1257,11 +1257,11 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, chk_scope_issue( scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "all-leaf", db->leaf_pages, leaf_pages); - if (db->large_pages != sdb->histogram.large_pages.amount) + if (db->large_pages != tbl->histogram.large_pages.amount) chk_scope_issue( scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "large/overlow", db->large_pages, - sdb->histogram.large_pages.amount); + tbl->histogram.large_pages.amount); } } @@ -1276,7 +1276,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, cursor->subcur->cursor.checking |= z_ignord | z_pagecheck; } - const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags); + const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, tbl->flags); MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; MDBX_val key, data; err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); @@ -1291,7 +1291,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, "key length exceeds max-key-size", "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); bad_key = true; - } else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && + } else if ((tbl->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && key.iov_len != 4) { chk_object_issue(scope, "entry", record_count, "wrong key length", "%" PRIuPTR " != 4or8", key.iov_len); @@ -1299,7 +1299,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, } bool bad_data = false; - if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && + if ((tbl->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && data.iov_len != 4) { chk_object_issue(scope, "entry", record_count, "wrong data length", "%" PRIuPTR " != 4or8", data.iov_len); @@ -1307,7 +1307,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, } if (prev_key.iov_base) { - if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) && + if (prev_data.iov_base && !bad_data && (tbl->flags & MDBX_DUPFIXED) && prev_data.iov_len != data.iov_len) { chk_object_issue(scope, "entry", record_count, "different data length", "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, @@ -1319,7 +1319,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); if (cmp == 0) { ++dups; - if ((sdb->flags & MDBX_DUPSORT) == 0) { + if ((tbl->flags & MDBX_DUPSORT) == 0) { chk_object_issue(scope, "entry", record_count, "duplicated entries", nullptr); if (prev_data.iov_base && data.iov_len == prev_data.iov_len && @@ -1342,34 +1342,32 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, } if (!bad_key) { - if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY)) + if (!prev_key.iov_base && (tbl->flags & MDBX_INTEGERKEY)) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed key-size %" PRIuSIZE, key.iov_len)); prev_key = key; } if (!bad_data) { if (!prev_data.iov_base && - (sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) + (tbl->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed data-size %" PRIuSIZE, data.iov_len)); prev_data = data; } record_count++; - histogram_acc(key.iov_len, &sdb->histogram.key_len); - histogram_acc(data.iov_len, &sdb->histogram.val_len); + histogram_acc(key.iov_len, &tbl->histogram.key_len); + histogram_acc(data.iov_len, &tbl->histogram.val_len); const node_t *const node = page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]); - if (node_flags(node) == N_SUBDATA) { - if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + if (node_flags(node) == N_TREE) { + if (dbi != MAIN_DBI || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) - chk_object_issue(scope, "entry", record_count, - "unexpected table", "node-flags 0x%x", - node_flags(node)); + chk_object_issue(scope, "entry", record_count, "unexpected table", + "node-flags 0x%x", node_flags(node)); else if (data.iov_len != sizeof(tree_t)) - chk_object_issue(scope, "entry", record_count, - "wrong table node size", + chk_object_issue(scope, "entry", record_count, "wrong table node size", "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, sizeof(tree_t)); else if (scope->stage == MDBX_chk_maindb) @@ -1379,10 +1377,10 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, /* обработка table при втором проходе */ tree_t aligned_db; memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); - walk_sdb_t sdb_info = {.name = key}; - sdb_info.internal = &aligned_db; + walk_tbl_t tbl_info = {.name = key}; + tbl_info.internal = &aligned_db; MDBX_chk_table_t *table; - err = chk_get_sdb(scope, &sdb_info, &table); + err = chk_get_tbl(scope, &tbl_info, &table); if (unlikely(err)) goto bailout; if (table->cookie) { @@ -1403,7 +1401,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, "Skip processing %s...", chk_v2a(chk, &table->name)))); } } else if (handler) { - err = handler(scope, sdb, record_count, &key, &data); + err = handler(scope, tbl, record_count, &key, &data); if (unlikely(err)) goto bailout; } @@ -1420,22 +1418,22 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, bailout: if (cursor) { if (handler) { - if (sdb->histogram.key_len.count) { + if (tbl->histogram.key_len.count) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); - line = histogram_dist(line, &sdb->histogram.key_len, + line = histogram_dist(line, &tbl->histogram.key_len, "key length density", "0/1", false); chk_line_feed(line); - line = histogram_dist(line, &sdb->histogram.val_len, + line = histogram_dist(line, &tbl->histogram.val_len, "value length density", "0/1", false); chk_line_end(line); } if (scope->stage == MDBX_chk_maindb) usr->result.table_total = sub_databases; if (chk->cb->table_conclude) - err = chk->cb->table_conclude(usr, sdb, cursor, err); + err = chk->cb->table_conclude(usr, tbl, cursor, err); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); - if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) line = chk_print(line, " %" PRIuSIZE " dups,", dups); if (sub_databases || dbi == MAIN_DBI) @@ -1444,8 +1442,8 @@ bailout: " %" PRIuSIZE " key's bytes," " %" PRIuSIZE " data's bytes," " %" PRIuSIZE " problem(s)", - sdb->histogram.key_len.amount, - sdb->histogram.val_len.amount, scope->subtotal_issues); + tbl->histogram.key_len.amount, + tbl->histogram.val_len.amount, scope->subtotal_issues); chk_line_end(chk_flush(line)); } @@ -1457,13 +1455,13 @@ bailout: } __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, - MDBX_chk_table_t *sdb, + MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; - assert(sdb == &chk->table_gc); - (void)sdb; + assert(tbl == &chk->table_gc); + (void)tbl; const char *bad = ""; pgno_t *iptr = data->iov_base; @@ -1546,7 +1544,7 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, : pgno_sub(pgno, span))) ++span; } - if (sdb->cookie) { + if (tbl->cookie) { chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), "transaction %" PRIaTXN ", %" PRIuSIZE " pages, maxspan %" PRIuSIZE "%s", @@ -1559,7 +1557,7 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, : pgno_sub(pgno, span)); ++span) ; - histogram_acc(span, &sdb->histogram.nested_tree); + histogram_acc(span, &tbl->histogram.nested_tree); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); if (line) { if (span > 1) diff --git a/src/cogs.h b/src/cogs.h index c9d39332..705900bc 100644 --- a/src/cogs.h +++ b/src/cogs.h @@ -207,7 +207,7 @@ flags_db2sub(uint16_t db_flags) { return sub_flags; } -static inline bool check_sdb_flags(unsigned flags) { +static inline bool check_table_flags(unsigned flags) { switch (flags & ~(MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { default: NOTICE("invalid db-flags 0x%x", flags); diff --git a/src/coherency.c b/src/coherency.c index 41986ac1..7ae4da87 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -30,7 +30,7 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, unlikely(freedb_root_pgno >= last_pgno)) { if (report) WARNING( - "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + "catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "free", freedb_root_pgno, txnid, (env->stuck_meta < 0) @@ -42,7 +42,7 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, unlikely(maindb_root_pgno >= last_pgno)) { if (report) WARNING( - "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + "catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "main", maindb_root_pgno, txnid, (env->stuck_meta < 0) @@ -55,7 +55,7 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, likely(magic_and_version == MDBX_DATA_MAGIC)))) { if (report) WARNING( - "catch invalid %sdb.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN + "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "free", freedb_mod_txnid, txnid, (env->stuck_meta < 0) @@ -68,7 +68,7 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, likely(magic_and_version == MDBX_DATA_MAGIC)))) { if (report) WARNING( - "catch invalid %sdb.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN + "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "main", maindb_mod_txnid, txnid, (env->stuck_meta < 0) @@ -83,7 +83,7 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, if (unlikely(root_txnid != freedb_mod_txnid)) { if (report) WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN - " for %sdb.mod_txnid %" PRIaTXN " %s", + " for %s-db.mod_txnid %" PRIaTXN " %s", freedb_root_pgno, root_txnid, "free", freedb_mod_txnid, (env->stuck_meta < 0) ? "(workaround for incoherent flaw of " "unified page/buffer cache)" @@ -98,7 +98,7 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, if (unlikely(root_txnid != maindb_mod_txnid)) { if (report) WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN - " for %sdb.mod_txnid %" PRIaTXN " %s", + " for %s-db.mod_txnid %" PRIaTXN " %s", maindb_root_pgno, root_txnid, "main", maindb_mod_txnid, (env->stuck_meta < 0) ? "(workaround for incoherent flaw of " "unified page/buffer cache)" @@ -169,7 +169,7 @@ __hot int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS; } tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); - tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags)); return MDBX_SUCCESS; } @@ -182,7 +182,7 @@ int coherency_check_written(const MDBX_env *env, const txnid_t txnid, if (likely( coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) { eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY); - eASSERT(env, check_sdb_flags(meta->trees.main.flags)); + eASSERT(env, check_table_flags(meta->trees.main.flags)); return MDBX_SUCCESS; } } else if (report) { diff --git a/src/cold.c b/src/cold.c index a37a04bb..837f89ce 100644 --- a/src/cold.c +++ b/src/cold.c @@ -193,7 +193,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { const page_t *mp = cx.outer.pg[cx.outer.top]; for (size_t i = 0; i < page_numkeys(mp); i++) { const node_t *node = page_node(mp, i); - if (node_flags(node) != N_SUBDATA) + if (node_flags(node) != N_TREE) continue; if (unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, diff --git a/src/copy.c b/src/copy.c index 7802ae17..7aaaba73 100644 --- a/src/copy.c +++ b/src/copy.c @@ -189,11 +189,10 @@ __cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, page_t *mp = mc->pg[mc->top]; const size_t nkeys = page_numkeys(mp); if (is_leaf(mp)) { - if (!(mc->flags & - z_inner) /* may have nested N_SUBDATA or N_BIGDATA nodes */) { + if (!(mc->flags & z_inner) /* may have nested N_TREE or N_BIG nodes */) { for (size_t i = 0; i < nkeys; i++) { node_t *node = page_node(mp, i); - if (node_flags(node) == N_BIGDATA) { + if (node_flags(node) == N_BIG) { /* Need writable leaf */ if (mp != leaf) { mc->pg[mc->top] = leaf; @@ -213,7 +212,7 @@ __cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, npages); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - } else if (node_flags(node) & N_SUBDATA) { + } else if (node_flags(node) & N_TREE) { if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, @@ -232,7 +231,7 @@ __cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, } tree_t *nested = nullptr; - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { rc = cursor_dupsort_setup(mc, node, mp); if (likely(rc == MDBX_SUCCESS)) { nested = &mc->subcur->nested_tree; diff --git a/src/cursor.c b/src/cursor.c index 8ef2cdb5..54ded76c 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -315,10 +315,10 @@ static __always_inline int couple_init(cursor_couple_t *couple, } if (unlikely(*dbi_state & DBI_STALE)) - return sdb_fetch(couple->outer.txn, cursor_dbi(&couple->outer)); + return tbl_fetch(couple->outer.txn, cursor_dbi(&couple->outer)); if (unlikely(kvx->clc.k.lmax == 0)) - return sdb_setup(txn->env, kvx, tree); + return tbl_setup(txn->env, kvx, tree); return MDBX_SUCCESS; } @@ -357,7 +357,7 @@ int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, default: ERROR("invalid node flags %u", flags); goto bailout; - case N_DUPDATA | N_SUBDATA: + case N_DUP | N_TREE: if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), sizeof(tree_t)); @@ -373,7 +373,7 @@ int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, } mx->cursor.top_and_flags = z_fresh_mark | z_inner; break; - case N_DUPDATA: + case N_DUP: if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) <= PAGEHDRSZ)) { ERROR("invalid nested-page size %zu", node_ds(node)); goto bailout; @@ -548,12 +548,12 @@ static __always_inline int cursor_bring(const bool inner, const bool tend2first, } const node_t *__restrict node = page_node(mp, ki); - if (!inner && (node_flags(node) & N_DUPDATA)) { + if (!inner && (node_flags(node) & N_DUP)) { int err = cursor_dupsort_setup(mc, node, mp); if (unlikely(err != MDBX_SUCCESS)) return err; MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr); - if (node_flags(node) & N_SUBDATA) { + if (node_flags(node) & N_TREE) { err = tend2first ? inner_first(&mc->subcur->cursor, data) : inner_last(&mc->subcur->cursor, data); if (unlikely(err != MDBX_SUCCESS)) @@ -800,7 +800,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (mc->subcur) { node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { cASSERT(mc, inner_pointed(mc)); /* Если за ключом более одного значения, либо если размер данных * отличается, то вместо обновления требуется удаление и @@ -1021,7 +1021,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, node_t *const node = page_node(mc->pg[mc->top], mc->ki[mc->top]); /* Large/Overflow page overwrites need special handling */ - if (unlikely(node_flags(node) & N_BIGDATA)) { + if (unlikely(node_flags(node) & N_BIG)) { const size_t dpages = (node_size(key, data) > env->leaf_nodemax) ? largechunk_npages(env, data->iov_len) : 0; @@ -1108,7 +1108,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, mp->pgno = mc->pg[mc->top]->pgno; /* Was a single item before, must convert now */ - if (!(node_flags(node) & N_DUPDATA)) { + if (!(node_flags(node) & N_DUP)) { /* does data match? */ if (flags & MDBX_APPENDDUP) { const int cmp = mc->clc->v.cmp(data, &old_data); @@ -1160,9 +1160,9 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, cASSERT(mc, (xdata.iov_len & 1) == 0); fp->upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ - } else if (node_flags(node) & N_SUBDATA) { + } else if (node_flags(node) & N_TREE) { /* Data is on sub-DB, just store it */ - flags |= N_DUPDATA | N_SUBDATA; + flags |= N_DUP | N_TREE; goto dupsort_put; } else { /* Data is on sub-page */ @@ -1257,7 +1257,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, fp->txnid = mc->txn->front_txnid; fp->pgno = mp->pgno; mc->subcur->cursor.pg[0] = fp; - flags |= N_DUPDATA; + flags |= N_DUP; goto dupsort_put; } xdata.iov_len = old_data.iov_len + growth; @@ -1296,7 +1296,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, cASSERT(mc, env->ps > old_data.iov_len); growth = env->ps - (unsigned)old_data.iov_len; cASSERT(mc, (growth & 1) == 0); - flags |= N_DUPDATA | N_SUBDATA; + flags |= N_DUP | N_TREE; nested_dupdb.root = mp->pgno; nested_dupdb.sequence = 0; nested_dupdb.mod_txnid = mc->txn->txnid; @@ -1331,12 +1331,12 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (!insert_key) node_del(mc, 0); ref_data = &xdata; - flags |= N_DUPDATA; + flags |= N_DUP; goto insert_node; } - /* MDBX passes N_SUBDATA in 'flags' to write a DB record */ - if (unlikely((node_flags(node) ^ flags) & N_SUBDATA)) + /* MDBX passes N_TREE in 'flags' to write a DB record */ + if (unlikely((node_flags(node) ^ flags) & N_TREE)) return MDBX_INCOMPATIBLE; current: @@ -1388,8 +1388,7 @@ insert_node:; } else { /* There is room already in this leaf page. */ if (is_dupfix_leaf(mc->pg[mc->top])) { - cASSERT(mc, !(naf & (N_BIGDATA | N_SUBDATA | N_DUPDATA)) && - ref_data->iov_len == 0); + cASSERT(mc, !(naf & (N_BIG | N_TREE | N_DUP)) && ref_data->iov_len == 0); rc = node_add_dupfix(mc, mc->ki[mc->top], key); } else rc = node_add_leaf(mc, mc->ki[mc->top], key, ref_data, naf); @@ -1414,7 +1413,7 @@ insert_node:; * storing the user data in the keys field, so there are strict * size limits on dupdata. The actual data fields of the child * DB are all zero size. */ - if (flags & N_DUPDATA) { + if (flags & N_DUP) { MDBX_val empty; dupsort_put: empty.iov_len = 0; @@ -1452,7 +1451,7 @@ insert_node:; goto dupsort_error; mx->cursor.tree->items = 1; } - if (!(node_flags(node) & N_SUBDATA) || sub_root) { + if (!(node_flags(node) & N_TREE) || sub_root) { page_t *const mp = mc->pg[mc->top]; const intptr_t nkeys = page_numkeys(mp); const size_t dbi = cursor_dbi(mc); @@ -1486,7 +1485,7 @@ insert_node:; inner_flags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; rc = cursor_put(&mc->subcur->cursor, data, &empty, inner_flags); - if (flags & N_SUBDATA) { + if (flags & N_TREE) { void *db = node_data(node); mc->subcur->nested_tree.mod_txnid = mc->txn->txnid; memcpy(db, &mc->subcur->nested_tree, sizeof(tree_t)); @@ -1613,12 +1612,12 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { goto del_key; node_t *node = page_node(mp, mc->ki[mc->top]); - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { if (flags & (MDBX_ALLDUPS | /* for compatibility */ MDBX_NODUPDATA)) { /* will subtract the final entry later */ mc->tree->items -= mc->subcur->nested_tree.items - 1; } else { - if (!(node_flags(node) & N_SUBDATA)) { + if (!(node_flags(node) & N_TREE)) { page_t *sp = node_data(node); cASSERT(mc, is_subpage(sp)); sp->txnid = mp->txnid; @@ -1629,7 +1628,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { return rc; /* If sub-DB still has entries, we're done */ if (mc->subcur->nested_tree.items) { - if (node_flags(node) & N_SUBDATA) { + if (node_flags(node) & N_TREE) { /* update table info */ mc->subcur->nested_tree.mod_txnid = mc->txn->txnid; memcpy(node_data(node), &mc->subcur->nested_tree, sizeof(tree_t)); @@ -1651,7 +1650,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { } if (m2->ki[mc->top] != mc->ki[mc->top]) { inner = page_node(mp, m2->ki[mc->top]); - if (node_flags(inner) & N_SUBDATA) + if (node_flags(inner) & N_TREE) continue; } m2->subcur->cursor.pg[0] = node_data(inner); @@ -1665,7 +1664,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { /* otherwise fall thru and delete the sub-DB */ } - if ((node_flags(node) & N_SUBDATA) && mc->subcur->cursor.tree->height) { + if ((node_flags(node) & N_TREE) && mc->subcur->cursor.tree->height) { /* add all the child DB's pages to the free list */ rc = tree_drop(&mc->subcur->cursor, false); if (unlikely(rc != MDBX_SUCCESS)) @@ -1674,13 +1673,13 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { inner_gone(mc); } else { cASSERT(mc, !inner_pointed(mc)); - /* MDBX passes N_SUBDATA in 'flags' to delete a DB record */ - if (unlikely((node_flags(node) ^ flags) & N_SUBDATA)) + /* MDBX passes N_TREE in 'flags' to delete a DB record */ + if (unlikely((node_flags(node) ^ flags) & N_TREE)) return MDBX_INCOMPATIBLE; } /* add large/overflow pages to free list */ - if (node_flags(node) & N_BIGDATA) { + if (node_flags(node) & N_BIG) { pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); if (unlikely((rc = lp.err) || (rc = page_retire(mc, lp.page)))) goto fail; @@ -1758,19 +1757,19 @@ del_key: /* уже переместились вправо */ m3->pg[top] != mp)) { node = page_node(m3->pg[m3->top], m3->ki[m3->top]); /* Если это dupsort-узел, то должен быть валидный вложенный курсор. */ - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { /* Тут три варианта событий: - * 1) Вложенный курсор уже инициализирован, у узла есть флаг N_SUBDATA, + * 1) Вложенный курсор уже инициализирован, у узла есть флаг N_TREE, * соответственно дубликаты вынесены в отдельное дерево с корнем * в отдельной странице = ничего корректировать не требуется. - * 2) Вложенный курсор уже инициализирован, у узла нет флага N_SUBDATA, + * 2) Вложенный курсор уже инициализирован, у узла нет флага N_TREE, * соответственно дубликаты размещены на вложенной sub-странице. * 3) Курсор стоял на удалённом элементе, который имел одно значение, * а после удаления переместился на следующий элемент с дубликатами. * В этом случае вложенный курсор не инициализирован и тепеь его * нужно установить на первый дубликат. */ if (is_pointed(&m3->subcur->cursor)) { - if ((node_flags(node) & N_SUBDATA) == 0) { + if ((node_flags(node) & N_TREE) == 0) { cASSERT(m3, m3->subcur->cursor.top == 0 && m3->subcur->nested_tree.height == 1); m3->subcur->cursor.pg[0] = node_data(node); @@ -1779,7 +1778,7 @@ del_key: rc = cursor_dupsort_setup(m3, node, m3->pg[m3->top]); if (unlikely(rc != MDBX_SUCCESS)) goto fail; - if (node_flags(node) & N_SUBDATA) { + if (node_flags(node) & N_TREE) { rc = inner_first(&m3->subcur->cursor, nullptr); if (unlikely(rc != MDBX_SUCCESS)) goto fail; @@ -1999,13 +1998,13 @@ got_node: return ret; } - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { ret.err = cursor_dupsort_setup(mc, node, mp); if (unlikely(ret.err != MDBX_SUCCESS)) return ret; if (op >= MDBX_SET) { MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr); - if (node_flags(node) & N_SUBDATA) { + if (node_flags(node) & N_TREE) { ret.err = inner_first(&mc->subcur->cursor, data); if (unlikely(ret.err != MDBX_SUCCESS)) return ret; @@ -2117,7 +2116,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, get_key_optional(node, key); if (!data) return MDBX_SUCCESS; - if (node_flags(node) & N_DUPDATA) { + if (node_flags(node) & N_DUP) { if (!MDBX_DISABLE_VALIDATION && unlikely(!mc->subcur)) return unexpected_dupsort(mc); mc = &mc->subcur->cursor; @@ -2248,7 +2247,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, else { node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); get_key_optional(node, key); - if ((node_flags(node) & N_DUPDATA) == 0) + if ((node_flags(node) & N_DUP) == 0) return node_read(mc, node, data, mc->pg[mc->top]); else if (MDBX_DISABLE_VALIDATION || likely(mc->subcur)) return ((op == MDBX_FIRST_DUP) ? inner_first diff --git a/src/cursor.h b/src/cursor.h index 05174726..9ecad9d6 100644 --- a/src/cursor.h +++ b/src/cursor.h @@ -391,7 +391,7 @@ MDBX_MAYBE_UNUSED static inline void cursor_inner_refresh(const MDBX_cursor *mc, const page_t *mp, unsigned ki) { cASSERT(mc, is_leaf(mp)); const node_t *node = page_node(mp, ki); - if ((node_flags(node) & (N_DUPDATA | N_SUBDATA)) == N_DUPDATA) + if ((node_flags(node) & (N_DUP | N_TREE)) == N_DUP) mc->subcur->cursor.pg[0] = node_data(node); } diff --git a/src/dbi.c b/src/dbi.c index bd2555bb..5e43234d 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -281,7 +281,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, else { eASSERT(env, env->dbs_flags[dbi] & DB_VALID); if (txn->dbi_state[dbi] & DBI_STALE) { - int err = sdb_fetch(txn, dbi); + int err = tbl_fetch(txn, dbi); if (unlikely(err == MDBX_SUCCESS)) return err; } @@ -306,7 +306,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, env->kvs[dbi].clc.v.cmp = datacmp ? datacmp : builtin_datacmp(user_flags); txn->dbs[dbi].flags = db_flags; txn->dbs[dbi].dupfix_size = 0; - if (unlikely(sdb_setup(env, &env->kvs[dbi], &txn->dbs[dbi]))) { + if (unlikely(tbl_setup(env, &env->kvs[dbi], &txn->dbs[dbi]))) { txn->dbi_state[dbi] = DBI_LINDO; txn->flags |= MDBX_TXN_ERROR; return MDBX_PROBLEM; @@ -379,7 +379,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, env->kvs[MAIN_DBI].clc.v.cmp = builtin_datacmp(main_flags); txn->dbs[MAIN_DBI].flags = main_flags; txn->dbs[MAIN_DBI].dupfix_size = 0; - int err = sdb_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]); + int err = tbl_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]); if (unlikely(err != MDBX_SUCCESS)) { txn->dbi_state[MAIN_DBI] = DBI_LINDO; txn->flags |= MDBX_TXN_ERROR; @@ -459,7 +459,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, /* make sure this is actually a table */ node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); - if (unlikely((node_flags(node) & (N_DUPDATA | N_SUBDATA)) != N_SUBDATA)) + if (unlikely((node_flags(node) & (N_DUP | N_TREE)) != N_TREE)) return MDBX_INCOMPATIBLE; if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(tree_t))) { ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, @@ -489,8 +489,8 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, txn->dbs[slot].flags = user_flags & DB_PERSISTENT_FLAGS; cx.outer.next = txn->cursors[MAIN_DBI]; txn->cursors[MAIN_DBI] = &cx.outer; - rc = cursor_put_checklen(&cx.outer, &name, &body, - N_SUBDATA | MDBX_NOOVERWRITE); + rc = + cursor_put_checklen(&cx.outer, &name, &body, N_TREE | MDBX_NOOVERWRITE); txn->cursors[MAIN_DBI] = cx.outer.next; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -541,7 +541,7 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, *dbi = 0; if (user_flags != MDBX_ACCEDE && - unlikely(!check_sdb_flags(user_flags & ~MDBX_CREATE))) + unlikely(!check_table_flags(user_flags & ~MDBX_CREATE))) return MDBX_EINVAL; int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -695,11 +695,11 @@ dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { MDBX_val data = {&txn->dbs[dbi], sizeof(tree_t)}; pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, - N_SUBDATA | MDBX_NOOVERWRITE); + N_TREE | MDBX_NOOVERWRITE); if (likely(pair.err == MDBX_SUCCESS)) { pair.err = cursor_seek(&cx.outer, &old_name, nullptr, MDBX_SET).err; if (likely(pair.err == MDBX_SUCCESS)) - pair.err = cursor_del(&cx.outer, N_SUBDATA); + pair.err = cursor_del(&cx.outer, N_TREE); if (likely(pair.err == MDBX_SUCCESS)) { pair.defer = env->kvs[dbi].name.iov_base; env->kvs[dbi].name = new_name; @@ -813,7 +813,7 @@ __cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { if (likely(rc == MDBX_SUCCESS)) { cx.outer.next = txn->cursors[MAIN_DBI]; txn->cursors[MAIN_DBI] = &cx.outer; - rc = cursor_del(&cx.outer, N_SUBDATA); + rc = cursor_del(&cx.outer, N_TREE); txn->cursors[MAIN_DBI] = cx.outer.next; if (likely(rc == MDBX_SUCCESS)) { tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY); @@ -943,7 +943,7 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, return MDBX_BAD_TXN; if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { - rc = sdb_fetch((MDBX_txn *)txn, dbi); + rc = tbl_fetch((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; } @@ -997,7 +997,7 @@ __cold int mdbx_enumerate_tables(const MDBX_txn *txn, rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) { node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); - if (node_flags(node) != N_SUBDATA) + if (node_flags(node) != N_TREE) continue; if (unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, diff --git a/src/dxb.c b/src/dxb.c index e15e0832..82a503b5 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -1134,7 +1134,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, troika_t *const troika) { eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0); eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY); - eASSERT(env, check_sdb_flags(pending->trees.main.flags)); + eASSERT(env, check_table_flags(pending->trees.main.flags)); const meta_t *const meta0 = METAPAGE(env, 0); const meta_t *const meta1 = METAPAGE(env, 1); const meta_t *const meta2 = METAPAGE(env, 2); @@ -1433,7 +1433,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, target->trees.gc = pending->trees.gc; target->trees.main = pending->trees.main; eASSERT(env, target->trees.gc.flags == MDBX_INTEGERKEY); - eASSERT(env, check_sdb_flags(target->trees.main.flags)); + eASSERT(env, check_table_flags(target->trees.main.flags)); target->canary = pending->canary; memcpy(target->pages_retired, pending->pages_retired, 8); jitter4testing(true); @@ -1488,7 +1488,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, #endif /* MDBX_ENABLE_PGOP_STAT */ const meta_t undo_meta = *target; eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY); - eASSERT(env, check_sdb_flags(pending->trees.main.flags)); + eASSERT(env, check_table_flags(pending->trees.main.flags)); rc = osal_pwrite(env->fd4meta, pending, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base)); if (unlikely(rc != MDBX_SUCCESS)) { diff --git a/src/layout-dxb.h b/src/layout-dxb.h index aea506d2..74d09166 100644 --- a/src/layout-dxb.h +++ b/src/layout-dxb.h @@ -187,10 +187,10 @@ typedef enum page_type { * omit entries and pack sorted MDBX_DUPFIXED values after the page header. * * P_LARGE records occupy one or more contiguous pages where only the - * first has a page header. They hold the real data of N_BIGDATA nodes. + * first has a page header. They hold the real data of N_BIG nodes. * * P_SUBP sub-pages are small leaf "pages" with duplicate data. - * A node with flag N_DUPDATA but not N_SUBDATA contains a sub-page. + * A node with flag N_DUP but not N_TREE contains a sub-page. * (Duplicate data can also go in tables, which use normal pages.) * * P_META pages contain meta_t, the start point of an MDBX snapshot. @@ -222,10 +222,10 @@ typedef struct page { * Used in pages of type P_BRANCH and P_LEAF without P_DUPFIX. * We guarantee 2-byte alignment for 'node_t's. * - * Leaf node flags describe node contents. N_BIGDATA says the node's + * Leaf node flags describe node contents. N_BIG says the node's * data part is the page number of an overflow page with actual data. - * N_DUPDATA and N_SUBDATA can be combined giving duplicate data in - * a sub-page/table, and named databases (just N_SUBDATA). */ + * N_DUP and N_TREE can be combined giving duplicate data in + * a sub-page/table, and named databases (just N_TREE). */ typedef struct node { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ union { @@ -254,9 +254,9 @@ typedef struct node { #define NODESIZE 8u typedef enum node_flags { - N_BIGDATA = 0x01 /* data put on large page */, - N_SUBDATA = 0x02 /* data is a table */, - N_DUPDATA = 0x04 /* data has duplicates */ + N_BIG = 0x01 /* data put on large page */, + N_TREE = 0x02 /* data is a b-tree */, + N_DUP = 0x04 /* data has duplicates */ } node_flags_t; #pragma pack(pop) diff --git a/src/logging_and_debug.c b/src/logging_and_debug.c index a3f52cf1..2ce08d05 100644 --- a/src/logging_and_debug.c +++ b/src/logging_and_debug.c @@ -135,9 +135,9 @@ __cold const char *pagetype_caption(const uint8_t type, char buf4unknown[16]) { __cold static const char *leafnode_type(node_t *n) { static const char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}}; - return (node_flags(n) & N_BIGDATA) + return (node_flags(n) & N_BIG) ? ": large page" - : tp[!!(node_flags(n) & N_DUPDATA)][!!(node_flags(n) & N_SUBDATA)]; + : tp[!!(node_flags(n) & N_DUP)][!!(node_flags(n) & N_TREE)]; } /* Display all the keys in the page. */ @@ -197,7 +197,7 @@ __cold void page_list(page_t *mp) { DKEY(&key)); total += nsize; } else { - if (node_flags(node) & N_BIGDATA) + if (node_flags(node) & N_BIG) nsize += sizeof(pgno_t); else nsize += node_ds(node); diff --git a/src/meta.c b/src/meta.c index 3f2bfcd6..b45d71c1 100644 --- a/src/meta.c +++ b/src/meta.c @@ -549,7 +549,7 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, return MDBX_INCOMPATIBLE; } - if (unlikely(!check_sdb_flags(meta->trees.main.flags))) { + if (unlikely(!check_table_flags(meta->trees.main.flags))) { WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, "MainDB", meta->trees.main.flags); return MDBX_INCOMPATIBLE; diff --git a/src/misc.c b/src/misc.c index 5e77e02a..06755121 100644 --- a/src/misc.c +++ b/src/misc.c @@ -42,7 +42,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, return rc; if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { - rc = sdb_fetch(txn, dbi); + rc = tbl_fetch(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; } diff --git a/src/node.c b/src/node.c index 525318d6..c8588fd0 100644 --- a/src/node.c +++ b/src/node.c @@ -103,7 +103,7 @@ __hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, page_t *largepage = nullptr; size_t node_bytes; - if (unlikely(flags & N_BIGDATA)) { + if (unlikely(flags & N_BIG)) { /* Data already on large/overflow page. */ STATIC_ASSERT(sizeof(pgno_t) % 2 == 0); node_bytes = @@ -116,7 +116,7 @@ __hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, mc->tree->flags); return MDBX_PROBLEM; } - if (unlikely(flags & (N_DUPDATA | N_SUBDATA))) { + if (unlikely(flags & (N_DUP | N_TREE))) { ERROR("Unexpected target %s flags 0x%x for large data-item", "node", flags); return MDBX_PROBLEM; @@ -130,7 +130,7 @@ __hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, DEBUG("allocated %u large/overflow page(s) %" PRIaPGNO "for %" PRIuPTR " data bytes", largepage->pages, largepage->pgno, data->iov_len); - flags |= N_BIGDATA; + flags |= N_BIG; node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); cASSERT(mc, node_bytes == leaf_size(mc->txn->env, key, data)); @@ -166,7 +166,7 @@ __hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, void *nodedata = node_data(node); if (likely(largepage == nullptr)) { - if (unlikely(flags & N_BIGDATA)) { + if (unlikely(flags & N_BIG)) { memcpy(nodedata, data->iov_base, sizeof(pgno_t)); return MDBX_SUCCESS; } @@ -208,8 +208,7 @@ __hot void node_del(MDBX_cursor *mc, size_t ksize) { cASSERT(mc, !is_branch(mp) || hole || node_ks(node) == 0); size_t hole_size = NODESIZE + node_ks(node); if (is_leaf(mp)) - hole_size += - (node_flags(node) & N_BIGDATA) ? sizeof(pgno_t) : node_ds(node); + hole_size += (node_flags(node) & N_BIG) ? sizeof(pgno_t) : node_ds(node); hole_size = EVEN_CEIL(hole_size); const indx_t hole_offset = mp->entries[hole]; @@ -239,7 +238,7 @@ __hot void node_del(MDBX_cursor *mc, size_t ksize) { __noinline int node_read_bigdata(MDBX_cursor *mc, const node_t *node, MDBX_val *data, const page_t *mp) { - cASSERT(mc, node_flags(node) == N_BIGDATA && data->iov_len == node_ds(node)); + cASSERT(mc, node_flags(node) == N_BIG && data->iov_len == node_ds(node)); pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); if (unlikely((lp.err != MDBX_SUCCESS))) { diff --git a/src/node.h b/src/node.h index b5437f18..5411aeed 100644 --- a/src/node.h +++ b/src/node.h @@ -6,7 +6,7 @@ #include "essentials.h" /* valid flags for mdbx_node_add() */ -#define NODE_ADD_FLAGS (N_DUPDATA | N_SUBDATA | MDBX_RESERVE | MDBX_APPEND) +#define NODE_ADD_FLAGS (N_DUP | N_TREE | MDBX_RESERVE | MDBX_APPEND) /* Get the page number pointed to by a branch node */ MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t @@ -81,7 +81,7 @@ node_size(const MDBX_val *key, const MDBX_val *value) { MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_largedata_pgno(const node_t *const __restrict node) { - assert(node_flags(node) & N_BIGDATA); + assert(node_flags(node) & N_BIG); return peek_pgno(node_data(node)); } @@ -96,7 +96,7 @@ static inline int __must_check_result node_read(MDBX_cursor *mc, const page_t *mp) { data->iov_len = node_ds(node); data->iov_base = node_data(node); - if (likely(node_flags(node) != N_BIGDATA)) + if (likely(node_flags(node) != N_BIG)) return MDBX_SUCCESS; return node_read_bigdata(mc, node, data, mp); } diff --git a/src/page-get.c b/src/page-get.c index 13828e1e..57f2e177 100644 --- a/src/page-get.c +++ b/src/page-get.c @@ -244,17 +244,17 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { rc = bad_page(mp, "invalid node[%zu] flags (%u)\n", i, node_flags(node)); break; - case N_BIGDATA /* data on large-page */: + case N_BIG /* data on large-page */: case 0 /* usual */: - case N_SUBDATA /* sub-db */: - case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: - case N_DUPDATA /* short sub-page */: + case N_TREE /* sub-db */: + case N_TREE | N_DUP /* dupsorted sub-tree */: + case N_DUP /* short sub-page */: break; } const size_t dsize = node_ds(node); const char *const data = node_data(node); - if (node_flags(node) & N_BIGDATA) { + if (node_flags(node) & N_BIG) { if (unlikely(end_of_page < data + sizeof(pgno_t))) { rc = bad_page( mp, "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", @@ -311,20 +311,20 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { continue; } break; - case N_SUBDATA /* sub-db */: + case N_TREE /* sub-db */: if (unlikely(dsize != sizeof(tree_t))) { rc = bad_page(mp, "invalid sub-db record size (%zu)\n", dsize); continue; } break; - case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + case N_TREE | N_DUP /* dupsorted sub-tree */: if (unlikely(dsize != sizeof(tree_t))) { rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", dsize, sizeof(tree_t)); continue; } break; - case N_DUPDATA /* short sub-page */: + case N_DUP /* short sub-page */: if (unlikely(dsize <= PAGEHDRSZ)) { rc = bad_page(mp, "invalid nested/sub-page record size (%zu)\n", dsize); diff --git a/src/page-search.c b/src/page-search.c index db985b8d..127e9ba0 100644 --- a/src/page-search.c +++ b/src/page-search.c @@ -39,7 +39,7 @@ __hot int tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { const size_t dbi = cursor_dbi(mc); if (unlikely(*cursor_dbi_state(mc) & DBI_STALE)) { - err = sdb_fetch(mc->txn, dbi); + err = tbl_fetch(mc->txn, dbi); if (unlikely(err != MDBX_SUCCESS)) goto bailout; } diff --git a/src/proto.h b/src/proto.h index 898bf512..531196c0 100644 --- a/src/proto.h +++ b/src/proto.h @@ -104,8 +104,8 @@ MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env); MDBX_INTERNAL void recalculate_subpage_thresholds(MDBX_env *env); /* table.c */ -MDBX_INTERNAL int __must_check_result sdb_fetch(MDBX_txn *txn, size_t dbi); -MDBX_INTERNAL int __must_check_result sdb_setup(const MDBX_env *env, +MDBX_INTERNAL int __must_check_result tbl_fetch(MDBX_txn *txn, size_t dbi); +MDBX_INTERNAL int __must_check_result tbl_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db); diff --git a/src/spill.c b/src/spill.c index 3368acfb..0a02ad52 100644 --- a/src/spill.c +++ b/src/spill.c @@ -99,7 +99,7 @@ static size_t spill_cursor_keep(const MDBX_txn *const txn, tASSERT(txn, is_leaf(mp)); if (!mc->subcur || mc->ki[mc->top] >= page_numkeys(mp)) break; - if (!(node_flags(page_node(mp, mc->ki[mc->top])) & N_SUBDATA)) + if (!(node_flags(page_node(mp, mc->ki[mc->top])) & N_TREE)) break; mc = &mc->subcur->cursor; } diff --git a/src/table.c b/src/table.c index 1f4c6082..c37f72a5 100644 --- a/src/table.c +++ b/src/table.c @@ -3,8 +3,8 @@ #include "internals.h" -int sdb_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db) { - if (unlikely(!check_sdb_flags(db->flags))) { +int tbl_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db) { + if (unlikely(!check_table_flags(db->flags))) { ERROR("incompatible or invalid db.flags (0x%x) ", db->flags); return MDBX_INCOMPATIBLE; } @@ -31,7 +31,7 @@ int sdb_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db) { return MDBX_SUCCESS; } -int sdb_fetch(MDBX_txn *txn, size_t dbi) { +int tbl_fetch(MDBX_txn *txn, size_t dbi) { cursor_couple_t couple; int rc = cursor_init(&couple.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) @@ -54,7 +54,7 @@ int sdb_fetch(MDBX_txn *txn, size_t dbi) { rc = MDBX_NOTFOUND; goto bailout; } - if (unlikely((node_flags(nsr.node) & (N_DUPDATA | N_SUBDATA)) != N_SUBDATA)) { + if (unlikely((node_flags(nsr.node) & (N_DUP | N_TREE)) != N_TREE)) { NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, "wrong flags"); @@ -95,7 +95,7 @@ int sdb_fetch(MDBX_txn *txn, size_t dbi) { return MDBX_CORRUPTED; } #endif /* !MDBX_DISABLE_VALIDATION */ - rc = sdb_setup(txn->env, kvx, db); + rc = tbl_setup(txn->env, kvx, db); if (unlikely(rc != MDBX_SUCCESS)) return rc; diff --git a/src/tools/dump.c b/src/tools/dump.c index 7dc57d36..ce892216 100644 --- a/src/tools/dump.c +++ b/src/tools/dump.c @@ -100,7 +100,7 @@ static void error(const char *func, int rc) { } /* Dump in BDB-compatible format */ -static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) { +static int dump_tbl(MDBX_txn *txn, MDBX_dbi dbi, char *name) { unsigned flags; int rc = mdbx_dbi_flags(txn, dbi, &flags); if (unlikely(rc != MDBX_SUCCESS)) { @@ -450,7 +450,7 @@ int main(int argc, char *argv[]) { if (list) { printf("%s\n", subname); } else { - err = dump_sdb(txn, sub_dbi, subname); + err = dump_tbl(txn, sub_dbi, subname); if (unlikely(err != MDBX_SUCCESS)) { if (!rescue) break; @@ -488,7 +488,7 @@ int main(int argc, char *argv[]) { cursor = nullptr; if (have_raw && (!count /* || rescue */)) - err = dump_sdb(txn, MAIN_DBI, nullptr); + err = dump_tbl(txn, MAIN_DBI, nullptr); else if (!count) { if (!quiet) fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, @@ -496,7 +496,7 @@ int main(int argc, char *argv[]) { err = MDBX_NOTFOUND; } } else { - err = dump_sdb(txn, dbi, subname); + err = dump_tbl(txn, dbi, subname); } switch (err) { diff --git a/src/tree.c b/src/tree.c index 13932e48..0f430749 100644 --- a/src/tree.c +++ b/src/tree.c @@ -77,14 +77,14 @@ int tree_drop(MDBX_cursor *mc, const bool may_have_tables) { cASSERT(mc, mc->top + 1 == mc->tree->height); for (size_t i = 0; i < nkeys; i++) { node_t *node = page_node(mp, i); - if (node_flags(node) & N_BIGDATA) { + if (node_flags(node) & N_BIG) { rc = page_retire_ex(mc, node_largedata_pgno(node), nullptr, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; if (!(may_have_tables | mc->tree->large_pages)) goto pop; - } else if (node_flags(node) & N_SUBDATA) { - if (unlikely((node_flags(node) & N_DUPDATA) == 0)) { + } else if (node_flags(node) & N_TREE) { + if (unlikely((node_flags(node) & N_DUP) == 0)) { rc = /* disallowing implicit table deletion */ MDBX_INCOMPATIBLE; goto bailout; } @@ -105,8 +105,7 @@ int tree_drop(MDBX_cursor *mc, const bool may_have_tables) { : P_BRANCH); for (size_t i = 0; i < nkeys; i++) { node_t *node = page_node(mp, i); - tASSERT(txn, (node_flags(node) & - (N_BIGDATA | N_SUBDATA | N_DUPDATA)) == 0); + tASSERT(txn, (node_flags(node) & (N_BIG | N_TREE | N_DUP)) == 0); const pgno_t pgno = node_pgno(node); rc = page_retire_ex(mc, pgno, nullptr, pagetype); if (unlikely(rc != MDBX_SUCCESS)) @@ -1240,8 +1239,8 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, node_t *node = ptr_disp(mp, tmp_ki_copy->entries[i] + PAGEHDRSZ); size = NODESIZE + node_ks(node) + sizeof(indx_t); if (is_leaf(mp)) - size += (node_flags(node) & N_BIGDATA) ? sizeof(pgno_t) - : node_ds(node); + size += + (node_flags(node) & N_BIG) ? sizeof(pgno_t) : node_ds(node); size = EVEN_CEIL(size); } @@ -1385,7 +1384,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, rc = node_add_leaf(mc, 0, newkey, newdata, naf); } break; case P_LEAF | P_DUPFIX: { - cASSERT(mc, (naf & (N_BIGDATA | N_SUBDATA | N_DUPDATA)) == 0); + cASSERT(mc, (naf & (N_BIG | N_TREE | N_DUP)) == 0); cASSERT(mc, newpgno == 0 || newpgno == P_INVALID); rc = node_add_dupfix(mc, 0, newkey); } break; @@ -1456,7 +1455,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, rc = node_add_leaf(mc, n, &rkey, rdata, flags); } break; /* case P_LEAF | P_DUPFIX: { - cASSERT(mc, (nflags & (N_BIGDATA | N_SUBDATA | N_DUPDATA)) == 0); + cASSERT(mc, (nflags & (N_BIG | N_TREE | N_DUP)) == 0); cASSERT(mc, gno == 0); rc = mdbx_node_add_dupfix(mc, n, &rkey); } break; */ @@ -1568,7 +1567,7 @@ done: rc = cursor_check_updating(mc); if (unlikely(naf & MDBX_RESERVE)) { node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); - if (!(node_flags(node) & N_BIGDATA)) + if (!(node_flags(node) & N_BIG)) newdata->iov_base = node_data(node); } #if MDBX_ENABLE_PGOP_STAT diff --git a/src/txn.c b/src/txn.c index f7b82787..845c0436 100644 --- a/src/txn.c +++ b/src/txn.c @@ -702,7 +702,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { /* Может быть mod_txnid > front после коммита вложенных тразакций */ db->mod_txnid = txn->txnid; MDBX_val data = {db, sizeof(tree_t)}; - rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_SUBDATA); + rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_TREE); if (unlikely(rc != MDBX_SUCCESS)) { txn->cursors[MAIN_DBI] = cx.outer.next; goto fail; @@ -1049,7 +1049,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { txn->txnid >= /* paranoia is appropriate here */ env->lck->cached_oldest.weak); tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); - tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags)); } else { eASSERT(env, (flags & ~(txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); @@ -1107,7 +1107,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); - tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags)); txn->flags = flags; txn->nested = nullptr; txn->tw.loose_pages = nullptr; @@ -1145,7 +1145,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { /* Setup db info */ tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); - tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags)); VALGRIND_MAKE_MEM_UNDEFINED(txn->dbi_state, env->max_dbi); #if MDBX_ENABLE_DBI_SPARSE txn->n_dbi = CORE_DBS; @@ -1196,7 +1196,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { txn->dbs[MAIN_DBI].flags); env->dbs_flags[MAIN_DBI] = DB_POISON; atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); - rc = sdb_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]); + rc = tbl_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]); if (likely(rc == MDBX_SUCCESS)) { seq = dbi_seq_next(env, MAIN_DBI); env->dbs_flags[MAIN_DBI] = DB_VALID | txn->dbs[MAIN_DBI].flags; @@ -1229,7 +1229,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); - tASSERT(txn, check_sdb_flags(txn->dbs[MAIN_DBI].flags)); + tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags)); if (unlikely(env->flags & ENV_FATAL_ERROR)) { WARNING("%s", "environment had fatal error, must shutdown!"); rc = MDBX_PANIC; diff --git a/src/walk.c b/src/walk.c index dc1c4599..265e7da8 100644 --- a/src/walk.c +++ b/src/walk.c @@ -12,7 +12,7 @@ typedef struct walk_ctx { MDBX_cursor *cursor; } walk_ctx_t; -__cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb); +__cold static int walk_tbl(walk_ctx_t *ctx, walk_tbl_t *tbl); static page_type_t walk_page_type(const page_t *mp) { if (mp) @@ -41,7 +41,7 @@ static page_type_t walk_subpage_type(const page_t *sp) { } /* Depth-first tree traversal. */ -__cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, +__cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, txnid_t parent_txnid) { assert(pgno != P_INVALID); page_t *mp = nullptr; @@ -82,7 +82,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, align_bytes += (node_key_size + node_data_size) & 1; break; - case N_BIGDATA /* long data on the large/overflow page */: { + case N_BIG /* long data on the large/overflow page */: { const pgno_t large_pgno = node_largedata_pgno(node); const size_t over_payload = node_data_size; const size_t over_header = PAGEHDRSZ; @@ -94,7 +94,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, const size_t pagesize = pgno2bytes(ctx->txn->env, npages); const size_t over_unused = pagesize - over_payload - over_header; const int rc = ctx->visitor(large_pgno, npages, ctx->userctx, ctx->deep, - sdb, pagesize, page_large, err, 1, + tbl, pagesize, page_large, err, 1, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -102,7 +102,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, align_bytes += node_key_size & 1; } break; - case N_SUBDATA /* sub-db */: { + case N_TREE /* sub-db */: { if (unlikely(node_data_size != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", (unsigned)node_data_size); @@ -113,7 +113,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, align_bytes += (node_key_size + node_data_size) & 1; } break; - case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + case N_TREE | N_DUP /* dupsorted sub-tree */: if (unlikely(node_data_size != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-tree node size", (unsigned)node_data_size); @@ -124,7 +124,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, align_bytes += (node_key_size + node_data_size) & 1; break; - case N_DUPDATA /* short sub-page */: { + case N_DUP /* short sub-page */: { if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-page node size", (unsigned)node_data_size); @@ -170,7 +170,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, } const int rc = - ctx->visitor(pgno, 0, ctx->userctx, ctx->deep + 1, sdb, + ctx->visitor(pgno, 0, ctx->userctx, ctx->deep + 1, tbl, node_data_size, subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -190,7 +190,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, } const int rc = ctx->visitor( - pgno, 1, ctx->userctx, ctx->deep, sdb, ctx->txn->env->ps, type, err, + pgno, 1, ctx->userctx, ctx->deep, tbl, ctx->txn->env->ps, type, err, nentries, payload_size, header_size, unused_size + align_bytes); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -203,7 +203,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, if (type == page_branch) { assert(err == MDBX_SUCCESS); ctx->deep += 1; - err = walk_pgno(ctx, sdb, node_pgno(node), mp->txnid); + err = walk_pgno(ctx, tbl, node_pgno(node), mp->txnid); ctx->deep -= 1; if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_RESULT_TRUE) @@ -218,7 +218,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, default: continue; - case N_SUBDATA /* sub-db */: + case N_TREE /* sub-db */: if (unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-tree node size", (unsigned)node_ds(node)); @@ -227,16 +227,16 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, } else { tree_t aligned_db; memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); - walk_sdb_t table = {{node_key(node), node_ks(node)}, nullptr, nullptr}; + walk_tbl_t table = {{node_key(node), node_ks(node)}, nullptr, nullptr}; table.internal = &aligned_db; assert(err == MDBX_SUCCESS); ctx->deep += 1; - err = walk_sdb(ctx, &table); + err = walk_tbl(ctx, &table); ctx->deep -= 1; } break; - case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */: + case N_TREE | N_DUP /* dupsorted sub-tree */: if (unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); @@ -252,9 +252,9 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, &container_of(ctx->cursor, cursor_couple_t, outer)->inner); ctx->cursor = &ctx->cursor->subcur->cursor; ctx->deep += 1; - sdb->nested = &aligned_db; - err = walk_pgno(ctx, sdb, aligned_db.root, mp->txnid); - sdb->nested = nullptr; + tbl->nested = &aligned_db; + err = walk_pgno(ctx, tbl, aligned_db.root, mp->txnid); + tbl->nested = nullptr; ctx->deep -= 1; subcur_t *inner_xcursor = container_of(ctx->cursor, subcur_t, cursor); cursor_couple_t *couple = @@ -269,8 +269,8 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno, return MDBX_SUCCESS; } -__cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb) { - tree_t *const db = sdb->internal; +__cold static int walk_tbl(walk_ctx_t *ctx, walk_tbl_t *tbl) { + tree_t *const db = tbl->internal; if (unlikely(db->root == P_INVALID)) return MDBX_SUCCESS; /* empty db */ @@ -288,7 +288,7 @@ __cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb) { couple.outer.next = ctx->cursor; couple.outer.top_and_flags = z_disable_tree_search_fastpath; ctx->cursor = &couple.outer; - rc = walk_pgno(ctx, sdb, db->root, + rc = walk_pgno(ctx, tbl, db->root, db->mod_txnid ? db->mod_txnid : ctx->txn->txnid); ctx->cursor = couple.outer.next; return rc; @@ -302,13 +302,13 @@ __cold int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, walk_ctx_t ctx = { .txn = txn, .userctx = user, .visitor = visitor, .options = options}; - walk_sdb_t sdb = {.name = {.iov_base = MDBX_CHK_GC}, + walk_tbl_t tbl = {.name = {.iov_base = MDBX_CHK_GC}, .internal = &txn->dbs[FREE_DBI]}; - rc = walk_sdb(&ctx, &sdb); + rc = walk_tbl(&ctx, &tbl); if (!MDBX_IS_ERROR(rc)) { - sdb.name.iov_base = MDBX_CHK_MAIN; - sdb.internal = &txn->dbs[MAIN_DBI]; - rc = walk_sdb(&ctx, &sdb); + tbl.name.iov_base = MDBX_CHK_MAIN; + tbl.internal = &txn->dbs[MAIN_DBI]; + rc = walk_tbl(&ctx, &tbl); } return rc; } diff --git a/src/walk.h b/src/walk.h index 8ef406c9..ef79c70b 100644 --- a/src/walk.h +++ b/src/walk.h @@ -5,13 +5,13 @@ #include "essentials.h" -typedef struct walk_sdb { +typedef struct walk_tbl { MDBX_val name; tree_t *internal, *nested; -} walk_sdb_t; +} walk_tbl_t; typedef int walk_func(const size_t pgno, const unsigned number, void *const ctx, - const int deep, const walk_sdb_t *table, + const int deep, const walk_tbl_t *table, const size_t page_size, const page_type_t page_type, const MDBX_error_t err, const size_t nentries, const size_t payload_bytes, const size_t header_bytes, From 7b09ecd9b7f4eb9af617540f381c1641701bb209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 3 Aug 2024 14:50:22 +0300 Subject: [PATCH 247/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B8=D1=81?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8F=20`mdbx=5Fenv=5Fcopy()`=20=D0=B8=20?= =?UTF-8?q?=D1=80=D0=BE=D0=B4=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/mdbx.h b/mdbx.h index 90c50771..07d9ecf3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2628,6 +2628,24 @@ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, * - \ref MDBX_CP_FORCE_DYNAMIC_SIZE * Force to make resizable copy, i.e. dynamic size instead of fixed. * + * - \ref MDBX_CP_DONT_FLUSH + * Don't explicitly flush the written data to an output media to reduce + * the time of the operation and the duration of the transaction. + * + * - \ref MDBX_CP_THROTTLE_MVCC + * Use read transaction parking during copying MVCC-snapshot + * to avoid stopping recycling and overflowing the database. + * This allows the writing transaction to oust the read + * transaction used to copy the database if copying takes so long + * that it will interfere with the recycling old MVCC snapshots + * and may lead to an overflow of the database. + * However, if the reading transaction is ousted the copy will + * be aborted until successful completion. Thus, this option + * allows copy the database without interfering with write + * transactions and a threat of database overflow, but at the cost + * that copying will be aborted to prevent such conditions. + * \see mdbx_txn_park() + * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, MDBX_copy_flags_t flags); @@ -2666,6 +2684,24 @@ LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, * - \ref MDBX_CP_FORCE_DYNAMIC_SIZE * Force to make resizable copy, i.e. dynamic size instead of fixed. * + * - \ref MDBX_CP_DONT_FLUSH + * Don't explicitly flush the written data to an output media to reduce + * the time of the operation and the duration of the transaction. + * + * - \ref MDBX_CP_THROTTLE_MVCC + * Use read transaction parking during copying MVCC-snapshot + * to avoid stopping recycling and overflowing the database. + * This allows the writing transaction to oust the read + * transaction used to copy the database if copying takes so long + * that it will interfere with the recycling old MVCC snapshots + * and may lead to an overflow of the database. + * However, if the reading transaction is ousted the copy will + * be aborted until successful completion. Thus, this option + * allows copy the database without interfering with write + * transactions and a threat of database overflow, but at the cost + * that copying will be aborted to prevent such conditions. + * \see mdbx_txn_park() + * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, MDBX_copy_flags_t flags); From 7bff3b3df699ca761c79c7350808fc1fb7f1216d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 3 Aug 2024 15:08:30 +0300 Subject: [PATCH 248/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=B8=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=20=D0=B8=20=D1=81=D1=81=D1=8B=D0=BB=D0=BE=D0=BA=20?= =?UTF-8?q?=D0=B2=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index ddf1f60b..aa686f08 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,7 +1,7 @@ ChangeLog ========= -English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) +English version [by liar Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). ## v0.13.1 (в процессе подготовки выпуска) @@ -44,7 +44,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic припаркованных транзакций является как дополнением, так и более простой в использовании альтернативой обратному вызову [Handle-Slow-Readers](https://libmdbx.dqdkfa.ru/group__c__err.html#ga2cb11b56414c282fe06dd942ae6cade6). - Для удобства функции `mdbx_txn_park()` и `mdbx_txn_unpack()` имеют + Для удобства функции `mdbx_txn_park()` и `mdbx_txn_unpark()` имеют дополнительные аргументы, позволяющие запросить автоматическую "распарковку" припаркованных и перезапуск вытесненных транзакций. @@ -109,19 +109,19 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Расширение и доработка C++ API: - - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов - `cursor::estimate()` унифицировано с `cursor::move()`; + - добавлен тип `mdbx::cursor::estimate_result`, а поведение методов + `mdbx::cursor::estimate()` унифицировано с `mdbx::cursor::move()`; - для предотвращения незаметного неверного использования API, для инициализации - возвращаемых по ссылке срезов, вместо пустых срезов задействован `slice::invalid()`; + возвращаемых по ссылке срезов, вместо пустых срезов задействован `mdbx::slice::invalid()`; - добавлены дополнительные C++ операторы преобразования к типам C API; - для совместимости со старыми стандартами C++ и старыми версиями STL перенесены - в public классы `buffer::move_assign_alloc` и `buffer::copy_assign_alloc`; + в public классы `mdbx::buffer::move_assign_alloc` и `mdbx::buffer::copy_assign_alloc`; - добавлен тип `mdbx::default_buffer`; - - для срезов и буферов добавлены методы `hex_decode()`, `base64_decode()`, `base58_decode()`; + - для срезов и буферов добавлены методы `mdbx::buffer::hex_decode()`, `mdbx::buffer::base64_decode()`, `mdbx::buffer::base58_decode()`; - добавлен тип `mdbx::comparator` и функций `mdbx::default_comparator()`; - - добавлены статические методы `buffer::hex()`, `base64()`, `base58()`; + - добавлены статические методы `mdbx::buffer::hex()`, `mdbx::buffer::base64()`, `mdbx::buffer::base58()`; - для транзакций и курсоров добавлены методы `get_/set_context`; - - добавлен метод `cursor::clone()`; + - добавлен метод `mdbx::cursor::clone()`; - поддержка base58 переработана и приведена в соответствии с черновиком RFC, в текущем понимании теперь это одна из самых высокопроизводительных реализаций base58; - переработка `to_hex()` и `from_hex()`; @@ -335,7 +335,7 @@ Signed-off-by: Леонид Юрьев (Leonid Yuriev) - Из разрабатываемой версии перенесены не-нарушающие совместимости доработки C++ API: - - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов + - добавлен тип `mdbx::cursor::estimate_result`, а поведение методов `cursor::estimate()` унифицировано с `cursor::move()`; - для предотвращения незаметного неверного использования API, для инициализации возвращаемых по ссылке срезов, вместо пустых срезов задействован `slice::invalid()`; From edfa526138dd34beadbcc4647c072094e4f6533a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 7 Aug 2024 17:38:53 +0300 Subject: [PATCH 249/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=D0=BE=D0=B7=D0=BC=D0=BE?= =?UTF-8?q?=D0=B6=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20`SIGSEGV`=20=D0=B2=D0=BD?= =?UTF-8?q?=D1=83=D1=82=D1=80=D0=B8=20`coherency=5Fcheck()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Падение происходило в случае когда: - Некоторый процесс увеличивал размер БД с изменением геометрии (с увеличением предельного размера БД и её отображения в ОЗУ), затем задействовал страницу из добавленного сегмента в качестве корневой для FreeDB/GC и/или MainDB и фиксировал транзакцию. - Другой процесс, уже работавший с БД до изменения геометрии первым процессом, запускал транзакцию чтения. Падение происходило при проверке «когерентности» отображения страниц БД в ОЗУ, при проверке отметок модификации внутри корневых страниц, так как в этом случае они были вне границ текущего отображения БД в адресном пространстве этого процесса. Похоже что в ходе какого-то рефакторинга потерялась соответствующая проверка. Этот коммит добавляет её как временное решение, до переноса проверки «когерентности» после изменения размера отображения (добавлено в TODO). --- TODO.md | 1 + src/coherency.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/TODO.md b/TODO.md index 13deca20..da83def7 100644 --- a/TODO.md +++ b/TODO.md @@ -11,6 +11,7 @@ For the same reason ~~Github~~ is blacklisted forever. So currently most of the links are broken due to noted malicious ~~Github~~ sabotage. + - Внутри `txn_renew()` вынести проверку когерентности mmap за/после изменение размера. - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). - [Support for RAW devices](https://libmdbx.dqdkfa.ru/dead-github/issues/124). - [Support MessagePack for Keys & Values](https://libmdbx.dqdkfa.ru/dead-github/issues/115). diff --git a/src/coherency.c b/src/coherency.c index 7ae4da87..9e55a894 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -76,7 +76,13 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, : "(wagering meta)"); ok = false; } - if (likely(freedb_root && freedb_mod_txnid)) { + + /* Проверяем отметки внутри корневых страниц только если сами страницы + * в пределах текущего отображения. Иначе возможны SIGSEGV до переноса + * вызова coherency_check_head() после dxb_resize() внутри txn_renew(). */ + if (likely(freedb_root && freedb_mod_txnid && + (size_t)ptr_dist(env->dxb_mmap.base, freedb_root) < + env->dxb_mmap.limit)) { VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->txnid)); MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, sizeof(freedb_root->txnid)); const txnid_t root_txnid = freedb_root->txnid; @@ -91,7 +97,9 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, ok = false; } } - if (likely(maindb_root && maindb_mod_txnid)) { + if (likely(maindb_root && maindb_mod_txnid && + (size_t)ptr_dist(env->dxb_mmap.base, maindb_root) < + env->dxb_mmap.limit)) { VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->txnid)); MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, sizeof(maindb_root->txnid)); const txnid_t root_txnid = maindb_root->txnid; From 8cc5e8c262ee90c87fc6d72e5e16f156745921ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 8 Aug 2024 00:17:09 +0300 Subject: [PATCH 250/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index aa686f08..b46c4f78 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -141,6 +141,18 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Возвращение `MDBX_TXN_INVALID` (`INT32_MIN`) вместо `-1` из `mdbx_txn_flags()` при передаче невалидной транзакции. +Исправления: + + - Исправление упущенного `TXN_END_EOTDONE` при сбое старта читающей транзакции. + Упомянутый флажок отсутствовал в пути разрушения транзакции при ошибке + её запуска. Из-за чего делалась попытка разрушить курсоры, что приводило + к падению **отладочных сборок**, так как в них соответствующий массив + намеренно заполнен некорректными указателями. + + - Устранение возможности `SIGSEGV` внутри `coherency_check()` после + изменения геометрии другим процессом с увеличением верхнего размера БД + и увеличением БД больше предыдущего лимита. + Мелочи: - Обновление конфигурации Doxygen до 1.9.6. From aca692212f3b13454d379de998b06e512710351c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 7 Aug 2024 19:11:34 +0300 Subject: [PATCH 251/443] =?UTF-8?q?mdbx-testing:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D0=BF?= =?UTF-8?q?=D1=83=D1=89=D0=B5=D0=BD=D0=BD=D0=BE=D0=B9=20=D0=BE=D0=BF=D1=86?= =?UTF-8?q?=D0=B8=D0=B8=20`--extra`=20=D0=B2=20battery-tmux.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/battery-tmux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/battery-tmux.sh b/test/battery-tmux.sh index 3fae3fc8..11319860 100755 --- a/test/battery-tmux.sh +++ b/test/battery-tmux.sh @@ -24,7 +24,7 @@ for ps in min 4k max; do fi done for n in 0 1 2 3 4 5 6 7; do - CMD="${TEST} --delay $((3 + n * 7)) --page-size ${ps} --from ${from} --dir ${PREFIX}page-${ps}.from-${from}.${n}-extra" + CMD="${TEST} --delay $((3 + n * 7)) --extra --page-size ${ps} --from ${from} --dir ${PREFIX}page-${ps}.from-${from}.${n}-extra" if [ $n -eq 0 ]; then tmux new-window -t mdbx:$((++W)) -n "page-${ps}.from-${from}-extra" -k -d "$CMD" tmux select-layout -E tiled From b054a69e72c01b3ac98b060b7426e2d9c7c8632f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 11 Aug 2024 00:00:22 +0300 Subject: [PATCH 252/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D1=80=D0=B5?= =?UTF-8?q?=D1=84=D0=B0=D0=BA=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D1=86?= =?UTF-8?q?=D0=B8=D0=BA=D0=BB=D0=B0=20=D1=87=D1=82=D0=B5=D0=BD=D0=B8=D1=8F?= =?UTF-8?q?=20=D0=BC=D0=B5=D1=82=D0=B0-=D1=81=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D1=86=20=D0=BF=D1=80=D0=B8=20=D1=81=D1=82=D0=B0=D1=80?= =?UTF-8?q?=D1=82=D0=B5=20=D1=87=D0=B8=D1=82=D0=B0=D1=8E=D1=89=D0=B8=D1=85?= =?UTF-8?q?=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Сценарий достаточно запутанный/сложный. --- src/api-env.c | 2 +- src/coherency.c | 10 ++++--- src/proto.h | 2 +- src/txn.c | 75 ++++++++++++++++++++++++++++--------------------- 4 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/api-env.c b/src/api-env.c index 4aa7bd2e..48324f68 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -1274,7 +1274,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, uint64_t timestamp = 0; while ("workaround for " "https://libmdbx.dqdkfa.ru/dead-github/issues/269") { - rc = coherency_check_head(env->basal_txn, head, ×tamp); + rc = coherency_fetch_head(env->basal_txn, head, ×tamp); if (likely(rc == MDBX_SUCCESS)) break; if (unlikely(rc != MDBX_RESULT_TRUE)) diff --git a/src/coherency.c b/src/coherency.c index 9e55a894..7701271b 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -152,18 +152,20 @@ __cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, /* check with timeout as the workaround * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ -__hot int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, +__hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp) { /* Copy the DB info and flags */ - txn->geo = head.ptr_v->geometry; + txn->txnid = head.txnid; + txn->geo = head.ptr_c->geometry; memcpy(txn->dbs, &head.ptr_c->trees, sizeof(head.ptr_c->trees)); STATIC_ASSERT(sizeof(head.ptr_c->trees) == CORE_DBS * sizeof(tree_t)); VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS, txn->env->max_dbi - CORE_DBS); - txn->canary = head.ptr_v->canary; + txn->canary = head.ptr_c->canary; if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v, - *timestamp == 0))) + *timestamp == 0) || + txn->txnid != meta_txnid(head.ptr_v))) return coherency_timeout(timestamp, -1, txn->env); if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) { diff --git a/src/proto.h b/src/proto.h index 531196c0..80f9394b 100644 --- a/src/proto.h +++ b/src/proto.h @@ -113,7 +113,7 @@ MDBX_INTERNAL int __must_check_result tbl_setup(const MDBX_env *env, MDBX_INTERNAL bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report); -MDBX_INTERNAL int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, +MDBX_INTERNAL int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp); MDBX_INTERNAL int coherency_check_written(const MDBX_env *env, const txnid_t txnid, diff --git a/src/txn.c b/src/txn.c index 845c0436..bd8a1a59 100644 --- a/src/txn.c +++ b/src/txn.c @@ -982,7 +982,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { likely(env->stuck_meta < 0) ? /* regular */ meta_recent(env, &troika) : /* recovery mode */ meta_ptr(env, env->stuck_meta); - if (likely(r)) { + if (likely(r != nullptr)) { safe64_reset(&r->txnid, true); atomic_store32(&r->snapshot_pages_used, head.ptr_v->geometry.first_unallocated, mo_Relaxed); @@ -1005,46 +1005,57 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } jitter4testing(true); - /* Snap the state from current meta-head */ - txn->txnid = head.txnid; - if (likely(env->stuck_meta < 0) && - unlikely(meta_should_retry(env, &troika) || - head.txnid < atomic_load64(&env->lck->cached_oldest, - mo_AcquireRelease))) { - if (unlikely(++loop > 42)) { - ERROR("bailout waiting for valid snapshot (%s)", - "metapages are too volatile"); - rc = MDBX_PROBLEM; - txn->txnid = INVALID_TXNID; - if (likely(r)) - safe64_reset(&r->txnid, true); - goto bailout; + if (unlikely(meta_should_retry(env, &troika))) { + retry: + if (likely(++loop < 42)) { + timestamp = 0; + continue; } - timestamp = 0; - continue; + ERROR("bailout waiting for valid snapshot (%s)", + "meta-pages are too volatile"); + rc = MDBX_PROBLEM; + goto read_failed; } - rc = coherency_check_head(txn, head, ×tamp); + /* Snap the state from current meta-head */ + rc = coherency_fetch_head(txn, head, ×tamp); jitter4testing(false); - if (likely(rc == MDBX_SUCCESS)) - break; - - if (unlikely(rc != MDBX_RESULT_TRUE)) { - txn->txnid = INVALID_TXNID; - if (likely(r)) - safe64_reset(&r->txnid, true); - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_RESULT_TRUE) + goto retry; + else + goto read_failed; } + + const uint64_t snap_oldest = + atomic_load64(&env->lck->cached_oldest, mo_AcquireRelease); + if (unlikely(txn->txnid < snap_oldest)) { + if (env->stuck_meta < 0) + goto retry; + ERROR("target meta-page %i is referenced to an obsolete MVCC-snapshot " + "%" PRIaTXN " < cached-oldest %" PRIaTXN, + env->stuck_meta, txn->txnid, snap_oldest); + rc = MDBX_MVCC_RETARDED; + goto read_failed; + } + + if (likely(r != nullptr) && + unlikely(txn->txnid != atomic_load64(&r->txnid, mo_Relaxed))) + goto retry; + break; } if (unlikely(txn->txnid < MIN_TXNID || txn->txnid > MAX_TXNID)) { ERROR("%s", "environment corrupted by died writer, must shutdown!"); - if (likely(r)) - safe64_reset(&r->txnid, true); - txn->txnid = INVALID_TXNID; rc = MDBX_CORRUPTED; + read_failed: + txn->txnid = INVALID_TXNID; + if (likely(r != nullptr)) + safe64_reset(&r->txnid, true); goto bailout; } + + tASSERT(txn, rc == MDBX_SUCCESS); ENSURE(env, txn->txnid >= /* paranoia is appropriate here */ env->lck->cached_oldest.weak); @@ -1092,14 +1103,14 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { const meta_ptr_t head = meta_recent(env, &txn->tw.troika); uint64_t timestamp = 0; while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") { - rc = coherency_check_head(txn, head, ×tamp); + rc = coherency_fetch_head(txn, head, ×tamp); if (likely(rc == MDBX_SUCCESS)) break; if (unlikely(rc != MDBX_RESULT_TRUE)) goto bailout; } - eASSERT(env, meta_txnid(head.ptr_v) == head.txnid); - txn->txnid = safe64_txnid_next(head.txnid); + eASSERT(env, meta_txnid(head.ptr_v) == txn->txnid); + txn->txnid = safe64_txnid_next(txn->txnid); if (unlikely(txn->txnid > MAX_TXNID)) { rc = MDBX_TXN_FULL; ERROR("txnid overflow, raise %d", rc); From f477fa13e065623e3398f0aa0a8f66f7694ae680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 11 Aug 2024 00:23:08 +0300 Subject: [PATCH 253/443] =?UTF-8?q?mdbx-testing:=20=D0=B2=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20`ulimit=20-c=20unliminte?= =?UTF-8?q?d`=20=D0=B2=20=D1=81=D1=82=D0=BE=D1=85=D0=B0=D1=81=D1=82=D0=B8?= =?UTF-8?q?=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=BC=20=D1=81=D0=BA=D1=80=D0=B8?= =?UTF-8?q?=D0=BF=D1=82=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 0e38a301..ece40bd5 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -264,6 +264,7 @@ case ${UNAME} in ;; esac +ulimit -c unlimited || echo "failed set unlimited core-dump size" >&2 rm -f ${TESTDB_DIR}/* ############################################################################### From 0c9f531c7223898e80c4fac6210334a6c820e29a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 11 Aug 2024 00:23:34 +0300 Subject: [PATCH 254/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86?= =?UTF-8?q?=D0=B8=D0=B8=20`--db-upto-gb`=20=D0=B2=20=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D1=85=D0=B0=D1=81=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B8?= =?UTF-8?q?=D0=B9=20=D1=81=D0=BA=D1=80=D0=B8=D0=BF=D1=82.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index ece40bd5..4d951e4a 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -36,6 +36,7 @@ do echo "--loops NN Stop after the NN loops" echo "--dir PATH Specifies directory for test DB and other files (it will be cleared)" echo "--db-upto-mb NN Limits upper size of test DB to the NN megabytes" + echo "--db-upto-gb NN --''--''--''--''--''--''--''--''-- NN gigabytes" echo "--no-geometry-jitter Disable jitter for geometry upper-size" echo "--pagesize NN Use specified page size (256 is minimal and used by default)" echo "--dont-check-ram-size Don't check available RAM" @@ -118,6 +119,14 @@ do fi shift ;; + --db-upto-gb) + DB_UPTO_MB=$(($2 * 1024)) + if [ -z "$DB_UPTO_MB" -o "$DB_UPTO_MB" -lt 1 -o "$DB_UPTO_MB" -gt 4194304 ]; then + echo "Invalid value '$2' for --db-upto-gb option" + exit -2 + fi + shift + ;; --no-geometry-jitter) GEOMETRY_JITTER=no ;; From 7511f480a4f9ebc3d9f6a7c16f6e2ce66afe921c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 11 Aug 2024 09:42:32 +0300 Subject: [PATCH 255/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BC=D0=BE=D1=89?= =?UTF-8?q?=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20"tmux-=D0=B1=D0=B0=D1=82=D0=B0?= =?UTF-8?q?=D1=80=D0=B5=D0=B9=D0=BD=D0=BE=D0=B3=D0=BE"=20=D1=82=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=B0=20=D0=B2=D0=B4=D0=B2=D0=BE=D0=B5=20(=D0=BF?= =?UTF-8?q?=D0=BE=D0=B4=20"=D0=B1=D0=BE=D0=BB=D0=B5=D0=B5=20=D1=82=D0=B8?= =?UTF-8?q?=D0=BF=D0=BE=D0=B2=D1=83=D1=8E"=20=D0=B2=D0=B8=D1=80=D1=82?= =?UTF-8?q?=D1=83=D0=B0=D0=BB=D0=BA=D1=83).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/battery-tmux.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/battery-tmux.sh b/test/battery-tmux.sh index 11319860..04b595d3 100755 --- a/test/battery-tmux.sh +++ b/test/battery-tmux.sh @@ -14,7 +14,7 @@ tmux -f ./test/tmux.conf new-session -d -s mdbx htop W=0 for ps in min 4k max; do for from in 1 30000; do - for n in 0 1 2 3 4 5 6 7; do + for n in 0 1 2 3; do CMD="${TEST} --delay $((n * 7)) --page-size ${ps} --from ${from} --dir ${PREFIX}page-${ps}.from-${from}.${n}" if [ $n -eq 0 ]; then tmux new-window -t mdbx:$((++W)) -n "page-${ps}.from-${from}" -k -d "$CMD" @@ -23,7 +23,7 @@ for ps in min 4k max; do tmux split-window -t mdbx:$W -l 20% -d $CMD fi done - for n in 0 1 2 3 4 5 6 7; do + for n in 0 1 2 3; do CMD="${TEST} --delay $((3 + n * 7)) --extra --page-size ${ps} --from ${from} --dir ${PREFIX}page-${ps}.from-${from}.${n}-extra" if [ $n -eq 0 ]; then tmux new-window -t mdbx:$((++W)) -n "page-${ps}.from-${from}-extra" -k -d "$CMD" From 77a35608f6139d4ea2d872371631e796dd65334d Mon Sep 17 00:00:00 2001 From: Alexey Sharov Date: Sun, 11 Aug 2024 06:49:01 +0000 Subject: [PATCH 256/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=B2=D0=B5?= =?UTF-8?q?=D0=BB=D0=B8=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B0=D0=B7?= =?UTF-8?q?=D0=BC=D0=B5=D1=80=D0=B0=20=D0=91=D0=94=20=D0=B4=D0=BE=2032?= =?UTF-8?q?=D0=93=D0=B1=20(=D0=B2=D0=B4=D0=B2=D0=BE=D0=B5)=20=D0=B2=20"tmu?= =?UTF-8?q?x-=D0=B1=D0=B0=D1=82=D0=B0=D1=80=D0=B5=D0=B9=D0=BD=D0=BE=D0=BC"?= =?UTF-8?q?=20=D1=82=D0=B5=D1=81=D1=82=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/battery-tmux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/battery-tmux.sh b/test/battery-tmux.sh index 04b595d3..fba52064 100755 --- a/test/battery-tmux.sh +++ b/test/battery-tmux.sh @@ -3,7 +3,7 @@ # Леонид Юрьев aka Leonid Yuriev # SPDX-License-Identifier: Apache-2.0 -TEST="./test/long_stochastic.sh --skip-make" +TEST="./test/long_stochastic.sh --skip-make --db-upto-gb 32" PREFIX="/dev/shm/mdbxtest-" tmux kill-session -t mdbx From 2b5d7ed29a3ac90124ae40d02d763b67b47b22f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 19 Aug 2024 08:43:33 +0300 Subject: [PATCH 257/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=B2=D0=BE=D0=B4?= =?UTF-8?q?=20=D0=BD=D0=BE=D0=BC=D0=B5=D1=80=D0=B0=20=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8=20=D0=BF=D1=80=D0=B8?= =?UTF-8?q?=20=D1=81=D0=B8=D1=82=D1=83=D0=B0=D1=86=D0=B8=D0=B8=20=D0=B7?= =?UTF-8?q?=D0=B0=D1=86=D0=B8=D0=BA=D0=BB=D0=B8=D0=B2=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`gc=5Fupdate()`?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-put.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gc-put.c b/src/gc-put.c index e5a036d9..a9388143 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -611,7 +611,8 @@ retry: MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) { - ERROR("too more loops %u, bailout", ctx->loop); + ERROR("txn #%" PRIaTXN " too more loops %u, bailout", txn->txnid, + ctx->loop); rc = MDBX_PROBLEM; goto bailout; } From b4dcf148c527aa26ebe036ee71a714738e0cf525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 22 Aug 2024 00:00:29 +0300 Subject: [PATCH 258/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=BF=D1=80=D0=B8=20?= =?UTF-8?q?=D0=BF=D0=BE=D0=BF=D1=8B=D1=82=D0=BA=D0=B5=20=D1=81=D0=BE=D0=B7?= =?UTF-8?q?=D0=B4=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8?= =?UTF-8?q?=D1=86=D1=8B=20=D1=81=20=D0=B4=D1=80=D1=83=D0=B3=D0=B8=D0=BC?= =?UTF-8?q?=D0=B8=20=D1=84=D0=BB=D0=B0=D0=B3=D0=B0=D0=BC=D0=B8/=D0=BE?= =?UTF-8?q?=D0=BF=D1=86=D0=B8=D1=8F=D0=BC=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Сообщение о проблеме https://t.me/libmdbx/6101 --- src/dbi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/dbi.c b/src/dbi.c index 5e43234d..bc0e9dcb 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -279,12 +279,14 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, } else if ((user_flags & MDBX_CREATE) == 0) return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; else { - eASSERT(env, env->dbs_flags[dbi] & DB_VALID); if (txn->dbi_state[dbi] & DBI_STALE) { + eASSERT(env, env->dbs_flags[dbi] & DB_VALID); int err = tbl_fetch(txn, dbi); if (unlikely(err == MDBX_SUCCESS)) return err; } + eASSERT(env, ((env->dbs_flags[dbi] ^ txn->dbs[dbi].flags) & + DB_PERSISTENT_FLAGS) == 0); eASSERT(env, (txn->dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == (DBI_LINDO | DBI_VALID)); @@ -295,7 +297,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, if (unlikely(txn->cursors[dbi])) return MDBX_DANGLING_DBI; env->dbs_flags[dbi] = DB_POISON; - atomic_store32(&env->dbi_seqs[dbi], dbi_seq_next(env, MAIN_DBI), + atomic_store32(&env->dbi_seqs[dbi], dbi_seq_next(env, dbi), mo_AcquireRelease); const uint32_t seq = dbi_seq_next(env, dbi); From a17b190dc8429b595b2ff53fae68f51a6db38acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 22 Aug 2024 00:03:08 +0300 Subject: [PATCH 259/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=BD=D1=8B=D0=B9=20clang-format=20(=D0=BA?= =?UTF-8?q?=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index 07d9ecf3..9972e6b5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -559,7 +559,7 @@ typedef mode_t mdbx_mode_t; } \ MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(unsigned a, \ ENUM b) { \ - return ENUM(a &unsigned(b)); \ + return ENUM(a & unsigned(b)); \ } \ MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, \ ENUM b) { \ From 94936fd4c98ff367a495533ca1e1bdb7a93312cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 22 Aug 2024 00:04:56 +0300 Subject: [PATCH 260/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=B0=20=D0=BF=D0=B5=D1=80=D0=B5=D1=81=D0=BE=D0=B7=D0=B4?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86?= =?UTF-8?q?=D1=8B=20=D1=81=20=D0=B4=D1=80=D1=83=D0=B3=D0=B8=D0=BC=D0=B8=20?= =?UTF-8?q?=D1=84=D0=BB=D0=B0=D0=B3=D0=B0=D0=BC=D0=B8/=D0=BE=D0=BF=D1=86?= =?UTF-8?q?=D0=B8=D1=8F=D0=BC=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 7 +++++++ test/extra/dbi.c++ | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/extra/dbi.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c334601d..f945255d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -124,6 +124,13 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_crunched_delete PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() + add_executable(test_extra_dbi extra/dbi.c++) + target_include_directories(test_extra_dbi PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dbi ${TOOL_MDBX_LIB}) + if(MDBX_CXX_STANDARD) + set_target_properties(test_extra_dbi PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() endif() endif() diff --git a/test/extra/dbi.c++ b/test/extra/dbi.c++ new file mode 100644 index 00000000..86dd0c4d --- /dev/null +++ b/test/extra/dbi.c++ @@ -0,0 +1,41 @@ +#include "mdbx.h++" + +#include + +static char log_buffer[1024]; + +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, + int line, const char *msg, unsigned length) noexcept { + (void)length; + (void)loglevel; + fprintf(stdout, "%s:%u %s", function, line, msg); +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, + log_buffer, sizeof(log_buffer)); + + mdbx::path path = "test-dbi"; + mdbx::env::remove(path); + + mdbx::env::operate_parameters operateParameters(100, 10); + mdbx::env_managed::create_parameters createParameters; + { + mdbx::env_managed env2(path, createParameters, operateParameters); + mdbx::txn_managed txn2 = env2.start_write(false); + /* mdbx::map_handle testHandle2 = */ txn2.create_map( + "fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); + txn2.commit(); + } + mdbx::env_managed env(path, createParameters, operateParameters); + mdbx::txn_managed txn = env.start_write(false); + /* mdbx::map_handle testHandle = */ txn.create_map( + "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + txn.commit(); + + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From 2c17c7b149f338791b179f0b28e04303c3d1973f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 22 Aug 2024 00:08:51 +0300 Subject: [PATCH 261/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index b46c4f78..9e2feee1 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -153,6 +153,8 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic изменения геометрии другим процессом с увеличением верхнего размера БД и увеличением БД больше предыдущего лимита. + - Исправление assert-проверки при попытке создания таблицы с другими флагами/опциями. + Мелочи: - Обновление конфигурации Doxygen до 1.9.6. From 5fc7a6b1077794789b97bb2a56f5a4eb541a0bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 30 Aug 2024 00:01:07 +0300 Subject: [PATCH 262/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=BF=D1=83=D1=81?= =?UTF-8?q?=D0=BA=200.13.1=20"=D0=A0=D0=94=D0=A1-1"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Новая версия со сменой лицензии, существенным расширением API, добавлением функционала и внутренними переработками. В том числе, с незначительным нарушением обратной совместимости API библиотеки. Список нововведений, доработок и изменений слишком велик для размещения здесь, но вся информация есть в файле [ChangeLog](https://libmdbx.dqdkfa.ru/md__change_log.html). ``` git diff' stat: 157 files changed, 41949 insertions(+), 33741 deletions(-) Signed-off-by: Леонид Юрьев (Leonid Yuriev) ``` --- ChangeLog.md | 2 +- src/man1/mdbx_chk.1 | 2 +- src/man1/mdbx_copy.1 | 2 +- src/man1/mdbx_drop.1 | 2 +- src/man1/mdbx_dump.1 | 2 +- src/man1/mdbx_load.1 | 2 +- src/man1/mdbx_stat.1 | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 9e2feee1..bb2b9085 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,7 +4,7 @@ ChangeLog English version [by liar Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). -## v0.13.1 (в процессе подготовки выпуска) +## v0.13.1 "РДС-1" от 2024-08-29 Новая версия со сменой лицензии, существенным расширением API, добавлением функционала и внутренними переработками. В том числе, diff --git a/src/man1/mdbx_chk.1 b/src/man1/mdbx_chk.1 index 4ff7fc6a..b48a7984 100644 --- a/src/man1/mdbx_chk.1 +++ b/src/man1/mdbx_chk.1 @@ -1,6 +1,6 @@ .\" Copyright 2015-2024 Leonid Yuriev . .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_CHK 1 "2024-03-21" "MDBX 0.13" +.TH MDBX_CHK 1 "2024-08-29" "MDBX 0.13" .SH NAME mdbx_chk \- MDBX checking tool .SH SYNOPSIS diff --git a/src/man1/mdbx_copy.1 b/src/man1/mdbx_copy.1 index 636bd754..16c0fced 100644 --- a/src/man1/mdbx_copy.1 +++ b/src/man1/mdbx_copy.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_COPY 1 "2024-03-21" "MDBX 0.13" +.TH MDBX_COPY 1 "2024-08-29" "MDBX 0.13" .SH NAME mdbx_copy \- MDBX environment copy tool .SH SYNOPSIS diff --git a/src/man1/mdbx_drop.1 b/src/man1/mdbx_drop.1 index 99f8d370..3bf5683b 100644 --- a/src/man1/mdbx_drop.1 +++ b/src/man1/mdbx_drop.1 @@ -1,7 +1,7 @@ .\" Copyright 2021-2024 Leonid Yuriev . .\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DROP 1 "2024-03-21" "MDBX 0.13" +.TH MDBX_DROP 1 "2024-08-29" "MDBX 0.13" .SH NAME mdbx_drop \- MDBX database delete tool .SH SYNOPSIS diff --git a/src/man1/mdbx_dump.1 b/src/man1/mdbx_dump.1 index ecd9618b..96342054 100644 --- a/src/man1/mdbx_dump.1 +++ b/src/man1/mdbx_dump.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DUMP 1 "2024-03-21" "MDBX 0.13" +.TH MDBX_DUMP 1 "2024-08-29" "MDBX 0.13" .SH NAME mdbx_dump \- MDBX environment export tool .SH SYNOPSIS diff --git a/src/man1/mdbx_load.1 b/src/man1/mdbx_load.1 index 6c2e16c7..5e790876 100644 --- a/src/man1/mdbx_load.1 +++ b/src/man1/mdbx_load.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_LOAD 1 "2024-03-21" "MDBX 0.13" +.TH MDBX_LOAD 1 "2024-08-29" "MDBX 0.13" .SH NAME mdbx_load \- MDBX environment import tool .SH SYNOPSIS diff --git a/src/man1/mdbx_stat.1 b/src/man1/mdbx_stat.1 index 2b87f201..935a541f 100644 --- a/src/man1/mdbx_stat.1 +++ b/src/man1/mdbx_stat.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_STAT 1 "2024-03-21" "MDBX 0.13" +.TH MDBX_STAT 1 "2024-08-29" "MDBX 0.13" .SH NAME mdbx_stat \- MDBX environment status tool .SH SYNOPSIS From 450c1081fa1ddf9b9982cf2bb9eb72cc6bdcae0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 8 Sep 2024 11:36:10 +0300 Subject: [PATCH 263/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D0=BF=D1=83=D1=89?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20inline-=D1=80=D0=B5=D0=B0?= =?UTF-8?q?=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D0=B9=20`mdbx::cursor::upp?= =?UTF-8?q?er=5Fbound()`=20=D0=B8=20`mdbx::cursor::upper=5Fbound=5Fmultiva?= =?UTF-8?q?lue()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 9c4101f9..52d63a1e 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -7099,6 +7099,11 @@ inline cursor::move_result cursor::lower_bound(const slice &key, return move(key_lowerbound, key, throw_notfound); } +inline cursor::move_result cursor::upper_bound(const slice &key, + bool throw_notfound) { + return move(key_greater_than, key, throw_notfound); +} + inline cursor::move_result cursor::find_multivalue(const slice &key, const slice &value, bool throw_notfound) { @@ -7111,6 +7116,12 @@ inline cursor::move_result cursor::lower_bound_multivalue(const slice &key, return move(multi_exactkey_lowerboundvalue, key, value, throw_notfound); } +inline cursor::move_result cursor::upper_bound_multivalue(const slice &key, + const slice &value, + bool throw_notfound) { + return move(multi_exactkey_value_greater, key, value, throw_notfound); +} + inline bool cursor::seek(const slice &key) { return move(seek_key, const_cast(&key), nullptr, false); } From fcc4748f23b356e09826b9518e1fa25c77868440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 8 Sep 2024 20:48:44 +0300 Subject: [PATCH 264/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8/=D0=BE=D1=80=D1=84=D0=BE=D0=B3=D1=80?= =?UTF-8?q?=D0=B0=D1=84=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index da83def7..c14414e5 100644 --- a/TODO.md +++ b/TODO.md @@ -28,7 +28,7 @@ Done - [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192). - [Get rid of dirty-pages list in MDBX_WRITEMAP mode](https://libmdbx.dqdkfa.ru/dead-github/issues/193). -Canceled +Cancelled -------- - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOSTICKYTHREADS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). From c69f23ed70d53252e84f9b0eca8be49a6e039b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 9 Sep 2024 00:09:17 +0300 Subject: [PATCH 265/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE-=D1=83=D0=BC=D0=BE?= =?UTF-8?q?=D0=BB=D1=87=D0=B0=D0=BD=D0=B8=D1=8E=20`MDBX=5FENABLE=5FBIGFOOT?= =?UTF-8?q?=3D1`=20=D0=B2=D0=BD=D0=B5=20=D0=B7=D0=B0=D0=B2=D0=B8=D1=81?= =?UTF-8?q?=D0=B8=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D0=BE=D1=82=20=D1=80?= =?UTF-8?q?=D0=B0=D0=B7=D1=80=D1=8F=D0=B4=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20?= =?UTF-8?q?=D0=BF=D0=BB=D0=B0=D1=82=D1=84=D0=BE=D1=80=D0=BC=D1=8B.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/options.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/options.h b/src/options.h index f3be513e..8ae89183 100644 --- a/src/options.h +++ b/src/options.h @@ -123,11 +123,7 @@ /** Enables chunking long list of retired pages during huge transactions commit * to avoid use sequences of pages. */ #ifndef MDBX_ENABLE_BIGFOOT -#if MDBX_WORDBITS >= 64 || defined(DOXYGEN) #define MDBX_ENABLE_BIGFOOT 1 -#else -#define MDBX_ENABLE_BIGFOOT 0 -#endif #elif !(MDBX_ENABLE_BIGFOOT == 0 || MDBX_ENABLE_BIGFOOT == 1) #error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1 #endif /* MDBX_ENABLE_BIGFOOT */ From b36e3702e57bc0702035b1c363c59345147b5351 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 9 Sep 2024 09:22:50 +0300 Subject: [PATCH 266/443] =?UTF-8?q?mdbx-doc:=20=D0=BF=D1=80=D0=BE=D0=B4?= =?UTF-8?q?=D0=BE=D0=BB=D0=B6=D0=B5=D0=BD=D0=B8=D0=B5=20s/subDb/=D1=82?= =?UTF-8?q?=D0=B0=D0=B1=D0=BB=D0=B8=D1=86=D0=B0/=20=D0=B2=20=D0=A1++=20API?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 52d63a1e..fe92607a 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3486,8 +3486,8 @@ MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(value_mode mode) noexcept { return mode == value_mode::msgpack; } -/// \brief A handle for an individual database (key-value spaces) in the -/// environment. +/// \brief A handle for an individual table (aka key-value space, maps or +/// sub-database) in the environment. /// \see txn::open_map() \see txn::create_map() /// \see txn::clear_map() \see txn::drop_map() /// \see txn::get_handle_info() \see txn::get_map_stat() @@ -3537,8 +3537,9 @@ enum put_mode { /// instances, but does not destroys the represented underlying object from the /// own class destructor. /// -/// An environment supports multiple key-value tables (aka key-value -/// maps, spaces or sub-databases), all residing in the same shared-memory map. +/// An environment supports multiple key-value tables (aka key-value maps, +/// spaces or sub-databases), all residing in the same shared-memory mapped +/// file. class LIBMDBX_API_TYPE env { friend class txn; @@ -3699,7 +3700,7 @@ public: /// \brief Operate parameters. struct LIBMDBX_API_TYPE operate_parameters { - /// \brief The maximum number of named databases for the environment. + /// \brief The maximum number of named tables/maps for the environment. /// Zero means default value. unsigned max_maps{0}; /// \brief The maximum number of threads/reader slots for the environment. @@ -3774,24 +3775,24 @@ public: /// \brief Returns the maximal database size in bytes for specified page /// size. static inline size_t dbsize_max(intptr_t pagesize); - /// \brief Returns the minimal key size in bytes for specified database + /// \brief Returns the minimal key size in bytes for specified table /// flags. static inline size_t key_min(MDBX_db_flags_t flags) noexcept; /// \brief Returns the minimal key size in bytes for specified keys mode. static inline size_t key_min(key_mode mode) noexcept; /// \brief Returns the maximal key size in bytes for specified page size and - /// database flags. + /// table flags. static inline size_t key_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns the maximal key size in bytes for specified page size and /// keys mode. static inline size_t key_max(intptr_t pagesize, key_mode mode); /// \brief Returns the maximal key size in bytes for given environment and - /// database flags. + /// table flags. static inline size_t key_max(const env &, MDBX_db_flags_t flags); /// \brief Returns the maximal key size in bytes for given environment and /// keys mode. static inline size_t key_max(const env &, key_mode mode); - /// \brief Returns the minimal values size in bytes for specified database + /// \brief Returns the minimal values size in bytes for specified table /// flags. static inline size_t value_min(MDBX_db_flags_t flags) noexcept; /// \brief Returns the minimal values size in bytes for specified values @@ -3799,41 +3800,41 @@ public: static inline size_t value_min(value_mode) noexcept; /// \brief Returns the maximal value size in bytes for specified page size - /// and database flags. + /// and table flags. static inline size_t value_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns the maximal value size in bytes for specified page size /// and values mode. static inline size_t value_max(intptr_t pagesize, value_mode); /// \brief Returns the maximal value size in bytes for given environment and - /// database flags. + /// table flags. static inline size_t value_max(const env &, MDBX_db_flags_t flags); /// \brief Returns the maximal value size in bytes for specified page size /// and values mode. static inline size_t value_max(const env &, value_mode); /// \brief Returns maximal size of key-value pair to fit in a single page - /// for specified size and database flags. + /// for specified size and table flags. static inline size_t pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns maximal size of key-value pair to fit in a single page /// for specified page size and values mode. static inline size_t pairsize4page_max(intptr_t pagesize, value_mode); /// \brief Returns maximal size of key-value pair to fit in a single page - /// for given environment and database flags. + /// for given environment and table flags. static inline size_t pairsize4page_max(const env &, MDBX_db_flags_t flags); /// \brief Returns maximal size of key-value pair to fit in a single page /// for specified page size and values mode. static inline size_t pairsize4page_max(const env &, value_mode); /// \brief Returns maximal data size in bytes to fit in a leaf-page or - /// single large/overflow-page for specified size and database flags. + /// single large/overflow-page for specified size and table flags. static inline size_t valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns maximal data size in bytes to fit in a leaf-page or /// single large/overflow-page for specified page size and values mode. static inline size_t valsize4page_max(intptr_t pagesize, value_mode); /// \brief Returns maximal data size in bytes to fit in a leaf-page or - /// single large/overflow-page for given environment and database flags. + /// single large/overflow-page for given environment and table flags. static inline size_t valsize4page_max(const env &, MDBX_db_flags_t flags); /// \brief Returns maximal data size in bytes to fit in a leaf-page or /// single large/overflow-page for specified page size and values mode. @@ -3960,7 +3961,7 @@ public: /// \see extra_runtime_option::max_readers inline unsigned max_readers() const; - /// \brief Returns the maximum number of named databases for the environment. + /// \brief Returns the maximum number of named tables for the environment. /// \see extra_runtime_option::max_maps inline unsigned max_maps() const; @@ -4104,7 +4105,7 @@ public: /// \brief Close a key-value map (aka table) handle. Normally /// unnecessary. /// - /// Closing a database handle is not necessary, but lets \ref txn::open_map() + /// Closing a table handle is not necessary, but lets \ref txn::open_map() /// reuse the handle value. Usually it's better to set a bigger /// \ref env::operate_parameters::max_maps, unless that value would be /// large. @@ -4115,8 +4116,8 @@ public: /// of libmdbx (\ref MithrilDB) will solve this issue. /// /// Handles should only be closed if no other threads are going to reference - /// the database handle or one of its cursors any further. Do not close a - /// handle if an existing transaction has modified its database. Doing so can + /// the table handle or one of its cursors any further. Do not close a + /// handle if an existing transaction has modified its table. Doing so can /// cause misbehavior from database corruption to errors like /// \ref MDBX_BAD_DBI (since the DB name is gone). inline void close_map(const map_handle &); @@ -4205,8 +4206,8 @@ public: /// object from the own class destructor, but disallows copying and assignment /// for instances. /// -/// An environment supports multiple key-value databases (aka key-value spaces -/// or tables), all residing in the same shared-memory map. +/// An environment supports multiple key-value tables (aka key-value spaces +/// or maps), all residing in the same shared-memory mapped file. class LIBMDBX_API_TYPE env_managed : public env { using inherited = env; /// delegated constructor for RAII @@ -4262,7 +4263,7 @@ public: /// \brief Explicitly closes the environment and release the memory map. /// - /// Only a single thread may call this function. All transactions, databases, + /// Only a single thread may call this function. All transactions, tables, /// and cursors must already be closed before calling this function. Attempts /// to use any such handles after calling this function will cause a /// `SIGSEGV`. The environment handle will be freed and must not be used again @@ -4522,7 +4523,7 @@ public: /// \brief Returns statistics for a table. inline map_stat get_map_stat(map_handle map) const; /// \brief Returns depth (bitmask) information of nested dupsort (multi-value) - /// B+trees for given database. + /// B+trees for given table. inline uint32_t get_tree_deepmask(map_handle map) const; /// \brief Returns information about key-value map (aka table) handle. inline map_handle::info get_handle_info(map_handle map) const; @@ -4571,11 +4572,11 @@ public: /// multimap (aka table). inline slice get(map_handle map, slice key, size_t &values_count, const slice &value_at_absence) const; - /// \brief Get value for equal or great key from a database. + /// \brief Get value for equal or great key from a table. /// \return Bundle of key-value pair and boolean flag, /// which will be `true` if the exact key was found and `false` otherwise. inline pair_result get_equal_or_great(map_handle map, const slice &key) const; - /// \brief Get value for equal or great key from a database. + /// \brief Get value for equal or great key from a table. /// \return Bundle of key-value pair and boolean flag, /// which will be `true` if the exact key was found and `false` otherwise. inline pair_result get_equal_or_great(map_handle map, const slice &key, From 81807f16b2d182064a9a4e4054dc39fb7876b842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 10 Sep 2024 08:41:23 +0300 Subject: [PATCH 267/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20README.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d82599b6..ecc95b0a 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,8 @@ Thus syncing data to disk might be a bottleneck for write intensive workload. but read transactions prevents recycling an old retired/freed pages, since it read ones. Thus altering of data during a parallel long-lived read operation will increase the process work set, may exhaust entire free database space, the database can grow quickly, and result in performance degradation. -Try to avoid long running read transactions. +Try to avoid long running read transactions, otherwise use [transaction parking](https://libmdbx.dqdkfa.ru/group__c__transactions.html#ga2c2c97730ff35cadcedfbd891ac9b12f) +and/or [Handle-Slow-Readers callback](https://libmdbx.dqdkfa.ru/group__c__err.html#ga2cb11b56414c282fe06dd942ae6cade6). 5. _libmdbx_ is extraordinarily fast and provides minimal overhead for data access, so you should reconsider using brute force techniques and double check your code. @@ -278,45 +279,43 @@ the user's point of view. 5. The same database format for 32- and 64-bit builds. > _libmdbx_ database format depends only on the [endianness](https://en.wikipedia.org/wiki/Endianness) but not on the [bitness](https://en.wiktionary.org/wiki/bitness). -6. LIFO policy for Garbage Collection recycling. This can significantly increase write performance due write-back disk cache up to several times in a best case scenario. +6. The "Big Foot" feature than solves speific performance issues with huge transactions and extra-large page-number-lists. + +7. LIFO policy for Garbage Collection recycling. This can significantly increase write performance due write-back disk cache up to several times in a best case scenario. > LIFO means that for reuse will be taken the latest becomes unused pages. > Therefore the loop of database pages circulation becomes as short as possible. > In other words, the set of pages, that are (over)written in memory and on disk during a series of write transactions, will be as small as possible. > Thus creates ideal conditions for the battery-backed or flash-backed disk cache efficiency. -7. Fast estimation of range query result volume, i.e. how many items can +8. Parking of read transactions with ousting and auto-restart, [Handle-Slow-Readers callback](https://libmdbx.dqdkfa.ru/group__c__err.html#ga2cb11b56414c282fe06dd942ae6cade6) to resolve an issues due to long-lived read transactions. + +9. Fast estimation of range query result volume, i.e. how many items can be found between a `KEY1` and a `KEY2`. This is a prerequisite for build and/or optimize query execution plans. > _libmdbx_ performs a rough estimate based on common B-tree pages of the paths from root to corresponding keys. -8. Database integrity check API both with standalone `mdbx_chk` utility. +10. Database integrity check API both with standalone `mdbx_chk` utility. -9. Support for opening databases in the exclusive mode, including on a network share. - -10. Zero-length for keys and values. - -11. Ability to determine whether the particular data is on a dirty page -or not, that allows to avoid copy-out before updates. +11. Support for opening databases in the exclusive mode, including on a network share. 12. Extended information of whole-database, tables/sub-databases, transactions, readers enumeration. > _libmdbx_ provides a lot of information, including dirty and leftover pages > for a write transaction, reading lag and holdover space for read transactions. -13. Extended update and delete operations. - > _libmdbx_ allows one _at once_ with getting previous value - > and addressing the particular item from multi-value with the same key. +13. Support of Zero-length for keys and values. 14. Useful runtime options for tuning engine to application's requirements and use cases specific. 15. Automated steady sync-to-disk upon several thresholds and/or timeout via cheap polling. -16. Sequence generation and three persistent 64-bit markers. +16. Ability to determine whether the particular data is on a dirty page +or not, that allows to avoid copy-out before updates. -17. Handle-Slow-Readers callback to resolve a database full/overflow issues due to long-lived read transaction(s). - -18. Ability to determine whether the cursor is pointed to a key-value -pair, to the first, to the last, or not set to anything. +17. Extended update and delete operations. + > _libmdbx_ allows one _at once_ with getting previous value + > and addressing the particular item from multi-value with the same key. +18. Sequence generation and three persistent 64-bit vector-clock like markers. ## Other fixes and specifics From 2e14404837a30b790bc152fbd07820c7790d374a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 10 Sep 2024 08:49:16 +0300 Subject: [PATCH 268/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index bb2b9085..a9a67ea0 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,6 +4,24 @@ ChangeLog English version [by liar Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). + +## v0.13.2 в процессе + +Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов. + +Исправления: + + - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. + - Корректировка описания С++ API для использования термина "таблица" вместо "sub-database". + +Мелочи: + + - Теперь `MDBX_ENABLE_BIGFOOT` включена по-умолчанию вне зависимости от разрядности платформы. + - Дополнение README и исправление опечаток/орфографии. + +-------------------------------------------------------------------------------- + + ## v0.13.1 "РДС-1" от 2024-08-29 Новая версия со сменой лицензии, существенным расширением API, From 74f7d134559cca79a11437f7cab390490f7d0c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Sep 2024 11:07:38 +0300 Subject: [PATCH 269/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=82=D0=BB=D0=B0=D0=B4?= =?UTF-8?q?=D0=BE=D1=87=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BB=D0=BE=D0=B3=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=B2=D0=BD=D1=83?= =?UTF-8?q?=D1=82=D1=80=D0=B8=20`dxb=5Fresize()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dxb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dxb.c b/src/dxb.c index 82a503b5..b57d0e0a 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -157,10 +157,10 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const void *const prev_map = env->dxb_mmap.base; #endif /* MDBX_ENABLE_MADVISE || ENABLE_MEMCHECK */ - VERBOSE("resize/%d datafile/mapping: " + VERBOSE("resize(env-flags 0x%x, mode %d) datafile/mapping: " "present %" PRIuPTR " -> %" PRIuPTR ", " "limit %" PRIuPTR " -> %" PRIuPTR, - mode, prev_size, size_bytes, prev_limit, limit_bytes); + env->flags, mode, prev_size, size_bytes, prev_limit, limit_bytes); eASSERT(env, limit_bytes >= size_bytes); eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno); From 14a55ee244224e884941ec33f0459661e3a768a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Sep 2024 20:27:10 +0300 Subject: [PATCH 270/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=BE=D0=B4=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B0=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=BE=D0=B8=D0=B4=D0=B0=D0=BB=D1=8C=D0=BD=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D0=BF=D1=80=D0=B5=D0=B4=D1=83=D0=BF=D1=80=D0=B5=D0=B6?= =?UTF-8?q?=D0=B4=D0=B5=D0=BD=D0=B8=D1=8F=20MSVC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index fe92607a..91ac0b70 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1736,7 +1736,7 @@ private: return capacity_bytes < sizeof(bin); } - enum : byte { lastbyte_inplace_signature = byte(~0u) }; + enum : byte { lastbyte_inplace_signature = byte(~byte(0)) }; enum : size_t { inplace_signature_limit = size_t(lastbyte_inplace_signature) From 202cdbc4be5634536a2acab549b8dd5c7bf5c654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Sep 2024 20:29:16 +0300 Subject: [PATCH 271/443] =?UTF-8?q?mdbx-testing:=20=D0=BF=D0=BE=D0=B4?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B0=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=BE=D0=B8=D0=B4=D0=B0=D0=BB=D1=8C=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D0=BF=D1=80=D0=B5=D0=B4=D1=83=D0=BF=D1=80=D0=B5=D0=B6?= =?UTF-8?q?=D0=B4=D0=B5=D0=BD=D0=B8=D0=B9=20MSVC=20=D0=B2=20extra-=D1=82?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B0=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/crunched_delete.c++ | 24 +++++++++++++----------- test/extra/hex_base64_base58.c++ | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index edb91d5b..5b4a420e 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -64,7 +64,7 @@ static mdbx::slice mk(mdbx::default_buffer &buf, unsigned min, unsigned max) { unsigned len = (min < max) ? min + prng_fast(seed) % (max - min) : min; buf.clear_and_reserve(len); for (unsigned i = 0; i < len; ++i) - buf.append_byte(prng_fast(seed)); + buf.append_byte(mdbx::byte(prng_fast(seed))); return buf.slice(); } @@ -112,28 +112,30 @@ static void chunched_delete(mdbx::txn txn, const acase &thecase, { auto cursor = txn.open_cursor(map); while (true) { - const unsigned all = cursor.txn().get_map_stat(cursor.map()).ms_entries; + const auto all = cursor.txn().get_map_stat(cursor.map()).ms_entries; // printf("== seek random of %u\n", all); const char *last_op; bool last_r; - if ((last_op = "MDBX_GET_BOTH", - last_r = cursor.find_multivalue(mk_key(k, thecase), - mk_val(v, thecase), false)) || + if (true == ((last_op = "MDBX_GET_BOTH"), + (last_r = cursor.find_multivalue( + mk_key(k, thecase), mk_val(v, thecase), false))) || rnd() % 3 == 0 || - (last_op = "MDBX_SET_RANGE", - last_r = cursor.lower_bound(mk_key(k, thecase), false))) { + true == ((last_op = "MDBX_SET_RANGE"), + (last_r = cursor.lower_bound(mk_key(k, thecase), false)))) { int i = int(rnd() % 7) - 3; // if (i) // printf(" %s -> %s\n", last_op, last_r ? "true" : "false"); // printf("== shift multi %i\n", i); try { - while (i < 0 && (last_op = "MDBX_PREV_DUP", - last_r = cursor.to_current_prev_multi(false))) + while (i < 0 && + true == ((last_op = "MDBX_PREV_DUP"), + (last_r = cursor.to_current_prev_multi(false)))) ++i; - while (i > 0 && (last_op = "MDBX_NEXT_DUP", - last_r = cursor.to_current_next_multi(false))) + while (i > 0 && + true == ((last_op = "MDBX_NEXT_DUP"), + (last_r = cursor.to_current_next_multi(false)))) --i; } catch (const mdbx::no_data &) { printf("cursor_del() -> exception, last %s %s\n", last_op, diff --git a/test/extra/hex_base64_base58.c++ b/test/extra/hex_base64_base58.c++ index 652c9e33..096c5821 100644 --- a/test/extra/hex_base64_base58.c++ +++ b/test/extra/hex_base64_base58.c++ @@ -17,7 +17,7 @@ static buffer random(size_t length) { buffer result(length); #if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L for (auto &i : result.bytes()) - i = prng(); + i = mdbx::byte(prng()); #else for (auto p = result.byte_ptr(); p < result.end_byte_ptr(); ++p) *p = mdbx::byte(prng()); From 29d0a9681897e53ab1eeefc70f1b76f61f60ed2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 16 Sep 2024 22:51:19 +0300 Subject: [PATCH 272/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D1=81=D0=BB=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=8F=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`as?= =?UTF-8?q?sert()`=20=D0=B2=20=D0=BF=D1=83=D1=82=D0=B8=20=D0=BE=D0=B1?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B8=20`MDBX=5FGET/NEXT/P?= =?UTF-8?q?REV=5FMULTIPLE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cursor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cursor.c b/src/cursor.c index 54ded76c..196e1dd9 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -2191,7 +2191,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return rc; else { fetch_multiple: - cASSERT(mc, is_filled(mc) && !inner_filled(mc)); + cASSERT(mc, is_filled(mc) && inner_filled(mc)); MDBX_cursor *mx = &mc->subcur->cursor; data->iov_len = page_numkeys(mx->pg[mx->top]) * mx->tree->dupfix_size; data->iov_base = page_data(mx->pg[mx->top]); From f7385527216079946b6c6ccf869238cf2e1dffe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 07:46:19 +0300 Subject: [PATCH 273/443] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80?= =?UTF-8?q?=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BB=D1=8E=D1=87?= =?UTF-8?q?=D0=B0=20=D0=BF=D1=80=D0=B8=20`MDBX=5FGET=5FMULTIPLE`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B5=D0=B4=D0=B8=D0=BD=D0=BE=D0=BE=D0=B1=D1=80?= =?UTF-8?q?=D0=B0=D0=B7=D0=B8=D1=8F=20C++=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cursor.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/cursor.c b/src/cursor.c index 196e1dd9..2d2e0fa4 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -2176,9 +2176,16 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, rc = cursor_seek(mc, key, data, MDBX_SET).err; if (unlikely(rc != MDBX_SUCCESS)) return rc; + } else { + if (unlikely(is_eof(mc) || !inner_filled(mc))) + return MDBX_ENODATA; + cASSERT(mc, is_filled(mc)); + if (key) { + const page_t *mp = mc->pg[mc->top]; + const node_t *node = page_node(mp, mc->ki[mc->top]); + *key = get_key(node); + } } - if (unlikely(is_eof(mc) || !inner_filled(mc))) - return MDBX_ENODATA; goto fetch_multiple; case MDBX_NEXT_MULTIPLE: From 07309427fd3b42f93965e44fe9e58040baaaba82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 07:41:11 +0300 Subject: [PATCH 274/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?`mdbx::txn::put=5Fmultiple=5Fsamelength()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 34 +++++++++++++------------ test/extra/dupfix_multiple.c++ | 45 ++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 91ac0b70..8ccbc9d4 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4669,25 +4669,26 @@ public: return append(map, kv.key, kv.value, multivalue_order_preserved); } - size_t put_multiple(map_handle map, const slice &key, - const size_t value_length, const void *values_array, - size_t values_count, put_mode mode, - bool allow_partial = false); + size_t put_multiple_samelength(map_handle map, const slice &key, + const size_t value_length, + const void *values_array, size_t values_count, + put_mode mode, bool allow_partial = false); template - size_t put_multiple(map_handle map, const slice &key, - const VALUE *values_array, size_t values_count, - put_mode mode, bool allow_partial = false) { + size_t put_multiple_samelength(map_handle map, const slice &key, + const VALUE *values_array, size_t values_count, + put_mode mode, bool allow_partial = false) { static_assert(::std::is_standard_layout::value && !::std::is_pointer::value && !::std::is_array::value, "Must be a standard layout type!"); - return put_multiple(map, key, sizeof(VALUE), values_array, values_count, - mode, allow_partial); + return put_multiple_samelength(map, key, sizeof(VALUE), values_array, + values_count, mode, allow_partial); } template - void put_multiple(map_handle map, const slice &key, - const ::std::vector &vector, put_mode mode) { - put_multiple(map, key, vector.data(), vector.size(), mode); + void put_multiple_samelength(map_handle map, const slice &key, + const ::std::vector &vector, + put_mode mode) { + put_multiple_samelength(map, key, vector.data(), vector.size(), mode); } inline ptrdiff_t estimate(map_handle map, const pair &from, @@ -6913,10 +6914,11 @@ inline void txn::append(map_handle map, const slice &key, const slice &value, : MDBX_APPEND)); } -inline size_t txn::put_multiple(map_handle map, const slice &key, - const size_t value_length, - const void *values_array, size_t values_count, - put_mode mode, bool allow_partial) { +inline size_t txn::put_multiple_samelength(map_handle map, const slice &key, + const size_t value_length, + const void *values_array, + size_t values_count, put_mode mode, + bool allow_partial) { MDBX_val args[2] = {{const_cast(values_array), value_length}, {nullptr, values_count}}; const int err = ::mdbx_put(handle_, map.dbi, const_cast(&key), args, diff --git a/test/extra/dupfix_multiple.c++ b/test/extra/dupfix_multiple.c++ index 5a70be88..77b9e0d9 100644 --- a/test/extra/dupfix_multiple.c++ +++ b/test/extra/dupfix_multiple.c++ @@ -50,13 +50,20 @@ int main(int argc, const char *argv[]) { const uint64_t array[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 42, 17, 99, 0, 33, 333}; txn = env.start_write(); - txn.put_multiple(map, buffer::key_from_u64(13), array + 3, 4, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(10), array + 0, 1, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(12), array + 2, 3, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(15), array + 5, 6, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(14), array + 4, 5, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(11), array + 1, 2, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(16), array + 6, 7, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(13), array + 3, 4, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(10), array + 0, 1, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(12), array + 2, 3, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(15), array + 5, 6, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(14), array + 4, 5, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(11), array + 1, 2, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(16), array + 6, 7, + mdbx::upsert); txn.commit(); txn = env.start_read(); @@ -119,21 +126,29 @@ int main(int argc, const char *argv[]) { txn.abort(); txn = env.start_write(); - txn.put_multiple(map, buffer::key_from_u64(7), array + 3, 4, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(7), array + 3, 4, + mdbx::update); txn.upsert(map, buffer::key_from_u64(10), buffer::key_from_u64(14)); - txn.put_multiple(map, buffer::key_from_u64(11), array + 4, 5, mdbx::upsert); - txn.put_multiple(map, buffer::key_from_u64(12), array + 0, 1, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(11), array + 4, 5, + mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(12), array + 0, 1, + mdbx::update); txn.update(map, buffer::key_from_u64(13), buffer::key_from_u64(18)); - txn.put_multiple(map, buffer::key_from_u64(14), array + 2, 3, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(14), array + 2, 3, + mdbx::update); txn.update(map, buffer::key_from_u64(15), buffer::key_from_u64(13)); - txn.put_multiple(map, buffer::key_from_u64(16), array + 6, 9, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(16), array + 6, 9, + mdbx::update); txn.update(map, buffer::key_from_u64(21), buffer::key_from_u64(17)); txn.update(map, buffer::key_from_u64(22), buffer::key_from_u64(15)); - txn.put_multiple(map, buffer::key_from_u64(23), array + 1, 2, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(23), array + 1, 2, + mdbx::update); txn.update(map, buffer::key_from_u64(24), buffer::key_from_u64(16)); - txn.put_multiple(map, buffer::key_from_u64(25), array + 5, 6, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(25), array + 5, 6, + mdbx::update); txn.upsert(map, buffer::key_from_u64(26), buffer::key_from_u64(12)); - txn.put_multiple(map, buffer::key_from_u64(27), array + 12, 3, mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(27), array + 12, 3, + mdbx::update); txn.commit(); txn = env.start_read(); From ec41ec1561e1afa0e3696925d14050ee30de5c19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 07:43:19 +0300 Subject: [PATCH 275/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx::cursor::get=5Fmul?= =?UTF-8?q?tiple=5Fsamelength()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 8ccbc9d4..98b878ff 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4868,6 +4868,10 @@ public: pair_exact = pair_equal, pair_greater_or_equal = MDBX_TO_PAIR_GREATER_OR_EQUAL, pair_greater_than = MDBX_TO_PAIR_GREATER_THAN, + + batch_samelength = MDBX_GET_MULTIPLE, + batch_samelength_next = MDBX_NEXT_MULTIPLE, + batch_samelength_previous = MDBX_PREV_MULTIPLE }; struct move_result : public pair_result { @@ -5133,6 +5137,23 @@ public: const slice &value, bool throw_notfound = false); + inline move_result get_multiple_samelength(const slice &key, + bool throw_notfound = true) { + return move(batch_samelength, key, throw_notfound); + } + + inline move_result get_multiple_samelength(bool throw_notfound = false) { + return move(batch_samelength, throw_notfound); + } + + inline move_result next_multiple_samelength(bool throw_notfound = false) { + return move(batch_samelength_next, throw_notfound); + } + + inline move_result previous_multiple_samelength(bool throw_notfound = false) { + return move(batch_samelength_previous, throw_notfound); + } + inline bool eof() const; inline bool on_first() const; inline bool on_last() const; From c9645239781451059fa758affa5f4e37c6328b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 07:43:33 +0300 Subject: [PATCH 276/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`get=5Fmultiple=5F?= =?UTF-8?q?samelength()`=20=D0=B2=20`extra/dupfix=5Fmultiple`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/dupfix_multiple.c++ | 61 ++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/test/extra/dupfix_multiple.c++ b/test/extra/dupfix_multiple.c++ index 77b9e0d9..2686feea 100644 --- a/test/extra/dupfix_multiple.c++ +++ b/test/extra/dupfix_multiple.c++ @@ -223,6 +223,67 @@ int main(int argc, const char *argv[]) { std::cerr << "Fail\n"; return EXIT_FAILURE; } + txn.abort(); + + //---------------------------------------------------------------------------- + + // let dir = tempdir().unwrap(); + // let db = Database::open(&dir).unwrap(); + + // let txn = db.begin_rw_txn().unwrap(); + // let table = txn + // .create_table(None, TableFlags::DUP_SORT | TableFlags::DUP_FIXED) + // .unwrap(); + // for (k, v) in [ + // (b"key1", b"val1"), + // (b"key1", b"val2"), + // (b"key1", b"val3"), + // (b"key2", b"val1"), + // (b"key2", b"val2"), + // (b"key2", b"val3"), + // ] { + // txn.put(&table, k, v, WriteFlags::empty()).unwrap(); + // } + + // let mut cursor = txn.cursor(&table).unwrap(); + // assert_eq!(cursor.first().unwrap(), Some((*b"key1", *b"val1"))); + // assert_eq!(cursor.get_multiple().unwrap(), Some(*b"val1val2val3")); + // assert_eq!(cursor.next_multiple::<(), ()>().unwrap(), None); + + txn = env.start_write(); + txn.clear_map(map); + map = txn.create_map(nullptr, mdbx::key_mode::usual, + mdbx::value_mode::multi_samelength); + txn.upsert(map, mdbx::slice("key1"), mdbx::slice("val1")); + txn.upsert(map, mdbx::pair("key1", "val2")); + txn.upsert(map, mdbx::pair("key1", "val3")); + txn.upsert(map, mdbx::slice("key2"), mdbx::slice("val1")); + txn.upsert(map, mdbx::pair("key2", "val2")); + txn.upsert(map, mdbx::pair("key2", "val3")); + + // cursor.close(); + cursor = txn.open_cursor(map); + const auto t1 = cursor.to_first(); + if (!t1 || t1.key != "key1" || t1.value != "val1") { + std::cerr << "Fail-t1\n"; + return EXIT_FAILURE; + } + const auto t2 = cursor.get_multiple_samelength(); + if (!t2 || t2.key != "key1" || t2.value != "val1val2val3") { + std::cerr << "Fail-t2\n"; + return EXIT_FAILURE; + } + // const auto t3 = cursor.get_multiple_samelength("key2"); + // if (!t3 || t3.key != "key2" || t3.value != "val1val2val3") { + // std::cerr << "Fail-t3\n"; + // return EXIT_FAILURE; + // } + const auto t4 = cursor.next_multiple_samelength(); + if (t4) { + std::cerr << "Fail-t4\n"; + return EXIT_FAILURE; + } + std::cout << "OK\n"; return EXIT_SUCCESS; } From c96714423d00bf14c8efd2317d44bf9027300cb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 07:59:24 +0300 Subject: [PATCH 277/443] =?UTF-8?q?mdbx-cmake:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`WIN32?= =?UTF-8?q?`=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE=20`${CMAKE=5FSYSTEM=5F?= =?UTF-8?q?NAME}`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 8 ++++---- test/CMakeLists.txt | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a378bfc..cbb1a48e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -495,7 +495,7 @@ if(NOT DEFINED MDBX_C_STANDARD) endif() endif() -if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") +if(WIN32 AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") if(MSVC) if(NOT MSVC_LIB_EXE) # Find lib.exe @@ -592,7 +592,7 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" OR IOS) add_mdbx_option(MDBX_OSX_SPEED_INSTEADOF_DURABILITY "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) mark_as_advanced(MDBX_OSX_SPEED_INSTEADOF_DURABILITY) endif() -if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") +if(WIN32) if(MDBX_NTDLL_EXTRA_IMPLIB) add_mdbx_option(MDBX_WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) endif() @@ -825,7 +825,7 @@ macro(libmdbx_setup_libs TARGET MODE) else() target_link_libraries(${TARGET} ${MODE} Threads::Threads) endif() - if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + if(WIN32) target_link_libraries(${TARGET} ${MODE} ntdll user32 kernel32 advapi32 ole32) if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT) target_link_libraries(${TARGET} ${MODE} ntdll_extra) @@ -913,7 +913,7 @@ endif() # build mdbx-tools if(MDBX_BUILD_TOOLS) set(WINGETOPT_SRC "") - if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + if(WIN32) set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/tools/wingetopt.c ${MDBX_SOURCE_DIR}/tools/wingetopt.h) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f945255d..c5537f87 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -53,7 +53,7 @@ target_setup_options(mdbx_test) if(NOT MDBX_BUILD_CXX) target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_CXX=1) - if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + if(WIN32) target_compile_definitions(mdbx_test PRIVATE MDBX_WITHOUT_MSVC_CRT=0) endif() endif() @@ -71,7 +71,7 @@ if(CMAKE_VERSION VERSION_LESS 3.1) else() target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} Threads::Threads) endif() -if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") +if(WIN32) target_link_libraries(mdbx_test winmm.lib) endif() From 42ca4edec8c348b090c1cd156735016f2c15f6b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 21:23:36 +0300 Subject: [PATCH 278/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index a9a67ea0..2d316b90 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -13,11 +13,19 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. - Корректировка описания С++ API для использования термина "таблица" вместо "sub-database". + - Исправление условия внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. + +Новое: + - Добавление `mdbx::cursor::get_multiple_samelength()` и переименование `mdbx::txn::put_multiple_samelength()`. + - Возвращение ключа при `MDBX_GET_MULTIPLE` для единообразия C++ API. Мелочи: - Теперь `MDBX_ENABLE_BIGFOOT` включена по-умолчанию вне зависимости от разрядности платформы. - Дополнение README и исправление опечаток/орфографии. + - Использование `WIN32` вместо `${CMAKE_SYSTEM_NAME}`. + - Подавление параноидальных предупреждений MSVC в extra-тестах. + - Дополнение отладочного логирования внутри `dxb_resize()`. -------------------------------------------------------------------------------- From 9fa76a56fcb548f539094d2168e5cb3406d4e7e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 28 Sep 2024 08:22:14 +0300 Subject: [PATCH 279/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`#ifdef`=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20iPhone.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/osal.c b/src/osal.c index d99cd630..ecb4219c 100644 --- a/src/osal.c +++ b/src/osal.c @@ -3509,7 +3509,9 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #include #endif /* FreeBSD */ -#if __GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || \ +#ifdef __IPHONE_OS_VERSION_MIN_REQUIRED +#include +#elif __GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || \ defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ defined(__APPLE__) || __has_include() #include From 0178d5b5c8547ba4e6c6d57c6beb47b5ab9e24a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 28 Sep 2024 22:11:09 +0300 Subject: [PATCH 280/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BB?= =?UTF-8?q?-=D0=B2=D0=B0=20=D0=B8=D1=82=D0=B5=D1=80=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B9=20`extra/crunched=5Fdelete`=20=D0=B4=D0=BB=D1=8F=20Windo?= =?UTF-8?q?ws.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit До этих изменений тесты на CI могли длиться несколько часов и завершаться по таймауту, что неприемлемо. --- test/extra/crunched_delete.c++ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index 5b4a420e..2158f33a 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -5,7 +5,7 @@ #include #include -#if MDBX_DEBUG || !defined(NDEBUG) || defined(__APPLE__) +#if MDBX_DEBUG || !defined(NDEBUG) || defined(__APPLE__) || defined(_WIN32) #define NN 1024 #else #define NN 16384 From 54dfc1f16de825896b0325ecfe50fe4c028d26b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 08:01:14 +0300 Subject: [PATCH 281/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=BD=D0=B8?= =?UTF-8?q?=D1=84=D0=B8=D0=BA=D0=B0=D1=86=D0=B8=D1=8F=20extra-=D1=82=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=BE=D0=B2=20=D0=B8=20=D0=B8=D0=BD=D1=82=D0=B5?= =?UTF-8?q?=D0=B3=D1=80=D0=B0=D1=86=D0=B8=D1=8F=20=D0=B2=20ctest.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 34 ++++++++++++++++------------ test/extra/crunched_delete.c++ | 6 ++--- test/extra/dbi.c++ | 8 +++---- test/extra/doubtless_positioning.c++ | 7 +++--- test/extra/dupfix_multiple.c++ | 8 +++---- test/extra/hex_base64_base58.c++ | 1 - test/extra/maindb_ordinal.c++ | 10 ++++---- 7 files changed, 36 insertions(+), 38 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c5537f87..77004853 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -75,19 +75,20 @@ if(WIN32) target_link_libraries(mdbx_test winmm.lib) endif() -if(UNIX AND NOT SUBPROJECT) - add_executable(test_extra_pcrf extra/pcrf/pcrf_test.c) - target_include_directories(test_extra_pcrf PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_pcrf ${TOOL_MDBX_LIB}) +if(NOT SUBPROJECT) + if(UNIX) + add_executable(test_extra_pcrf extra/pcrf/pcrf_test.c) + target_include_directories(test_extra_pcrf PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_pcrf ${TOOL_MDBX_LIB}) - add_executable(test_extra_upsert_alldups extra/upsert_alldups.c) - target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) - - add_executable(test_extra_dupfix_addodd extra/dupfix_addodd.c) - target_include_directories(test_extra_dupfix_addodd PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_dupfix_addodd ${TOOL_MDBX_LIB}) + add_executable(test_extra_upsert_alldups extra/upsert_alldups.c) + target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) + add_executable(test_extra_dupfix_addodd extra/dupfix_addodd.c) + target_include_directories(test_extra_dupfix_addodd PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dupfix_addodd ${TOOL_MDBX_LIB}) + endif() if(MDBX_BUILD_CXX) add_executable(test_extra_maindb_ordinal extra/maindb_ordinal.c++) target_include_directories(test_extra_maindb_ordinal PRIVATE "${PROJECT_SOURCE_DIR}") @@ -210,10 +211,12 @@ else() REQUIRED_FILES uniq_nested.db-copy) endif() - if(UNIX AND NOT SUBPROJECT) - add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) - add_test(NAME extra_dupfix_addodd COMMAND test_extra_dupfix_addodd) - if(MDBX_BUILD_CXX) + if(NOT SUBPROJECT) + if(UNIX) + add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) + add_test(NAME extra_dupfix_addodd COMMAND test_extra_dupfix_addodd) + endif() + if(MDBX_BUILD_CXX AND NOT (WIN32 AND MDBX_BUILD_SHARED_LIBRARY)) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfix_multiple COMMAND test_extra_dupfix_multiple) add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) @@ -222,6 +225,7 @@ else() set_tests_properties(extra_doubtless_positioning PROPERTIES TIMEOUT 10800) endif() add_test(NAME extra_crunched_delete COMMAND test_extra_crunched_delete) + add_test(NAME extra_dbi COMMAND test_extra_dbi) endif() endif() diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index 2158f33a..89952ddf 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -377,8 +377,8 @@ int main(int argc, const char *argv[]) { mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); - const char *filename = "test-crunched-del"; - mdbx::env::remove(filename); + mdbx::path db_filename = "test-crunched-del"; + mdbx::env::remove(db_filename); std::vector testset; // Там ключи разной длины - от 1 до 64 байт. @@ -394,7 +394,7 @@ int main(int argc, const char *argv[]) { testset.emplace_back(8, 8, 1, 5, 10); testset.emplace_back(8, 8, 32, 36, 9); - mdbx::env_managed env(filename, mdbx::env_managed::create_parameters(), + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters(42)); if (!simple(env) || !next_prev_current(env) || !outofrange_prev(env)) return EXIT_FAILURE; diff --git a/test/extra/dbi.c++ b/test/extra/dbi.c++ index 86dd0c4d..2a12be6a 100644 --- a/test/extra/dbi.c++ +++ b/test/extra/dbi.c++ @@ -18,19 +18,19 @@ int main(int argc, const char *argv[]) { mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); - mdbx::path path = "test-dbi"; - mdbx::env::remove(path); + mdbx::path db_filename = "test-dbi"; + mdbx::env::remove(db_filename); mdbx::env::operate_parameters operateParameters(100, 10); mdbx::env_managed::create_parameters createParameters; { - mdbx::env_managed env2(path, createParameters, operateParameters); + mdbx::env_managed env2(db_filename, createParameters, operateParameters); mdbx::txn_managed txn2 = env2.start_write(false); /* mdbx::map_handle testHandle2 = */ txn2.create_map( "fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); txn2.commit(); } - mdbx::env_managed env(path, createParameters, operateParameters); + mdbx::env_managed env(db_filename, createParameters, operateParameters); mdbx::txn_managed txn = env.start_write(false); /* mdbx::map_handle testHandle = */ txn.create_map( "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); diff --git a/test/extra/doubtless_positioning.c++ b/test/extra/doubtless_positioning.c++ index e1f070b1..4cf710cd 100644 --- a/test/extra/doubtless_positioning.c++ +++ b/test/extra/doubtless_positioning.c++ @@ -6,7 +6,6 @@ #include #include #include -#include static ::std::ostream &operator<<(::std::ostream &out, const mdbx::cursor::move_operation op) { @@ -233,9 +232,9 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - unlink("." MDBX_DATANAME); - unlink("." MDBX_LOCKNAME); - mdbx::env_managed env(".", mdbx::env_managed::create_parameters(), + mdbx::path db_filename = "test-posi"; + mdbx::env_managed::remove(db_filename); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters(3)); auto txn = env.start_write(); diff --git a/test/extra/dupfix_multiple.c++ b/test/extra/dupfix_multiple.c++ index 2686feea..3e1d323e 100644 --- a/test/extra/dupfix_multiple.c++ +++ b/test/extra/dupfix_multiple.c++ @@ -4,16 +4,14 @@ #include "mdbx.h++" #include #include -#include int main(int argc, const char *argv[]) { (void)argc; (void)argv; - unlink("." MDBX_DATANAME); - unlink("." MDBX_LOCKNAME); - - mdbx::env_managed env(".", mdbx::env_managed::create_parameters(), + mdbx::path db_filename = "test-dupfix-multiple"; + mdbx::env_managed::remove(db_filename); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters()); using buffer = diff --git a/test/extra/hex_base64_base58.c++ b/test/extra/hex_base64_base58.c++ index 096c5821..f2419ebd 100644 --- a/test/extra/hex_base64_base58.c++ +++ b/test/extra/hex_base64_base58.c++ @@ -4,7 +4,6 @@ #include "mdbx.h++" #include #include -#include #include #include diff --git a/test/extra/maindb_ordinal.c++ b/test/extra/maindb_ordinal.c++ index b38c04ef..dc3fd597 100644 --- a/test/extra/maindb_ordinal.c++ +++ b/test/extra/maindb_ordinal.c++ @@ -3,16 +3,14 @@ #include "mdbx.h++" #include -#include int main(int argc, const char *argv[]) { (void)argc; (void)argv; - unlink("." MDBX_DATANAME); - unlink("." MDBX_LOCKNAME); - - mdbx::env_managed env(".", mdbx::env_managed::create_parameters(), + mdbx::path db_filename = "test-dupfix-multiple"; + mdbx::env_managed::remove(db_filename); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters()); using buffer = @@ -23,7 +21,7 @@ int main(int argc, const char *argv[]) { #if 0 /* workaround */ txn.commit(); env.close(); - env = mdbx::env_managed(".", mdbx::env_managed::create_parameters(), + env = mdbx::env_managed(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters()); txn = env.start_write(); #endif From b11998de01baeeca7979953f00e7e0aa055df874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 7 Oct 2024 09:09:35 +0300 Subject: [PATCH 282/443] =?UTF-8?q?mdbx-cmake:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20dll-=D0=BA=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D1=8B=D0=BB=D1=8F=20=D0=B4=D0=BB=D1=8F=20Windows=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D1=8B=20?= =?UTF-8?q?=D0=B8=D1=81=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20?= =?UTF-8?q?=D0=B2=20=D1=82=D0=B5=D1=81=D1=82=D0=B0=D1=85=20=D0=BD=D0=B0=20?= =?UTF-8?q?C++.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 13 ++++ test/CMakeLists.txt | 186 +++++++++++++++++++++++++++++--------------- 2 files changed, 135 insertions(+), 64 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cbb1a48e..c7eb016d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -906,8 +906,21 @@ if(MDBX_BUILD_SHARED_LIBRARY AND MDBX_LINK_TOOLS_NONSTATIC) set(CMAKE_INSTALL_RPATH "\$ORIGIN/../lib") endif() endif() + + if(WIN32) + # Windows don't have RPATH feature, + # therefore we should prepare PATH or copy DLL(s) + set(TOOL_MDBX_DLLCRUTCH "Crutch for ${CMAKE_SYSTEM_NAME}") + if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_VERSION VERSION_LESS 3.0) + # will use LOCATION property to compose DLLPATH + cmake_policy(SET CMP0026 OLD) + endif() + else() + set(TOOL_MDBX_DLLCRUTCH FALSE) + endif() else() set(TOOL_MDBX_LIB mdbx-static) + set(TOOL_MDBX_DLLCRUTCH FALSE) endif() # build mdbx-tools diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 77004853..3296f5a5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -75,63 +75,124 @@ if(WIN32) target_link_libraries(mdbx_test winmm.lib) endif() +function(add_extra_test name) + set(options DISABLED) + set(oneValueArgs TIMEOUT) + set(multiValueArgs SOURCE LIBRARY DEPEND DLLPATH) + cmake_parse_arguments(params "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(params_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown keywords given to add_extra_test(): \"${params_UNPARSED_ARGUMENTS}\".") + endif() + + macro(oops) + message(FATAL_ERROR "add_extra_test(): Opps, " ${ARGV}) + endmacro() + + if(NOT params_SOURCE) + set(params_SOURCE extra/${name}.c++) + endif() + + set(target "test_extra_${name}") + add_executable(${target} ${params_SOURCE}) + target_include_directories(${target} PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(${target} ${TOOL_MDBX_LIB}) + set_target_properties(${target} PROPERTIES + SKIP_BUILD_RPATH FALSE + BUILD_WITH_INSTALL_RPATH FALSE) + + if(MDBX_BUILD_CXX AND MDBX_CXX_STANDARD) + set_target_properties(${target} PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() + + if(params_DEPEND) + add_dependencies(${target} ${params_DEPEND}) + endif() + + if(TOOL_MDBX_DLLCRUTCH) + string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPERCASE) + foreach(dep IN LISTS params_LIBRARY) + get_target_property(type ${dep} TYPE) + if(type STREQUAL SHARED_LIBRARY) + # Windows don't have RPATH feature, + # therefore we should prepare PATH or copy DLL(s)... + if(CMAKE_CONFIGURATION_TYPES) + # Could not provide static ENVIRONMENT property with configuration-depended path + set(dir FALSE) + else(CMAKE_CONFIGURATION_TYPES) + get_target_property(filename ${dep} IMPORTED_LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) + if(NOT filename) + get_target_property(filename ${dep} IMPORTED_LOCATION) + endif() + get_target_property(filename ${dep} LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) + if(NOT filename) + get_target_property(filename ${dep} LOCATION) + endif() + if(filename) + get_filename_component(dir ${filename} DIRECTORY) + else(filename) + get_target_property(dir ${dep} LIBRARY_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) + if(NOT dir) + get_target_property(dir ${dep} RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) + endif() + if(NOT dir) + get_target_property(dir ${dep} LIBRARY_OUTPUT_DIRECTORY) + endif() + if(NOT dir) + get_target_property(dir ${dep} RUNTIME_OUTPUT_DIRECTORY) + endif() + endif(filename) + endif(CMAKE_CONFIGURATION_TYPES) + if(dir) + list(APPEND params_DLLPATH ${dir}) + else(dir) + # Path is configuration-depended or not available, should copy dll + add_custom_command(TARGET ${target} POST_BUILD + COMMAND if exist "$" + ${CMAKE_COMMAND} -E copy_if_different "$" "$") + add_custom_command(TARGET ${target} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$" + COMMENT "${TOOL_MDBX_DLLCRUTCH}: Copy shared library ${dep} for test ${target}") + endif(dir) + endif() + endforeach(dep) + endif(TOOL_MDBX_DLLCRUTCH) + + if(NOT params_DISABLED AND NOT (CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)) + add_test(extra_${name} ${MDBX_OUTPUT_DIR}/${target}) + if(params_TIMEOUT) + if(MEMORYCHECK_COMMAND OR CMAKE_MEMORYCHECK_COMMAND OR ENABLE_MEMCHECK) + # FIXME: unless there are any other ideas how to fix the + # timeouts problem when testing under Valgrind. + math(EXPR params_TIMEOUT "${params_TIMEOUT} * 42") + endif() + set_tests_properties(extra_${name} PROPERTIES TIMEOUT ${params_TIMEOUT}) + endif() + if(params_DLLPATH) + # Compose DLL's path in the ENVIRONMENT property + if(WIN32) + set(params_DLLPATH_ENV "${params_DLLPATH};$ENV{PATH}") + else() + set(params_DLLPATH_ENV "${params_DLLPATH}:$ENV{PATH}") + string(REPLACE ":" ";" params_DLLPATH_ENV "${params_DLLPATH_ENV}") + endif() + list(REMOVE_DUPLICATES params_DLLPATH_ENV) + if(WIN32) + string(REPLACE ";" "\\;" params_DLLPATH_ENV "${params_DLLPATH_ENV}") + else() + string(REPLACE ";" ":" params_DLLPATH_ENV "${params_DLLPATH_ENV}") + endif() + set_tests_properties(extra_${name} PROPERTIES ENVIRONMENT "PATH=${params_DLLPATH_ENV}") + endif() + endif() +endfunction(add_extra_test) + if(NOT SUBPROJECT) if(UNIX) add_executable(test_extra_pcrf extra/pcrf/pcrf_test.c) target_include_directories(test_extra_pcrf PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(test_extra_pcrf ${TOOL_MDBX_LIB}) - - add_executable(test_extra_upsert_alldups extra/upsert_alldups.c) - target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) - - add_executable(test_extra_dupfix_addodd extra/dupfix_addodd.c) - target_include_directories(test_extra_dupfix_addodd PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_dupfix_addodd ${TOOL_MDBX_LIB}) - endif() - if(MDBX_BUILD_CXX) - add_executable(test_extra_maindb_ordinal extra/maindb_ordinal.c++) - target_include_directories(test_extra_maindb_ordinal PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_maindb_ordinal ${TOOL_MDBX_LIB}) - if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_maindb_ordinal PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) - endif() - add_executable(test_extra_dupfix_multiple extra/dupfix_multiple.c++) - target_include_directories(test_extra_dupfix_multiple PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_dupfix_multiple ${TOOL_MDBX_LIB}) - if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_dupfix_multiple PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) - endif() - add_executable(test_extra_hex_base64_base58 extra/hex_base64_base58.c++) - target_include_directories(test_extra_hex_base64_base58 PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_hex_base64_base58 ${TOOL_MDBX_LIB}) - if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_hex_base64_base58 PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) - endif() - add_executable(test_extra_doubtless_positioning extra/doubtless_positioning.c++) - target_include_directories(test_extra_doubtless_positioning PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_doubtless_positioning ${TOOL_MDBX_LIB}) - if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_doubtless_positioning PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) - endif() - add_executable(test_extra_crunched_delete extra/crunched_delete.c++) - target_include_directories(test_extra_crunched_delete PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_crunched_delete ${TOOL_MDBX_LIB}) - if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_crunched_delete PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) - endif() - add_executable(test_extra_dbi extra/dbi.c++) - target_include_directories(test_extra_dbi PRIVATE "${PROJECT_SOURCE_DIR}") - target_link_libraries(test_extra_dbi ${TOOL_MDBX_LIB}) - if(MDBX_CXX_STANDARD) - set_target_properties(test_extra_dbi PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) - endif() endif() endif() @@ -213,19 +274,16 @@ else() if(NOT SUBPROJECT) if(UNIX) - add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) - add_test(NAME extra_dupfix_addodd COMMAND test_extra_dupfix_addodd) + add_extra_test(upsert_alldups SOURCE extra/upsert_alldups.c) + add_extra_test(dupfix_addodd SOURCE extra/dupfix_addodd.c) endif() - if(MDBX_BUILD_CXX AND NOT (WIN32 AND MDBX_BUILD_SHARED_LIBRARY)) - add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) - add_test(NAME extra_dupfix_multiple COMMAND test_extra_dupfix_multiple) - add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) - add_test(NAME extra_doubtless_positioning COMMAND test_extra_doubtless_positioning) - if (ENABLE_MEMCHECK) - set_tests_properties(extra_doubtless_positioning PROPERTIES TIMEOUT 10800) - endif() - add_test(NAME extra_crunched_delete COMMAND test_extra_crunched_delete) - add_test(NAME extra_dbi COMMAND test_extra_dbi) + if(MDBX_BUILD_CXX) + add_extra_test(maindb_ordinal) + add_extra_test(dupfix_multiple) + add_extra_test(hex_base64_base58) + add_extra_test(doubtless_positioning TIMEOUT 10800) + add_extra_test(crunched_delete TIMEOUT 10800) + add_extra_test(dbi) endif() endif() From ce579bcb8e15770c58948217700d01f66dd7df26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Sep 2024 12:03:28 +0300 Subject: [PATCH 283/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/open`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 1 + test/extra/open.c++ | 88 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 test/extra/open.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3296f5a5..697f27fd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -284,6 +284,7 @@ else() add_extra_test(doubtless_positioning TIMEOUT 10800) add_extra_test(crunched_delete TIMEOUT 10800) add_extra_test(dbi) + add_extra_test(open) endif() endif() diff --git a/test/extra/open.c++ b/test/extra/open.c++ new file mode 100644 index 00000000..d475182c --- /dev/null +++ b/test/extra/open.c++ @@ -0,0 +1,88 @@ +#include "mdbx.h++" + +#include + +#if !defined(__cpp_lib_latch) && __cpp_lib_latch < 201907L + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + std::cout << "FAKE-OK (since no C++20 std::thread and/or std::latch\n"; + return EXIT_SUCCESS; +} + +#else + +#include +#include + +static char log_buffer[1024]; + +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, + int line, const char *msg, unsigned length) noexcept { + (void)length; + (void)loglevel; + fprintf(stdout, "%s:%u %s", function, line, msg); +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + mdbx_setup_debug_nofmt(MDBX_LOG_VERBOSE, MDBX_DBG_ASSERT, logger_nofmt, + log_buffer, sizeof(log_buffer)); + + mdbx::path path = "test-open"; + mdbx::env::remove(path); + + { + mdbx::env::operate_parameters operateParameters2(100, 10); + mdbx::env_managed::create_parameters createParameters2; + createParameters2.geometry.make_fixed(42 * mdbx::env::geometry::MiB); + mdbx::env_managed env2(path, createParameters2, operateParameters2); + mdbx::txn_managed txn2 = env2.start_write(false); + /* mdbx::map_handle testHandle2 = */ txn2.create_map( + "fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); + txn2.commit(); + } + + mdbx::env::operate_parameters operateParameters(100, 10); + mdbx::env_managed::create_parameters createParameters; + createParameters.geometry.make_dynamic(21 * mdbx::env::geometry::MiB, + 84 * mdbx::env::geometry::MiB); + mdbx::env_managed env(path, createParameters, operateParameters); + mdbx::txn_managed txn = env.start_write(false); + /* mdbx::map_handle testHandle = */ txn.create_map( + "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + txn.commit(); + + std::latch starter(1); + + std::thread t1([&]() { + starter.wait(); + // mdbx::env_managed env(path, createParameters, operateParameters); + mdbx::txn_managed txn = env.start_write(false); + /* mdbx::map_handle testHandle = */ txn.create_map( + "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + txn.commit(); + }); + + std::thread t2([&]() { + starter.wait(); + // mdbx::env_managed env(path, createParameters, operateParameters); + mdbx::txn_managed txn = env.start_write(false); + /* mdbx::map_handle testHandle = */ txn.create_map( + "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + txn.commit(); + }); + + starter.count_down(); + + t1.join(); + t2.join(); + + std::cout << "OK\n"; + return EXIT_SUCCESS; +} + +#endif /* __cpp_lib_latch */ From 486fb3c36def3a1431c720a09ad838e53fa792b4 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 7 Oct 2024 23:03:44 +0300 Subject: [PATCH 284/443] =?UTF-8?q?mdbx-testing:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BC=D0=B0?= =?UTF-8?q?=D0=BA=D1=81=D0=B8=D0=BC=D0=B0=D0=BB=D1=8C=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=B4=D0=BB=D0=B8=D0=BD=D1=8B=20=D0=B7=D0=BD=D0=B0=D1=87=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B9=20=D0=B2=20`extra/crunched-delete`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/crunched_delete.c++ | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index 89952ddf..e47a0c35 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -380,11 +380,18 @@ int main(int argc, const char *argv[]) { mdbx::path db_filename = "test-crunched-del"; mdbx::env::remove(db_filename); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), + mdbx::env::operate_parameters(42)); + if (!simple(env) || !next_prev_current(env) || !outofrange_prev(env)) + return EXIT_FAILURE; + std::vector testset; // Там ключи разной длины - от 1 до 64 байт. // Значения разной длины от 100 до 1000 байт. testset.emplace_back(/* keylen_min */ 1, /* keylen_max */ 64, - /* datalen_min */ 100, /* datalen_max */ 4000, + /* datalen_min */ 100, /* datalen_max */ + mdbx_env_get_valsize4page_max( + env, MDBX_db_flags_t(mdbx::value_mode::multi)), /* dups_log2 */ 6); // В одной таблице DupSort: path -> version_u64+data // path - это префикс в дереве. Самые частые длины: 1-5 байт и 32-36 байт. @@ -394,11 +401,6 @@ int main(int argc, const char *argv[]) { testset.emplace_back(8, 8, 1, 5, 10); testset.emplace_back(8, 8, 32, 36, 9); - mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), - mdbx::env::operate_parameters(42)); - if (!simple(env) || !next_prev_current(env) || !outofrange_prev(env)) - return EXIT_FAILURE; - auto txn = env.start_write(); for (unsigned i = 0; i < testset.size(); ++i) create_and_fill(txn, testset[i], i); From bf58ec59f55f057cb234a5a80d28dd0ec3f8dca8 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 7 Oct 2024 23:35:24 +0300 Subject: [PATCH 285/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D1=83=D1=89?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=204-=D0=B1=D0=B0=D0=B9=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=D0=BE=D0=B3=D0=BE=20=D0=B2=D1=8B=D1=80=D0=B0=D0=B2=D0=BD?= =?UTF-8?q?=D0=B8=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=B4=D0=B0=D0=BD=D0=BD?= =?UTF-8?q?=D1=8B=D1=85=20`MDBX=5FMULTIPLE`=20=D0=B4=D0=BB=D1=8F=2032-?= =?UTF-8?q?=D0=B1=D0=B8=D1=82=D0=BD=D1=8B=D1=85=20=D1=81=D0=B1=D0=BE=D1=80?= =?UTF-8?q?=D0=BE=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit На 32-битных платформах элементы массивов 64-битных типов могут быть выравнены на 4-байтовую границу. Из-за этого `mdbx_put(MDBX_MULTIPLE)` могла возвращать ошибку `MDBX_BAD_VALSIZE`, считая что переданные пользователем данные не выровнены. --- src/cursor.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/cursor.c b/src/cursor.c index 2d2e0fa4..0d4e1ec5 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -1569,12 +1569,19 @@ __hot int cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, if (mc->tree->flags & MDBX_INTEGERDUP) { if (data->iov_len == 8) { if (unlikely(7 & (uintptr_t)data->iov_base)) { - if (unlikely(flags & MDBX_MULTIPLE)) - return MDBX_BAD_VALSIZE; - /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = bcopy_8(&aligned_databytes, data->iov_base); - aligned_data.iov_len = data->iov_len; - data = &aligned_data; + if (unlikely(flags & MDBX_MULTIPLE)) { + /* LY: использование alignof(uint64_t) тут не подходил из-за ошибок + * MSVC и некоторых других компиляторов, когда для элементов + * массивов/векторов обеспечивает выравнивание только на 4-х байтовых + * границу и одновременно alignof(uint64_t) == 8. */ + if (MDBX_WORDBITS > 32 || (3 & (uintptr_t)data->iov_base) != 0) + return MDBX_BAD_VALSIZE; + } else { + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = bcopy_8(&aligned_databytes, data->iov_base); + aligned_data.iov_len = data->iov_len; + data = &aligned_data; + } } } else if (data->iov_len == 4) { if (unlikely(3 & (uintptr_t)data->iov_base)) { From ecf862a4f6ab715ff539611e9d48562b4527677b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 8 Oct 2024 00:33:33 +0300 Subject: [PATCH 286/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`osal=5Fjitter()`=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D1=83=D0=BC=D0=B5=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=B7=D0=B0=D0=B4=D0=B5=D1=80=D0=B6=D0=B5=D0=BA=20?= =?UTF-8?q?=D0=B2=20=D1=82=D0=B5=D1=81=D1=82=D0=B0=D1=85=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=B4=20Windows.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/osal.c b/src/osal.c index ecb4219c..2df93db4 100644 --- a/src/osal.c +++ b/src/osal.c @@ -2761,7 +2761,9 @@ __cold MDBX_INTERNAL void osal_jitter(bool tiny) { for (;;) { #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ defined(__x86_64__) - const unsigned salt = 277u * (unsigned)__rdtsc(); + unsigned salt = 5296013u * (unsigned)__rdtsc(); + salt ^= salt >> 11; + salt *= 25810541u; #elif (defined(_WIN32) || defined(_WIN64)) && MDBX_WITHOUT_MSVC_CRT static ULONG state; const unsigned salt = (unsigned)RtlRandomEx(&state); @@ -2769,13 +2771,26 @@ __cold MDBX_INTERNAL void osal_jitter(bool tiny) { const unsigned salt = rand(); #endif - const unsigned coin = salt % (tiny ? 29u : 43u); + const int coin = salt % (tiny ? 29u : 43u); if (coin < 43 / 3) break; #if defined(_WIN32) || defined(_WIN64) - SwitchToThread(); - if (coin > 43 * 2 / 3) - Sleep(1); + if (coin < 43 * 2 / 3) + SwitchToThread(); + else { + static HANDLE timer; + if (!timer) + timer = CreateWaitableTimer(NULL, TRUE, NULL); + + LARGE_INTEGER ft; + ft.QuadPart = + coin * (int64_t)-10; // Convert to 100 nanosecond interval, + // negative value indicates relative time. + SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0); + WaitForSingleObject(timer, INFINITE); + // CloseHandle(timer); + break; + } #else sched_yield(); if (coin > 43 * 2 / 3) From ca2dbf0933b1b8135a1c8ba05707d0980ced9069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 8 Oct 2024 12:24:30 +0300 Subject: [PATCH 287/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BB?= =?UTF-8?q?-=D0=B2=D0=B0=20=D0=B8=D1=82=D0=B5=D1=80=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B9=20`extra/crunched-delete`=20=D0=B4=D0=BB=D1=8F=20CI.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 4 ++++ test/extra/crunched_delete.c++ | 2 ++ 2 files changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7eb016d..605f2248 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -656,6 +656,10 @@ else() set(MDBX_ENABLE_TESTS FALSE) endif() +if(CI) + add_definitions(-DMDBX_CI="${CI}") +endif() + ################################################################################ ################################################################################ diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index e47a0c35..2d14f423 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -7,6 +7,8 @@ #if MDBX_DEBUG || !defined(NDEBUG) || defined(__APPLE__) || defined(_WIN32) #define NN 1024 +#elif defined(MDBX_CI) +#define NN 4096 #else #define NN 16384 #endif From 57848b1d2d61406fb63326448b7723b811a210f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 8 Oct 2024 18:06:11 +0300 Subject: [PATCH 288/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=BE=D0=B3?= =?UTF-8?q?=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=A1++=20?= =?UTF-8?q?=D0=B8=D1=81=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20?= =?UTF-8?q?=D0=B2=20`extra/dupfix=5Fmultiple`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/dupfix_multiple.c++ | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/test/extra/dupfix_multiple.c++ b/test/extra/dupfix_multiple.c++ index 3e1d323e..1b0ffc45 100644 --- a/test/extra/dupfix_multiple.c++ +++ b/test/extra/dupfix_multiple.c++ @@ -5,10 +5,7 @@ #include #include -int main(int argc, const char *argv[]) { - (void)argc; - (void)argv; - +int doit() { mdbx::path db_filename = "test-dupfix-multiple"; mdbx::env_managed::remove(db_filename); mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), @@ -285,3 +282,15 @@ int main(int argc, const char *argv[]) { std::cout << "OK\n"; return EXIT_SUCCESS; } + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + try { + return doit(); + } catch (const std::exception &ex) { + std::cerr << "Exception: " << ex.what() << "\n"; + return EXIT_FAILURE; + } +} From d40e4db13acfa1dac38efb4fa88fd0b98c4944e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 8 Oct 2024 18:14:15 +0300 Subject: [PATCH 289/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 2d316b90..2deef601 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -14,10 +14,11 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. - Корректировка описания С++ API для использования термина "таблица" вместо "sub-database". - Исправление условия внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. + - Допущение 4-байтового выравнивания данных `MDBX_MULTIPLE` для 32-битных сборок. Новое: - Добавление `mdbx::cursor::get_multiple_samelength()` и переименование `mdbx::txn::put_multiple_samelength()`. - - Возвращение ключа при `MDBX_GET_MULTIPLE` для единообразия C++ API. + - Возвращение ключа при выполнении операции `MDBX_GET_MULTIPLE` для единообразия C++ API. Мелочи: @@ -26,6 +27,11 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Использование `WIN32` вместо `${CMAKE_SYSTEM_NAME}`. - Подавление параноидальных предупреждений MSVC в extra-тестах. - Дополнение отладочного логирования внутри `dxb_resize()`. + - Добавление в сценарии CMake/CTest копирования dll под Windows для работы исключений в тестах на C++. + - Добавление С++ теста `extra/open`. + - Доработка `osal_jitter()` для уменьшения задержек в тестах под Windows. + - Исправление максимальной длины значений в тесте `extra/crunched-delete`. + - Добавление логирования С++ исключений в `extra/dupfix_multiple`. -------------------------------------------------------------------------------- From d2b74e4da5d04fe970689994a36d0678205e53ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 8 Oct 2024 23:08:52 +0300 Subject: [PATCH 290/443] =?UTF-8?q?mdbx-cmake:=20=D0=B2=D0=BA=D0=BB=D1=8E?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=B0=D0=BD=D0=B4?= =?UTF-8?q?=D0=B0=D1=80=D1=82=D0=B0=20`C23`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 8 ++++++-- cmake/compiler.cmake | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 605f2248..59dcada5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -450,7 +450,6 @@ if(MDBX_MANAGE_BUILD_FLAGS) setup_compile_flags() endif() -list(FIND CMAKE_C_COMPILE_FEATURES c_std_11 HAS_C11) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_14 HAS_CXX14) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_17 HAS_CXX17) @@ -481,6 +480,9 @@ if(NOT DEFINED MDBX_CXX_STANDARD) set(MDBX_CXX_STANDARD 98) endif() endif() + +list(FIND CMAKE_C_COMPILE_FEATURES c_std_11 HAS_C11) +list(FIND CMAKE_C_COMPILE_FEATURES c_std_23 HAS_C23) if(NOT DEFINED MDBX_C_STANDARD) # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! # It unable process Windows SDK headers in the C11 mode! @@ -488,6 +490,8 @@ if(NOT DEFINED MDBX_C_STANDARD) set(MDBX_C_STANDARD 99) set(C_FALLBACK_11 OFF) set(C_FALLBACK_GNU11 OFF) + elseif(NOT HAS_C23 LESS 0) + set(MDBX_C_STANDARD 23) elseif(HAS_C11 LESS 0 AND NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) set(MDBX_C_STANDARD 99) else() @@ -800,7 +804,7 @@ macro(target_setup_options TARGET) set_target_properties(${TARGET} PROPERTIES INTERPROCEDURAL_OPTIMIZATION $) endif() - if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) + if(NOT MDBX_C_STANDARD EQUAL 11 OR (NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11)) set_target_properties(${TARGET} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) endif() diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index bd50b9d7..a6c7f618 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -384,9 +384,13 @@ endif() if(CMAKE_CXX_COMPILER_LOADED) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11) if(HAS_CXX11 LESS 0) - check_cxx_compiler_flag("-std=gnu++11" CXX_FALLBACK_GNU11) - if(NOT CXX_FALLBACK_GNU11) - check_cxx_compiler_flag("-std=c++11" CXX_FALLBACK_11) + if (MSVC) + check_cxx_compiler_flag("/std:c++11" CXX_FALLBACK_11) + else() + check_cxx_compiler_flag("-std=gnu++11" CXX_FALLBACK_GNU11) + if(NOT CXX_FALLBACK_GNU11) + check_cxx_compiler_flag("-std=c++11" CXX_FALLBACK_11) + endif() endif() endif() endif() From 22233b0991ca3b9c58aab6d1b9aeb359c3134665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 9 Oct 2024 10:22:16 +0300 Subject: [PATCH 291/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BC?= =?UTF-8?q?=D0=B5=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FNORETURN`=20?= =?UTF-8?q?=D0=B2=20=D0=BF=D1=80=D0=BE=D1=82=D0=BE=D1=82=D0=B8=D0=BF=D0=B0?= =?UTF-8?q?=D1=85=20assert-failed=20=20=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BE=D0=B3=D0=BE=20clang.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/osal.c b/src/osal.c index 2df93db4..3aa30349 100644 --- a/src/osal.c +++ b/src/osal.c @@ -146,13 +146,14 @@ __extern_C void __assert2(const char *file, int line, const char *function, __assert2(file, line, function, assertion) #elif defined(__UCLIBC__) -__extern_C void __assert(const char *, const char *, unsigned, const char *) +MDBX_NORETURN __extern_C void __assert(const char *, const char *, unsigned, + const char *) #ifdef __THROW __THROW #else __nothrow #endif /* __THROW */ - MDBX_NORETURN; + ; #define __assert_fail(assertion, file, line, function) \ __assert(assertion, file, line, function) @@ -160,14 +161,15 @@ __extern_C void __assert(const char *, const char *, unsigned, const char *) /* workaround for avoid musl libc wrong prototype */ ( \ defined(__GLIBC__) || defined(__GNU_LIBRARY__)) /* Prototype should match libc runtime. ISO POSIX (2003) & LSB 1.x-3.x */ -__extern_C void __assert_fail(const char *assertion, const char *file, - unsigned line, const char *function) +MDBX_NORETURN __extern_C void __assert_fail(const char *assertion, + const char *file, unsigned line, + const char *function) #ifdef __THROW __THROW #else __nothrow #endif /* __THROW */ - MDBX_NORETURN; + ; #elif defined(__APPLE__) || defined(__MACH__) __extern_C void __assert_rtn(const char *function, const char *file, int line, @@ -185,8 +187,9 @@ __extern_C void __assert_rtn(const char *function, const char *file, int line, #define __assert_fail(assertion, file, line, function) \ __assert_rtn(function, file, line, assertion) #elif defined(__sun) || defined(__SVR4) || defined(__svr4__) -__extern_C void __assert_c99(const char *assection, const char *file, int line, - const char *function) MDBX_NORETURN; +MDBX_NORETURN __extern_C void __assert_c99(const char *assection, + const char *file, int line, + const char *function); #define __assert_fail(assertion, file, line, function) \ __assert_c99(assertion, file, line, function) #elif defined(__OpenBSD__) From bfce1cd24d182e47c08adc52d2666aed7972f7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 10 Oct 2024 06:16:49 +0300 Subject: [PATCH 292/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`=5F=5Fhas?= =?UTF-8?q?=5Fc=5Fattribute()`=20=D0=B8=20`=5F=5Fhas=5Fcxx=5Fattribute()`,?= =?UTF-8?q?=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5?= =?UTF-8?q?=20`=5F=5Fhas=5FC23=5For=5FCXX=5Fattribute()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 85 +++++++++++++++++++++++++++---------------------- mdbx.h++ | 12 +++---- test/config.h++ | 2 +- 3 files changed, 54 insertions(+), 45 deletions(-) diff --git a/mdbx.h b/mdbx.h index 9972e6b5..4fdace51 100644 --- a/mdbx.h +++ b/mdbx.h @@ -189,10 +189,33 @@ typedef mode_t mdbx_mode_t; #define __has_attribute(x) (0) #endif /* __has_attribute */ +#ifndef __has_c_attribute +#define __has_c_attribute(x) (0) +#endif /* __has_c_attribute */ + #ifndef __has_cpp_attribute #define __has_cpp_attribute(x) 0 #endif /* __has_cpp_attribute */ +#ifndef __has_CXX_attribute +#if defined(__cplusplus) && \ + (!defined(_MSC_VER) || defined(__clang__) || _MSC_VER >= 1942) +#define __has_CXX_attribute(x) __has_cpp_attribute(x) +#else +#define __has_CXX_attribute(x) 0 +#endif +#endif /* __has_CXX_attribute */ + +#ifndef __has_C23_or_CXX_attribute +#if defined(__cplusplus) +#define __has_C23_or_CXX_attribute(x) __has_CXX_attribute(x) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ > 202311L +#define __has_C23_or_CXX_attribute(x) __has_c_attribute(x) +#else +#define __has_C23_or_CXX_attribute(x) 0 +#endif +#endif /* __has_C23_or_CXX_attribute */ + #ifndef __has_feature #define __has_feature(x) (0) #endif /* __has_feature */ @@ -213,15 +236,12 @@ typedef mode_t mdbx_mode_t; * These functions should be declared with the attribute pure. */ #if defined(DOXYGEN) #define MDBX_PURE_FUNCTION [[gnu::pure]] -#elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ - (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ \ - || !defined(__cplusplus) || !__has_feature(cxx_exceptions)) -#define MDBX_PURE_FUNCTION __attribute__((__pure__)) -#elif defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1920 -#define MDBX_PURE_FUNCTION -#elif defined(__cplusplus) && __has_cpp_attribute(gnu::pure) && \ - (!defined(__clang__) || !__has_feature(cxx_exceptions)) +#elif __has_C23_or_CXX_attribute(gnu::pure) #define MDBX_PURE_FUNCTION [[gnu::pure]] +#elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ + (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ + !defined(__cplusplus) || !__has_feature(cxx_exceptions)) +#define MDBX_PURE_FUNCTION __attribute__((__pure__)) #else #define MDBX_PURE_FUNCTION #endif /* MDBX_PURE_FUNCTION */ @@ -231,22 +251,16 @@ typedef mode_t mdbx_mode_t; * that is compatible to CLANG and proposed [[pure]]. */ #if defined(DOXYGEN) #define MDBX_NOTHROW_PURE_FUNCTION [[gnu::pure, gnu::nothrow]] -#elif defined(__GNUC__) || \ - (__has_attribute(__pure__) && __has_attribute(__nothrow__)) -#define MDBX_NOTHROW_PURE_FUNCTION __attribute__((__pure__, __nothrow__)) -#elif defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1920 -#if __has_cpp_attribute(pure) -#define MDBX_NOTHROW_PURE_FUNCTION [[pure]] -#else -#define MDBX_NOTHROW_PURE_FUNCTION -#endif -#elif defined(__cplusplus) && __has_cpp_attribute(gnu::pure) -#if __has_cpp_attribute(gnu::nothrow) +#elif __has_C23_or_CXX_attribute(gnu::pure) +#if __has_C23_or_CXX_attribute(gnu::nothrow) #define MDBX_NOTHROW_PURE_FUNCTION [[gnu::pure, gnu::nothrow]] #else #define MDBX_NOTHROW_PURE_FUNCTION [[gnu::pure]] #endif -#elif defined(__cplusplus) && __has_cpp_attribute(pure) +#elif defined(__GNUC__) || \ + (__has_attribute(__pure__) && __has_attribute(__nothrow__)) +#define MDBX_NOTHROW_PURE_FUNCTION __attribute__((__pure__, __nothrow__)) +#elif __has_CXX_attribute(pure) #define MDBX_NOTHROW_PURE_FUNCTION [[pure]] #else #define MDBX_NOTHROW_PURE_FUNCTION @@ -264,15 +278,12 @@ typedef mode_t mdbx_mode_t; * It does not make sense for a const function to return void. */ #if defined(DOXYGEN) #define MDBX_CONST_FUNCTION [[gnu::const]] -#elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ - (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ \ - || !defined(__cplusplus) || !__has_feature(cxx_exceptions)) -#define MDBX_CONST_FUNCTION __attribute__((__const__)) -#elif defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1920 -#define MDBX_CONST_FUNCTION MDBX_PURE_FUNCTION -#elif defined(__cplusplus) && __has_cpp_attribute(gnu::const) && \ - (!defined(__clang__) || !__has_feature(cxx_exceptions)) +#elif __has_C23_or_CXX_attribute(gnu::const) #define MDBX_CONST_FUNCTION [[gnu::const]] +#elif (defined(__GNUC__) || __has_attribute(__const__)) && \ + (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ + !defined(__cplusplus) || !__has_feature(cxx_exceptions)) +#define MDBX_CONST_FUNCTION __attribute__((__const__)) #else #define MDBX_CONST_FUNCTION MDBX_PURE_FUNCTION #endif /* MDBX_CONST_FUNCTION */ @@ -282,18 +293,16 @@ typedef mode_t mdbx_mode_t; * that is compatible to CLANG and future [[const]]. */ #if defined(DOXYGEN) #define MDBX_NOTHROW_CONST_FUNCTION [[gnu::const, gnu::nothrow]] +#elif __has_C23_or_CXX_attribute(gnu::const) +#if __has_C23_or_CXX_attribute(gnu::nothrow) +#define MDBX_NOTHROW_CONST_FUNCTION [[gnu::const, gnu::nothrow]] +#else +#define MDBX_NOTHROW_CONST_FUNCTION [[gnu::const]] +#endif #elif defined(__GNUC__) || \ (__has_attribute(__const__) && __has_attribute(__nothrow__)) #define MDBX_NOTHROW_CONST_FUNCTION __attribute__((__const__, __nothrow__)) -#elif defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1920 -#define MDBX_NOTHROW_CONST_FUNCTION MDBX_NOTHROW_PURE_FUNCTION -#elif defined(__cplusplus) && __has_cpp_attribute(gnu::const) -#if __has_cpp_attribute(gnu::nothrow) -#define MDBX_NOTHROW_PURE_FUNCTION [[gnu::const, gnu::nothrow]] -#else -#define MDBX_NOTHROW_PURE_FUNCTION [[gnu::const]] -#endif -#elif defined(__cplusplus) && __has_cpp_attribute(const) +#elif __has_CXX_attribute(const) #define MDBX_NOTHROW_CONST_FUNCTION [[const]] #else #define MDBX_NOTHROW_CONST_FUNCTION MDBX_NOTHROW_PURE_FUNCTION @@ -3848,7 +3857,7 @@ mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags); /** \deprecated Please use \ref mdbx_env_get_maxkeysize_ex() * and/or \ref mdbx_env_get_maxvalsize_ex() * \ingroup c_statinfo */ -MDBX_DEPRECATED MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int +MDBX_NOTHROW_PURE_FUNCTION MDBX_DEPRECATED LIBMDBX_API int mdbx_env_get_maxkeysize(const MDBX_env *env); /** \brief Returns maximal size of key-value pair to fit in a single page diff --git a/mdbx.h++ b/mdbx.h++ index 98b878ff..75761c10 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -924,7 +924,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \param [in] ignore_spaces If `true` function will skips spaces surrounding /// (before, between and after) a encoded bytes. However, spaces should not /// break a pair of characters encoding a single byte. - inline MDBX_NOTHROW_PURE_FUNCTION bool + MDBX_NOTHROW_PURE_FUNCTION inline bool is_hex(bool ignore_spaces = false) const noexcept; /// \brief Checks whether the content of the slice is a @@ -932,7 +932,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \param [in] ignore_spaces If `true` function will skips spaces surrounding /// (before, between and after) a encoded bytes. However, spaces should not /// break a code group of characters. - inline MDBX_NOTHROW_PURE_FUNCTION bool + MDBX_NOTHROW_PURE_FUNCTION inline bool is_base58(bool ignore_spaces = false) const noexcept; /// \brief Checks whether the content of the slice is a @@ -940,7 +940,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \param [in] ignore_spaces If `true` function will skips spaces surrounding /// (before, between and after) a encoded bytes. However, spaces should not /// break a code group of characters. - inline MDBX_NOTHROW_PURE_FUNCTION bool + MDBX_NOTHROW_PURE_FUNCTION inline bool is_base64(bool ignore_spaces = false) const noexcept; inline void swap(slice &other) noexcept; @@ -5876,17 +5876,17 @@ slice::base64_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { .as_buffer(allocator); } -inline MDBX_NOTHROW_PURE_FUNCTION bool +MDBX_NOTHROW_PURE_FUNCTION inline bool slice::is_hex(bool ignore_spaces) const noexcept { return !from_hex(*this, ignore_spaces).is_erroneous(); } -inline MDBX_NOTHROW_PURE_FUNCTION bool +MDBX_NOTHROW_PURE_FUNCTION inline bool slice::is_base58(bool ignore_spaces) const noexcept { return !from_base58(*this, ignore_spaces).is_erroneous(); } -inline MDBX_NOTHROW_PURE_FUNCTION bool +MDBX_NOTHROW_PURE_FUNCTION inline bool slice::is_base64(bool ignore_spaces) const noexcept { return !from_base64(*this, ignore_spaces).is_erroneous(); } diff --git a/test/config.h++ b/test/config.h++ index 12bc1b66..45ab7cf4 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -282,7 +282,7 @@ struct actor_params_pod { // FIXME: TODO return 0; } - static MDBX_PURE_FUNCTION uint64_t serial_mask(unsigned bits) { + MDBX_PURE_FUNCTION static uint64_t serial_mask(unsigned bits) { assert(bits > 0 && bits <= 64); return (~(uint64_t)0u) >> (64 - bits); } From 06dd50580c8e884723874eadf6fc970b4c1407f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 18 Oct 2024 18:38:36 +0300 Subject: [PATCH 293/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20API-=D0=BC?= =?UTF-8?q?=D0=B0=D0=BA=D1=80=D0=BE=D1=81=D0=BE=D0=B2=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20Doxygen.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdbx.h b/mdbx.h index 4fdace51..2a6a29d9 100644 --- a/mdbx.h +++ b/mdbx.h @@ -621,7 +621,7 @@ extern "C" { #define MDBX_VERSION_MINOR 13 #ifndef LIBMDBX_API -#if defined(LIBMDBX_EXPORTS) +#if defined(LIBMDBX_EXPORTS) || defined(DOXYGEN) #define LIBMDBX_API __dll_export #elif defined(LIBMDBX_IMPORTS) #define LIBMDBX_API __dll_import @@ -631,7 +631,7 @@ extern "C" { #endif /* LIBMDBX_API */ #ifdef __cplusplus -#if defined(__clang__) || __has_attribute(type_visibility) +#if defined(__clang__) || __has_attribute(type_visibility) || defined(DOXYGEN) #define LIBMDBX_API_TYPE LIBMDBX_API __attribute__((type_visibility("default"))) #else #define LIBMDBX_API_TYPE LIBMDBX_API From b43eed2c2b62a9cb064107ae4cc5d39c47ae9a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 22 Oct 2024 22:24:08 +0300 Subject: [PATCH 294/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B5=D0=BD=D0=BD=D0=B8=D1=85=20?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=BE=D0=B4=D0=BE=D0=B2=20`mdbx::buffer<>::s?= =?UTF-8?q?ilo::bin::inplace=5Flastbyte()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 75761c10..fbf8c28b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1743,10 +1743,10 @@ private: << (sizeof(size_t /* allocated::capacity_bytes_ */) - 1) * CHAR_BIT }; - constexpr byte lastbyte() const noexcept { + constexpr byte inplace_lastbyte() const noexcept { return inplace_[sizeof(bin) - 1]; } - MDBX_CXX17_CONSTEXPR byte &lastbyte() noexcept { + MDBX_CXX17_CONSTEXPR byte &inplace_lastbyte() noexcept { return inplace_[sizeof(bin) - 1]; } @@ -1758,7 +1758,7 @@ private: (std::numeric_limits::max() >> CHAR_BIT) == inplace_signature_limit, "WTF?"); - return lastbyte() == lastbyte_inplace_signature; + return inplace_lastbyte() == lastbyte_inplace_signature; } constexpr bool is_allocated() const noexcept { return !is_inplace(); } @@ -1772,7 +1772,7 @@ private: if (::std::is_trivial::value) /* workaround for "uninitialized" warning from some compilers */ memset(&allocated_.ptr_, 0, sizeof(allocated_.ptr_)); - lastbyte() = lastbyte_inplace_signature; + inplace_lastbyte() = lastbyte_inplace_signature; MDBX_CONSTEXPR_ASSERT(is_inplace() && address() == inplace_ && is_suitable_for_inplace(capacity())); return address(); From 964ee00116dedc218e456ada11efd81c22feb429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 23 Oct 2024 11:26:09 +0300 Subject: [PATCH 295/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=81=D1=82=D1=8B?= =?UTF-8?q?=D0=BB=D1=8C=20=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=B5=D0=BA=D0=BE?= =?UTF-8?q?=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE=D0=B9=20=D0=BE=D0=B1?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B8=20`[[gnu::pure]]`=20?= =?UTF-8?q?=D0=B2=20Apple=20Clang.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index 2a6a29d9..e3915991 100644 --- a/mdbx.h +++ b/mdbx.h @@ -218,10 +218,14 @@ typedef mode_t mdbx_mode_t; #ifndef __has_feature #define __has_feature(x) (0) +#define __has_exceptions_disabled (0) +#else +#define __has_exceptions_disabled \ + (__has_feature(cxx_noexcept) && !__has_feature(cxx_exceptions)) #endif /* __has_feature */ #ifndef __has_extension -#define __has_extension(x) (0) +#define __has_extension(x) __has_feature(x) #endif /* __has_extension */ #ifndef __has_builtin @@ -236,11 +240,13 @@ typedef mode_t mdbx_mode_t; * These functions should be declared with the attribute pure. */ #if defined(DOXYGEN) #define MDBX_PURE_FUNCTION [[gnu::pure]] -#elif __has_C23_or_CXX_attribute(gnu::pure) +#elif __has_C23_or_CXX_attribute(gnu::pure) && \ + (!defined(__apple_build_version__) || !defined(__clang_major__) || \ + __clang_major__ > 17) #define MDBX_PURE_FUNCTION [[gnu::pure]] #elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ - !defined(__cplusplus) || !__has_feature(cxx_exceptions)) + !defined(__cplusplus) || __has_exceptions_disabled) #define MDBX_PURE_FUNCTION __attribute__((__pure__)) #else #define MDBX_PURE_FUNCTION @@ -278,11 +284,13 @@ typedef mode_t mdbx_mode_t; * It does not make sense for a const function to return void. */ #if defined(DOXYGEN) #define MDBX_CONST_FUNCTION [[gnu::const]] -#elif __has_C23_or_CXX_attribute(gnu::const) +#elif __has_C23_or_CXX_attribute(gnu::const) && \ + (!defined(__apple_build_version__) || !defined(__clang_major__) || \ + __clang_major__ > 17) #define MDBX_CONST_FUNCTION [[gnu::const]] #elif (defined(__GNUC__) || __has_attribute(__const__)) && \ (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ - !defined(__cplusplus) || !__has_feature(cxx_exceptions)) + !defined(__cplusplus) || __has_exceptions_disabled) #define MDBX_CONST_FUNCTION __attribute__((__const__)) #else #define MDBX_CONST_FUNCTION MDBX_PURE_FUNCTION From 7232d7b5fcba92c91b28d9e85c9f9bf721d3ba36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 23 Oct 2024 13:22:53 +0300 Subject: [PATCH 296/443] =?UTF-8?q?mdbx:=20=D1=83=D1=82=D0=BE=D1=87=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B8=D1=81=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D1=8F=20`mdbx=5Fdbi=5Fclose()`=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D1=81=D0=BB=D1=83=D1=87=D0=B0=D1=8F=20=D1=85=D0=B5=D0=BD=D0=B4?= =?UTF-8?q?=D0=BB=D0=BE=D0=B2=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD?= =?UTF-8?q?=D0=BD=D1=8B=D1=85=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index e3915991..4f7d11dd 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2013,7 +2013,7 @@ typedef enum MDBX_error { MDBX_DUPLICATED_CLK = -30413, /** Some cursors and/or other resources should be closed before table or - * corresponding DBI-handle could be (re)used */ + * corresponding DBI-handle could be (re)used and/or closed. */ MDBX_DANGLING_DBI = -30412, /** The parked read transaction was outed for the sake of @@ -4904,9 +4904,12 @@ LIBMDBX_INLINE_API(int, mdbx_dbi_flags, * \ref mdbx_env_set_maxdbs(), unless that value would be large. * * \note Use with care. - * This call is synchronized via mutex with \ref mdbx_dbi_close(), but NOT with - * other transactions running by other threads. The "next" version of libmdbx - * (\ref MithrilDB) will solve this issue. + * This call is synchronized via mutex with \ref mdbx_dbi_open(), but NOT with + * any transaction(s) running by other thread(s). + * So the `mdbx_dbi_close()` MUST NOT be called in-parallel/concurrently + * with any transactions using the closing dbi-handle, nor during other thread + * commit/abort a write transacton(s). The "next" version of libmdbx (\ref + * MithrilDB) will solve this issue. * * Handles should only be closed if no other threads are going to reference * the table handle or one of its cursors any further. Do not close a handle From 3049bb87b5b14d83b16d121c186ce8fb3f21383e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 23 Oct 2024 13:25:06 +0300 Subject: [PATCH 297/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`mdbx=5Fclose=5Fdbi()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80=D0=B0=D1=82=D0=B0?= =?UTF-8?q?=20`MDBX=5FDANGLING=5FDBI`=20=D0=BF=D1=80=D0=B8=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=BF=D1=8B=D1=82=D0=BA=D0=B5=20=D0=B7=D0=B0=D0=BA=D1=80=D1=8B?= =?UTF-8?q?=D1=82=D1=8C=20dbi-=D1=85=D0=B5=D0=BD=D0=B4=D0=BB=20=D0=B8?= =?UTF-8?q?=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=BD=D0=BE=D0=B9=20=D0=B2?= =?UTF-8?q?=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8?= =?UTF-8?q?=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86=D1=8B.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dbi.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/dbi.c b/src/dbi.c index bc0e9dcb..aa60fe8d 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -859,8 +859,53 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { return MDBX_BAD_DBI; rc = osal_fastmutex_acquire(&env->dbi_lock); - if (likely(rc == MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) { + retry: + if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && + (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { + /* LY: Опасный код, так как env->txn может быть изменено в другом потоке. + * К сожалению тут нет надежного решения и может быть падение при неверном + * использовании API (вызове mdbx_dbi_close конкурентно с завершением + * пишущей транзакции). + * + * Для минимизации вероятности падения сначала проверяем dbi-флаги + * в basal_txn, а уже после в env->txn. Таким образом, падение может быть + * только при коллизии с завершением вложенной транзакции. + * + * Альтернативно можно попробовать выполнять обновление/put записи в + * mainDb соответствующей таблице закрываемого хендла. Семантически это + * верный путь, но проблема в текущем API, в котором исторически dbi-хендл + * живет и закрывается вне транзакции. Причем проблема не только в том, + * что нет указателя на текущую пишущую транзакцию, а в том что + * пользователь точно не ожидает что закрытие хендла приведет к + * скрытой/непрозрачной активности внутри транзакции потенциально + * выполняемой в другом потоке. Другими словами, проблема может быть + * только при неверном использовании API и если пользователь это + * допускает, то точно не будет ожидать скрытых действий внутри + * транзакции, и поэтому этот путь потенциально более опасен. */ + const MDBX_txn *const hazard = env->txn; + osal_compiler_barrier(); + if ((dbi_state(env->basal_txn, dbi) & + (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { + bailout_dirty_dbi: + osal_fastmutex_release(&env->dbi_lock); + return MDBX_DANGLING_DBI; + } + osal_memory_barrier(); + if (unlikely(hazard != env->txn)) + goto retry; + if (hazard != env->basal_txn && hazard && + (hazard->flags & MDBX_TXN_FINISHED) == 0 && + hazard->signature == txn_signature && + (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > + DBI_LINDO) + goto bailout_dirty_dbi; + osal_compiler_barrier(); + if (unlikely(hazard != env->txn)) + goto retry; + } rc = defer_and_release(env, dbi_close_locked(env, dbi)); + } return rc; } From 80708f9591a43b056fb0094b2ef02f5c259c7b26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 23 Oct 2024 19:12:31 +0300 Subject: [PATCH 298/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20T-=D0=BC=D0=B0=D0=BA=D1=80?= =?UTF-8?q?=D0=BE=D1=81=D0=BE=D0=B2=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D0=B0?= =?UTF-8?q?=D1=80=D0=BD=D1=8B=D1=85=20`char`/`wchar=5Ft`=20=D1=84=D1=83?= =?UTF-8?q?=D0=BD=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/mdbx.h b/mdbx.h index 4f7d11dd..2ae43a68 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2559,6 +2559,11 @@ LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *pathname, * \see mdbx_env_open() */ LIBMDBX_API int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode); +#define mdbx_env_openT(env, pathname, flags, mode) \ + mdbx_env_openW(env, pathname, flags, mode) +#else +#define mdbx_env_openT(env, pathname, flags, mode) \ + mdbx_env_open(env, pathname, flags, mode) #endif /* Windows */ /** \brief Deletion modes for \ref mdbx_env_delete(). @@ -2609,6 +2614,9 @@ LIBMDBX_API int mdbx_env_delete(const char *pathname, * \see mdbx_env_delete() */ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, MDBX_env_delete_mode_t mode); +#define mdbx_env_deleteT(pathname, mode) mdbx_env_deleteW(pathname, mode) +#else +#define mdbx_env_deleteT(pathname, mode) mdbx_env_delete(pathname, mode) #endif /* Windows */ /** \brief Copy an MDBX environment to the specified path, with options. @@ -2730,6 +2738,7 @@ LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, * \see mdbx_env_copy() */ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, MDBX_copy_flags_t flags); +#define mdbx_env_copyT(env, dest, flags) mdbx_env_copyW(env, dest, flags) /** \copydoc mdbx_txn_copy2pathname() * \ingroup c_extra @@ -2737,6 +2746,12 @@ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, * \see mdbx_txn_copy2pathname() */ LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, MDBX_copy_flags_t flags); +#define mdbx_txn_copy2pathnameT(txn, dest, flags) \ + mdbx_txn_copy2pathnameW(txn, dest, path) +#else +#define mdbx_env_copyT(env, dest, flags) mdbx_env_copy(env, dest, flags) +#define mdbx_txn_copy2pathnameT(txn, dest, flags) \ + mdbx_txn_copy2pathname(txn, dest, path) #endif /* Windows */ /** \brief Copy an environment to the specified file descriptor, with @@ -3403,6 +3418,9 @@ LIBMDBX_API int mdbx_env_get_path(const MDBX_env *env, const char **dest); * \note Available only on Windows. * \see mdbx_env_get_path() */ LIBMDBX_API int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **dest); +#define mdbx_env_get_pathT(env, dest) mdbx_env_get_pathW(env, dest) +#else +#define mdbx_env_get_pathT(env, dest) mdbx_env_get_path(env, dest) #endif /* Windows */ /** \brief Return the file descriptor for the given environment. @@ -6426,6 +6444,11 @@ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, unsigned target_meta, bool writeable); +#define mdbx_env_open_for_recoveryT(env, pathname, target_mets, writeable) \ + mdbx_env_open_for_recoveryW(env, pathname, target_mets, writeable) +#else +#define mdbx_env_open_for_recoveryT(env, pathname, target_mets, writeable) \ + mdbx_env_open_for_recovery(env, pathname, target_mets, writeable) #endif /* Windows */ /** \brief Turn database to the specified meta-page. @@ -6475,6 +6498,11 @@ LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *info, * \see mdbx_preopen_snapinfo() */ LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *info, size_t bytes); +#define mdbx_preopen_snapinfoT(pathname, info, bytes) \ + mdbx_preopen_snapinfoW(pathname, info, bytes) +#else +#define mdbx_preopen_snapinfoT(pathname, info, bytes) \ + mdbx_preopen_snapinfo(pathname, info, bytes) #endif /* Windows */ /** \brief Флаги/опции для проверки целостности базы данных. From 5dc1f36b67f2d8c8003f4ff0abd27097e82f5bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 23 Oct 2024 20:35:21 +0300 Subject: [PATCH 299/443] =?UTF-8?q?mdbx++:=20=D1=81=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B0=20=D0=B1=D0=B0=D0=B7=D0=BE=D0=B2=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D1=82=D0=B8=D0=BF=D0=B0=20=D0=BD=D0=B0=20`intptr=5Ft`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=80=D0=B0=D0=B7=D0=BC=D0=B5=D1=80=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D0=BA=D0=BE=D0=BD=D1=81=D1=82=D0=B0=D0=BD=D1=82=20`md?= =?UTF-8?q?bx::env::geometry`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index fbf8c28b..bd139ade 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3570,22 +3570,26 @@ public: /// create_parameters &, const operate_parameters &, bool accede) struct LIBMDBX_API_TYPE geometry { - enum : int64_t { + enum : intptr_t { default_value = -1, ///< Means "keep current or use default" minimal_value = 0, ///< Means "minimal acceptable" maximal_value = INTPTR_MAX, ///< Means "maximal acceptable" - kB = 1000, ///< \f$10^{3}\f$ bytes - MB = kB * 1000, ///< \f$10^{6}\f$ bytes - GB = MB * 1000, ///< \f$10^{9}\f$ bytes - TB = GB * 1000, ///< \f$10^{12}\f$ bytes - PB = TB * 1000, ///< \f$10^{15}\f$ bytes - EB = PB * 1000, ///< \f$10^{18}\f$ bytes - KiB = 1024, ///< \f$2^{10}\f$ bytes - MiB = KiB << 10, ///< \f$2^{20}\f$ bytes - GiB = MiB << 10, ///< \f$2^{30}\f$ bytes - TiB = GiB << 10, ///< \f$2^{40}\f$ bytes - PiB = TiB << 10, ///< \f$2^{50}\f$ bytes - EiB = PiB << 10, ///< \f$2^{60}\f$ bytes + kB = 1000, ///< \f$10^{3}\f$ bytes (0x03E8) + MB = kB * 1000, ///< \f$10^{6}\f$ bytes (0x000F_4240) + GB = MB * 1000, ///< \f$10^{9}\f$ bytes (0x3B9A_CA00) +#if INTPTR_MAX > 0x7fffFFFFl + TB = GB * 1000, ///< \f$10^{12}\f$ bytes (0x0000_00E8_D4A5_1000) + PB = TB * 1000, ///< \f$10^{15}\f$ bytes (0x0003_8D7E_A4C6_8000) + EB = PB * 1000, ///< \f$10^{18}\f$ bytes (0x0DE0_B6B3_A764_0000) +#endif /* 64-bit intptr_t */ + KiB = 1024, ///< \f$2^{10}\f$ bytes (0x0400) + MiB = KiB << 10, ///< \f$2^{20}\f$ bytes (0x0010_0000) + GiB = MiB << 10, ///< \f$2^{30}\f$ bytes (0x4000_0000) +#if INTPTR_MAX > 0x7fffFFFFl + TiB = GiB << 10, ///< \f$2^{40}\f$ bytes (0x0000_0100_0000_0000) + PiB = TiB << 10, ///< \f$2^{50}\f$ bytes (0x0004_0000_0000_0000) + EiB = PiB << 10, ///< \f$2^{60}\f$ bytes (0x1000_0000_0000_0000) +#endif /* 64-bit intptr_t */ }; /// \brief Tagged type for output to std::ostream From 03077773cb2e12442d7afff29d7f9703aa287fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 24 Oct 2024 01:17:08 +0300 Subject: [PATCH 300/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/early=5Fclo?= =?UTF-8?q?se=5Fdbi`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 1 + test/extra/early_close_dbi.c++ | 105 +++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 test/extra/early_close_dbi.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 697f27fd..21bb7318 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -278,6 +278,7 @@ else() add_extra_test(dupfix_addodd SOURCE extra/dupfix_addodd.c) endif() if(MDBX_BUILD_CXX) + add_extra_test(early_close_dbi) add_extra_test(maindb_ordinal) add_extra_test(dupfix_multiple) add_extra_test(hex_base64_base58) diff --git a/test/extra/early_close_dbi.c++ b/test/extra/early_close_dbi.c++ new file mode 100644 index 00000000..d9723107 --- /dev/null +++ b/test/extra/early_close_dbi.c++ @@ -0,0 +1,105 @@ +#include "mdbx.h++" +#include + +static const char *const testkey = "testkey"; +static uint64_t testval = 11; + +int main(int argc, char *argv[]) { + (void)argc; + (void)argv; + + mdbx::path db_filename = "test-early_close_dbi"; + mdbx::env_managed::remove(db_filename); + + MDBX_env *environment; + MDBX_MAYBE_UNUSED int err = mdbx_env_create(&environment); + assert(err == MDBX_SUCCESS); + + err = mdbx_env_set_option(environment, MDBX_opt_max_db, 2); + assert(err == MDBX_SUCCESS); + err = mdbx_env_set_option(environment, MDBX_opt_max_readers, 2); + assert(err == MDBX_SUCCESS); + // status = mdbx_env_set_option(environment, MDBX_opt_prefault_write_enable, + // 1); assert(err == MDBX_SUCCESS); + + intptr_t lowerbound(0), size(0), upperbound(mdbx::env::geometry::GiB / 2); + intptr_t step(128 * mdbx::env::geometry::MiB), + shrink(256 * mdbx::env::geometry::MiB), pagesize(-1); + err = mdbx_env_set_geometry(environment, lowerbound, size, upperbound, step, + shrink, pagesize); + assert(err == MDBX_SUCCESS); + + MDBX_env_flags_t flags(MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_LIFORECLAIM | + MDBX_NORDAHEAD); + err = mdbx_env_openT(environment, db_filename.c_str(), flags, 0644); + assert(err == MDBX_SUCCESS); + + // --- + + MDBX_txn *transaction; + err = mdbx_txn_begin(environment, nullptr, MDBX_TXN_READWRITE, &transaction); + assert(err == MDBX_SUCCESS); + + MDBX_dbi textindex; + err = mdbx_dbi_open(transaction, "testdb", MDBX_DB_DEFAULTS, &textindex); + assert(err == MDBX_NOTFOUND); + err = mdbx_dbi_open(transaction, "testdb", MDBX_CREATE, &textindex); + assert(err == MDBX_SUCCESS); + + MDBX_val mdbxkey{(void *)testkey, std::strlen(testkey)}, mdbxval{}; + err = mdbx_get(transaction, textindex, &mdbxkey, &mdbxval); + assert(err == MDBX_NOTFOUND); + + unsigned dbi_flags, dbi_state; + err = mdbx_dbi_flags_ex(transaction, textindex, &dbi_flags, &dbi_state); + assert(err == MDBX_SUCCESS); + assert((dbi_state & (MDBX_DBI_CREAT | MDBX_DBI_DIRTY)) != 0); + err = mdbx_dbi_close(environment, textindex); + assert(err == MDBX_DANGLING_DBI); + + err = mdbx_txn_commit(transaction); + assert(err == MDBX_SUCCESS); + + // --- + + err = mdbx_txn_begin(environment, nullptr, MDBX_TXN_READWRITE, &transaction); + assert(err == MDBX_SUCCESS); + + MDBX_val mdbxput{&testval, sizeof(uint64_t)}; + err = mdbx_put(transaction, textindex, &mdbxkey, &mdbxput, MDBX_NOOVERWRITE); + assert(err == MDBX_SUCCESS); + err = mdbx_get(transaction, textindex, &mdbxkey, &mdbxval); + assert(err == MDBX_SUCCESS); + assert(testval == *reinterpret_cast(mdbxval.iov_base)); + + err = mdbx_put(transaction, textindex, &mdbxkey, &mdbxput, MDBX_NOOVERWRITE); + assert(err == MDBX_KEYEXIST); + err = mdbx_get(transaction, textindex, &mdbxkey, &mdbxval); + assert(err == MDBX_SUCCESS); + assert(testval == *reinterpret_cast(mdbxval.iov_base)); + + err = mdbx_dbi_flags_ex(transaction, textindex, &dbi_flags, &dbi_state); + assert(err == MDBX_SUCCESS); + assert((dbi_state & MDBX_DBI_DIRTY) != 0); + err = mdbx_dbi_close(environment, textindex); + assert(err == MDBX_DANGLING_DBI); + err = mdbx_txn_commit(transaction); + assert(err == MDBX_SUCCESS); + + // --- + + err = mdbx_txn_begin(environment, nullptr, MDBX_TXN_RDONLY, &transaction); + assert(err == MDBX_SUCCESS); + err = mdbx_get(transaction, textindex, &mdbxkey, &mdbxval); + assert(err == MDBX_SUCCESS); + assert(testval == *reinterpret_cast(mdbxval.iov_base)); + + err = mdbx_dbi_close(environment, textindex); + assert(err == MDBX_SUCCESS); + err = mdbx_txn_commit(transaction); + assert(err == MDBX_SUCCESS); + err = mdbx_env_close_ex(environment, true); + assert(err == MDBX_SUCCESS); + + return 0; +} From 3d6e1964221dd83e811692170ce6be6888863374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 25 Oct 2024 12:31:31 +0300 Subject: [PATCH 301/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=82=D1=80=D1=8B?= =?UTF-8?q?=D1=82=D0=B8=D1=8F=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86=D1=8B?= =?UTF-8?q?=20=D1=81=20=D0=BF=D1=83=D1=81=D1=82=D1=8B=D0=BC/=D0=BD=D1=83?= =?UTF-8?q?=D0=BB=D0=B5=D0=B2=D1=8B=D0=BC=20=D0=B8=D0=BC=D0=B5=D0=BD=D0=B5?= =?UTF-8?q?=D0=BC=20=D0=B8=20=D1=83=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20`SIGSEGV`=20=D0=BF=D1=80=D0=B8=20=D0=B5?= =?UTF-8?q?=D1=91=20=D0=B7=D0=B0=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dbi.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/dbi.c b/src/dbi.c index aa60fe8d..fb6577c3 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -472,14 +472,11 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, } /* Done here so we cannot fail after creating a new DB */ - void *clone = nullptr; - if (name.iov_len) { - clone = osal_malloc(dbi_namelen(name)); - if (unlikely(!clone)) - return MDBX_ENOMEM; - name.iov_base = memcpy(clone, name.iov_base, name.iov_len); - } else - name.iov_base = ""; + defer_free_item_t *const clone = osal_malloc(dbi_namelen(name)); + if (unlikely(!clone)) + return MDBX_ENOMEM; + memcpy(clone, name.iov_base, name.iov_len); + name.iov_base = clone; uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH; if (unlikely(rc)) { From b8f9b8659cfff2f4ae6f2e23393bd0685e1224e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 26 Oct 2024 23:23:17 +0300 Subject: [PATCH 302/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 2deef601..922766cb 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -11,14 +11,30 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Исправления: + - Доработка `mdbx_close_dbi()` для возврата ошибки `MDBX_DANGLING_DBI` + при попытке закрыть dbi-дескриптор таблицы, созданной и/или измененной в + ещё выполняющейся транзакции. Такое преждевременное закрытие дескриптора + является неверным использованием API и нарушением контракта/предусловий + сформулированных в описании `mdbx_close_dbi()`. Однако, вместо возврата + ошибки выполнялось некорректное закрытие дескриптора, что могло + приводить к созданию таблицы с пустым именем, утечки страниц БД и/или + нарушению структуры b-tree (неверной ссылкой на корень таблицы). + + - Исправление открытия таблицы с пустым/нулевым именем и устранение + `SIGSEGV` при закрытии её дескриптора. + - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. - Корректировка описания С++ API для использования термина "таблица" вместо "sub-database". - Исправление условия внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. - Допущение 4-байтового выравнивания данных `MDBX_MULTIPLE` для 32-битных сборок. + - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang. Новое: - Добавление `mdbx::cursor::get_multiple_samelength()` и переименование `mdbx::txn::put_multiple_samelength()`. - Возвращение ключа при выполнении операции `MDBX_GET_MULTIPLE` для единообразия C++ API. + - Смена базового типа на `intptr_t` для размерных констант `mdbx::env::geometry`. + - Включение стандарта `C23` в CMake-скриптах сборки. + - Добавление T-макросов для парных `char`/`wchar_t` функций. Мелочи: @@ -32,6 +48,9 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Доработка `osal_jitter()` для уменьшения задержек в тестах под Windows. - Исправление максимальной длины значений в тесте `extra/crunched-delete`. - Добавление логирования С++ исключений в `extra/dupfix_multiple`. + - Корректировка API-макросов для Doxygen. + - Уточнение описания `mdbx_dbi_close()` для случая хендлов измененных таблиц. + - Добавление теста `extra/early_close_dbi`. -------------------------------------------------------------------------------- From 90642bffaba4b2570e3eb8948dca9a135e9742ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 27 Oct 2024 22:59:20 +0300 Subject: [PATCH 303/443] =?UTF-8?q?mdbx:=20=D0=B8=D0=BC=D0=BF=D0=BE=D1=80?= =?UTF-8?q?=D1=82=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=BF=D0=B0=D1=82=D1=87=D0=B0=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D1=81=D1=82=D0=B0=D1=80=D1=8B=D1=85=20=D0=B2=D0=B5=D1=80=D1=81?= =?UTF-8?q?=D0=B8=D0=B9=20buildroot=20=D0=B8=D0=B7=20=D0=B2=D0=B5=D1=82?= =?UTF-8?q?=D0=BA=D0=B8=20`stable`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...libmdbx-new-package-library-database.patch | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch b/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch index 40b50c7e..98ffaa79 100644 --- a/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +++ b/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch @@ -1,8 +1,8 @@ -From 22732cf074188912caea6da6795c36098efb7eb5 Mon Sep 17 00:00:00 2001 +From 40efe497b511c322470aa9c084fe4c1759788c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= -Date: Mon, 23 Oct 2023 18:07:13 +0300 -Subject: [PATCH] package/libmdbx: new package (library/database). +Date: Sun, 27 Oct 2024 22:34:19 +0300 +Subject: [PATCH 1/1] package/libmdbx: new package (library/database). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -15,8 +15,9 @@ This patch adds libmdbx: in terms of reliability, features and performance. - more information at https://gitflic.ru/project/erthink/libmdbx -The v0.12.10 "SEM" is stable release of _libmdbx_ branch with new superior features, -in memory of the Hero of Russia Guard Major Dmitry Semenov with the call sign "SEM". +The 0.12.12 "Dollezhal" is stable release of _libmdbx_ branch in memory +of the Soviet energy scientist Nikolai Antonovich Dollezhal on the 125th +anniversary of his birth. The complete ChangeLog: https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md @@ -111,18 +112,18 @@ index 0000000000..a9a4ac45c5 + !BR2_TOOLCHAIN_GCC_AT_LEAST_4_4 diff --git a/package/libmdbx/libmdbx.hash b/package/libmdbx/libmdbx.hash new file mode 100644 -index 0000000000..1dbdef7c69 +index 0000000000..222e7caf5c --- /dev/null +++ b/package/libmdbx/libmdbx.hash @@ -0,0 +1,5 @@ +# Hashes from: https://libmdbx.dqdkfa.ru/release/SHA256SUMS -+sha256 4637e06768a9a8fc7577e6e458e045ad1f9a8baee74996a3c88bc0ad64cbcf67 libmdbx-amalgamated-0.12.10.tar.xz ++sha256 19c0eb33e1ed43ca2a94dceb06dd31946432d16f30a9751d3701c67efa22eb1a libmdbx-amalgamated-0.12.12.tar.xz + +# Locally calculated +sha256 310fe25c858a9515fc8c8d7d1f24a67c9496f84a91e0a0e41ea9975b1371e569 LICENSE diff --git a/package/libmdbx/libmdbx.mk b/package/libmdbx/libmdbx.mk new file mode 100644 -index 0000000000..36df45b64f +index 0000000000..da4a53913f --- /dev/null +++ b/package/libmdbx/libmdbx.mk @@ -0,0 +1,42 @@ @@ -132,7 +133,7 @@ index 0000000000..36df45b64f +# +################################################################################ + -+LIBMDBX_VERSION = 0.12.10 ++LIBMDBX_VERSION = 0.12.12 +LIBMDBX_SOURCE = libmdbx-amalgamated-$(LIBMDBX_VERSION).tar.xz +LIBMDBX_SITE = https://libmdbx.dqdkfa.ru/release +LIBMDBX_SUPPORTS_IN_SOURCE_BUILD = NO @@ -169,5 +170,5 @@ index 0000000000..36df45b64f + +$(eval $(cmake-package)) -- -2.43.2 +2.47.0 From dc6f29a0465b2d9496298c4fd09119585e10c50b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 27 Oct 2024 23:00:34 +0300 Subject: [PATCH 304/443] =?UTF-8?q?mdbx:=20=D0=B8=D0=BC=D0=BF=D0=BE=D1=80?= =?UTF-8?q?=D1=82=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB=D0=BD=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B9=20ChangeLog=20=D0=B8=D0=B7=20=D0=B2=D0=B5=D1=82=D0=BA?= =?UTF-8?q?=D0=B8=20`stable`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 922766cb..a05af74e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -30,6 +30,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang. Новое: + - Добавление `mdbx::cursor::get_multiple_samelength()` и переименование `mdbx::txn::put_multiple_samelength()`. - Возвращение ключа при выполнении операции `MDBX_GET_MULTIPLE` для единообразия C++ API. - Смена базового типа на `intptr_t` для размерных констант `mdbx::env::geometry`. @@ -52,6 +53,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Уточнение описания `mdbx_dbi_close()` для случая хендлов измененных таблиц. - Добавление теста `extra/early_close_dbi`. + -------------------------------------------------------------------------------- @@ -221,6 +223,48 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic ******************************************************************************** +## v0.12.12 "Доллежаль" от 2024-10-27 + +Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов, +в память о советском ученом-энергетике Николае Антоновиче Доллежаль в день 125-летия со дня его рождения. + +Это последний выпуск куста стабильных версий 0.12.x, спустя более двух +лет после выпуска 0.12.1. Последующие выпуски 0.12.x будут формироваться +только в случае существенных проблем/ошибок, вероятность чего близка к +нулю. Для всех проектов находящихся в стадии активной разраборки +рекомендуется использовать ветку `master`. + +``` +git diff' stat: x commits, y files changed, z insertions(+), zz deletions(-) +Signed-off-by: Леонид Юрьев (Leonid Yuriev) +``` + +Значимые исправления: + + - Исправление упущенного `TXN_END_EOTDONE` при сбое старта читающей транзакции. + Упомянутый флажок отсутствовал в пути разрушения транзакции при ошибке + её запуска. Из-за чего делалась попытка разрушить курсоры, что приводило + к падению **отладочных сборок**, так как в них соответствующий массив + намеренно заполнен некорректными указателями. + + - Устранение возможности `SIGSEGV` внутри `coherency_check()` после + изменения геометрии другим процессом с увеличением верхнего размера БД + и увеличением БД больше предыдущего лимита. + + - Доработка `mdbx_close_dbi()` для возврата ошибки при попытке закрыть + dbi-дескриптор таблицы, созданной и/или измененной в ещё выполняющейся + транзакции. Такое преждевременное закрытие дескриптора является неверным + использованием API и нарушением контракта/предусловий сформулированных + в описании `mdbx_close_dbi()`. Однако, вместо возврата ошибки + выполнялось некорректное закрытие дескриптора, что могло приводить к + созданию таблицы с пустым именем, утечки страниц БД и/или нарушению + структуры b-tree (неверной ссылкой на корень таблицы). + Добавлен соответствующий тест `extra/early_close_dbi`. + + +-------------------------------------------------------------------------------- + + ## v0.12.11 "Лиза и Соня" от 2024-07-23 Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов, @@ -234,7 +278,6 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic и наведение ATACAMS невозможно без использования орбитальной группировки военных спутников США. - ``` git diff' stat: 29 commits, 14 files changed, 379 insertions(+), 151 deletions(-) Signed-off-by: Леонид Юрьев (Leonid Yuriev) From 8571eac81bff1c08bfe5ff2020951dac5f27cee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 28 Oct 2024 08:55:37 +0300 Subject: [PATCH 305/443] =?UTF-8?q?mdbx-cmake:=20=D0=BA=D0=BE=D1=80=D1=80?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B1=D1=8B=20OpenMP.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake/compiler.cmake | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index a6c7f618..e05df1a6 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -368,13 +368,15 @@ else() set(CMAKE_REQUIRED_FLAGS "-fopenmp -Werror") if(CMAKE_CXX_COMPILER_LOADED) check_cxx_source_compiles("int main(void) { - #pragma omp parallel - return 0; + #pragma omp for + for(int i = 0, j = 0; i != 42; i = 1 + i * 12345) j += i % 43; + return j; }" HAVE_OPENMP) else() check_c_source_compiles("int main(void) { - #pragma omp parallel - return 0; + #pragma omp for + for(int i = 0, j = 0; i != 42; i = 1 + i * 12345) j += i % 43; + return j; }" HAVE_OPENMP) endif() set(CMAKE_REQUIRED_FLAGS "") From de36d94acab21e6bd3c0a211b8c6cf24836dc908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 4 Nov 2024 20:42:39 +0300 Subject: [PATCH 306/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=B5=D0=B4=D0=BE?= =?UTF-8?q?=D1=82=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2?= =?UTF-8?q?=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BE=D1=82?= =?UTF-8?q?=D0=BB=D0=B0=D0=B4=D0=BA=D0=B8=20=D1=82=D0=BE=D0=BB=D1=8C=D0=BA?= =?UTF-8?q?=D0=BE=20=D0=B8=D0=B7-=D0=B7=D0=B0=20=D0=B0=D0=BA=D1=82=D0=B8?= =?UTF-8?q?=D0=B2=D0=B0=D1=86=D0=B8=D0=B8=20assert-=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=B5=D1=80=D0=BE=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/preface.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/preface.h b/src/preface.h index 83576d6c..8bffbf6e 100644 --- a/src/preface.h +++ b/src/preface.h @@ -7,6 +7,10 @@ #if (defined(MDBX_DEBUG) && MDBX_DEBUG > 0) || \ (defined(MDBX_FORCE_ASSERTIONS) && MDBX_FORCE_ASSERTIONS) #undef NDEBUG +#ifndef MDBX_DEBUG +/* Чтобы избежать включения отладки только из-за включения assert-проверок */ +#define MDBX_DEBUG 0 +#endif #endif /*----------------------------------------------------------------------------*/ From 2c919c0efed65dd46f3fa6ae2b4d1f2c91a15545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 5 Nov 2024 15:19:58 +0300 Subject: [PATCH 307/443] =?UTF-8?q?mdbx-testing:=20=D0=BF=D1=80=D0=B5?= =?UTF-8?q?=D0=B4=D0=BE=D1=82=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=BF=D0=BE=D1=82=D0=B5=D1=80=D0=B8=20=D0=BB=D0=BE?= =?UTF-8?q?=D0=B3=D0=BE=D0=B2=20=D0=B8=D0=B7-=D0=B7=D0=B0=20=D0=BE=D1=82?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=B5=D0=BB=D0=B0=20gzip/lz4=20=D0=B2=20?= =?UTF-8?q?=D1=81=D0=BE=D1=81=D1=82=D0=B0=D0=B2=D0=B5=20=D0=B3=D1=80=D1=83?= =?UTF-8?q?=D0=BF=D0=BF=D1=8B=20=D0=BF=D1=80=D0=BE=D1=86=D0=B5=D1=81=D1=81?= =?UTF-8?q?=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 52 +++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 4d951e4a..eb7293e5 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -365,9 +365,15 @@ fi ############################################################################### # 5. run stochastic iterations +if which setsid >/dev/null 2>/dev/null; then + SETSID=$(which setsid) +else + SETSID="" +fi + if which lz4 >/dev/null; then function logger { - lz4 > ${TESTDB_DIR}/long.log.lz4 + ${SETSID} lz4 -z -c > ${TESTDB_DIR}/long.log.lz4 || echo "FAILED 'lz4 -z -c > ${TESTDB_DIR}/long.log.lz4'" >&2 } function taillog { if [ -s ${TESTDB_DIR}/long.log.lz4 ]; then @@ -379,7 +385,7 @@ if which lz4 >/dev/null; then } elif which gzip >/dev/null; then function logger { - gzip > ${TESTDB_DIR}/long.log.gz + ${SETSID} gzip -c -k > ${TESTDB_DIR}/long.log.gz || echo "FAILED 'gzip -c -k > ${TESTDB_DIR}/long.log.gz'" >&2 } function taillog { if [ -s ${TESTDB_DIR}/long.log.gz ]; then @@ -391,7 +397,7 @@ elif which gzip >/dev/null; then } else function logger { - cat > ${TESTDB_DIR}/long.log + cat > ${TESTDB_DIR}/long.log || echo "FAILED 'cat > ${TESTDB_DIR}/long.log'" >&2 } function taillog { if [ -s ${TESTDB_DIR}/long.log ]; then @@ -421,21 +427,37 @@ function bits2options { join , ${list[@]} } +LFD=0 +trap "echo 'SIGPIPE(ignored)'" SIGPIPE + function failed { + set +euo pipefail echo "FAILED" >&2 + if [ ${LFD} -ne 0 ]; then + sleep 0.05 + echo "@@@ END-OF-LOG/FAILED" >&${LFD} + sleep 0.05 + exec {LFD}>&- + LFD=0 + fi if [ ${TAILLOG} -gt 0 ]; then taillog fi exit 1 } -function check_deep { - if [ "$case" = "basic" -o "$case" = "--hill" ]; then - tee >(logger) | grep -e reach -e achieve - else - logger +function on_exit { + set +euo pipefail + if [ ${LFD} -ne 0 ]; then + sleep 0.05 + echo "@@@ END-OF-LOG/EXIT" >&${LFD} + sleep 0.05 + exec {LFD}>&- + LFD=0 fi + echo "--- EXIT" >&2 } +trap on_exit EXIT function probe { echo "----------------------------------------------- $(date)" @@ -444,11 +466,21 @@ function probe { for case in $LIST do echo "Run ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" - ${MONITOR} ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} "$@" $case | check_deep \ + if [ "$case" = "basic" -o "$case" = "--hill" ]; then + exec {LFD}> >(tee -p -i >(logger) | grep -e reach -e achieve) + else + exec {LFD}> >(logger) + fi + ${MONITOR} ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} "$@" $case >&${LFD} \ && ${MONITOR} ./mdbx_chk ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${MONITOR} ./mdbx_chk ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ || failed - done + if [ ${LFD} -ne 0 ]; then + echo "@@@ END-OF-LOG/ITERATION" >&${LFD} + exec {LFD}>&- + LFD=0 + fi + done } #------------------------------------------------------------------------------ From 6067ba5f9d026d6c41298d51c434c5255435e7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 5 Nov 2024 19:25:10 +0300 Subject: [PATCH 308/443] =?UTF-8?q?mdbx-testing:=20=D0=BC=D0=B8=D0=BD?= =?UTF-8?q?=D0=B8=D0=BC=D0=B0=D0=BB=D1=8C=D0=BD=D0=B0=D1=8F=20=D0=BE=D0=B1?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20`SIGINT`/`SIGTERM`?= =?UTF-8?q?/`SIGHUP`/`SIGQUIT`=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B7=D1=80=D0=B0=D1=87=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B5=D1=80=D1=8B=D0=B2=D0=B0=D0=BD=D0=B8=D0=B9=20=D0=B2?= =?UTF-8?q?=20=D0=BB=D0=BE=D0=B3=D0=B0=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/osal-unix.c++ | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index df340c1c..2c099d85 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -342,6 +342,12 @@ static void handler_SIGCHLD(int signum) { ++sigalarm_head; } +static std::atomic_int sigbreak; +static void handler_SIGBREAK(int signum) { + (void)signum; + ++sigbreak; +} + int osal_delay(unsigned seconds) { return sleep(seconds) ? errno : 0; } int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { @@ -349,6 +355,17 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { if (children.empty()) { struct sigaction act; memset(&act, 0, sizeof(act)); + act.sa_handler = handler_SIGBREAK; + sigaction(SIGTERM, &act, nullptr); + sigaction(SIGHUP, &act, nullptr); + sigaction(SIGINT, &act, nullptr); + sigaction(SIGQUIT, &act, nullptr); +#ifdef SIGXCPU + sigaction(SIGXCPU, &act, nullptr); +#endif +#ifdef SIGXFSZ + sigaction(SIGXFSZ, &act, nullptr); +#endif act.sa_handler = handler_SIGCHLD; sigaction(SIGCHLD, &act, nullptr); sigaction(SIGALRM, &act, nullptr); @@ -500,7 +517,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { sigalarm_tail = sigalarm_head /* reset timeout flag */; int options = WNOHANG; - if (timeout) { + if (timeout && !sigbreak) { alarm((timeout > INT_MAX) ? INT_MAX : timeout); options = 0; } @@ -573,7 +590,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { if (err != EINTR) return err; } - return 0 /* timeout */; + return sigbreak ? EINTR : 0 /* timeout */; } void osal_yield(void) { From 157ede4e42db379ce4236770e4ad1d23fd019da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 10:28:23 +0300 Subject: [PATCH 309/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86?= =?UTF-8?q?=D0=B8=D0=B8=20`--report-depth`=20=D0=B4=D0=BB=D1=8F=20=D1=81?= =?UTF-8?q?=D0=BE=D0=BA=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=BA=D0=BB=D0=B0=D0=B4=D0=BD=D1=8B=D1=85=20=D1=80=D0=B0?= =?UTF-8?q?=D1=81=D1=85=D0=BE=D0=B4=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index eb7293e5..d35b32fc 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -18,6 +18,7 @@ DONT_CHECK_RAM=no EXTRA=no TAILLOG=0 DELAY=0 +REPORT_DEPTH=no while [ -n "$1" ] do @@ -43,6 +44,7 @@ do echo "--extra Iterate extra modes/flags" echo "--taillog Dump tail of test log on failure" echo "--delay NN Delay NN seconds before run test" + echo "--report-depth Report tree depth (tee+grep log)" echo "--help Print this usage help and exit" exit -2 ;; @@ -173,6 +175,9 @@ do DELAY=$(($2)) shift ;; + --report-depth) + REPORT_DEPTH=yes + ;; *) echo "Unknown option '$1'" exit -2 @@ -466,7 +471,7 @@ function probe { for case in $LIST do echo "Run ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" - if [ "$case" = "basic" -o "$case" = "--hill" ]; then + if [[ ${REPORT_DEPTH} = "yes" && ($case = "basic" || $case = "--hill") ]]; then exec {LFD}> >(tee -p -i >(logger) | grep -e reach -e achieve) else exec {LFD}> >(logger) From 2669f285f9b917d3af6992bc0129b631ad342e19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 11:19:32 +0300 Subject: [PATCH 310/443] =?UTF-8?q?mdbx-testing:=20=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=B5=D1=80=D0=BA=D0=B0=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5?= =?UTF-8?q?=D1=80=D0=B6=D0=BA=D0=B8=20`tee=20-p`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index d35b32fc..876f784e 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -472,7 +472,10 @@ function probe { do echo "Run ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" if [[ ${REPORT_DEPTH} = "yes" && ($case = "basic" || $case = "--hill") ]]; then - exec {LFD}> >(tee -p -i >(logger) | grep -e reach -e achieve) + if [ -z "${TEE4PIPE:-}" ]; then + TEE4PIPE=$(tee --help | grep -q ' -p' && echo "tee -i -p" || echo "tee -i") + fi + exec {LFD}> >(${TEE4PIPE} >(logger) | grep -e reach -e achieve) else exec {LFD}> >(logger) fi From af41bcf11e716f17478b3ece64a72f718f00eb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 11:34:46 +0300 Subject: [PATCH 311/443] =?UTF-8?q?mdbx-testing:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D0=B5=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B4=D0=BB=D1=8F=20`/u?= =?UTF-8?q?sr/bin/banner`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 876f784e..92776b80 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -10,7 +10,7 @@ MONITOR= LOOPS= SKIP_MAKE=no GEOMETRY_JITTER=yes -BANNER="$(which banner 2>/dev/null | echo echo)" +BANNER="$(which banner 2>/dev/null || echo echo)" UNAME="$(uname -s 2>/dev/null || echo Unknown)" DB_UPTO_MB=17408 PAGESIZE=min From 029f14280b06ab5d086f177a0547332cec417c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 13:12:08 +0300 Subject: [PATCH 312/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D0=B4=D0=B0?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`/usr/bin/time`=20=D1=82=D0=B0?= =?UTF-8?q?=D0=BA=20=D0=BA=D0=B0=D0=BA=20`rusage()`=20=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D1=8C=20=D0=B2=20=D0=BA=D0=BE=D0=B4=D0=B5=20=D1=82=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 92776b80..2deec0c8 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -188,12 +188,6 @@ done set -euo pipefail if [ -z "$MONITOR" ]; then - if which time >/dev/null 2>/dev/null; then - MONITOR=$(which time) - if $MONITOR -o /dev/stdout true >/dev/null 2>/dev/null; then - MONITOR="$MONITOR -o /dev/stdout" - fi - fi export MALLOC_CHECK_=7 MALLOC_PERTURB_=42 fi From acb15790b4f3d553458999a04488e7525f1e86f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 13:42:33 +0300 Subject: [PATCH 313/443] =?UTF-8?q?mdbx-testing:=20=D0=BA=D0=BE=D1=81?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0=20=D0=B2=20`long=5Fstochas?= =?UTF-8?q?tic.sh`=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=BC=D0=B5=D0=BD=D1=8C?= =?UTF-8?q?=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BE=D0=B1=D1=8A=D0=B5=D0=BC?= =?UTF-8?q?=D0=B0=20=D0=B2=D1=8B=D0=B2=D0=BE=D0=B4=D0=B0=20=D0=B2=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=BD=D1=81=D0=BE=D0=BB=D1=8C.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 66 ++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 2deec0c8..76e3c709 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -460,11 +460,11 @@ trap on_exit EXIT function probe { echo "----------------------------------------------- $(date)" - echo "${caption}" + echo "PROBE №${caption}" rm -f ${TESTDB_DIR}/* || failed for case in $LIST do - echo "Run ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" + echo "${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" if [[ ${REPORT_DEPTH} = "yes" && ($case = "basic" || $case = "--hill") ]]; then if [ -z "${TEE4PIPE:-}" ]; then TEE4PIPE=$(tee --help | grep -q ' -p' && echo "tee -i -p" || echo "tee -i") @@ -474,8 +474,8 @@ function probe { exec {LFD}> >(logger) fi ${MONITOR} ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} "$@" $case >&${LFD} \ - && ${MONITOR} ./mdbx_chk ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ - && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${MONITOR} ./mdbx_chk ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ + && ${MONITOR} ./mdbx_chk -q ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ + && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${MONITOR} ./mdbx_chk -q ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ || failed if [ ${LFD} -ne 0 ]; then echo "@@@ END-OF-LOG/ITERATION" >&${LFD} @@ -509,102 +509,102 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 seed=$(($(date +%s) + RANDOM)) split=30 - caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=24 - caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=16 - caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} if [ "$EXTRA" != "no" ]; then split=10 - caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} fi split=4 - caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + caption="$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options From 6c56ed97bbd8ca46abac61886a113ba31e5f1291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 17:10:50 +0300 Subject: [PATCH 314/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0/=D0=B8=D1=81=D0=BF=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D0=BF=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D0=BE=D1=8F=D0=BD=D0=BD=D0=B0=D1=8F=20=D0=B0=D0=BA=D1=82?= =?UTF-8?q?=D0=B8=D0=B2=D0=B0=D1=86=D0=B8=D1=8F=20=D0=BA=D0=BE=D1=80=D1=80?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D1=83=D1=8E=D1=89=D0=B5=D0=B9=20?= =?UTF-8?q?=D0=BE=D0=B1=D1=80=D0=B0=D1=82=D0=BD=D0=BE=D0=B9=20=D1=81=D0=B2?= =?UTF-8?q?=D1=8F=D0=B7=D0=B8=20=D0=BF=D1=80=D0=B8=20=D0=BE=D0=B1=D0=BD?= =?UTF-8?q?=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B8=20GC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit При обновлении GC, с помещением/возвратом страниц, возникает рекурсивная зависимость, так как страницы, необходимые для CoW-модификации GC и размещения списков возвращаемых страниц, берутся/выделяются из этих-же списков и/или из GC. Эта рекуррентная зависимость разрешается путём подготовки необходимого запаса страниц и двух-стадийным заполнением списков, с повторением всего цикла при изменении ситуации/расклада, плюс применение некоторых эвристик и поправок. Кроме корректной работы, принципиально важным тут является минимизация количества повторов/рестартов процесса, в том числе исключение возможности бесконечного зацикливания. Существующая реализация многократно/итеративно дорабатывалась. Поэтому она неплохо обкатана и стабильна, но одновременно сложна и запутана. Тем не менее, до последнего момента для текущей реализации были известны условия/сценарии, в которых сходимость итеративного процесса обновления GC нарушалась и при фиксации транзакции возвращалась ошибка MDBX_PROBLEM. Эти условия/сценарии очень специфичны и далеки от реальных практических случаев, поэтому этот недостаток не мешал использованию библиотеки. Этим коммитом добавляется и активируется еще один механизм нацеленный на улучшение сходимости и минимизацию повторов/рестартов. Суть механизма в формировании и учета поправки, которая на следующем цикле позволит учесть все переходные процессы/затраты вне зависимости от их природы, и этим обеспечить моментальную сходимость. В текущем понимании, описанный выше недостаток полностью устраняется/исправляется этим коммитом. --- src/gc-put.c | 80 ++++++++++------------------------------------------ src/gc.h | 12 ++------ 2 files changed, 17 insertions(+), 75 deletions(-) diff --git a/src/gc-put.c b/src/gc-put.c index a9388143..be740cbf 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -454,6 +454,7 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page) { if (unlikely(ctx->rid <= MIN_TXNID)) { + ctx->dense = true; if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <= ctx->reused_slot)) { NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " @@ -480,10 +481,10 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, goto return_error; } const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (unlikely(gc_first <= MIN_TXNID)) { - DEBUG("%s: no free GC's id(s) less than %" PRIaTXN - " (going dense-mode)", - dbg_prefix(ctx), ctx->rid); + if (unlikely(gc_first <= INITIAL_TXNID)) { + NOTICE("%s: no free GC's id(s) less than %" PRIaTXN + " (going dense-mode)", + dbg_prefix(ctx), ctx->rid); ctx->dense = true; goto return_restart; } @@ -590,19 +591,11 @@ int gc_update(MDBX_txn *txn, gcu_t *ctx) { txn->cursors[FREE_DBI] = &ctx->cursor; int rc; - // tASSERT(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages) || - // ctx->cleaned_slot < - // (txn->tw.gc.reclaimed ? - // MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0) - // || ctx->cleaned_id < txn->tw.gc.last_reclaimed); - /* txn->tw.relist[] can grow and shrink during this call. * txn->tw.gc.last_reclaimed and txn->tw.retired_pages[] can only grow. * But page numbers cannot disappear from txn->tw.retired_pages[]. */ -#if MDBX_ENABLE_GC_EXPERIMENTAL retry_clean_adj: ctx->reserve_adj = 0; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ retry: ctx->loop += ctx->prev_first_unallocated == txn->geo.first_unallocated; TRACE(">> restart, loop %u", ctx->loop); @@ -629,7 +622,8 @@ retry: ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; - ctx->amount = ctx->fill_idx = ~0u; + ctx->amount = 0; + ctx->fill_idx = ~0u; ctx->cleaned_id = 0; ctx->rid = txn->tw.gc.last_reclaimed; while (true) { @@ -746,9 +740,7 @@ retry: env->maxgc_large1page / 2)) { TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); -#if MDBX_ENABLE_GC_EXPERIMENTAL ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist); @@ -772,7 +764,6 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } -#if MDBX_ENABLE_GC_EXPERIMENTAL const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj; TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, " "lifo-reclaimed-slots %zu, " @@ -780,15 +771,6 @@ retry: dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left, txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, ctx->reused_slot); -#else - const size_t left = ctx->amount - ctx->reserved; - TRACE("%s: amount %zu, reserved %zd, left %zd, " - "lifo-reclaimed-slots %zu, " - "reused-gc-slots %zu", - dbg_prefix(ctx), ctx->amount, ctx->reserved, left, - txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, - ctx->reused_slot); -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ if (0 >= (intptr_t)left) break; @@ -911,9 +893,7 @@ retry: TRACE("%s", " >> filling"); /* Fill in the reserved records */ -#if MDBX_ENABLE_GC_EXPERIMENTAL size_t excess_slots = 0; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ ctx->fill_idx = txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot @@ -924,15 +904,17 @@ retry: tASSERT(txn, dpl_check(txn)); if (ctx->amount) { MDBX_val key, data; - key.iov_len = data.iov_len = 0; /* avoid MSVC warning */ + key.iov_len = data.iov_len = 0; key.iov_base = data.iov_base = nullptr; size_t left = ctx->amount, excess = 0; if (txn->tw.gc.reclaimed == nullptr) { tASSERT(txn, is_lifo(txn) == 0); rc = outer_first(&ctx->cursor, &key, &data); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND) + goto bailout; + } } else { tASSERT(txn, is_lifo(txn) != 0); } @@ -943,36 +925,29 @@ retry: MDBX_PNL_GETSIZE(txn->tw.relist)); if (txn->tw.gc.reclaimed == nullptr) { tASSERT(txn, is_lifo(txn) == 0); - fill_gc_id = unaligned_peek_u64(4, key.iov_base); + fill_gc_id = + key.iov_base ? unaligned_peek_u64(4, key.iov_base) : MIN_TXNID; if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) { -#if MDBX_ENABLE_GC_EXPERIMENTAL if (!left) break; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN ", left %zu", ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); -#if MDBX_ENABLE_GC_EXPERIMENTAL ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } ctx->fill_idx -= 1; } else { tASSERT(txn, is_lifo(txn) != 0); if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { -#if MDBX_ENABLE_GC_EXPERIMENTAL if (!left) break; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ NOTICE("** restart: reserve depleted (fill_idx %zu >= " "gc.reclaimed %zu, left %zu", ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left); -#if MDBX_ENABLE_GC_EXPERIMENTAL ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } ctx->fill_idx += 1; @@ -1001,12 +976,10 @@ retry: excess += delta; TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, left, fill_gc_id); -#if MDBX_ENABLE_GC_EXPERIMENTAL if (!left) { excess_slots += 1; goto next; } -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || delta > env->maxgc_large1page) data.iov_len = (left + 1) * sizeof(pgno_t); @@ -1022,10 +995,8 @@ retry: NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); -#if MDBX_ENABLE_GC_EXPERIMENTAL if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) goto retry_clean_adj; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto retry; } @@ -1061,23 +1032,16 @@ retry: goto bailout; } -#if MDBX_ENABLE_GC_EXPERIMENTAL next: -#else - if (left == 0) - break; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ if (txn->tw.gc.reclaimed == nullptr) { tASSERT(txn, is_lifo(txn) == 0); rc = outer_next(&ctx->cursor, &key, &data, MDBX_NEXT); if (unlikely(rc != MDBX_SUCCESS)) { -#if MDBX_ENABLE_GC_EXPERIMENTAL if (rc == MDBX_NOTFOUND && !left) { rc = MDBX_SUCCESS; break; } -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ goto bailout; } } else { @@ -1086,14 +1050,12 @@ retry: } if (excess) { -#if MDBX_ENABLE_GC_EXPERIMENTAL size_t n = excess, adj = excess; while (n >= env->maxgc_large1page) adj -= n /= env->maxgc_large1page; ctx->reserve_adj += adj; TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx), excess, adj, ctx->reserve_adj); -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ } } @@ -1105,27 +1067,15 @@ retry: goto retry; } -#if MDBX_ENABLE_GC_EXPERIMENTAL if (unlikely(excess_slots)) { const bool will_retry = ctx->loop < 5 || excess_slots > 1; NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " - "loop %zu)", + "loop %u)", will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, ctx->reserve_adj, ctx->loop); if (will_retry) goto retry; } -#else - if (unlikely(ctx->fill_idx != (txn->tw.gc.reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - : 0))) { - const bool will_retry = ctx->loop < 9; - NOTICE("** %s: reserve excess (filled-idx %zu, loop %u)", - will_retry ? "restart" : "ignore", ctx->fill_idx, ctx->loop); - if (will_retry) - goto retry; - } -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ tASSERT(txn, txn->tw.gc.reclaimed == nullptr || ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); diff --git a/src/gc.h b/src/gc.h index 41e787ba..a8a68a24 100644 --- a/src/gc.h +++ b/src/gc.h @@ -5,19 +5,11 @@ #include "essentials.h" -#ifndef MDBX_ENABLE_GC_EXPERIMENTAL -#define MDBX_ENABLE_GC_EXPERIMENTAL 0 -#elif !(MDBX_ENABLE_GC_EXPERIMENTAL == 0 || MDBX_ENABLE_GC_EXPERIMENTAL == 1) -#error MDBX_ENABLE_GC_EXPERIMENTAL must be defined as 0 or 1 -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ - typedef struct gc_update_context { unsigned loop; pgno_t prev_first_unallocated; bool dense; -#if MDBX_ENABLE_GC_EXPERIMENTAL - intptr_t reserve_adj; -#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */ + size_t reserve_adj; size_t retired_stored; size_t amount, reserved, cleaned_slot, reused_slot, fill_idx; txnid_t cleaned_id, rid; @@ -32,7 +24,7 @@ typedef struct gc_update_context { static inline int gc_update_init(MDBX_txn *txn, gcu_t *ctx) { memset(ctx, 0, offsetof(gcu_t, cursor)); - ctx->dense = txn->txnid < MIN_TXNID; + ctx->dense = txn->txnid <= MIN_TXNID; #if MDBX_ENABLE_BIGFOOT ctx->bigfoot = txn->txnid; #endif /* MDBX_ENABLE_BIGFOOT */ From 00be608af95ef851e9747322efa5f8cd180eb49a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 21:17:39 +0300 Subject: [PATCH 315/443] =?UTF-8?q?mdbx-testing:=20=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=B5=D1=80=D0=BA=D0=B0=20=D0=B2=D0=B5=D1=80=D1=81=D0=B8?= =?UTF-8?q?=D0=B8=20bash=20>=3D=204.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 76e3c709..7d895c2d 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -3,6 +3,12 @@ # Леонид Юрьев aka Leonid Yuriev # SPDX-License-Identifier: Apache-2.0 +if [ "${BASH_VERSION}" \< "4.3" ]; then + echo "Bash-shell 4.3 or later is REQUIRED." >&2 + echo "Present Bash-shell version is '${BASH_VERSION}' (BASH_VERSION)" >&2 + exit +fi + LIST=basic FROM=1 UPTO=9999999 From c0e5108d712a2b90de8aa6df302a06cccec8481c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 22:48:36 +0300 Subject: [PATCH 316/443] =?UTF-8?q?mdbx-testing:=20=D1=80=D0=B5=D0=B0?= =?UTF-8?q?=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F/=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D0=BD=D0=BE=D1=81=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80?= =?UTF-8?q?=D0=B6=D0=BA=D0=B8=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=BC=D0=B5=D0=BB=D0=BA=D0=B8=D1=85=20=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9=20=D0=B2=20lon?= =?UTF-8?q?g=5Fstochastic=20=D1=81=D0=BA=D1=80=D0=B8=D0=BF=D1=82.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 99 ++++++--- test/stochastic_small.sh | 461 --------------------------------------- 2 files changed, 72 insertions(+), 488 deletions(-) delete mode 100755 test/stochastic_small.sh diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 7d895c2d..0fb64ca2 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -25,6 +25,9 @@ EXTRA=no TAILLOG=0 DELAY=0 REPORT_DEPTH=no +REPEAT=11 +ROUNDS=1 +SMALL=no while [ -n "$1" ] do @@ -40,6 +43,8 @@ do echo "--skip-make Don't (re)build libmdbx and test's executable" echo "--from NN Start iterating from the NN ops per test case" echo "--upto NN Don't run tests with more than NN ops per test case" + echo "--repeat NN Repeat each testcase NN times within test run" + echo "--rounds NN Cycle each n-ops/wbatch case NN times" echo "--loops NN Stop after the NN loops" echo "--dir PATH Specifies directory for test DB and other files (it will be cleared)" echo "--db-upto-mb NN Limits upper size of test DB to the NN megabytes" @@ -51,6 +56,7 @@ do echo "--taillog Dump tail of test log on failure" echo "--delay NN Delay NN seconds before run test" echo "--report-depth Report tree depth (tee+grep log)" + echo "--small Small transactions/batch/nops pattern" echo "--help Print this usage help and exit" exit -2 ;; @@ -103,6 +109,22 @@ do fi shift ;; + --repeat|--reps|--rep) + REPEAT=$(($2)) + if [ -z "$REPEAT" -o "$REPEAT" -lt 1 -o "$REPEAT" -gt 99 ]; then + echo "Invalid value '$REPEAT' for --repeat option" + exit -2 + fi + shift + ;; + --rounds) + ROUNDS=$(($2)) + if [ -z "$ROUNDS" -o "$ROUNDS" -lt 1 -o "$ROUNDS" -gt 99 ]; then + echo "Invalid value '$ROUNDS' for --rounds option" + exit -2 + fi + shift + ;; --loops) LOOPS=$(($2)) if [ -z "$LOOPS" -o "$LOOPS" -lt 1 -o "$LOOPS" -gt 99 ]; then @@ -184,6 +206,9 @@ do --report-depth) REPORT_DEPTH=yes ;; + --small) + SMALL=yes + ;; *) echo "Unknown option '$1'" exit -2 @@ -470,7 +495,7 @@ function probe { rm -f ${TESTDB_DIR}/* || failed for case in $LIST do - echo "${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" + echo "${speculum} --random-writemap=no --ignore-dbfull --repeat=${REPEAT} --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" if [[ ${REPORT_DEPTH} = "yes" && ($case = "basic" || $case = "--hill") ]]; then if [ -z "${TEE4PIPE:-}" ]; then TEE4PIPE=$(tee --help | grep -q ' -p' && echo "tee -i -p" || echo "tee -i") @@ -479,7 +504,7 @@ function probe { else exec {LFD}> >(logger) fi - ${MONITOR} ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} "$@" $case >&${LFD} \ + ${MONITOR} ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=${REPEAT} --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} "$@" $case >&${LFD} \ && ${MONITOR} ./mdbx_chk -q ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${MONITOR} ./mdbx_chk -q ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ || failed @@ -491,25 +516,14 @@ function probe { done } -#------------------------------------------------------------------------------ - -if [ "$DELAY" != "0" ]; then - sleep $DELAY -fi - -count=0 -loop=0 -cases='?' -for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10000000 33333333 100000000 333333333 1000000000; do - if [ $nops -lt $FROM ]; then continue; fi - if [ $nops -gt $UPTO ]; then echo "The '--upto $UPTO' limit reached"; break; fi - if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then echo "The '--loops $LOOPS' limit reached"; break; fi - echo "=======================================================================" - wbatch=$((nops / 7 + 1)) - speculum=$([ $nops -le 1000 ] && echo '--speculum' || true) - while true; do +function pass { + for ((round=1; round <= ROUNDS; ++round)); do echo "=======================================================================" - ${BANNER} "$nops / $wbatch" + if [[ $ROUNDS > 1 ]]; then + ${BANNER} "$nops / $wbatch / round $round of $ROUNDS" + else + ${BANNER} "$nops / $wbatch" + fi subcase=0 for ((bits=2**${#options[@]}; --bits >= 0; )); do seed=$(($(date +%s) + RANDOM)) @@ -548,7 +562,6 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} - split=16 caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ @@ -614,12 +627,44 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options - loop=$((loop + 1)) - if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi cases="${subcase}" - wbatch=$(((wbatch > 7) ? wbatch / 7 : 1)) - if [ $wbatch -eq 1 -o $((nops / wbatch)) -gt 1000 ]; then break; fi - done # batch (write-ops per txn) -done # n-ops + done +} + +#------------------------------------------------------------------------------ + +if [ "$DELAY" != "0" ]; then + sleep $DELAY +fi + +count=0 +loop=0 +cases='?' +if [[ $SMALL != "yes" ]]; then + for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10000000 33333333 100000000 333333333 1000000000; do + if [ $nops -lt $FROM ]; then continue; fi + if [ $nops -gt $UPTO ]; then echo "The '--upto $UPTO' limit reached"; break; fi + if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then echo "The '--loops $LOOPS' limit reached"; break; fi + echo "=======================================================================" + wbatch=$((nops / 7 + 1)) + speculum=$([ $nops -le 1000 ] && echo '--speculum' || true) + while true; do + pass + loop=$((loop + 1)) + if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi + wbatch=$(((wbatch > 7) ? wbatch / 7 : 1)) + if [ $wbatch -eq 1 -o $((nops / wbatch)) -gt 1000 ]; then break; fi + done # batch (write-ops per txn) + done # n-ops +else + for ((wbatch=FROM; wbatch<=UPTO; ++wbatch)); do + if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then echo "The '--loops $LOOPS' limit reached"; break; fi + echo "=======================================================================" + speculum=$([ $wbatch -le 1000 ] && echo '--speculum' || true) + nops=$((wbatch / 7 + 1)) + pass + loop=$((loop + 1)) + done # wbatch +fi echo "=== ALL DONE ====================== $(date)" diff --git a/test/stochastic_small.sh b/test/stochastic_small.sh deleted file mode 100755 index ffdd212e..00000000 --- a/test/stochastic_small.sh +++ /dev/null @@ -1,461 +0,0 @@ -#!/usr/bin/env bash - -# Леонид Юрьев aka Leonid Yuriev -# SPDX-License-Identifier: Apache-2.0 - -LIST=--hill -FROM=1 -UPTO=9999999 -MONITOR= -LOOPS= -SKIP_MAKE=no -GEOMETRY_JITTER=yes -BANNER="$(which banner 2>/dev/null | echo echo)" -UNAME="$(uname -s 2>/dev/null || echo Unknown)" -DB_UPTO_MB=17408 -PAGESIZE=min -DONT_CHECK_RAM=no - -while [ -n "$1" ] -do - case "$1" in - --help) - echo "--multi Engage multi-process test scenario (default)" - echo "--single Execute series of single-process tests (for QEMU, etc)" - echo "--nested Execute only 'nested' testcase" - echo "--hill Execute only 'hill' testcase" - echo "--append Execute only 'append' testcase" - echo "--ttl Execute only 'ttl' testcase" - echo "--with-valgrind Run tests under Valgrind's memcheck tool" - echo "--skip-make Don't (re)build libmdbx and test's executable" - echo "--from NN Start iterating from the NN ops per test case" - echo "--upto NN Don't run tests with more than NN ops per test case" - echo "--loops NN Stop after the NN loops" - echo "--dir PATH Specifies directory for test DB and other files (it will be cleared)" - echo "--db-upto-mb NN Limits upper size of test DB to the NN megabytes" - echo "--no-geometry-jitter Disable jitter for geometry upper-size" - echo "--pagesize NN Use specified page size (256 is minimal and used by default) " - echo "--dont-check-ram-size Don't check available RAM " - echo "--help Print this usage help and exit" - exit -2 - ;; - --multi) - LIST=basic - ;; - --single) - LIST="--nested --hill --append --ttl --copy" - ;; - --nested) - LIST="--nested" - ;; - --hill) - LIST="--hill" - ;; - --append) - LIST="--append" - ;; - --ttl) - LIST="--ttl" - ;; - --with-valgrind) - echo " NOTE: Valgrind could produce some false-positive warnings" - echo " in multi-process environment with shared memory." - echo " For instance, when the process 'A' explicitly marks a memory" - echo " region as 'undefined', the process 'B' fill it," - echo " and after this process 'A' read such region, etc." - MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" - rm -f valgrind-*.log - ;; - --skip-make) - SKIP_MAKE=yes - ;; - --from) - FROM=$(($2)) - if [ -z "$FROM" -o "$FROM" -lt 1 ]; then - echo "Invalid value '$FROM' for --from option" - exit -2 - fi - shift - ;; - --upto) - UPTO=$(($2)) - if [ -z "$UPTO" -o "$UPTO" -lt 1 ]; then - echo "Invalid value '$UPTO' for --upto option" - exit -2 - fi - shift - ;; - --loops) - LOOPS=$(($2)) - if [ -z "$LOOPS" -o "$LOOPS" -lt 1 -o "$LOOPS" -gt 99 ]; then - echo "Invalid value '$LOOPS' for --loops option" - exit -2 - fi - shift - ;; - --dir) - TESTDB_DIR="$2" - if [ -z "$TESTDB_DIR" ]; then - echo "Invalid value '$TESTDB_DIR' for --dir option" - exit -2 - fi - shift - ;; - --db-upto-mb) - DB_UPTO_MB=$(($2)) - if [ -z "$DB_UPTO_MB" -o "$DB_UPTO_MB" -lt 1 -o "$DB_UPTO_MB" -gt 4194304 ]; then - echo "Invalid value '$DB_UPTO_MB' for --db-upto-mb option" - exit -2 - fi - shift - ;; - --no-geometry-jitter) - GEOMETRY_JITTER=no - ;; - --pagesize|--page-size) - case "$2" in - min|max|256|512|1024|2048|4096|8192|16386|32768|65536) - PAGESIZE=$2 - ;; - 1|1k|1K|k|K) - PAGESIZE=$((1024*1)) - ;; - 2|2k|2K) - PAGESIZE=$((1024*2)) - ;; - 4|4k|4K) - PAGESIZE=$((1024*4)) - ;; - 8|8k|8K) - PAGESIZE=$((1024*8)) - ;; - 16|16k|16K) - PAGESIZE=$((1024*16)) - ;; - 32|32k|32K) - PAGESIZE=$((1024*32)) - ;; - 64|64k|64K) - PAGESIZE=$((1024*64)) - ;; - *) - echo "Invalig page size '$2'" - exit -2 - ;; - esac - shift - ;; - --dont-check-ram-size) - DONT_CHECK_RAM=yes - ;; - *) - echo "Unknown option '$1'" - exit -2 - ;; - esac - shift -done - -set -euo pipefail -if [ -z "$MONITOR" ]; then - if which time >/dev/null 2>/dev/null; then - MONITOR=$(which time) - if $MONITOR -o /dev/stdout true >/dev/null 2>/dev/null; then - MONITOR="$MONITOR -o /dev/stdout" - fi - fi - export MALLOC_CHECK_=7 MALLOC_PERTURB_=42 -fi - -if ! which $([ "$SKIP_MAKE" == "no" ] && echo make cc c++) tee >/dev/null; then - echo "Please install the following prerequisites: make cc c++ tee banner" >&2 - exit 1 -fi - -############################################################################### -# 1. clean data from prev runs and examine available RAM - -WANNA_MOUNT=0 -case ${UNAME} in - Linux) - MAKE=make - if [ -z "${TESTDB_DIR:-}" ]; then - for old_test_dir in $(ls -d /dev/shm/mdbx-test.[0-9]* 2>/dev/null); do - rm -rf $old_test_dir - done - TESTDB_DIR="/dev/shm/mdbx-test.$$" - fi - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - - if LC_ALL=C free | grep -q -i available; then - ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s '[:blank:]' ' ' | cut -d ' ' -f 7) / 1024)) - else - ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s '[:blank:]' ' ' | cut -d ' ' -f 4) / 1024)) - fi - ;; - - FreeBSD) - MAKE=gmake - if [ -z "${TESTDB_DIR:-}" ]; then - for old_test_dir in $(ls -d /tmp/mdbx-test.[0-9]* 2>/dev/null); do - umount $old_test_dir && rm -r $old_test_dir - done - TESTDB_DIR="/tmp/mdbx-test.$$" - rm -rf $TESTDB_DIR && mkdir -p $TESTDB_DIR - WANNA_MOUNT=1 - else - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - fi - ram_avail_mb=$(($(LC_ALL=C vmstat -s | grep -ie '[0-9] pages free$' | cut -d p -f 1) * ($(LC_ALL=C vmstat -s | grep -ie '[0-9] bytes per page$' | cut -d b -f 1) / 1024) / 1024)) - ;; - - Darwin) - MAKE=make - if [ -z "${TESTDB_DIR:-}" ]; then - for vol in $(ls -d /Volumes/mdx[0-9]*[0-9]tst 2>/dev/null); do - disk=$(mount | grep $vol | cut -d ' ' -f 1) - echo "umount: volume $vol disk $disk" - hdiutil unmount $vol -force - hdiutil detach $disk - done - TESTDB_DIR="/Volumes/mdx$$tst" - WANNA_MOUNT=1 - else - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - fi - pagesize=$(($(LC_ALL=C vm_stat | grep -o 'page size of [0-9]\+ bytes' | cut -d' ' -f 4) / 1024)) - freepages=$(LC_ALL=C vm_stat | grep '^Pages free:' | grep -o '[0-9]\+\.$' | cut -d'.' -f 1) - ram_avail_mb=$((pagesize * freepages / 1024)) - echo "pagesize ${pagesize}K, freepages ${freepages}, ram_avail_mb ${ram_avail_mb}" - ;; - - MSYS*|MINGW*) - if [ -z "${TESTDB_DIR:-}" ]; then - for old_test_dir in $(ls -d /tmp/mdbx-test.[0-9]* 2>/dev/null); do - rm -rf $old_test_dir - done - TESTDB_DIR="/tmp/mdbx-test.$$" - fi - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - - echo "FIXME: Fake support for ${UNAME}" - ram_avail_mb=32768 - ;; - - *) - echo "FIXME: ${UNAME} not supported by this script" - exit 2 - ;; -esac - -rm -f ${TESTDB_DIR}/* - -############################################################################### -# 2. estimate reasonable RAM space for test-db - -echo "=== ${ram_avail_mb}M RAM available" -if [ $DONT_CHECK_RAM = yes ]; then - db_size_mb=$DB_UPTO_MB - ram_reserve4logs_mb=64 -else - ram_reserve4logs_mb=1234 - if [ $ram_avail_mb -lt $ram_reserve4logs_mb ]; then - echo "=== At least ${ram_reserve4logs_mb}Mb RAM required" - exit 3 - fi - -# -# В режимах отличных от MDBX_WRITEMAP изменения до записи в файл -# будут накапливаться в памяти, что может потребовать свободной -# памяти размером с БД. Кроме этого, в тест входит сценарий -# создания копия БД на ходу. Поэтому БД не может быть больше 1/3 -# от доступной памяти. Однако, следует учесть что malloc() будет -# не сразу возвращать выделенную память системе, а также -# предусмотреть места для логов. -# -# In non-MDBX_WRITEMAP modes, updates (dirty pages) will -# accumulate in memory before writing to the disk, which may -# require a free memory up to the size of a whole database. In -# addition, the test includes a script create a copy of the -# database on the go. Therefore, the database cannot be more 1/3 -# of available memory. Moreover, should be taken into account -# that malloc() will not return the allocated memory to the -# system immediately, as well some space is required for logs. -# - db_size_mb=$(((ram_avail_mb - ram_reserve4logs_mb) / 4)) - if [ $db_size_mb -gt $DB_UPTO_MB ]; then - db_size_mb=$DB_UPTO_MB - fi -fi -echo "=== use ${db_size_mb}M for DB" - -############################################################################### -# 3. Create test-directory in ramfs/tmpfs, i.e. create/format/mount if required -case ${UNAME} in - Linux) - ulimit -c unlimited - if [ "$(cat /proc/sys/kernel/core_pattern)" != "core.%p" ]; then - echo "core.%p > /proc/sys/kernel/core_pattern" >&2 - if [ $(id -u) -ne 0 -a -n "$(which sudo 2>/dev/null)" ]; then - echo "core.%p" | sudo tee /proc/sys/kernel/core_pattern || true - else - (echo "core.%p" > /proc/sys/kernel/core_pattern) || true - fi - fi - ;; - - FreeBSD) - if [[ WANNA_MOUNT ]]; then - mount -t tmpfs tmpfs $TESTDB_DIR - fi - ;; - - Darwin) - if [[ WANNA_MOUNT ]]; then - ramdisk_size_mb=$((42 + db_size_mb * 2 + ram_reserve4logs_mb)) - number_of_sectors=$((ramdisk_size_mb * 2048)) - ramdev=$(hdiutil attach -nomount ram://${number_of_sectors}) - diskutil erasevolume ExFAT "mdx$$tst" ${ramdev} - fi - ;; - - MSYS*|MINGW*) - echo "FIXME: Fake support for ${UNAME}" - ;; - - *) - echo "FIXME: ${UNAME} not supported by this script" - exit 2 - ;; -esac - -############################################################################### -# 4. build the test executables - -if [ "$SKIP_MAKE" != "yes" ]; then - ${MAKE} -j$(which nproc >/dev/null 2>/dev/null && nproc || echo 2) build-test -fi - -############################################################################### -# 5. run stochastic iterations - -if which lz4 >/dev/null; then - function logger { - lz4 > ${TESTDB_DIR}/long.log.lz4 - } -elif which gzip >/dev/null; then - function logger { - gzip > ${TESTDB_DIR}/long.log.gz - } -else - function logger { - cat > ${TESTDB_DIR}/long.log - } -fi - -syncmodes=("" ,+nosync-safe ,+nosync-utterly) -options=(writemap lifo nostickythreads perturb) - -function join { local IFS="$1"; shift; echo "$*"; } - -function bits2options { - local bits=$1 - local i - local list=() - for ((i = 0; i < ${#options[@]}; ++i)); do - list[$i]=$( (( (bits & (1 << i)) != 0 )) && echo -n '+' || echo -n '-'; echo ${options[$i]}) - done - join , ${list[@]} -} - -function failed { - echo "FAILED" >&2 - exit 1 -} - -function check_deep { - if [ "$case" = "basic" -o "$case" = "--hill" ]; then - tee >(logger) | grep -e reach -e achieve - else - logger - fi -} - -function probe { - echo "----------------------------------------------- $(date)" - echo "${caption}" - rm -f ${TESTDB_DIR}/* || failed - for case in $LIST - do - echo "Run ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} $@ $case" - ${MONITOR} ./mdbx_test ${speculum} --random-writemap=no --ignore-dbfull --repeat=11 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no --geometry-jitter=${GEOMETRY_JITTER} "$@" $case | check_deep \ - && ${MONITOR} ./mdbx_chk ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ - && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${MONITOR} ./mdbx_chk ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ - || failed - done -} - -#------------------------------------------------------------------------------ - -count=0 -loop=0 -cases='?' -for ((wbatch=FROM; wbatch<=UPTO; ++wbatch)); do - if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then echo "The '--loops $LOOPS' limit reached"; break; fi - echo "=======================================================================" - speculum=$([ $wbatch -le 1000 ] && echo '--speculum' || true) - nops=$((wbatch/7 + 1)) - for ((rep=1; rep < 11; ++rep)); do - echo "=======================================================================" - ${BANNER} "$nops / $wbatch, repeat $rep" - subcase=0 - for ((bits=2**${#options[@]}; --bits >= 0; )); do - seed=$(($(date +%s) + RANDOM)) - - split=30 - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - - split=24 - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - - split=16 - caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - - split=4 - caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=4K --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \ - --keygen.seed=${seed} - done # options - cases="${subcase}" - done # repeats - loop=$((loop + 1)) - if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi -done # wbatch - -echo "=== ALL DONE ====================== $(date)" From ca8e9fe7b19431070e3357099830c065c66220cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Nov 2024 22:55:54 +0300 Subject: [PATCH 317/443] =?UTF-8?q?mdbx-testing:=20=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D1=81=D0=BA=D1=80=D0=B8=D0=BF=D1=82=D0=B0=20`stochast?= =?UTF-8?q?ic.sh`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 4 ++-- GNUmakefile | 16 ++++++++-------- README.md | 6 +++--- test/battery-tmux.sh | 2 +- test/{long_stochastic.sh => stochastic.sh} | 0 5 files changed, 14 insertions(+), 14 deletions(-) rename test/{long_stochastic.sh => stochastic.sh} (100%) diff --git a/ChangeLog.md b/ChangeLog.md index a05af74e..d98a95c0 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -756,7 +756,7 @@ Signed-off-by: Леонид Юрьев (Leonid Yuriev) все они были несущественные, либо ложные. - Устранено ложное предупреждение GCC при сборке для SH4. - Добавлена поддержка ASAN (Address Sanitizer) при сборке посредством MSVC. - - Расширен набор перебираемых режимов в скрипте `test/long_stochastic.sh`, + - Расширен набор перебираемых режимов в скрипте `test/stochastic.sh`, добавлена опция `--extra`. - В C++ API добавлена поддержка расширенных опций времени выполнения `mdbx::extra_runtime_option`, аналогично `enum MDBX_option_t` из C API. @@ -1066,7 +1066,7 @@ Signed-off-by: Леонид Юрьев (Leonid Yuriev) - Уменьшение в 42 раза значения по-умолчанию для `me_options.dp_limit` в отладочных сборках. - Добавление платформы `gcc-riscv64-linux-gnu` в список для цели `cross-gcc`. - - Небольшие правки скрипта `long_stochastic.sh` для работы в Windows. + - Небольшие правки скрипта `stochastic.sh` для работы в Windows. - Удаление ненужного вызова `LockFileEx()` внутри `mdbx_env_copy()`. - Добавлено описание использования файловых дескрипторов в различных режимах. - Добавлено использование `_CrtDbgReport()` в отладочных сборках. diff --git a/GNUmakefile b/GNUmakefile index 2b404988..5c5a20fd 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -425,23 +425,23 @@ smoke-fault: build-test ; ./mdbx_chk -vvnw $(TEST_DB) && ([ ! -e $(TEST_DB)-copy ] || ./mdbx_chk -vvn $(TEST_DB)-copy) test: build-test - @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) + @echo ' RUNNING `test/stochastic.sh --loops 2`...' + $(QUIET)test/stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) long-test: test-long test-long: build-test - @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' - $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --extra --skip-make --taillog + @echo ' RUNNING `test/stochastic.sh --loops 42`...' + $(QUIET)test/stochastic.sh --loops 42 --db-upto-mb 1024 --extra --skip-make --taillog test-singleprocess: build-test - @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) + @echo ' RUNNING `test/stochastic.sh --single --loops 2`...' + $(QUIET)test/stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) test-valgrind: test-memcheck test-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK test-memcheck: build-test - @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' - $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + @echo ' RUNNING `test/stochastic.sh --with-valgrind --loops 2`...' + $(QUIET)test/stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) memcheck: smoke-memcheck smoke-memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt diff --git a/README.md b/README.md index ecc95b0a..1fbf8e65 100644 --- a/README.md +++ b/README.md @@ -472,7 +472,7 @@ Therefore, only basic information is provided: - The `Makefile` provide several self-described targets for testing: `smoke`, `test`, `check`, `memcheck`, `test-valgrind`, `test-asan`, `test-leak`, `test-ubsan`, `cross-gcc`, `cross-qemu`, `gcc-analyzer`, `smoke-fault`, `smoke-singleprocess`, `test-singleprocess`, 'long-test'. Please run `make --help` if doubt. - - In addition to the `mdbx_test` utility, there is the script [`long_stochastic.sh`](https://gitflic.ru/project/erthink/libmdbx/blob/master/test/long_stochastic.sh), + - In addition to the `mdbx_test` utility, there is the script [`stochastic.sh`](https://gitflic.ru/project/erthink/libmdbx/blob/master/test/stochastic.sh), which calls `mdbx_test` by going through set of modes and options, with gradually increasing the number of operations and the size of transactions. This script is used for mostly of all automatic testing, including `Makefile` targets and Continuous Integration. - Brief information of available command-line options is available by `--help`. @@ -583,7 +583,7 @@ during configure by CMake. An example of running a basic test script can be found in the [CI-script](appveyor.yml) for [AppVeyor](https://www.appveyor.com/). To -run the [long stochastic test scenario](test/long_stochastic.sh), +run the [long stochastic test scenario](test/stochastic.sh), [bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)) is required, and such testing is recommended with placing the test data on the [RAM-disk](https://en.wikipedia.org/wiki/RAM_drive). @@ -603,7 +603,7 @@ directory with source code, and run `make check` to execute the base tests. If something goes wrong, it is recommended to install [Homebrew](https://brew.sh/) and try again. -To run the [long stochastic test scenario](test/long_stochastic.sh), you +To run the [long stochastic test scenario](test/stochastic.sh), you will need to install the current (not outdated) version of [Bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)). To do this, we recommend that you install [Homebrew](https://brew.sh/) and then execute diff --git a/test/battery-tmux.sh b/test/battery-tmux.sh index fba52064..65c3178e 100755 --- a/test/battery-tmux.sh +++ b/test/battery-tmux.sh @@ -3,7 +3,7 @@ # Леонид Юрьев aka Leonid Yuriev # SPDX-License-Identifier: Apache-2.0 -TEST="./test/long_stochastic.sh --skip-make --db-upto-gb 32" +TEST="./test/stochastic.sh --skip-make --db-upto-gb 32" PREFIX="/dev/shm/mdbxtest-" tmux kill-session -t mdbx diff --git a/test/long_stochastic.sh b/test/stochastic.sh similarity index 100% rename from test/long_stochastic.sh rename to test/stochastic.sh From 10a93f4b9f485e8f71592f4ec9016a69faa209c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 7 Nov 2024 11:40:16 +0300 Subject: [PATCH 318/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index d98a95c0..639a106b 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -11,7 +11,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Исправления: - - Доработка `mdbx_close_dbi()` для возврата ошибки `MDBX_DANGLING_DBI` + - Функция `mdbx_close_dbi()` доработана для возврата ошибки `MDBX_DANGLING_DBI` при попытке закрыть dbi-дескриптор таблицы, созданной и/или измененной в ещё выполняющейся транзакции. Такое преждевременное закрытие дескриптора является неверным использованием API и нарушением контракта/предусловий @@ -20,22 +20,31 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic приводить к созданию таблицы с пустым именем, утечки страниц БД и/или нарушению структуры b-tree (неверной ссылкой на корень таблицы). - - Исправление открытия таблицы с пустым/нулевым именем и устранение - `SIGSEGV` при закрытии её дескриптора. + - Исправлено открытие таблицы с пустым/нулевым именем, в том числе устранена + возможность `SIGSEGV` при закрытии её дескриптора. - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. - - Корректировка описания С++ API для использования термина "таблица" вместо "sub-database". - - Исправление условия внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. - - Допущение 4-байтового выравнивания данных `MDBX_MULTIPLE` для 32-битных сборок. + - Продолжена корректировка описания С++ API для использования термина "таблица" вместо "sub-database". + - Исправлено проверяемое условие внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. + - На 32-битных платформах разрешено использовть 4-байтное выравнивание при получении 64-битных значений посредством `MDBX_MULTIPLE`. - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang. Новое: - - Добавление `mdbx::cursor::get_multiple_samelength()` и переименование `mdbx::txn::put_multiple_samelength()`. - - Возвращение ключа при выполнении операции `MDBX_GET_MULTIPLE` для единообразия C++ API. - - Смена базового типа на `intptr_t` для размерных констант `mdbx::env::geometry`. - - Включение стандарта `C23` в CMake-скриптах сборки. - - Добавление T-макросов для парных `char`/`wchar_t` функций. + - Ускорено обновление GC при возврате/помещении списков страниц в + сложных сценариях. Был доработан и активирован ранее отключенный + экспериментальный режим корректирующей обратной связи. Этим + принципиально улучшилась сходимость (сократилось количество повторных + попыток), а также устранен дефект приводящий к "зацикливанию" при + фиксации транзакций (с возвратом ошибки `MDBX_PROBLEM`) в редких + специфических условиях. + Подробности см. в описании коммита [`6c56ed97bbd8ca46abac61886a113ba31e5f1291`](https://gitflic.ru/project/erthink/libmdbx/commit/6c56ed97bbd8ca46abac61886a113ba31e5f1291). + + - Добавлен метод `mdbx::cursor::get_multiple_samelength()` и переименован `mdbx::txn::put_multiple_samelength()`. + - Для единообразия C++ API при выполнении операции `MDBX_GET_MULTIPLE` теперь также возвращается значение самого ключа. + - Для размерных констант `mdbx::env::geometry` базовый тип изменен с беззнакового `size_t` на знаковый `intptr_t`. + - Включен стандарт `C23` в CMake-скриптах сборки. + - Добавлены T-макросы для парных `char`/`wchar_t` функций. Мелочи: @@ -52,6 +61,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Корректировка API-макросов для Doxygen. - Уточнение описания `mdbx_dbi_close()` для случая хендлов измененных таблиц. - Добавление теста `extra/early_close_dbi`. + - Доработка скрипта стохастического теста и его переименование в `stochastic.sh`. -------------------------------------------------------------------------------- From 4cc1c7d8de39bc5cb895be23066314bc38f3b04e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 9 Nov 2024 22:09:37 +0300 Subject: [PATCH 319/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`MDBX=5FDEPRECATED=5FENUM`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=81=D1=82=D0=B0=D1=80=D1=8B=D1=85=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=BC=D0=BF=D0=B8=D0=BB=D1=8F=D1=82=D0=BE=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=BF=D1=80=D0=B8=20=D0=B2=D0=BA=D0=BB=D1=8E=D1=87?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B8=20=D0=A1++11.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/mdbx.h b/mdbx.h index 2ae43a68..4b0ebbb9 100644 --- a/mdbx.h +++ b/mdbx.h @@ -323,14 +323,16 @@ typedef mode_t mdbx_mode_t; #ifdef __deprecated #define MDBX_DEPRECATED __deprecated #elif defined(DOXYGEN) || \ - (defined(__cplusplus) && __cplusplus >= 201403L && \ - __has_cpp_attribute(deprecated) && \ - __has_cpp_attribute(deprecated) >= 201309L) || \ - (!defined(__cplusplus) && defined(__STDC_VERSION__) && \ - __STDC_VERSION__ >= 202304L) + ((!defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5) && \ + ((defined(__cplusplus) && __cplusplus >= 201403L && \ + __has_cpp_attribute(deprecated) && \ + __has_cpp_attribute(deprecated) >= 201309L) || \ + (!defined(__cplusplus) && defined(__STDC_VERSION__) && \ + __STDC_VERSION__ >= 202304L))) #define MDBX_DEPRECATED [[deprecated]] #elif (defined(__GNUC__) && __GNUC__ > 5) || \ - (__has_attribute(__deprecated__) && !defined(__GNUC__)) + (__has_attribute(__deprecated__) && \ + (!defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5)) #define MDBX_DEPRECATED __attribute__((__deprecated__)) #elif defined(_MSC_VER) #define MDBX_DEPRECATED __declspec(deprecated) @@ -340,7 +342,10 @@ typedef mode_t mdbx_mode_t; #endif /* MDBX_DEPRECATED */ #ifndef MDBX_DEPRECATED_ENUM -#if !defined(DOXYGEN) && (!defined(_MSC_VER) || _MSC_VER >= 1930) +#if !defined(DOXYGEN) && \ + (!defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201403L && \ + __has_cpp_attribute(deprecated) && \ + __has_cpp_attribute(deprecated) >= 201309L)) #define MDBX_DEPRECATED_ENUM MDBX_DEPRECATED #else #define MDBX_DEPRECATED_ENUM /* avoid madness MSVC */ From bd7b272bca4932a91eb8287ea4c8f50652bf2da0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 9 Nov 2024 22:45:14 +0300 Subject: [PATCH 320/443] =?UTF-8?q?mdbx-tests:=20=D0=BA=D0=BE=D1=80=D1=80?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=B8?= =?UTF-8?q?=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D1=8F=20`mdbx::default=5Fbuffer`=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D1=81=D0=BE=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=B8=D0=BC=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B8=20=D1=81=20C++11.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/crunched_delete.c++ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index 2d14f423..d11b5528 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -92,7 +92,7 @@ static mdbx::map_handle create_and_fill(mdbx::txn txn, const acase &thecase, : mdbx::value_mode::multi); if (txn.get_map_stat(map).ms_entries < NN) { - mdbx::buffer k, v; + mdbx::default_buffer k, v; for (auto i = 0u; i < NN; i++) { mk_key(k, thecase); for (auto ii = thecase.dupmax_log2 @@ -108,7 +108,7 @@ static mdbx::map_handle create_and_fill(mdbx::txn txn, const acase &thecase, static void chunched_delete(mdbx::txn txn, const acase &thecase, const unsigned n) { // printf(">> %s, case #%i\n", __FUNCTION__, n); - mdbx::buffer k, v; + mdbx::default_buffer k, v; auto map = txn.open_map_accede(name(n)); { From 9b9d6c6d65ba0aacd42e155b433a0d0c51ad7c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 9 Nov 2024 23:02:56 +0300 Subject: [PATCH 321/443] =?UTF-8?q?mdbx-cmake:=20=D0=BE=D1=87=D0=B8=D1=81?= =?UTF-8?q?=D1=82=D0=BA=D0=B0=20=D1=83=D1=81=D0=BB=D0=BE=D0=B2=D0=B8=D0=B9?= =?UTF-8?q?=20=D0=B2=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D1=81=D1=82=D0=B0=D0=BD=D0=B4=D0=B0=D1=80=D1=82=D0=BE=D0=B2=20?= =?UTF-8?q?C=20=D0=B8=20C++.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 8 +++++--- cmake/compiler.cmake | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59dcada5..c2ce48e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -804,13 +804,15 @@ macro(target_setup_options TARGET) set_target_properties(${TARGET} PROPERTIES INTERPROCEDURAL_OPTIMIZATION $) endif() - if(NOT MDBX_C_STANDARD EQUAL 11 OR (NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11)) + if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) set_target_properties(${TARGET} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) endif() if(MDBX_BUILD_CXX) - set_target_properties(${TARGET} PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + if(NOT CXX_FALLBACK_GNU11 AND NOT CXX_FALLBACK_11) + set_target_properties(${TARGET} PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() if(MSVC AND NOT MSVC_VERSION LESS 1910) target_compile_options(${TARGET} INTERFACE "/Zc:__cplusplus") endif() diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index e05df1a6..f9c23c20 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -386,7 +386,7 @@ endif() if(CMAKE_CXX_COMPILER_LOADED) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11) if(HAS_CXX11 LESS 0) - if (MSVC) + if(MSVC) check_cxx_compiler_flag("/std:c++11" CXX_FALLBACK_11) else() check_cxx_compiler_flag("-std=gnu++11" CXX_FALLBACK_GNU11) @@ -401,7 +401,7 @@ endif() if(CMAKE_C_COMPILER_LOADED) list(FIND CMAKE_C_COMPILE_FEATURES c_std_11 HAS_C11) if(HAS_C11 LESS 0) - if (MSVC) + if(MSVC) check_c_compiler_flag("/std:c11" C_FALLBACK_11) else() check_c_compiler_flag("-std=gnu11" C_FALLBACK_GNU11) From 9da743515c811596a132ee9119b3c40a46ecfd14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 9 Nov 2024 23:13:35 +0300 Subject: [PATCH 322/443] =?UTF-8?q?mdbx-cmake:=20=D0=B2=D0=BA=D0=BB=D1=8E?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D1=83=D1=8E?= =?UTF-8?q?=D1=89=D0=B8=D1=85=20`mdbx::path`=20=D0=BD=D0=B0=20Windows=20?= =?UTF-8?q?=D1=82=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE=20=D0=B4=D0=BB=D1=8F=20C++?= =?UTF-8?q?17=20=D0=B8=20=D0=B2=D1=8B=D1=88=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 21bb7318..dd4400d2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -278,14 +278,16 @@ else() add_extra_test(dupfix_addodd SOURCE extra/dupfix_addodd.c) endif() if(MDBX_BUILD_CXX) - add_extra_test(early_close_dbi) - add_extra_test(maindb_ordinal) - add_extra_test(dupfix_multiple) + if(NOT WIN32 OR NOT MDBX_CXX_STANDARD LESS 17) + add_extra_test(early_close_dbi) + add_extra_test(maindb_ordinal) + add_extra_test(dupfix_multiple) + add_extra_test(doubtless_positioning TIMEOUT 10800) + add_extra_test(crunched_delete TIMEOUT 10800) + add_extra_test(dbi) + add_extra_test(open) + endif() add_extra_test(hex_base64_base58) - add_extra_test(doubtless_positioning TIMEOUT 10800) - add_extra_test(crunched_delete TIMEOUT 10800) - add_extra_test(dbi) - add_extra_test(open) endif() endif() From 6f41276dbc8fdd2e061fcf8420581035ed5f8e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 10 Nov 2024 01:21:10 +0300 Subject: [PATCH 323/443] =?UTF-8?q?mdbx++:=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B0=20=D0=B2=D0=BB=D0=BE=D0=B6=D0=B5?= =?UTF-8?q?=D0=BD=D0=BD=D1=8B=D1=85=20=D0=BF=D0=B8=D1=88=D1=83=D1=89=D0=B8?= =?UTF-8?q?=D1=85=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index bd139ade..09d825a0 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3652,9 +3652,10 @@ public: /// \brief Operation mode. enum mode { - readonly, ///< \copydoc MDBX_RDONLY - write_file_io, // don't available on OpenBSD - write_mapped_io ///< \copydoc MDBX_WRITEMAP + readonly, ///< \copydoc MDBX_RDONLY + write_file_io, // don't available on OpenBSD + write_mapped_io, ///< \copydoc MDBX_WRITEMAP + nested_transactions = write_file_io }; /// \brief Durability level. @@ -4197,6 +4198,9 @@ public: /// \brief Creates but not start read transaction. inline txn_managed prepare_read() const; + /// \brief Starts write (read-write) transaction. + inline txn_managed start_write(txn &parent); + /// \brief Starts write (read-write) transaction. inline txn_managed start_write(bool dont_wait = false); @@ -6404,6 +6408,14 @@ inline txn_managed env::start_write(bool dont_wait) { return txn_managed(ptr); } +inline txn_managed env::start_write(txn &parent) { + ::MDBX_txn *ptr; + error::success_or_throw( + ::mdbx_txn_begin(handle_, parent, MDBX_TXN_READWRITE, &ptr)); + assert(ptr != nullptr); + return txn_managed(ptr); +} + inline txn_managed env::try_start_write() { return start_write(true); } //------------------------------------------------------------------------------ From 6893a79c7071a3856d98b6e36badd72e780f9d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 10 Nov 2024 01:24:53 +0300 Subject: [PATCH 324/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/cursor=5Fcl?= =?UTF-8?q?osing`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 1 + test/extra/cursor_closing.c++ | 59 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 test/extra/cursor_closing.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index dd4400d2..d4322f61 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -279,6 +279,7 @@ else() endif() if(MDBX_BUILD_CXX) if(NOT WIN32 OR NOT MDBX_CXX_STANDARD LESS 17) + add_extra_test(cursor_closing) add_extra_test(early_close_dbi) add_extra_test(maindb_ordinal) add_extra_test(dupfix_multiple) diff --git a/test/extra/cursor_closing.c++ b/test/extra/cursor_closing.c++ new file mode 100644 index 00000000..7b6967ef --- /dev/null +++ b/test/extra/cursor_closing.c++ @@ -0,0 +1,59 @@ +#include "mdbx.h++" + +#include + +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, + int line, const char *msg, unsigned length) noexcept { + (void)length; + (void)loglevel; + std::cout << function << ":" << line << " " << msg; +} + +static char log_buffer[1024]; + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, + log_buffer, sizeof(log_buffer)); + + mdbx::path db_filename = "test-cursor-closing"; + mdbx::env::remove(db_filename); + + mdbx::env_managed env( + db_filename, mdbx::env_managed::create_parameters(), + mdbx::env::operate_parameters(42, 0, mdbx::env::nested_transactions)); + + { + auto txn = env.start_write(); + auto table = txn.create_map("dummy", mdbx::key_mode::usual, + mdbx::value_mode::single); + auto cursor_1 = txn.open_cursor(table); + auto cursor_2 = cursor_1.clone(); + + auto nested = env.start_write(txn); + auto nested_cursor_1 = nested.open_cursor(table); + auto nested_cursor_2 = nested_cursor_1.clone(); + auto nested_cursor_3 = cursor_1.clone(); + + auto deep = env.start_write(nested); + auto deep_cursor_1 = deep.open_cursor(table); + auto deep_cursor_2 = nested_cursor_1.clone(); + auto deep_cursor_3 = cursor_1.clone(); + deep_cursor_1.close(); + deep.commit(); + deep_cursor_2.close(); + + nested_cursor_1.close(); + nested.abort(); + nested_cursor_2.close(); + + cursor_1.close(); + txn.commit(); + cursor_2.close(); + } + + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From 7aa5d9ab979d7783a7b5edad36f973c45a75546b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 10 Nov 2024 20:17:47 +0300 Subject: [PATCH 325/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20`std::experi?= =?UTF-8?q?mental::filesystem`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 43 +++++++++++++++++++++++++++++++++++-------- src/mdbx.c++ | 8 ++++++-- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 09d825a0..eb370d9f 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -78,11 +78,34 @@ #include #endif -#if defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L -#include +#ifndef MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM +#ifdef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL +#define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 1 +#elif defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ + __cplusplus >= 201703L +#define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 0 +#elif (!defined(_MSC_VER) || __cplusplus >= 201403L || \ + (defined(_MSC_VER) && \ + defined(_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING) && \ + __cplusplus >= 201403L)) +#if defined(__cpp_lib_experimental_filesystem) && \ + __cpp_lib_experimental_filesystem >= 201406L +#define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 1 #elif defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L && \ __has_include() +#define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 1 +#else +#define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 0 +#endif +#else +#define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 0 +#endif +#endif /* MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM */ + +#if MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM #include +#elif defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L +#include #endif #if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L @@ -388,13 +411,21 @@ template using string = ::std::basic_string, ALLOCATOR>; using filehandle = ::mdbx_filehandle_t; -#if defined(DOXYGEN) || \ +#if MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM +#ifdef _MSC_VER +namespace filesystem = ::std::experimental::filesystem::v1; +#else +namespace filesystem = ::std::experimental::filesystem; +#endif +#define MDBX_STD_FILESYSTEM_PATH ::mdbx::filesystem::path +#elif defined(DOXYGEN) || \ (defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L && \ (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || \ __MAC_OS_X_VERSION_MIN_REQUIRED >= 101500) && \ (!defined(__IPHONE_OS_VERSION_MIN_REQUIRED) || \ - __IPHONE_OS_VERSION_MIN_REQUIRED >= 130100)) + __IPHONE_OS_VERSION_MIN_REQUIRED >= 130100)) && \ + (!defined(_MSC_VER) || __cplusplus >= 201703L) namespace filesystem = ::std::filesystem; /// \brief Defined if `mdbx::filesystem::path` is available. /// \details If defined, it is always `mdbx::filesystem::path`, @@ -403,10 +434,6 @@ namespace filesystem = ::std::filesystem; /// Nonetheless `MDBX_STD_FILESYSTEM_PATH` not defined if the `::mdbx::path` /// is fallbacked to c `std::string` or `std::wstring`. #define MDBX_STD_FILESYSTEM_PATH ::mdbx::filesystem::path -#elif defined(__cpp_lib_experimental_filesystem) && \ - __cpp_lib_experimental_filesystem >= 201406L -namespace filesystem = ::std::experimental::filesystem; -#define MDBX_STD_FILESYSTEM_PATH ::mdbx::filesystem::path #endif /* MDBX_STD_FILESYSTEM_PATH */ #ifdef MDBX_STD_FILESYSTEM_PATH diff --git a/src/mdbx.c++ b/src/mdbx.c++ index f33e6617..7c83aa8b 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -11,10 +11,14 @@ #endif /* MDBX_BUILD_CXX*/ /* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */ -#if defined(_MSC_VER) && defined(__SANITIZE_ADDRESS__) && \ - !defined(_DISABLE_VECTOR_ANNOTATION) +#if defined(_MSC_VER) +#if defined(__SANITIZE_ADDRESS__) && !defined(_DISABLE_VECTOR_ANNOTATION) #define _DISABLE_VECTOR_ANNOTATION #endif /* _DISABLE_VECTOR_ANNOTATION */ +#ifndef _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING +#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING +#endif /* #define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING */ +#endif /* _MSC_VER */ #include "../mdbx.h++" From 871bb7f56c66d870749392716ed88b8628beefed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 10 Nov 2024 21:17:34 +0300 Subject: [PATCH 326/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=20=D0=BF=D0=BE=D1=80=D1=8F=D0=B4=D0=BA=D0=B0=20=D0=B0=D1=82?= =?UTF-8?q?=D1=80=D0=B8=D0=B1=D1=83=D1=82=D0=BE=D0=B2=20`pure`|`const`/`ma?= =?UTF-8?q?ybe=5Funused`=20=D0=B2=20=D0=BE=D0=BF=D1=80=D0=B5=D0=B4=D0=B5?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B8=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dbi.h | 2 +- src/essentials.h | 6 +++--- src/unaligned.h | 2 +- src/utils.h | 10 +++++----- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/dbi.h b/src/dbi.h index 401c1b59..4c66c664 100644 --- a/src/dbi.h +++ b/src/dbi.h @@ -5,7 +5,7 @@ #include "essentials.h" -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL size_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi); #if MDBX_ENABLE_DBI_SPARSE diff --git a/src/essentials.h b/src/essentials.h index 9ac71df5..e6f42305 100644 --- a/src/essentials.h +++ b/src/essentials.h @@ -115,20 +115,20 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor; /*----------------------------------------------------------------------------*/ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t int64pgno(int64_t i64) { if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1)) return (pgno_t)i64; return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pgno_add(size_t base, size_t augend) { assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO); return int64pgno((int64_t)base + (int64_t)augend); } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pgno_sub(size_t base, size_t subtrahend) { assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && subtrahend < MAX_PAGENO); diff --git a/src/unaligned.h b/src/unaligned.h index 0dcbb3f2..722e084a 100644 --- a/src/unaligned.h +++ b/src/unaligned.h @@ -6,7 +6,7 @@ /*------------------------------------------------------------------------------ * Unaligned access */ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t field_alignment(size_t alignment_baseline, size_t field_offset) { size_t merge = alignment_baseline | (size_t)field_offset; return merge & -(int)merge; diff --git a/src/utils.h b/src/utils.h index ec65379a..9f51099a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -41,7 +41,7 @@ ASAN_UNPOISON_MEMORY_REGION(addr, size); \ } while (0) -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t branchless_abs(intptr_t value) { assert(value > INT_MIN); const size_t expanded_sign = @@ -49,23 +49,23 @@ branchless_abs(intptr_t value) { return ((size_t)value + expanded_sign) ^ expanded_sign; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline bool +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline bool is_powerof2(size_t x) { return (x & (x - 1)) == 0; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t floor_powerof2(size_t value, size_t granularity) { assert(is_powerof2(granularity)); return value & ~(granularity - 1); } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static inline size_t +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t ceil_powerof2(size_t value, size_t granularity) { return floor_powerof2(value + granularity - 1, granularity); } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr); MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v); From ab4bf2d7f09326b8d6be6cf065b35c588c996e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Nov 2024 12:25:51 +0300 Subject: [PATCH 327/443] =?UTF-8?q?mdbx-cmake:=20=D1=8D=D0=BA=D1=81=D0=BF?= =?UTF-8?q?=D0=BE=D1=80=D1=82/=D0=B8=D0=BC=D0=BF=D0=BE=D1=80=D1=82=20?= =?UTF-8?q?=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8=D0=B8=20?= =?UTF-8?q?=D0=BE=20=D0=B2=D0=B5=D1=80=D1=81=D0=B8=D0=B8=20=D0=B2=20`VERSI?= =?UTF-8?q?ON.json`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- cmake/utils.cmake | 311 +++++++++++++++++++++++++++++----------------- 2 files changed, 197 insertions(+), 116 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c2ce48e0..7a0adb35 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -677,7 +677,7 @@ if(MDBX_BUILD_CXX) endif() # Get version -fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" FALSE) +fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" FALSE "${CMAKE_CURRENT_BINARY_DIR}") message(STATUS "libmdbx version is ${MDBX_VERSION}") # sources list diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 164ce8eb..0348d96d 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -73,201 +73,282 @@ macro(set_source_files_compile_flags) unset(_lang) endmacro(set_source_files_compile_flags) -macro(fetch_version name source_root_directory parent_scope) - set(${name}_VERSION "") - set(${name}_GIT_DESCRIBE "") - set(${name}_GIT_TIMESTAMP "") - set(${name}_GIT_TREE "") - set(${name}_GIT_COMMIT "") - set(${name}_GIT_REVISION 0) - set(${name}_GIT_VERSION "") - if(GIT AND EXISTS "${source_root_directory}/.git") - execute_process(COMMAND ${GIT} show --no-patch --format=%cI HEAD - OUTPUT_VARIABLE ${name}_GIT_TIMESTAMP +macro(fetch_version name source_root_directory parent_scope build_directory_for_json_output) + set(_version_4dot "") + set(_git_describe "") + set(_git_timestamp "") + set(_git_tree "") + set(_git_commit "") + set(_git_revision 0) + set(_git_version "") + set(_version_from "") + set(_git_root FALSE) + + find_program(GIT git) + if(GIT) + execute_process(COMMAND ${GIT} rev-parse --show-toplevel + OUTPUT_VARIABLE _git_root + ERROR_VARIABLE _git_root_error OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_TIMESTAMP" STREQUAL "%cI") + RESULT_VARIABLE _rc) + if(_rc OR _git_root STREQUAL "") + if(EXISTS "${source_root_directory}/.git") + message(ERROR "`git rev-parse --show-toplevel` failed '${_git_root_error}'") + else() + message(VERBOSE "`git rev-parse --show-toplevel` failed '${_git_root_error}'") + endif() + else() + set(_source_root "${source_root_directory}") + if(NOT CMAKE_VERSION VERSION_LESS 3.20) + cmake_path(NORMAL_PATH _git_root) + cmake_path(NORMAL_PATH _source_root) + endif() + if(_source_root STREQUAL _git_root AND EXISTS "${_git_root}/VERSION.json") + message(FATAL_ERROR "Несколько источников информации о версии, допустим только один из: репозиторий git, либо файл VERSION.json") + endif() + endif() + endif() + + if(EXISTS "${source_root_directory}/VERSION.json") + set(_version_from "${source_root_directory}/VERSION.json") + + if(CMAKE_VERSION VERSION_LESS 3.19) + message(FATAL_ERROR "Требуется CMake версии >= 3.19 для чтения VERSION.json") + endif() + file(STRINGS "${_version_from}" _versioninfo_json NEWLINE_CONSUME LIMIT_COUNT 9 LIMIT_INPUT 999 ENCODING UTF-8) + string(JSON _git_describe GET ${_versioninfo_json} git_describe) + string(JSON _git_timestamp GET "${_versioninfo_json}" "git_timestamp") + string(JSON _git_tree GET "${_versioninfo_json}" "git_tree") + string(JSON _git_commit GET "${_versioninfo_json}" "git_commit") + string(JSON _version_4dot GET "${_versioninfo_json}" "version_4dot") + unset(_json_object) + string(REPLACE "." ";" _version_list "${_version_4dot}") + + if(NOT _version_4dot) + message(ERROR "Unable to retrieve ${name} version from \"${_version_from}\" file.") + set(_version_list ${_git_version}) + string(REPLACE ";" "." _version_4dot "${_git_version}") + else() + string(REPLACE "." ";" _version_list ${_version_4dot}) + endif() + + elseif(_git_root AND _source_root STREQUAL _git_root) + set(_version_from git) + + execute_process(COMMAND ${GIT} show --no-patch --format=%cI HEAD + OUTPUT_VARIABLE _git_timestamp + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_timestamp STREQUAL "%cI") execute_process(COMMAND ${GIT} show --no-patch --format=%ci HEAD - OUTPUT_VARIABLE ${name}_GIT_TIMESTAMP + OUTPUT_VARIABLE _git_timestamp OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_TIMESTAMP" STREQUAL "%ci") + RESULT_VARIABLE _rc) + if(_rc OR _git_timestamp STREQUAL "%ci") message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)") endif() endif() execute_process(COMMAND ${GIT} show --no-patch --format=%T HEAD - OUTPUT_VARIABLE ${name}_GIT_TREE + OUTPUT_VARIABLE _git_tree OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_TREE" STREQUAL "") + RESULT_VARIABLE _rc) + if(_rc OR _git_tree STREQUAL "") message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)") endif() execute_process(COMMAND ${GIT} show --no-patch --format=%H HEAD - OUTPUT_VARIABLE ${name}_GIT_COMMIT + OUTPUT_VARIABLE _git_commit OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_COMMIT" STREQUAL "") + RESULT_VARIABLE _rc) + if(_rc OR _git_commit STREQUAL "") message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)") endif() - execute_process(COMMAND ${GIT} rev-list --tags --count - OUTPUT_VARIABLE tag_count + execute_process(COMMAND ${GIT} status --untracked-files=no --porcelain + OUTPUT_VARIABLE _git_status OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc) + RESULT_VARIABLE _rc) + if(_rc) + message(FATAL_ERROR "Please install latest version of git (`status --untracked-files=no --porcelain` failed)") + endif() + if(NOT _git_status STREQUAL "") + set(_git_commit "${_git_commit}-dirty") + endif() + unset(_git_status) + + execute_process(COMMAND ${GIT} rev-list --tags --count + OUTPUT_VARIABLE _tag_count + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc) message(FATAL_ERROR "Please install latest version of git (`git rev-list --tags --count` failed)") endif() - if(tag_count EQUAL 0) + if(_tag_count EQUAL 0) execute_process(COMMAND ${GIT} rev-list --all --count - OUTPUT_VARIABLE whole_count + OUTPUT_VARIABLE _whole_count OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc) + RESULT_VARIABLE _rc) + if(_rc) message(FATAL_ERROR "Please install latest version of git (`git rev-list --all --count` failed)") endif() - if(whole_count GREATER 42) - message(FATAL_ERROR "Please fetch tags (no any tags for ${whole_count} commits)") + if(_whole_count GREATER 42) + message(FATAL_ERROR "Please fetch tags (no any tags for ${_whole_count} commits)") endif() - set(${name}_GIT_VERSION "0;0;0") + set(_git_version "0;0;0") execute_process(COMMAND ${GIT} rev-list --count --all --no-merges - OUTPUT_VARIABLE ${name}_GIT_REVISION + OUTPUT_VARIABLE _git_revision OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_REVISION" STREQUAL "") + RESULT_VARIABLE _rc) + if(_rc OR _git_revision STREQUAL "") message(FATAL_ERROR "Please install latest version of git (`rev-list --count --all --no-merges` failed)") endif() - else(tag_count EQUAL 0) + else(_tag_count EQUAL 0) execute_process(COMMAND ${GIT} describe --tags --long --dirty=-dirty "--match=v[0-9]*" - OUTPUT_VARIABLE ${name}_GIT_DESCRIBE + OUTPUT_VARIABLE _git_describe OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_DESCRIBE" STREQUAL "") + RESULT_VARIABLE _rc) + if(_rc OR _git_describe STREQUAL "") + execute_process(COMMAND ${GIT} rev-list --all --count + OUTPUT_VARIABLE _whole_count + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc) + message(FATAL_ERROR "Please install latest version of git (`git rev-list --all --count` failed)") + endif() if(_whole_count GREATER 42) message(FATAL_ERROR "Please fetch tags (`describe --tags --long --dirty --match=v[0-9]*` failed)") else() execute_process(COMMAND ${GIT} describe --all --long --dirty=-dirty - OUTPUT_VARIABLE ${name}_GIT_DESCRIBE + OUTPUT_VARIABLE _git_describe OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_DESCRIBE" STREQUAL "") + RESULT_VARIABLE _rc) + if(_rc OR _git_describe STREQUAL "") message(FATAL_ERROR "Please install latest version of git (`git rev-list --tags --count` and/or `git rev-list --all --count` failed)") endif() endif() endif() execute_process(COMMAND ${GIT} describe --tags --abbrev=0 "--match=v[0-9]*" - OUTPUT_VARIABLE last_release_tag + OUTPUT_VARIABLE _last_release_tag OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc) + RESULT_VARIABLE _rc) + if(_rc) message(FATAL_ERROR "Please install latest version of git (`describe --tags --abbrev=0 --match=v[0-9]*` failed)") endif() - if (last_release_tag) - set(git_revlist_arg "${last_release_tag}..HEAD") + if (_last_release_tag) + set(_git_revlist_arg "${_last_release_tag}..HEAD") else() execute_process(COMMAND ${GIT} tag --sort=-version:refname - OUTPUT_VARIABLE tag_list + OUTPUT_VARIABLE _tag_list OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc) + RESULT_VARIABLE _rc) + if(_rc) message(FATAL_ERROR "Please install latest version of git (`tag --sort=-version:refname` failed)") endif() - string(REGEX REPLACE "\n" ";" tag_list "${tag_list}") - set(git_revlist_arg "HEAD") - foreach(tag IN LISTS tag_list) - if(NOT last_release_tag) - string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" last_release_tag "${tag}") - set(git_revlist_arg "${tag}..HEAD") + string(REGEX REPLACE "\n" ";" _tag_list "${_tag_list}") + set(_git_revlist_arg "HEAD") + foreach(_tag IN LISTS _tag_list) + if(NOT _last_release_tag) + string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" _last_release_tag "${_tag}") + set(_git_revlist_arg "${_tag}..HEAD") endif() - endforeach(tag) + endforeach(_tag) endif() - execute_process(COMMAND ${GIT} rev-list --count "${git_revlist_arg}" - OUTPUT_VARIABLE ${name}_GIT_REVISION + execute_process(COMMAND ${GIT} rev-list --count "${_git_revlist_arg}" + OUTPUT_VARIABLE _git_revision OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE rc) - if(rc OR "${name}_GIT_REVISION" STREQUAL "") - message(FATAL_ERROR "Please install latest version of git (`rev-list --count ${git_revlist_arg}` failed)") + RESULT_VARIABLE _rc) + if(_rc OR _git_revision STREQUAL "") + message(FATAL_ERROR "Please install latest version of git (`rev-list --count ${_git_revlist_arg}` failed)") endif() - string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" git_version_valid "${${name}_GIT_DESCRIBE}") - if(git_version_valid) - string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;\\4" ${name}_GIT_VERSION ${${name}_GIT_DESCRIBE}) + string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" _git_version_valid "${_git_describe}") + if(_git_version_valid) + string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;\\4" _git_version ${_git_describe}) else() - string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)(.*)?" git_version_valid "${${name}_GIT_DESCRIBE}") - if(git_version_valid) - string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;0" ${name}_GIT_VERSION ${${name}_GIT_DESCRIBE}) + string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)(.*)?" _git_version_valid "${_git_describe}") + if(_git_version_valid) + string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;0" _git_version ${_git_describe}) else() - message(AUTHOR_WARNING "Bad ${name} version \"${${name}_GIT_DESCRIBE}\"; falling back to 0.0.0 (have you made an initial release?)") - set(${name}_GIT_VERSION "0;0;0") + message(AUTHOR_WARNING "Bad ${name} version \"${_git_describe}\"; falling back to 0.0.0 (have you made an initial release?)") + set(_git_version "0;0;0") endif() endif() - endif(tag_count EQUAL 0) - endif() + endif(_tag_count EQUAL 0) - if(NOT ${name}_GIT_VERSION OR NOT ${name}_GIT_TIMESTAMP OR ${name}_GIT_REVISION STREQUAL "") - if(GIT AND EXISTS "${source_root_directory}/.git") - message(WARNING "Unable to retrieve ${name} version from git.") - endif() - set(${name}_GIT_VERSION "0;0;0;0") - set(${name}_GIT_TIMESTAMP "") - set(${name}_GIT_REVISION 0) - - # Try to get version from VERSION file - set(version_file "${source_root_directory}/VERSION.txt") - if(NOT EXISTS "${version_file}") - set(version_file "${source_root_directory}/VERSION") - endif() - if(EXISTS "${version_file}") - file(STRINGS "${version_file}" ${name}_VERSION LIMIT_COUNT 1 LIMIT_INPUT 42) - endif() - - if(NOT ${name}_VERSION) - message(WARNING "Unable to retrieve ${name} version from \"${version_file}\" file.") - set(${name}_VERSION_LIST ${${name}_GIT_VERSION}) - string(REPLACE ";" "." ${name}_VERSION "${${name}_GIT_VERSION}") - else() - string(REPLACE "." ";" ${name}_VERSION_LIST ${${name}_VERSION}) - endif() + list(APPEND _git_version "${_git_revision}") + set(_version_list "${_git_version}") + string(REPLACE ";" "." _version_4dot "${_version_list}") + elseif(GIT) + message(FATAL_ERROR "Нет источника информации о версии (${source_root_directory}), требуется один из: репозиторий git, либо VERSION.json") else() - list(APPEND ${name}_GIT_VERSION ${${name}_GIT_REVISION}) - set(${name}_VERSION_LIST ${${name}_GIT_VERSION}) - string(REPLACE ";" "." ${name}_VERSION "${${name}_GIT_VERSION}") + message(FATAL_ERROR "Требуется git для получения информации о версии") endif() - list(GET ${name}_VERSION_LIST 0 "${name}_VERSION_MAJOR") - list(GET ${name}_VERSION_LIST 1 "${name}_VERSION_MINOR") - list(GET ${name}_VERSION_LIST 2 "${name}_VERSION_RELEASE") - list(GET ${name}_VERSION_LIST 3 "${name}_VERSION_REVISION") + list(LENGTH _version_list _version_list_length) + list(GET _version_list 0 _version_major) + list(GET _version_list 1 _version_minor) + list(GET _version_list 2 _version_release) + list(GET _version_list 3 _version_revision) + + if(NOT _git_describe OR NOT _git_timestamp OR NOT _git_tree OR NOT _git_commit OR _git_revision STREQUAL "" OR NOT _version_list_length EQUAL 4 OR _version_major STREQUAL "" OR _version_minor STREQUAL "" OR _version_release STREQUAL "" OR _version_revision STREQUAL "") + message(ERROR "Unable to retrieve ${name} version from ${_version_from}.") + else() + list(APPEND _git_version "${_git_revision}") + endif() if(${parent_scope}) - set(${name}_VERSION_MAJOR "${${name}_VERSION_MAJOR}" PARENT_SCOPE) - set(${name}_VERSION_MINOR "${${name}_VERSION_MINOR}" PARENT_SCOPE) - set(${name}_VERSION_RELEASE "${${name}_VERSION_RELEASE}" PARENT_SCOPE) - set(${name}_VERSION_REVISION "${${name}_VERSION_REVISION}" PARENT_SCOPE) - set(${name}_VERSION "${${name}_VERSION}" PARENT_SCOPE) + set(${name}_VERSION_MAJOR "${_version_major}" PARENT_SCOPE) + set(${name}_VERSION_MINOR "${_version_minor}" PARENT_SCOPE) + set(${name}_VERSION_RELEASE "${_version_release}" PARENT_SCOPE) + set(${name}_VERSION_REVISION "${_version_revision}" PARENT_SCOPE) + set(${name}_VERSION "${_version_4dot}" PARENT_SCOPE) - set(${name}_GIT_DESCRIBE "${${name}_GIT_DESCRIBE}" PARENT_SCOPE) - set(${name}_GIT_TIMESTAMP "${${name}_GIT_TIMESTAMP}" PARENT_SCOPE) - set(${name}_GIT_TREE "${${name}_GIT_TREE}" PARENT_SCOPE) - set(${name}_GIT_COMMIT "${${name}_GIT_COMMIT}" PARENT_SCOPE) - set(${name}_GIT_REVISION "${${name}_GIT_REVISION}" PARENT_SCOPE) - set(${name}_GIT_VERSION "${${name}_GIT_VERSION}" PARENT_SCOPE) + set(${name}_GIT_DESCRIBE "${_git_describe}" PARENT_SCOPE) + set(${name}_GIT_TIMESTAMP "${_git_timestamp}" PARENT_SCOPE) + set(${name}_GIT_TREE "${_git_tree}" PARENT_SCOPE) + set(${name}_GIT_COMMIT "${_git_commit}" PARENT_SCOPE) + set(${name}_GIT_REVISION "${_git_revision}" PARENT_SCOPE) + else() + set(${name}_VERSION_MAJOR "${_version_major}") + set(${name}_VERSION_MINOR "${_version_minor}") + set(${name}_VERSION_RELEASE "${_version_release}") + set(${name}_VERSION_REVISION "${_version_revision}") + set(${name}_VERSION "${_version_4dot}") + + set(${name}_GIT_DESCRIBE "${_git_describe}") + set(${name}_GIT_TIMESTAMP "${_git_timestamp}") + set(${name}_GIT_TREE "${_git_tree}") + set(${name}_GIT_COMMIT "${_git_commit}") + set(${name}_GIT_REVISION "${_git_revision}") + endif() + + if(_version_from STREQUAL "git") + string(CONFIGURE "{ + \"git_describe\" : \"@_git_describe@\", + \"git_timestamp\" : \"@_git_timestamp@\", + \"git_tree\" : \"@_git_tree@\", + \"git_commit\" : \"@_git_commit@\", + \"version_4dot\" : \"@_version_4dot@\"\n}" _versioninfo_json @ONLY ESCAPE_QUOTES) + file(WRITE "${build_directory_for_json_output}/VERSION.json" "${_versioninfo_json}") endif() endmacro(fetch_version) From f550c654764d71bd5b16802b6c380fe5a0160e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Nov 2024 17:49:21 +0300 Subject: [PATCH 328/443] =?UTF-8?q?mdbx-make:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`VERSI?= =?UTF-8?q?ON.json`=20=D1=81=20=D0=BF=D0=BE=D0=BB=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8=D0=B5?= =?UTF-8?q?=D0=B9=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE=20=D0=BE=D0=B4?= =?UTF-8?q?=D0=BD=D0=BE=D1=81=D1=82=D1=80=D0=BE=D1=87=D0=BD=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D1=82=D0=B5=D0=BA=D1=81=D1=82=D0=BE=D0=B2=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D1=84=D0=B0=D0=B9=D0=BB=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 3 +-- GNUmakefile | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a0adb35..581a6a80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -167,7 +167,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND message(SEND_ERROR "Git command-line tool not found") endif() set(MDBX_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") -elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION.txt" AND +elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION.json" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c" AND @@ -1180,7 +1180,6 @@ if (NOT SUBPROJECT) set(CPACK_PACKAGE_VERSION_COMMIT ${MDBX_VERSION_REVISION}) set(PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${CPACK_PACKAGE_VERSION_COMMIT}") message(STATUS "libmdbx package version is ${PACKAGE_VERSION}") - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/VERSION.txt" "${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VERSION_RELEASE}.${MDBX_VERSION_REVISION}") endif() cmake_policy(POP) diff --git a/GNUmakefile b/GNUmakefile index 5c5a20fd..09c69a0b 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -353,7 +353,7 @@ define uname2titer esac endef -DIST_EXTRA := LICENSE NOTICE README.md CMakeLists.txt GNUmakefile Makefile ChangeLog.md VERSION.txt config.h.in ntdll.def \ +DIST_EXTRA := LICENSE NOTICE README.md CMakeLists.txt GNUmakefile Makefile ChangeLog.md VERSION.json config.h.in ntdll.def \ $(addprefix man1/, $(MANPAGES)) cmake/compiler.cmake cmake/profile.cmake cmake/utils.cmake DIST_SRC := mdbx.h mdbx.h++ mdbx.c mdbx.c++ $(addsuffix .c, $(MDBX_TOOLS)) @@ -733,11 +733,11 @@ dist/$(1): $(1) src/version.c $(lastword $(MAKEFILE_LIST)) $(QUIET)mkdir -p $$(dir $$@) && sed -e '/^#> dist-cutoff-begin/,/^#< dist-cutoff-end/d' $$< >$$@ endef -$(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.txt %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) +$(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.json %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) -dist/VERSION.txt: src/version.c +dist/VERSION.json: src/version.c @echo ' MAKE $@' - $(QUIET)mkdir -p dist/ && echo "$(MDBX_GIT_VERSION).$(MDBX_GIT_REVISION)" >$@ + $(QUIET)mkdir -p dist/ && echo "{ \"git_describe\": \"$(MDBX_GIT_DESCRIBE)\", \"git_timestamp\": \"$(MDBX_GIT_TIMESTAMP)\", \"git_tree\": \"$(shell git show --no-patch --format=%T HEAD 2>&1)\", \"git_commit\": \"$(shell git show --no-patch --format=%H HEAD 2>&1)\", \"version_4dot\": \"$(MDBX_GIT_VERSION).$(MDBX_GIT_REVISION)\" }" >$@ dist/ntdll.def: src/ntdll.def @echo ' COPY $@' From e27537dd9d6462ba3783b0521f5fe373393e6a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Nov 2024 20:43:32 +0300 Subject: [PATCH 329/443] =?UTF-8?q?mdbx-make:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D1=80=D0=B5=D1=84?= =?UTF-8?q?=D0=B8=D0=BA=D1=81=D0=B0=20`@`=20=D0=BA=20=D0=B8=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D0=B0=D0=BC=20=D1=81=D0=BB=D1=83=D0=B6=D0=B5=D0=B1=D0=BD?= =?UTF-8?q?=D1=8B=D1=85/=D0=B2=D1=80=D0=B5=D0=BC=D0=B5=D0=BD=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D1=84=D0=B0=D0=B9=D0=BB=D0=BE=D0=B2/=D0=BA=D0=B0?= =?UTF-8?q?=D1=82=D0=B0=D0=BB=D0=BE=D0=B3=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 09c69a0b..8eabcf19 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -255,19 +255,19 @@ strip: all clean: @echo ' REMOVE ...' $(QUIET)rm -rf $(MDBX_TOOLS) mdbx_test @* *.[ao] *.[ls]o *.$(SO_SUFFIX) *.dSYM *~ tmp.db/* \ - *.gcov *.log *.err src/*.o test/*.o mdbx_example dist \ - config.h src/config.h src/version.c *.tar* buildflags.tag \ + *.gcov *.log *.err src/*.o test/*.o mdbx_example dist @dist-check \ + config.h src/config.h src/version.c *.tar* @buildflags.tag @dist-checked.tag \ mdbx_*.static mdbx_*.static-lto MDBX_BUILD_FLAGS =$(strip MDBX_BUILD_CXX=$(MDBX_BUILD_CXX) $(MDBX_BUILD_OPTIONS) $(call select_by,MDBX_BUILD_CXX,$(CXXFLAGS) $(LDFLAGS) $(LIB_STDCXXFS) $(LIBS),$(CFLAGS) $(LDFLAGS) $(LIBS))) check_buildflags_tag: - $(QUIET)if [ "$(MDBX_BUILD_FLAGS)" != "$$(cat buildflags.tag 2>&1)" ]; then \ + $(QUIET)if [ "$(MDBX_BUILD_FLAGS)" != "$$(cat @buildflags.tag 2>&1)" ]; then \ echo -n " CLEAN for build with specified flags..." && \ $(MAKE) IOARENA=false CXXSTD= -s clean >/dev/null && echo " Ok" && \ - echo '$(MDBX_BUILD_FLAGS)' > buildflags.tag; \ + echo '$(MDBX_BUILD_FLAGS)' > @buildflags.tag; \ fi -buildflags.tag: check_buildflags_tag +@buildflags.tag: check_buildflags_tag lib-static libmdbx.a: mdbx-static.o $(call select_by,MDBX_BUILD_CXX,mdbx++-static.o) @echo ' AR $@' @@ -285,10 +285,10 @@ ifeq ($(wildcard mdbx.c),mdbx.c) # Amalgamated source code, i.e. distributed after `make dist` MAN_SRCDIR := man1/ -config.h: buildflags.tag mdbx.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE +config.h: @buildflags.tag mdbx.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' MAKE $@' $(QUIET)(echo '#define MDBX_BUILD_TIMESTAMP "$(MDBX_BUILD_TIMESTAMP)"' \ - && echo "#define MDBX_BUILD_FLAGS \"$$(cat buildflags.tag)\"" \ + && echo "#define MDBX_BUILD_FLAGS \"$$(cat @buildflags.tag)\"" \ && echo '#define MDBX_BUILD_COMPILER "$(shell (LC_ALL=C $(CC) --version || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_TARGET "$(shell set -o pipefail; (LC_ALL=C $(CC) -v 2>&1 | grep -i '^Target:' | cut -d ' ' -f 2- || (LC_ALL=C $(CC) --version | grep -qi e2k && echo E2K) || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_CXX $(call select_by,MDBX_BUILD_CXX,1,0)' \ @@ -531,10 +531,10 @@ src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(g -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_GIT_REVISION)|" \ src/version.c.in >$@ -src/config.h: buildflags.tag src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE +src/config.h: @buildflags.tag src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' MAKE $@' $(QUIET)(echo '#define MDBX_BUILD_TIMESTAMP "$(MDBX_BUILD_TIMESTAMP)"' \ - && echo "#define MDBX_BUILD_FLAGS \"$$(cat buildflags.tag)\"" \ + && echo "#define MDBX_BUILD_FLAGS \"$$(cat @buildflags.tag)\"" \ && echo '#define MDBX_BUILD_COMPILER "$(shell (LC_ALL=C $(CC) --version || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_TARGET "$(shell set -o pipefail; (LC_ALL=C $(CC) -v 2>&1 | grep -i '^Target:' | cut -d ' ' -f 2- || (LC_ALL=C $(CC) --version | grep -qi e2k && echo E2K) || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_SOURCERY $(MDBX_BUILD_SOURCERY)' \ @@ -600,7 +600,7 @@ mdbx++-static.o: src/config.h src/mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE @echo ' CC $@' $(QUIET)$(CXX) $(CXXFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -ULIBMDBX_EXPORTS -c src/mdbx.c++ -o $@ -dist: tags dist-checked.tag libmdbx-sources-$(MDBX_VERSION_IDENT).tar.gz $(lastword $(MAKEFILE_LIST)) +dist: tags @dist-checked.tag libmdbx-sources-$(MDBX_VERSION_IDENT).tar.gz $(lastword $(MAKEFILE_LIST)) @echo ' AMALGAMATION is done' tags: @@ -619,32 +619,32 @@ release-assets: libmdbx-amalgamated-$(MDBX_GIT_VERSION).zpaq \ || (echo 'ERROR: Is not a valid release because not in the clean state with a suitable annotated tag!!!' >&2 && false)) \ && echo ' RELEASE ASSETS are done' -dist-checked.tag: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) +@dist-checked.tag: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) @echo -n ' VERIFY amalgamated sources...' $(QUIET)rm -rf $@ dist/@tmp-essentials.inc dist/@tmp-internals.inc \ && if grep -R "define xMDBX_ALLOY" dist | grep -q MDBX_BUILD_SOURCERY; then echo "sed output is WRONG!" >&2; exit 2; fi \ - && rm -rf dist-check && cp -r -p dist dist-check && ($(MAKE) IOARENA=false CXXSTD=$(CXXSTD) -C dist-check >dist-check.log 2>dist-check.err || (cat dist-check.err && exit 1)) \ - && touch $@ || (echo " FAILED! See dist-check.log and dist-check.err" >&2; exit 2) && echo " Ok" + && rm -rf @dist-check && cp -r -p dist @dist-check && ($(MAKE) IOARENA=false CXXSTD=$(CXXSTD) -C @dist-check >@dist-check.log 2>@dist-check.err || (cat @dist-check.err && exit 1)) \ + && touch $@ || (echo " FAILED! See @dist-check.log and @dist-check.err" >&2; exit 2) && echo " Ok" -%.tar.gz: dist-checked.tag +%.tar.gz: @dist-checked.tag @echo ' CREATE $@' $(QUIET)$(TAR) -c $(shell LC_ALL=C $(TAR) --help | grep -q -- '--owner' && echo '--owner=0 --group=0') -f - -C dist $(DIST_SRC) $(DIST_EXTRA) | gzip -c -9 >$@ -%.tar.xz: dist-checked.tag +%.tar.xz: @dist-checked.tag @echo ' CREATE $@' $(QUIET)$(TAR) -c $(shell LC_ALL=C $(TAR) --help | grep -q -- '--owner' && echo '--owner=0 --group=0') -f - -C dist $(DIST_SRC) $(DIST_EXTRA) | xz -9 -z >$@ -%.tar.bz2: dist-checked.tag +%.tar.bz2: @dist-checked.tag @echo ' CREATE $@' $(QUIET)$(TAR) -c $(shell LC_ALL=C $(TAR) --help | grep -q -- '--owner' && echo '--owner=0 --group=0') -f - -C dist $(DIST_SRC) $(DIST_EXTRA) | bzip2 -9 -z >$@ -%.zip: dist-checked.tag +%.zip: @dist-checked.tag @echo ' CREATE $@' - $(QUIET)rm -rf $@ && (cd dist && $(ZIP) -9 ../$@ $(DIST_SRC) $(DIST_EXTRA)) &>zip.log + $(QUIET)rm -rf $@ && (cd dist && $(ZIP) -9 ../$@ $(DIST_SRC) $(DIST_EXTRA)) &>@zip.log -%.zpaq: dist-checked.tag +%.zpaq: @dist-checked.tag @echo ' CREATE $@' - $(QUIET)rm -rf $@ && (cd dist && zpaq a ../$@ $(DIST_SRC) $(DIST_EXTRA) -m59) &>zpaq.log + $(QUIET)rm -rf $@ && (cd dist && zpaq a ../$@ $(DIST_SRC) $(DIST_EXTRA) -m59) &>@zpaq.log dist/@tmp-essentials.inc: src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) @echo ' ALLOYING...' @@ -741,11 +741,11 @@ dist/VERSION.json: src/version.c dist/ntdll.def: src/ntdll.def @echo ' COPY $@' - $(QUIET)mkdir -p dist/cmake/ && cp $< $@ + $(QUIET)mkdir -p dist/ && cp $< $@ dist/config.h.in: src/config.h.in @echo ' COPY $@' - $(QUIET)mkdir -p dist/cmake/ && cp $< $@ + $(QUIET)mkdir -p dist/ && cp $< $@ dist/man1/mdbx_%.1: src/man1/mdbx_%.1 @echo ' COPY $@' From b5def26565fbb180392f6ef4609dd613715d08f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Nov 2024 20:44:08 +0300 Subject: [PATCH 330/443] =?UTF-8?q?mdbx-make:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=86=D0=B5=D0=BB=D0=B5?= =?UTF-8?q?=D0=B9=20`cmake-build`=20=D0=B8=20`ninja`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 17 +++++++++++++---- Makefile | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 8eabcf19..7c2b0291 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -51,6 +51,8 @@ CC ?= gcc CXX ?= g++ CFLAGS_EXTRA ?= LD ?= ld +CMAKE ?= cmake +CMAKE_OPT ?= # build options MDBX_BUILD_OPTIONS ?=-DNDEBUG=1 @@ -127,7 +129,7 @@ MANPAGES := mdbx_stat.1 mdbx_copy.1 mdbx_dump.1 mdbx_load.1 mdbx_chk.1 mdbx_dr TIP := // TIP: .PHONY: all help options lib libs tools clean install uninstall check_buildflags_tag tools-static -.PHONY: install-strip install-no-strip strip libmdbx mdbx show-options lib-static lib-shared +.PHONY: install-strip install-no-strip strip libmdbx mdbx show-options lib-static lib-shared cmake-build ninja boolean = $(if $(findstring $(strip $($1)),YES Yes yes y ON On on 1 true True TRUE),1,$(if $(findstring $(strip $($1)),NO No no n OFF Off off 0 false False FALSE),,$(error Wrong value `$($1)` of $1 for YES/NO option))) select_by = $(if $(call boolean,$(1)),$(2),$(3)) @@ -161,6 +163,7 @@ help: @echo " make clean " @echo " make install " @echo " make uninstall " + @echo " make cmake-build | ninja - build by CMake & Ninja" @echo "" @echo " make strip - strip debug symbols from binaries" @echo " make install-no-strip - install explicitly without strip" @@ -257,7 +260,7 @@ clean: $(QUIET)rm -rf $(MDBX_TOOLS) mdbx_test @* *.[ao] *.[ls]o *.$(SO_SUFFIX) *.dSYM *~ tmp.db/* \ *.gcov *.log *.err src/*.o test/*.o mdbx_example dist @dist-check \ config.h src/config.h src/version.c *.tar* @buildflags.tag @dist-checked.tag \ - mdbx_*.static mdbx_*.static-lto + mdbx_*.static mdbx_*.static-lto CMakeFiles MDBX_BUILD_FLAGS =$(strip MDBX_BUILD_CXX=$(MDBX_BUILD_CXX) $(MDBX_BUILD_OPTIONS) $(call select_by,MDBX_BUILD_CXX,$(CXXFLAGS) $(LDFLAGS) $(LIB_STDCXXFS) $(LIBS),$(CFLAGS) $(LDFLAGS) $(LIBS))) check_buildflags_tag: @@ -277,6 +280,11 @@ lib-shared libmdbx.$(SO_SUFFIX): mdbx-dylib.o $(call select_by,MDBX_BUILD_CXX,md @echo ' LD $@' $(QUIET)$(call select_by,MDBX_BUILD_CXX,$(CXX) $(CXXFLAGS),$(CC) $(CFLAGS)) $^ -pthread -shared $(LDFLAGS) $(call select_by,MDBX_BUILD_CXX,$(LIB_STDCXXFS)) $(LIBS) -o $@ +ninja: cmake-build +cmake-build: + @echo "-G Ninja . && cmake --build ." + $(QUIET)mkdir @cmake-ninja-build && $(CMAKE) $(CMAKE_OPT) -G Ninja -S . -B @cmake-ninja-build && $(CMAKE) --build @cmake-ninja-build + #> dist-cutoff-begin ifeq ($(wildcard mdbx.c),mdbx.c) #< dist-cutoff-end @@ -392,7 +400,8 @@ MDBX_DIST_DIR = libmdbx-$(MDBX_VERSION_NODOT) MDBX_SMOKE_EXTRA ?= check: DESTDIR = $(shell pwd)/@check-install -check: test dist install +check: CMAKE_OPT = -Werror=dev +check: smoke-assertion ninja dist install test smoke-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1 -UNDEBUG -DMDBX_DEBUG=0) smoke-assertion: smoke @@ -623,7 +632,7 @@ release-assets: libmdbx-amalgamated-$(MDBX_GIT_VERSION).zpaq \ @echo -n ' VERIFY amalgamated sources...' $(QUIET)rm -rf $@ dist/@tmp-essentials.inc dist/@tmp-internals.inc \ && if grep -R "define xMDBX_ALLOY" dist | grep -q MDBX_BUILD_SOURCERY; then echo "sed output is WRONG!" >&2; exit 2; fi \ - && rm -rf @dist-check && cp -r -p dist @dist-check && ($(MAKE) IOARENA=false CXXSTD=$(CXXSTD) -C @dist-check >@dist-check.log 2>@dist-check.err || (cat @dist-check.err && exit 1)) \ + && rm -rf @dist-check && cp -r -p dist @dist-check && ($(MAKE) -j IOARENA=false CXXSTD=$(CXXSTD) -C @dist-check all ninja >@dist-check.log 2>@dist-check.err || (cat @dist-check.err && exit 1)) \ && touch $@ || (echo " FAILED! See @dist-check.log and @dist-check.err" >&2; exit 2) && echo " Ok" %.tar.gz: @dist-checked.tag diff --git a/Makefile b/Makefile index 78ba3483..8a176ceb 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # This is thunk-Makefile for calling GNU Make 3.80 or above -all help options \ +all help options cmake-build ninja \ clean install install-no-strip install-strip strip tools uninstall \ bench bench-clean bench-couple bench-quartet bench-triplet re-bench \ lib libs lib-static lib-shared tools-static \ From e2b4245abe9e8a653c9e3dec07fb6327f47de3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Nov 2024 21:13:57 +0300 Subject: [PATCH 331/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20`.gitignore`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 72 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 7c21a018..f88be7bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,47 +1,69 @@ +*[~#] +@* *.[ao] +*.autosave *.bak -*.exe +build.ninja +cmake-build-* +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake +CMakeLists.txt.user +core +CTestTestfile.cmake +DartConfiguration.tcl +dist/ +*.dll +docs/Doxyfile +docs/html/ +*.dSYM +*.dylib *.err +*.exe *.gcda *.gcno *.gcov -*.lo -*.orig -*.rej -*.so -*.dll -*.dylib -*.dSYM -*[~#] .idea -.vs/ -.vscode/ -cmake-build-* -@* -core -mdbx_example libmdbx.creator.user -CMakeLists.txt.user +*.lo mdbx_chk mdbx_copy mdbx_drop mdbx_dump +mdbx_example mdbx_load mdbx_stat mdbx_test +.ninja_deps +.ninja_log +*.orig +*.rej +*.so +src/config.h +src/version.c +*.tar* +test/cmake_install.cmake +test/CTestTestfile.cmake +test_extra_crunched_delete +test_extra_cursor_closing +test_extra_dbi +test_extra_doubtless_positioning +test_extra_dupfix_addodd +test_extra_dupfix_multiple +test_extra_early_close_dbi +test_extra_hex_base64_base58 +test_extra_maindb_ordinal +test_extra_open +test_extra_pcrf +test_extra_upsert_alldups +Testing/ test.log test/tmp.db test/tmp.db-lck tmp.db tmp.db-lck valgrind.* -src/version.c -src/config.h -dist/ -dist-check/ -dist-checked.tag -*.tar* +version.c +.vs/ +.vscode/ *.zip -docs/Doxyfile -docs/html/ -buildflags.tag From 5862a4b5423a5d895a0edea7de08f7d88e1bb2b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Nov 2024 22:36:50 +0300 Subject: [PATCH 332/443] =?UTF-8?q?mdbx-cmake:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D0=BF=D1=83?= =?UTF-8?q?=D1=89=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20`POST=5FBUILD`=20=D0=B2?= =?UTF-8?q?=20`add=5Fcustom=5Fcommand()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 581a6a80..d575c3db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -429,12 +429,12 @@ else() if(NOT "${PROJECT_BINARY_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") add_custom_target(distclean) - add_custom_command(TARGET distclean + add_custom_command(TARGET distclean POST_BUILD COMMAND ${CMAKE_COMMAND} -E remove_directory "${PROJECT_BINARY_DIR}" COMMENT "Removing the build directory and its content") elseif(IS_DIRECTORY .git AND GIT) add_custom_target(distclean) - add_custom_command(TARGET distclean + add_custom_command(TARGET distclean POST_BUILD WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT} submodule foreach --recursive git clean -f -X -d COMMAND ${GIT} clean -f -X -d From a2984c604de7ff7d0f48a55b21e9671c80022304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 13 Nov 2024 15:03:16 +0300 Subject: [PATCH 333/443] =?UTF-8?q?mdbx-cmake:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=82=D0=B8=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=B5=20cmake-=D1=81=D0=BA=D1=80=D0=B8=D0=BF?= =?UTF-8?q?=D1=82=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 890 ++++++++++++++++++++++++++----------------- cmake/compiler.cmake | 614 +++++++++++++++++++---------- cmake/profile.cmake | 30 +- cmake/utils.cmake | 253 ++++++++---- test/CMakeLists.txt | 306 +++++++++------ 5 files changed, 1344 insertions(+), 749 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d575c3db..793b8a60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,27 +1,27 @@ -## Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev -## SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 -## -## libmdbx = { Revised and extended descendant of Symas LMDB. } -## Please see README.md at https://gitflic.ru/project/erthink/libmdbx -## -## Libmdbx is superior to LMDB in terms of features and reliability, -## not inferior in performance. libmdbx works on Linux, FreeBSD, MacOS X -## and other systems compliant with POSIX.1-2008, but also support Windows -## as a complementary platform. -## -## The next version is under active non-public development and will be -## released as MithrilDB and libmithrildb for libraries & packages. -## Admittedly mythical Mithril is resembling silver but being stronger and -## lighter than steel. Therefore MithrilDB is rightly relevant name. -## -## MithrilDB will be radically different from libmdbx by the new database -## format and API based on C++17, as well as the Apache 2.0 License. -## The goal of this revolution is to provide a clearer and robust API, -## add more features and new valuable properties of database. -## -## The Future will (be) Positive. Всё будет хорошо. -## +# +# libmdbx = { Revised and extended descendant of Symas LMDB. } Please see +# README.md at https://gitflic.ru/project/erthink/libmdbx +# +# Libmdbx is superior to LMDB in terms of features and reliability, not inferior +# in performance. libmdbx works on Linux, FreeBSD, MacOS X and other systems +# compliant with POSIX.1-2008, but also support Windows as a complementary +# platform. +# +# The next version is under active non-public development and will be released +# as MithrilDB and libmithrildb for libraries & packages. Admittedly mythical +# Mithril is resembling silver but being stronger and lighter than steel. +# Therefore MithrilDB is rightly relevant name. +# +# MithrilDB will be radically different from libmdbx by the new database format +# and API based on C++17, as well as the Apache 2.0 License. The goal of this +# revolution is to provide a clearer and robust API, add more features and new +# valuable properties of database. +# +# The Future will (be) Positive. Всё будет хорошо. +# if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) @@ -57,142 +57,147 @@ else() set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_AVAILABLE FALSE) endif() -if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/COPYRIGHT" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/alloy.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cursor.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-env.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-extra.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-key-transform.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-ops.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-types.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/audit.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/chk.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/coherency.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cold.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/copy.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/debug_begin.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/debug_end.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dxb.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env-opts.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/essentials.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-get.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-put.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/global.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/internals.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/layout-dxb.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/layout-lck.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck-posix.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck-windows.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/logging_and_debug.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/logging_and_debug.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_chk.1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_copy.1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_drop.1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_dump.1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_load.1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_stat.1" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx.c++" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/misc.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mvcc-readers.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/ntdll.def" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/options.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/osal.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/osal.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-get.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-search.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/preface.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/proto.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/range-estimate.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/refund.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/sort.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/table.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/chk.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/copy.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/drop.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/dump.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/load.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/stat.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txn.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/unaligned.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/version.c.in" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h") +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/COPYRIGHT" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/alloy.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cursor.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-env.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-extra.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-key-transform.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-ops.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-types.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/audit.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/chk.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/coherency.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cold.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/copy.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/debug_begin.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/debug_end.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dxb.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env-opts.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/essentials.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-get.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-put.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/global.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/internals.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/layout-dxb.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/layout-lck.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck-posix.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck-windows.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/lck.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/logging_and_debug.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/logging_and_debug.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_chk.1" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_copy.1" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_drop.1" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_dump.1" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_load.1" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/man1/mdbx_stat.1" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx.c++" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/misc.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mvcc-readers.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/ntdll.def" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/options.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/osal.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/osal.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-get.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-search.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/preface.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/proto.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/range-estimate.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/refund.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/sort.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/table.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tls.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/chk.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/copy.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/drop.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/dump.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/load.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/stat.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txn.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/unaligned.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/utils.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/version.c.in" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h") set(MDBX_AMALGAMATED_SOURCE FALSE) find_program(GIT git) if(NOT GIT) message(SEND_ERROR "Git command-line tool not found") endif() set(MDBX_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") -elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION.json" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c++" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_chk.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_copy.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_dump.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_load.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_stat.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_drop.c" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ntdll.def" AND - EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/config.h.in") +elseif( + EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION.json" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.c++" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_chk.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_copy.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_dump.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_load.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_stat.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx_drop.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ntdll.def" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/config.h.in") set(MDBX_AMALGAMATED_SOURCE TRUE) set(MDBX_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") else() - message(FATAL_ERROR "\n" - "The set of libmdbx source code files is incomplete! " - "Instead just follow the https://libmdbx.dqdkfa.ru/usage.html " - "PLEASE, AVOID USING ANY OTHER TECHNIQUES.") + message( + FATAL_ERROR + "\nThe set of libmdbx source code files is incomplete! " + "Instead just follow the https://libmdbx.dqdkfa.ru/usage.html " + "PLEASE, AVOID USING ANY OTHER TECHNIQUES.") endif() if(DEFINED PROJECT_NAME) - option(MDBX_FORCE_BUILD_AS_MAIN_PROJECT "Force libmdbx to full control build options even it added as a subdirectory to your project." OFF) + option( + MDBX_FORCE_BUILD_AS_MAIN_PROJECT + "Force libmdbx to full control build options even it added as a subdirectory to your project." + OFF) endif() if(DEFINED PROJECT_NAME AND NOT MDBX_FORCE_BUILD_AS_MAIN_PROJECT) @@ -215,15 +220,20 @@ if(NOT MDBX_AMALGAMATED_SOURCE) include(CTest) option(MDBX_ENABLE_TESTS "Build libmdbx tests." ${BUILD_TESTING}) elseif(DEFINED MDBX_ENABLE_TESTS AND MDBX_ENABLE_TESTS) - message(WARNING "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: But amalgamated source code don't includes tests.") + message( + WARNING + "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: But amalgamated source code don't includes tests." + ) set(MDBX_ENABLE_TESTS OFF) endif() # Try to find a C++ compiler unless sure that this is unnecessary. -if (NOT CMAKE_CXX_COMPILER_LOADED) +if(NOT CMAKE_CXX_COMPILER_LOADED) include(CheckLanguage) - if(NOT DEFINED MDBX_BUILD_CXX OR MDBX_BUILD_CXX - OR (NOT MDBX_AMALGAMATED_SOURCE AND (NOT DEFINED MDBX_ENABLE_TESTS OR MDBX_ENABLE_TESTS))) + if(NOT DEFINED MDBX_BUILD_CXX + OR MDBX_BUILD_CXX + OR (NOT MDBX_AMALGAMATED_SOURCE AND (NOT DEFINED MDBX_ENABLE_TESTS + OR MDBX_ENABLE_TESTS))) check_language(CXX) if(CMAKE_CXX_COMPILER) enable_language(CXX) @@ -235,9 +245,12 @@ endif() # Set default build type to Release. This is to ease a User's life. if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release CACHE STRING - "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." - FORCE) + set(CMAKE_BUILD_TYPE + Release + CACHE + STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) endif() string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPERCASE) @@ -279,8 +292,9 @@ include(FindPackageMessage) include(GNUInstallDirs) if(CMAKE_C_COMPILER_ID STREQUAL "MSVC" AND MSVC_VERSION LESS 1900) - message(SEND_ERROR "MSVC compiler ${MSVC_VERSION} is too old for building MDBX." - " At least 'Microsoft Visual Studio 2015' is required.") + message( + SEND_ERROR "MSVC compiler ${MSVC_VERSION} is too old for building MDBX." + " At least 'Microsoft Visual Studio 2015' is required.") endif() if(NOT DEFINED THREADS_PREFER_PTHREAD_FLAG) @@ -293,9 +307,11 @@ include(cmake/compiler.cmake) include(cmake/profile.cmake) # Workaround for `-pthread` toolchain/cmake bug -if(NOT APPLE AND NOT MSVC - AND CMAKE_USE_PTHREADS_INIT AND NOT CMAKE_THREAD_LIBS_INIT - AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG)) +if(NOT APPLE + AND NOT MSVC + AND CMAKE_USE_PTHREADS_INIT + AND NOT CMAKE_THREAD_LIBS_INIT + AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG)) check_compiler_flag("-pthread" CC_HAS_PTHREAD) if(CC_HAS_PTHREAD AND NOT CMAKE_EXE_LINKER_FLAGS MATCHES "-pthread") message(STATUS "Force add -pthread for linker flags to avoid troubles") @@ -305,12 +321,12 @@ if(NOT APPLE AND NOT MSVC endif() endif() -CHECK_FUNCTION_EXISTS(pow NOT_NEED_LIBM) +check_function_exists(pow NOT_NEED_LIBM) if(NOT_NEED_LIBM) set(LIB_MATH "") else() set(CMAKE_REQUIRED_LIBRARIES m) - CHECK_FUNCTION_EXISTS(pow HAVE_LIBM) + check_function_exists(pow HAVE_LIBM) if(HAVE_LIBM) set(LIB_MATH m) else() @@ -323,52 +339,94 @@ if(SUBPROJECT) option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)" OFF) endif() if(NOT DEFINED CMAKE_POSITION_INDEPENDENT_CODE) - option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" ON) + option(CMAKE_POSITION_INDEPENDENT_CODE + "Generate position independent (PIC)" ON) endif() set(MDBX_MANAGE_BUILD_FLAGS_DEFAULT OFF) else() option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)" ON) - option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" ON) - if (CC_HAS_ARCH_NATIVE) - option(BUILD_FOR_NATIVE_CPU "Generate code for the compiling machine CPU" OFF) + option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" + ON) + if(CC_HAS_ARCH_NATIVE) + option(BUILD_FOR_NATIVE_CPU "Generate code for the compiling machine CPU" + OFF) endif() if(CMAKE_INTERPROCEDURAL_OPTIMIZATION_AVAILABLE - OR GCC_LTO_AVAILABLE OR MSVC_LTO_AVAILABLE OR CLANG_LTO_AVAILABLE) - if((CMAKE_CONFIGURATION_TYPES OR NOT CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") AND - ((MSVC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 19) OR - (GCC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7) OR - (CLANG_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 5))) + OR GCC_LTO_AVAILABLE + OR MSVC_LTO_AVAILABLE + OR CLANG_LTO_AVAILABLE) + if((CMAKE_CONFIGURATION_TYPES OR NOT CMAKE_BUILD_TYPE_UPPERCASE STREQUAL + "DEBUG") + AND ((MSVC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 19 + ) + OR (GCC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS + 7) + OR (CLANG_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION + VERSION_LESS 5) + )) set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT ON) else() set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT OFF) endif() - option(INTERPROCEDURAL_OPTIMIZATION "Enable interprocedural/LTO optimization." ${INTERPROCEDURAL_OPTIMIZATION_DEFAULT}) + option(INTERPROCEDURAL_OPTIMIZATION + "Enable interprocedural/LTO optimization." + ${INTERPROCEDURAL_OPTIMIZATION_DEFAULT}) endif() if(INTERPROCEDURAL_OPTIMIZATION) if(GCC_LTO_AVAILABLE) set(LTO_ENABLED TRUE) - set(CMAKE_AR ${CMAKE_GCC_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE) - set(CMAKE_C_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE) - set(CMAKE_CXX_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE) - set(CMAKE_NM ${CMAKE_GCC_NM} CACHE PATH "Path to nm program with LTO-plugin" FORCE) - set(CMAKE_RANLIB ${CMAKE_GCC_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) - set(CMAKE_C_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) - set(CMAKE_CXX_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) + set(CMAKE_AR + ${CMAKE_GCC_AR} + CACHE PATH "Path to ar program with LTO-plugin" FORCE) + set(CMAKE_C_COMPILER_AR + ${CMAKE_AR} + CACHE PATH "Path to ar program with LTO-plugin" FORCE) + set(CMAKE_CXX_COMPILER_AR + ${CMAKE_AR} + CACHE PATH "Path to ar program with LTO-plugin" FORCE) + set(CMAKE_NM + ${CMAKE_GCC_NM} + CACHE PATH "Path to nm program with LTO-plugin" FORCE) + set(CMAKE_RANLIB + ${CMAKE_GCC_RANLIB} + CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) + set(CMAKE_C_COMPILER_RANLIB + ${CMAKE_RANLIB} + CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) + set(CMAKE_CXX_COMPILER_RANLIB + ${CMAKE_RANLIB} + CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) message(STATUS "MDBX indulge Link-Time Optimization by GCC") elseif(CLANG_LTO_AVAILABLE) set(LTO_ENABLED TRUE) if(CMAKE_CLANG_LD) - set(CMAKE_LINKER ${CMAKE_CLANG_LD} CACHE PATH "Path to lld or ld program with LTO-plugin" FORCE) + set(CMAKE_LINKER + ${CMAKE_CLANG_LD} + CACHE PATH "Path to lld or ld program with LTO-plugin" FORCE) endif() - set(CMAKE_AR ${CMAKE_CLANG_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE) - set(CMAKE_C_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE) - set(CMAKE_CXX_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE) - set(CMAKE_NM ${CMAKE_CLANG_NM} CACHE PATH "Path to nm program with LTO-plugin" FORCE) - set(CMAKE_RANLIB ${CMAKE_CLANG_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) - set(CMAKE_C_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) - set(CMAKE_CXX_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) + set(CMAKE_AR + ${CMAKE_CLANG_AR} + CACHE PATH "Path to ar program with LTO-plugin" FORCE) + set(CMAKE_C_COMPILER_AR + ${CMAKE_AR} + CACHE PATH "Path to ar program with LTO-plugin" FORCE) + set(CMAKE_CXX_COMPILER_AR + ${CMAKE_AR} + CACHE PATH "Path to ar program with LTO-plugin" FORCE) + set(CMAKE_NM + ${CMAKE_CLANG_NM} + CACHE PATH "Path to nm program with LTO-plugin" FORCE) + set(CMAKE_RANLIB + ${CMAKE_CLANG_RANLIB} + CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) + set(CMAKE_C_COMPILER_RANLIB + ${CMAKE_RANLIB} + CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) + set(CMAKE_CXX_COMPILER_RANLIB + ${CMAKE_RANLIB} + CACHE PATH "Path to ranlib program with LTO-plugin" FORCE) message(STATUS "MDBX indulge Link-Time Optimization by CLANG") elseif(MSVC_LTO_AVAILABLE) set(LTO_ENABLED TRUE) @@ -388,40 +446,45 @@ else() if(NOT MDBX_AMALGAMATED_SOURCE) find_program(VALGRIND valgrind) if(VALGRIND) - # LY: cmake is ugly and nasty. - # - therefore memcheck-options should be defined before including ctest; - # - otherwise ctest may ignore it. + # (LY) cmake is ugly and nasty. Therefore memcheck-options should be + # defined before including ctest. Otherwise ctest may ignore it. set(MEMORYCHECK_SUPPRESSIONS_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/test/valgrind_suppress.txt" - CACHE FILEPATH "Suppressions file for Valgrind" FORCE) + "${CMAKE_CURRENT_SOURCE_DIR}/test/valgrind_suppress.txt" + CACHE FILEPATH "Suppressions file for Valgrind" FORCE) set(MEMORYCHECK_COMMAND_OPTIONS - "--trace-children=yes --leak-check=full --track-origins=yes --track-origins=yes --error-exitcode=42 --error-markers=@ --errors-for-leak-kinds=definite --fair-sched=yes --suppressions=${MEMORYCHECK_SUPPRESSIONS_FILE}" - CACHE STRING "Valgrind options" FORCE) - set(VALGRIND_COMMAND_OPTIONS "${MEMORYCHECK_COMMAND_OPTIONS}" CACHE STRING "Valgrind options" FORCE) + "--trace-children=yes --leak-check=full --track-origins=yes --track-origins=yes --error-exitcode=42 --error-markers=@ --errors-for-leak-kinds=definite --fair-sched=yes --suppressions=${MEMORYCHECK_SUPPRESSIONS_FILE}" + CACHE STRING "Valgrind options" FORCE) + set(VALGRIND_COMMAND_OPTIONS + "${MEMORYCHECK_COMMAND_OPTIONS}" + CACHE STRING "Valgrind options" FORCE) endif() # Enable 'make tags' target. find_program(CTAGS ctags) if(CTAGS) - add_custom_target(tags COMMAND ${CTAGS} -R -f tags + add_custom_target( + tags + COMMAND ${CTAGS} -R -f tags WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_target(ctags DEPENDS tags) endif(CTAGS) if(UNIX) - find_program(CLANG_FORMAT - NAMES clang-format-13 clang-format) + find_program(CLANG_FORMAT NAMES clang-format-13 clang-format) if(CLANG_FORMAT) - execute_process(COMMAND ${CLANG_FORMAT} "--version" OUTPUT_VARIABLE clang_format_version_info) - string(REGEX MATCH "version ([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" clang_format_version_info CLANG_FORMAT_VERSION) - if(clang_format_version_info AND NOT CLANG_FORMAT_VERSION VERSION_LESS 13.0) + execute_process(COMMAND ${CLANG_FORMAT} "--version" + OUTPUT_VARIABLE clang_format_version_info) + string(REGEX MATCH "version ([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" + clang_format_version_info CLANG_FORMAT_VERSION) + if(clang_format_version_info AND NOT CLANG_FORMAT_VERSION VERSION_LESS + 13.0) # Enable 'make reformat' target. - add_custom_target(reformat + add_custom_target( + reformat VERBATIM COMMAND - git ls-files | - grep -E \\.\(c|cxx|cc|cpp|h|hxx|hpp\)\(\\.in\)?\$ | - xargs ${CLANG_FORMAT} -i --style=file + git ls-files | grep -E \\.\(c|cxx|cc|cpp|h|hxx|hpp\)\(\\.in\)?\$ | + xargs ${CLANG_FORMAT} -i --style=file WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) endif() endif() @@ -429,12 +492,16 @@ else() if(NOT "${PROJECT_BINARY_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") add_custom_target(distclean) - add_custom_command(TARGET distclean POST_BUILD + add_custom_command( + TARGET distclean + POST_BUILD COMMAND ${CMAKE_COMMAND} -E remove_directory "${PROJECT_BINARY_DIR}" COMMENT "Removing the build directory and its content") elseif(IS_DIRECTORY .git AND GIT) add_custom_target(distclean) - add_custom_command(TARGET distclean POST_BUILD + add_custom_command( + TARGET distclean + POST_BUILD WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT} submodule foreach --recursive git clean -f -X -d COMMAND ${GIT} clean -f -X -d @@ -445,7 +512,9 @@ else() set(MDBX_MANAGE_BUILD_FLAGS_DEFAULT ON) endif(SUBPROJECT) -option(MDBX_MANAGE_BUILD_FLAGS "Allow libmdbx to configure/manage/override its own build flags" ${MDBX_MANAGE_BUILD_FLAGS_DEFAULT}) +option(MDBX_MANAGE_BUILD_FLAGS + "Allow libmdbx to configure/manage/override its own build flags" + ${MDBX_MANAGE_BUILD_FLAGS_DEFAULT}) if(MDBX_MANAGE_BUILD_FLAGS) setup_compile_flags() endif() @@ -462,13 +531,16 @@ if(NOT DEFINED MDBX_CXX_STANDARD) if(DEFINED CMAKE_CXX_STANDARD) set(MDBX_CXX_STANDARD ${CMAKE_CXX_STANDARD}) elseif(NOT HAS_CXX23 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12)) + AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 12)) set(MDBX_CXX_STANDARD 23) elseif(NOT HAS_CXX20 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) + AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 10)) set(MDBX_CXX_STANDARD 20) elseif(NOT HAS_CXX17 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)) + AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 5)) set(MDBX_CXX_STANDARD 17) elseif(NOT HAS_CXX14 LESS 0) set(MDBX_CXX_STANDARD 14) @@ -484,15 +556,20 @@ endif() list(FIND CMAKE_C_COMPILE_FEATURES c_std_11 HAS_C11) list(FIND CMAKE_C_COMPILE_FEATURES c_std_23 HAS_C23) if(NOT DEFINED MDBX_C_STANDARD) - # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! - # It unable process Windows SDK headers in the C11 mode! - if(MSVC AND MSVC_VERSION GREATER 1927 AND NOT MSVC_VERSION GREATER 1929) + # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! It unable process + # Windows SDK headers in the C11 mode! + if(MSVC + AND MSVC_VERSION GREATER 1927 + AND NOT MSVC_VERSION GREATER 1929) set(MDBX_C_STANDARD 99) set(C_FALLBACK_11 OFF) set(C_FALLBACK_GNU11 OFF) elseif(NOT HAS_C23 LESS 0) set(MDBX_C_STANDARD 23) - elseif(HAS_C11 LESS 0 AND NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) + elseif( + HAS_C11 LESS 0 + AND NOT C_FALLBACK_GNU11 + AND NOT C_FALLBACK_11) set(MDBX_C_STANDARD 99) else() set(MDBX_C_STANDARD 11) @@ -509,11 +586,16 @@ if(WIN32 AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") endif() if(MSVC_LIB_EXE) message(STATUS "Found MSVC's lib tool: ${MSVC_LIB_EXE}") - set(MDBX_NTDLL_EXTRA_IMPLIB "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.lib") - add_custom_command(OUTPUT "${MDBX_NTDLL_EXTRA_IMPLIB}" + set(MDBX_NTDLL_EXTRA_IMPLIB + "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.lib") + add_custom_command( + OUTPUT "${MDBX_NTDLL_EXTRA_IMPLIB}" COMMENT "Create extra-import-library for ntdll.dll" MAIN_DEPENDENCY "${MDBX_SOURCE_DIR}/ntdll.def" - COMMAND ${MSVC_LIB_EXE} /def:"${MDBX_SOURCE_DIR}/ntdll.def" /out:"${MDBX_NTDLL_EXTRA_IMPLIB}" ${INITIAL_CMAKE_STATIC_LINKER_FLAGS}) + COMMAND + ${MSVC_LIB_EXE} /def:"${MDBX_SOURCE_DIR}/ntdll.def" + /out:"${MDBX_NTDLL_EXTRA_IMPLIB}" + ${INITIAL_CMAKE_STATIC_LINKER_FLAGS}) else() message(WARNING "MSVC's lib tool not found") endif() @@ -526,32 +608,35 @@ if(WIN32 AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") endif() if(DLLTOOL) message(STATUS "Found dlltool: ${DLLTOOL}") - set(MDBX_NTDLL_EXTRA_IMPLIB "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.a") - add_custom_command(OUTPUT "${MDBX_NTDLL_EXTRA_IMPLIB}" + set(MDBX_NTDLL_EXTRA_IMPLIB + "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.a") + add_custom_command( + OUTPUT "${MDBX_NTDLL_EXTRA_IMPLIB}" COMMENT "Create extra-import-library for ntdll.dll" MAIN_DEPENDENCY "${MDBX_SOURCE_DIR}/ntdll.def" - COMMAND ${DLLTOOL} -d "${MDBX_SOURCE_DIR}/ntdll.def" -l "${MDBX_NTDLL_EXTRA_IMPLIB}") + COMMAND ${DLLTOOL} -d "${MDBX_SOURCE_DIR}/ntdll.def" -l + "${MDBX_NTDLL_EXTRA_IMPLIB}") else() message(WARNING "dlltool not found") endif() endif() if(MDBX_NTDLL_EXTRA_IMPLIB) - # LY: Sometimes CMake requires a nightmarish magic for simple things. - # 1) create a target out of the library compilation result + # Sometimes CMake requires a nightmarish magic for simple things. + # + # (1) create a target out of the library compilation result add_custom_target(ntdll_extra_target DEPENDS "${MDBX_NTDLL_EXTRA_IMPLIB}") - # 2) create an library target out of the library compilation result + # (2) create an library target out of the library compilation result add_library(ntdll_extra STATIC IMPORTED GLOBAL) add_dependencies(ntdll_extra ntdll_extra_target) - # 3) specify where the library is (and where to find the headers) - set_target_properties(ntdll_extra - PROPERTIES - IMPORTED_LOCATION "${MDBX_NTDLL_EXTRA_IMPLIB}") + # (3) specify where the library is (and where to find the headers) + set_target_properties(ntdll_extra PROPERTIES IMPORTED_LOCATION + "${MDBX_NTDLL_EXTRA_IMPLIB}") endif() endif() -################################################################################ -################################################################################ +# ############################################################################## +# ~~~ # # #### ##### ##### # #### # # #### # # # # # # # # # ## # # @@ -560,8 +645,11 @@ endif() # # # # # # # # # ## # # # #### # # # #### # # #### # +# ~~~ +# ############################################################################## -set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) +set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF + ENABLE_GCOV) macro(add_mdbx_option NAME DESCRIPTION DEFAULT) list(APPEND MDBX_BUILD_OPTIONS ${NAME}) if(NOT ${DEFAULT} STREQUAL "AUTO") @@ -581,48 +669,81 @@ else() set(MDBX_BUILD_TOOLS_DEFAULT ON) endif() -add_mdbx_option(MDBX_INSTALL_STATIC "Build and install libmdbx for static linking" OFF) -add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) -add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" ${MDBX_BUILD_TOOLS_DEFAULT}) -CMAKE_DEPENDENT_OPTION(MDBX_INSTALL_MANPAGES "Install man-pages for MDBX tools (mdbx_chk/stat/dump/load/copy)" ON MDBX_BUILD_TOOLS OFF) -add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON) -add_mdbx_option(MDBX_ENV_CHECKPID "Paranoid checking PID inside libmdbx's API" AUTO) +add_mdbx_option(MDBX_INSTALL_STATIC + "Build and install libmdbx for static linking" OFF) +add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY + "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) +add_mdbx_option( + MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" + ${MDBX_BUILD_TOOLS_DEFAULT}) +cmake_dependent_option( + MDBX_INSTALL_MANPAGES + "Install man-pages for MDBX tools (mdbx_chk/stat/dump/load/copy)" ON + MDBX_BUILD_TOOLS OFF) +add_mdbx_option( + MDBX_TXN_CHECKOWNER + "Checking transaction matches the calling thread inside libmdbx's API" ON) +add_mdbx_option(MDBX_ENV_CHECKPID "Paranoid checking PID inside libmdbx's API" + AUTO) mark_as_advanced(MDBX_ENV_CHECKPID) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF) + add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" + OFF) mark_as_advanced(MDBX_DISABLE_GNU_SOURCE) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" OR IOS) - add_mdbx_option(MDBX_OSX_SPEED_INSTEADOF_DURABILITY "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) + add_mdbx_option(MDBX_OSX_SPEED_INSTEADOF_DURABILITY + "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) mark_as_advanced(MDBX_OSX_SPEED_INSTEADOF_DURABILITY) endif() if(WIN32) if(MDBX_NTDLL_EXTRA_IMPLIB) - add_mdbx_option(MDBX_WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) + add_mdbx_option( + MDBX_WITHOUT_MSVC_CRT + "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) endif() set(MDBX_AVOID_MSYNC_DEFAULT ON) else() - add_mdbx_option(MDBX_USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) + add_mdbx_option( + MDBX_USE_OFDLOCKS + "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) mark_as_advanced(MDBX_USE_OFDLOCKS) set(MDBX_AVOID_MSYNC_DEFAULT OFF) endif() -add_mdbx_option(MDBX_AVOID_MSYNC "Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP mode" ${MDBX_AVOID_MSYNC_DEFAULT}) -add_mdbx_option(MDBX_LOCKING "Locking method (Windows=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" AUTO) +add_mdbx_option( + MDBX_AVOID_MSYNC + "Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP mode" + ${MDBX_AVOID_MSYNC_DEFAULT}) +add_mdbx_option( + MDBX_LOCKING + "Locking method (Windows=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" + AUTO) mark_as_advanced(MDBX_LOCKING) -add_mdbx_option(MDBX_TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake" AUTO) +add_mdbx_option( + MDBX_TRUST_RTC + "Does a system have battery-backed Real-Time Clock or just a fake" AUTO) mark_as_advanced(MDBX_TRUST_RTC) add_mdbx_option(MDBX_FORCE_ASSERTIONS "Force enable assertion checking" OFF) -add_mdbx_option(MDBX_DISABLE_VALIDATION "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" OFF) +add_mdbx_option( + MDBX_DISABLE_VALIDATION + "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" + OFF) mark_as_advanced(MDBX_DISABLE_VALIDATION) -add_mdbx_option(MDBX_ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON) -add_mdbx_option(MDBX_ENABLE_MADVISE "Using POSIX' madvise() and/or similar hints" ON) -if (CMAKE_TARGET_BITNESS GREATER 32) +add_mdbx_option(MDBX_ENABLE_REFUND + "Zerocost auto-compactification during write-transactions" ON) +add_mdbx_option(MDBX_ENABLE_MADVISE + "Using POSIX' madvise() and/or similar hints" ON) +if(CMAKE_TARGET_BITNESS GREATER 32) set(MDBX_BIGFOOT_DEFAULT ON) else() set(MDBX_BIGFOOT_DEFAULT OFF) endif() -add_mdbx_option(MDBX_ENABLE_BIGFOOT "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" ${MDBX_BIGFOOT_DEFAULT}) -add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) +add_mdbx_option( + MDBX_ENABLE_BIGFOOT + "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" + ${MDBX_BIGFOOT_DEFAULT}) +add_mdbx_option(MDBX_ENABLE_PGOP_STAT + "Gathering statistics for page operations" ON) add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) add_mdbx_option(MDBX_ENABLE_DBI_SPARSE "FIXME" ON) @@ -634,23 +755,30 @@ if(NOT MDBX_AMALGAMATED_SOURCE) else() set(MDBX_ALLOY_BUILD_DEFAULT ON) endif() - add_mdbx_option(MDBX_ALLOY_BUILD "Build MDBX library through single/alloyed object file" ${MDBX_ALLOY_BUILD_DEFAULT}) + add_mdbx_option( + MDBX_ALLOY_BUILD "Build MDBX library through single/alloyed object file" + ${MDBX_ALLOY_BUILD_DEFAULT}) endif() if((MDBX_BUILD_TOOLS OR MDBX_ENABLE_TESTS) AND MDBX_BUILD_SHARED_LIBRARY) - add_mdbx_option(MDBX_LINK_TOOLS_NONSTATIC "Link MDBX tools with non-static libmdbx" OFF) + add_mdbx_option(MDBX_LINK_TOOLS_NONSTATIC + "Link MDBX tools with non-static libmdbx" OFF) else() unset(MDBX_LINK_TOOLS_NONSTATIC CACHE) endif() -if(CMAKE_CXX_COMPILER_LOADED AND MDBX_CXX_STANDARD LESS 83 AND NOT MDBX_CXX_STANDARD LESS 11) +if(CMAKE_CXX_COMPILER_LOADED + AND MDBX_CXX_STANDARD LESS 83 + AND NOT MDBX_CXX_STANDARD LESS 11) if(NOT MDBX_AMALGAMATED_SOURCE) option(MDBX_ENABLE_TESTS "Build MDBX tests" ${BUILD_TESTING}) endif() if(NOT MDBX_WITHOUT_MSVC_CRT - AND NOT (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.9) - AND NOT (MSVC AND MSVC_VERSION LESS 1900)) + AND NOT (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 4.8) + AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 3.9) + AND NOT (MSVC AND MSVC_VERSION LESS 1900)) option(MDBX_BUILD_CXX "Build C++ portion" ON) else() set(MDBX_BUILD_CXX FALSE) @@ -664,11 +792,13 @@ if(CI) add_definitions(-DMDBX_CI="${CI}") endif() -################################################################################ -################################################################################ +# ############################################################################## if(MDBX_BUILD_CXX AND NOT CMAKE_CXX_COMPILER_LOADED) - message(FATAL_ERROR "MDBX_BUILD_CXX=${MDBX_BUILD_CXX}: The C++ compiler is required to build the C++API.") + message( + FATAL_ERROR + "MDBX_BUILD_CXX=${MDBX_BUILD_CXX}: The C++ compiler is required to build the C++API." + ) endif() if(MDBX_BUILD_CXX) @@ -677,7 +807,8 @@ if(MDBX_BUILD_CXX) endif() # Get version -fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" FALSE "${CMAKE_CURRENT_BINARY_DIR}") +fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" FALSE + "${CMAKE_CURRENT_BINARY_DIR}") message(STATUS "libmdbx version is ${MDBX_VERSION}") # sources list @@ -688,7 +819,7 @@ if(MDBX_AMALGAMATED_SOURCE) else() # generate version file configure_file("${MDBX_SOURCE_DIR}/version.c.in" - "${CMAKE_CURRENT_BINARY_DIR}/version.c" ESCAPE_QUOTES) + "${CMAKE_CURRENT_BINARY_DIR}/version.c" ESCAPE_QUOTES) file(SHA256 "${CMAKE_CURRENT_BINARY_DIR}/version.c" MDBX_SOURCERY_DIGEST) string(MAKE_C_IDENTIFIER "${MDBX_GIT_DESCRIBE}" MDBX_SOURCERY_SUFFIX) set(MDBX_BUILD_SOURCERY "${MDBX_SOURCERY_DIGEST}_${MDBX_SOURCERY_SUFFIX}") @@ -697,7 +828,9 @@ else() list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/alloy.c") include_directories("${MDBX_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") else() - list(APPEND LIBMDBX_SOURCES + list( + APPEND + LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/api-cursor.c" "${MDBX_SOURCE_DIR}/api-env.c" "${MDBX_SOURCE_DIR}/api-extra.c" @@ -769,56 +902,60 @@ else() "${MDBX_SOURCE_DIR}/utils.h" "${MDBX_SOURCE_DIR}/walk.c" "${MDBX_SOURCE_DIR}/walk.h" - "${CMAKE_CURRENT_BINARY_DIR}/version.c" - ) + "${CMAKE_CURRENT_BINARY_DIR}/version.c") if(NOT MSVC) list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/lck-posix.c") endif() if(NOT APPLE) - list(APPEND LIBMDBX_SOURCES - "${MDBX_SOURCE_DIR}/windows-import.h" - "${MDBX_SOURCE_DIR}/windows-import.c" - "${MDBX_SOURCE_DIR}/lck-windows.c" - ) + list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/windows-import.h" + "${MDBX_SOURCE_DIR}/windows-import.c" + "${MDBX_SOURCE_DIR}/lck-windows.c") endif() include_directories("${MDBX_SOURCE_DIR}") endif() endif(MDBX_AMALGAMATED_SOURCE) if(MDBX_BUILD_CXX) - message(STATUS "Use C${MDBX_C_STANDARD} and C++${MDBX_CXX_STANDARD} for libmdbx") + message( + STATUS "Use C${MDBX_C_STANDARD} and C++${MDBX_CXX_STANDARD} for libmdbx") list(APPEND LIBMDBX_PUBLIC_HEADERS mdbx.h++) list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/mdbx.c++" mdbx.h++) else() - message(STATUS "Use C${MDBX_C_STANDARD} for libmdbx but C++ portion is disabled") + message( + STATUS "Use C${MDBX_C_STANDARD} for libmdbx but C++ portion is disabled") endif() if(SUBPROJECT AND MSVC) if(MSVC_VERSION LESS 1900) - message(FATAL_ERROR "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required.") + message( + FATAL_ERROR + "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required." + ) endif() add_compile_options("/utf-8") endif() macro(target_setup_options TARGET) if(DEFINED INTERPROCEDURAL_OPTIMIZATION) - set_target_properties(${TARGET} PROPERTIES - INTERPROCEDURAL_OPTIMIZATION $) + set_target_properties( + ${TARGET} PROPERTIES INTERPROCEDURAL_OPTIMIZATION + $) endif() if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) - set_target_properties(${TARGET} PROPERTIES - C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) + set_target_properties(${TARGET} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} + C_STANDARD_REQUIRED ON) endif() if(MDBX_BUILD_CXX) if(NOT CXX_FALLBACK_GNU11 AND NOT CXX_FALLBACK_11) - set_target_properties(${TARGET} PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + set_target_properties( + ${TARGET} PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} + CXX_STANDARD_REQUIRED ON) endif() if(MSVC AND NOT MSVC_VERSION LESS 1910) target_compile_options(${TARGET} INTERFACE "/Zc:__cplusplus") endif() endif() - if(CC_HAS_FASTMATH - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) + if(CC_HAS_FASTMATH AND NOT (CMAKE_COMPILER_IS_CLANG + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) target_compile_options(${TARGET} PRIVATE "-ffast-math") endif() if(CC_HAS_VISIBILITY) @@ -836,18 +973,27 @@ macro(libmdbx_setup_libs TARGET MODE) target_link_libraries(${TARGET} ${MODE} Threads::Threads) endif() if(WIN32) - target_link_libraries(${TARGET} ${MODE} ntdll user32 kernel32 advapi32 ole32) + target_link_libraries( + ${TARGET} + ${MODE} + ntdll + user32 + kernel32 + advapi32 + ole32) if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT) target_link_libraries(${TARGET} ${MODE} ntdll_extra) endif() - elseif(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Solaris") + elseif(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS" OR ${CMAKE_SYSTEM_NAME} STREQUAL + "Solaris") target_link_libraries(${TARGET} ${MODE} kstat) elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Android") target_link_libraries(${TARGET} ${MODE} log) endif() if(LIBCXX_FILESYSTEM AND MDBX_BUILD_CXX) - if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 - AND NOT CMAKE_VERSION VERSION_LESS 3.13) + if(CMAKE_COMPILER_IS_ELBRUSCXX + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 + AND NOT CMAKE_VERSION VERSION_LESS 3.13) target_link_options(${TARGET} PUBLIC "-Wl,--allow-multiple-definition") endif() target_link_libraries(${TARGET} PUBLIC ${LIBCXX_FILESYSTEM}) @@ -860,7 +1006,8 @@ if(MDBX_INSTALL_STATIC) else() add_library(mdbx-static STATIC EXCLUDE_FROM_ALL ${LIBMDBX_SOURCES}) endif() -set_target_properties(mdbx-static PROPERTIES PUBLIC_HEADER "${LIBMDBX_PUBLIC_HEADERS}") +set_target_properties(mdbx-static PROPERTIES PUBLIC_HEADER + "${LIBMDBX_PUBLIC_HEADERS}") target_compile_definitions(mdbx-static PRIVATE MDBX_BUILD_SHARED_LIBRARY=0) target_setup_options(mdbx-static) libmdbx_setup_libs(mdbx-static INTERFACE) @@ -871,20 +1018,26 @@ else() endif() target_include_directories(mdbx-static INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") -################################################################################ +# ############################################################################## # build shared library if(MDBX_BUILD_SHARED_LIBRARY) add_library(mdbx SHARED ${LIBMDBX_SOURCES}) - set_target_properties(mdbx PROPERTIES PUBLIC_HEADER "${LIBMDBX_PUBLIC_HEADERS}") - target_compile_definitions(mdbx PRIVATE LIBMDBX_EXPORTS MDBX_BUILD_SHARED_LIBRARY=1 INTERFACE LIBMDBX_IMPORTS) + set_target_properties(mdbx PROPERTIES PUBLIC_HEADER + "${LIBMDBX_PUBLIC_HEADERS}") + target_compile_definitions( + mdbx + PRIVATE LIBMDBX_EXPORTS MDBX_BUILD_SHARED_LIBRARY=1 + INTERFACE LIBMDBX_IMPORTS) target_setup_options(mdbx) libmdbx_setup_libs(mdbx PRIVATE) if(MSVC) if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT) set_property(TARGET mdbx PROPERTY LINKER_FLAGS "/NODEFAULTLIB") else() - set_property(TARGET mdbx PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") + set_property( + TARGET mdbx PROPERTY MSVC_RUNTIME_LIBRARY + "MultiThreaded$<$:Debug>DLL") endif() endif() if(CC_HAS_VISIBILITY AND (LTO_ENABLED OR INTERPROCEDURAL_OPTIMIZATION)) @@ -900,15 +1053,18 @@ if(MDBX_BUILD_SHARED_LIBRARY AND MDBX_LINK_TOOLS_NONSTATIC) # use, i.e. don't skip the full RPATH for the build tree set(CMAKE_SKIP_BUILD_RPATH FALSE) - # when building, don't use the install RPATH already (but later on when installing) + # when building, don't use the install RPATH already (but later on when + # installing) set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) - # add the automatically determined parts of the RPATH - # which point to directories outside the build tree to the install RPATH + # add the automatically determined parts of the RPATH which point to + # directories outside the build tree to the install RPATH set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) - # the RPATH to be used when installing, but only if it's not a system directory - list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) + # the RPATH to be used when installing, but only if it's not a system + # directory + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES + "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) if(isSystemDir EQUAL -1) if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") set(CMAKE_INSTALL_RPATH "@executable_path/../lib") @@ -918,8 +1074,8 @@ if(MDBX_BUILD_SHARED_LIBRARY AND MDBX_LINK_TOOLS_NONSTATIC) endif() if(WIN32) - # Windows don't have RPATH feature, - # therefore we should prepare PATH or copy DLL(s) + # Windows don't have RPATH feature, therefore we should prepare PATH or copy + # DLL(s) set(TOOL_MDBX_DLLCRUTCH "Crutch for ${CMAKE_SYSTEM_NAME}") if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_VERSION VERSION_LESS 3.0) # will use LOCATION property to compose DLLPATH @@ -937,18 +1093,21 @@ endif() if(MDBX_BUILD_TOOLS) set(WINGETOPT_SRC "") if(WIN32) - set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/tools/wingetopt.c ${MDBX_SOURCE_DIR}/tools/wingetopt.h) + set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/tools/wingetopt.c + ${MDBX_SOURCE_DIR}/tools/wingetopt.h) endif() foreach(TOOL chk copy stat dump load drop) if(MDBX_AMALGAMATED_SOURCE) add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/mdbx_${TOOL}.c) else() - add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/tools/${TOOL}.c ${WINGETOPT_SRC}) + add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/tools/${TOOL}.c + ${WINGETOPT_SRC}) endif() if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) - set_target_properties(mdbx_${TOOL} PROPERTIES - C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) + set_target_properties( + mdbx_${TOOL} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} + C_STANDARD_REQUIRED ON) endif() target_setup_options(mdbx_${TOOL}) target_link_libraries(mdbx_${TOOL} ${TOOL_MDBX_LIB}) @@ -959,7 +1118,7 @@ if(MDBX_BUILD_TOOLS) endif() endif() -################################################################################ +# ############################################################################## # mdbx-shared-lib installation if(NOT DEFINED MDBX_DLL_INSTALL_DESTINATION) @@ -971,19 +1130,28 @@ if(NOT DEFINED MDBX_DLL_INSTALL_DESTINATION) endif() if(MDBX_BUILD_SHARED_LIBRARY) if(CMAKE_VERSION VERSION_LESS 3.12) - install(TARGETS mdbx EXPORT libmdbx + install( + TARGETS mdbx + EXPORT libmdbx LIBRARY DESTINATION ${MDBX_DLL_INSTALL_DESTINATION} COMPONENT runtime ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel) + INCLUDES + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT devel) else() - install(TARGETS mdbx EXPORT libmdbx - LIBRARY DESTINATION ${MDBX_DLL_INSTALL_DESTINATION} COMPONENT runtime - NAMELINK_COMPONENT devel + install( + TARGETS mdbx + EXPORT libmdbx + LIBRARY DESTINATION ${MDBX_DLL_INSTALL_DESTINATION} + COMPONENT runtime + NAMELINK_COMPONENT devel OBJECTS DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel) + INCLUDES + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT devel) endif() endif(MDBX_BUILD_SHARED_LIBRARY) @@ -992,29 +1160,20 @@ if(MDBX_BUILD_TOOLS) if(NOT DEFINED MDBX_TOOLS_INSTALL_DESTINATION) set(MDBX_TOOLS_INSTALL_DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() - install( - TARGETS - mdbx_chk - mdbx_stat - mdbx_copy - mdbx_dump - mdbx_load - mdbx_drop - RUNTIME - DESTINATION ${MDBX_TOOLS_INSTALL_DESTINATION} - COMPONENT runtime) + install(TARGETS mdbx_chk mdbx_stat mdbx_copy mdbx_dump mdbx_load mdbx_drop + RUNTIME DESTINATION ${MDBX_TOOLS_INSTALL_DESTINATION} + COMPONENT runtime) if(MDBX_INSTALL_MANPAGES) if(NOT DEFINED MDBX_MAN_INSTALL_DESTINATION) set(MDBX_MAN_INSTALL_DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) endif() install( - FILES - "${MDBX_SOURCE_DIR}/man1/mdbx_chk.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_stat.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_copy.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_dump.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_load.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_drop.1" + FILES "${MDBX_SOURCE_DIR}/man1/mdbx_chk.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_stat.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_copy.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_dump.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_load.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_drop.1" DESTINATION ${MDBX_MAN_INSTALL_DESTINATION} COMPONENT doc) endif() @@ -1023,24 +1182,33 @@ endif(MDBX_BUILD_TOOLS) # mdbx-static-lib installation if(MDBX_INSTALL_STATIC) if(CMAKE_VERSION VERSION_LESS 3.12) - install(TARGETS mdbx-static EXPORT libmdbx + install( + TARGETS mdbx-static + EXPORT libmdbx LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel OBJECTS DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel) + INCLUDES + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT devel) else() - install(TARGETS mdbx-static EXPORT libmdbx - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel - NAMELINK_COMPONENT devel + install( + TARGETS mdbx-static + EXPORT libmdbx + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT devel + NAMELINK_COMPONENT devel OBJECTS DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel) + INCLUDES + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT devel) endif() endif(MDBX_INSTALL_STATIC) -################################################################################ +# ############################################################################## # collect options & build info if(NOT DEFINED MDBX_BUILD_TIMESTAMP) @@ -1055,7 +1223,8 @@ endif() if(NOT CMAKE_CONFIGURATION_TYPES) list(APPEND MDBX_BUILD_FLAGS ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}) if(MDBX_BUILD_CXX) - list(APPEND MDBX_BUILD_FLAGS ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}) + list(APPEND MDBX_BUILD_FLAGS + ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}) endif() endif() @@ -1082,20 +1251,25 @@ list(REMOVE_DUPLICATES MDBX_BUILD_FLAGS) string(REPLACE ";" " " MDBX_BUILD_FLAGS "${MDBX_BUILD_FLAGS}") if(CMAKE_CONFIGURATION_TYPES) # add dynamic part via per-configuration define - message(STATUS "MDBX Compile Flags: ${MDBX_BUILD_FLAGS} ") - add_definitions(-DMDBX_BUILD_FLAGS_CONFIG="$<$:${CMAKE_C_FLAGS_DEBUG} ${CMAKE_C_DEFINES_DEBUG}>$<$:${CMAKE_C_FLAGS_RELEASE} ${CMAKE_C_DEFINES_RELEASE}>$<$:${CMAKE_C_FLAGS_RELWITHDEBINFO} ${CMAKE_C_DEFINES_RELWITHDEBINFO}>$<$:${CMAKE_C_FLAGS_MINSIZEREL} ${CMAKE_C_DEFINES_MINSIZEREL}>") + message( + STATUS + "MDBX Compile Flags: ${MDBX_BUILD_FLAGS} ") + add_definitions( + -DMDBX_BUILD_FLAGS_CONFIG="$<$:${CMAKE_C_FLAGS_DEBUG} ${CMAKE_C_DEFINES_DEBUG}>$<$:${CMAKE_C_FLAGS_RELEASE} ${CMAKE_C_DEFINES_RELEASE}>$<$:${CMAKE_C_FLAGS_RELWITHDEBINFO} ${CMAKE_C_DEFINES_RELWITHDEBINFO}>$<$:${CMAKE_C_FLAGS_MINSIZEREL} ${CMAKE_C_DEFINES_MINSIZEREL}>" + ) else() message(STATUS "MDBX Compile Flags: ${MDBX_BUILD_FLAGS}") endif() # get compiler info -execute_process(COMMAND sh -c "${CMAKE_C_COMPILER} --version | head -1" +execute_process( + COMMAND sh -c "${CMAKE_C_COMPILER} --version | head -1" OUTPUT_VARIABLE MDBX_BUILD_COMPILER - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET RESULT_VARIABLE rc) if(rc OR NOT MDBX_BUILD_COMPILER) - string(STRIP "${CMAKE_C_COMPILER_ID}-${CMAKE_C_COMPILER_VERSION}" MDBX_BUILD_COMPILER) + string(STRIP "${CMAKE_C_COMPILER_ID}-${CMAKE_C_COMPILER_VERSION}" + MDBX_BUILD_COMPILER) endif() # make a build-target triplet @@ -1104,7 +1278,8 @@ if(CMAKE_C_COMPILER_TARGET) else() if(CMAKE_C_COMPILER_ARCHITECTURE_ID) string(STRIP "${CMAKE_C_COMPILER_ARCHITECTURE_ID}" MDBX_BUILD_TARGET) - elseif(CMAKE_GENERATOR_PLATFORM AND NOT CMAKE_GENERATOR_PLATFORM STREQUAL CMAKE_SYSTEM_NAME) + elseif(CMAKE_GENERATOR_PLATFORM AND NOT CMAKE_GENERATOR_PLATFORM STREQUAL + CMAKE_SYSTEM_NAME) string(STRIP "${CMAKE_GENERATOR_PLATFORM}" MDBX_BUILD_TARGET) elseif(CMAKE_SYSTEM_ARCH) string(STRIP "${CMAKE_SYSTEM_ARCH}" MDBX_BUILD_TARGET) @@ -1116,14 +1291,19 @@ else() set(MDBX_BUILD_TARGET "unknown") endif() if(CMAKE_C_COMPILER_ABI - AND NOT (CMAKE_C_COMPILER_ABI MATCHES ".*${MDBX_BUILD_TARGET}.*" OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_COMPILER_ABI}.*")) - string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") + AND NOT (CMAKE_C_COMPILER_ABI MATCHES ".*${MDBX_BUILD_TARGET}.*" + OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_COMPILER_ABI}.*")) + string(CONCAT MDBX_BUILD_TARGET + "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") endif() if(CMAKE_C_PLATFORM_ID - AND NOT (CMAKE_SYSTEM_NAME - AND (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_SYSTEM_NAME}.*" OR CMAKE_SYSTEM_NAME MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) - AND NOT (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_C_PLATFORM_ID}.*" OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) - string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") + AND NOT (CMAKE_SYSTEM_NAME + AND (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_SYSTEM_NAME}.*" + OR CMAKE_SYSTEM_NAME MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) + AND NOT (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_C_PLATFORM_ID}.*" + OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) + string(CONCAT MDBX_BUILD_TARGET + "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") endif() if(CMAKE_SYSTEM_NAME) string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_SYSTEM_NAME}") @@ -1140,7 +1320,8 @@ else() endif() # options -set(options VERSION C_COMPILER CXX_COMPILER MDBX_BUILD_TARGET MDBX_BUILD_TYPE ${MDBX_BUILD_OPTIONS}) +set(options VERSION C_COMPILER CXX_COMPILER MDBX_BUILD_TARGET MDBX_BUILD_TYPE + ${MDBX_BUILD_OPTIONS}) foreach(item IN LISTS options) if(DEFINED ${item}) set(value "${${item}}") @@ -1158,27 +1339,32 @@ endforeach(item) # provide config.h for library build info configure_file("${MDBX_SOURCE_DIR}/config.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/config.h" ESCAPE_QUOTES) + "${CMAKE_CURRENT_BINARY_DIR}/config.h" ESCAPE_QUOTES) add_definitions(-DMDBX_CONFIG_H="${CMAKE_CURRENT_BINARY_DIR}/config.h") -################################################################################ +# ############################################################################## if(NOT MDBX_AMALGAMATED_SOURCE AND MDBX_ENABLE_TESTS) if(NOT CMAKE_CXX_COMPILER_LOADED) - message(FATAL_ERROR "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: The C++ compiler is required to build the tests.") + message( + FATAL_ERROR + "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: The C++ compiler is required to build the tests." + ) endif() add_subdirectory(test) endif() -################################################################################ +# ############################################################################## -if (NOT SUBPROJECT) +if(NOT SUBPROJECT) set(PACKAGE "libmdbx") set(CPACK_PACKAGE_VERSION_MAJOR ${MDBX_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${MDBX_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${MDBX_VERSION_RELEASE}) set(CPACK_PACKAGE_VERSION_COMMIT ${MDBX_VERSION_REVISION}) - set(PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${CPACK_PACKAGE_VERSION_COMMIT}") + set(PACKAGE_VERSION + "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${CPACK_PACKAGE_VERSION_COMMIT}" + ) message(STATUS "libmdbx package version is ${PACKAGE_VERSION}") endif() diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index f9c23c20..0ee15952 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -1,5 +1,5 @@ -## Copyright (c) 2010-2024 Леонид Юрьев aka Leonid Yuriev -## SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2010-2024 Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) @@ -32,8 +32,11 @@ if(NOT CMAKE_VERSION VERSION_LESS 3.9) endif() if(CMAKE_VERSION MATCHES ".*MSVC.*" AND CMAKE_VERSION VERSION_LESS 3.16) - message(FATAL_ERROR "CMake from MSVC kit is unfit! " - "Please use MSVC2019 with modern CMake the original CMake from https://cmake.org/download/") + message( + FATAL_ERROR + "CMake from MSVC kit is unfit! " + "Please use MSVC2019 with modern CMake the original CMake from https://cmake.org/download/" + ) endif() if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) @@ -55,11 +58,12 @@ include(CheckLibraryExists) include(CheckIncludeFiles) # Check if the same compile family is used for both C and CXX -if(CMAKE_C_COMPILER_LOADED AND CMAKE_CXX_COMPILER_LOADED AND - NOT (CMAKE_C_COMPILER_ID STREQUAL CMAKE_CXX_COMPILER_ID)) +if(CMAKE_C_COMPILER_LOADED + AND CMAKE_CXX_COMPILER_LOADED + AND NOT (CMAKE_C_COMPILER_ID STREQUAL CMAKE_CXX_COMPILER_ID)) message(WARNING "CMAKE_C_COMPILER_ID (${CMAKE_C_COMPILER_ID}) is different " - "from CMAKE_CXX_COMPILER_ID (${CMAKE_CXX_COMPILER_ID}). " - "The final binary may be unusable.") + "from CMAKE_CXX_COMPILER_ID (${CMAKE_CXX_COMPILER_ID}). " + "The final binary may be unusable.") endif() if(CMAKE_CXX_COMPILER_LOADED) @@ -76,27 +80,32 @@ macro(check_compiler_flag flag variable) endif() endmacro(check_compiler_flag) -# We support building with Clang and gcc. First check -# what we're using for build. -if(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ID MATCHES ".*[Cc][Ll][Aa][Nn][Gg].*") - set(CMAKE_COMPILER_IS_CLANG ON) - set(CMAKE_COMPILER_IS_GNUCC OFF) +# We support building with Clang and gcc. First check what we're using for +# build. +if(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ID MATCHES + ".*[Cc][Ll][Aa][Nn][Gg].*") + set(CMAKE_COMPILER_IS_CLANG ON) + set(CMAKE_COMPILER_IS_GNUCC OFF) endif() -if(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXX_COMPILER_ID MATCHES ".*[Cc][Ll][Aa][Nn][Gg].*") - set(CMAKE_COMPILER_IS_CLANG ON) +if(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXX_COMPILER_ID MATCHES + ".*[Cc][Ll][Aa][Nn][Gg].*") + set(CMAKE_COMPILER_IS_CLANG ON) set(CMAKE_COMPILER_IS_GNUCXX OFF) endif() if(CMAKE_C_COMPILER_LOADED) # Check for Elbrus lcc - execute_process(COMMAND ${CMAKE_C_COMPILER} --version + execute_process( + COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE tmp_lcc_probe_version - RESULT_VARIABLE tmp_lcc_probe_result ERROR_QUIET) + RESULT_VARIABLE tmp_lcc_probe_result + ERROR_QUIET) if(tmp_lcc_probe_result EQUAL 0) string(FIND "${tmp_lcc_probe_version}" "lcc:" tmp_lcc_marker) string(FIND "${tmp_lcc_probe_version}" ":e2k-" tmp_e2k_marker) if(tmp_lcc_marker GREATER -1 AND tmp_e2k_marker GREATER tmp_lcc_marker) - execute_process(COMMAND ${CMAKE_C_COMPILER} -print-version + execute_process( + COMMAND ${CMAKE_C_COMPILER} -print-version OUTPUT_VARIABLE CMAKE_C_COMPILER_VERSION RESULT_VARIABLE tmp_lcc_probe_result OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -115,20 +124,24 @@ endif() if(CMAKE_CXX_COMPILER_LOADED) # Check for Elbrus l++ - execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE tmp_lxx_probe_version - RESULT_VARIABLE tmp_lxx_probe_result ERROR_QUIET) + RESULT_VARIABLE tmp_lxx_probe_result + ERROR_QUIET) if(tmp_lxx_probe_result EQUAL 0) string(FIND "${tmp_lxx_probe_version}" "lcc:" tmp_lcc_marker) string(FIND "${tmp_lxx_probe_version}" ":e2k-" tmp_e2k_marker) if(tmp_lcc_marker GREATER -1 AND tmp_e2k_marker GREATER tmp_lcc_marker) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-version + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -print-version OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERSION RESULT_VARIABLE tmp_lxx_probe_result OUTPUT_STRIP_TRAILING_WHITESPACE) set(CMAKE_COMPILER_IS_ELBRUSCXX ON) set(CMAKE_CXX_COMPILER_ID "Elbrus") - message(STATUS "Detected Elbrus C++ compiler ${CMAKE_CXX_COMPILER_VERSION}") + message( + STATUS "Detected Elbrus C++ compiler ${CMAKE_CXX_COMPILER_VERSION}") else() set(CMAKE_COMPILER_IS_ELBRUSCXX OFF) endif() @@ -139,20 +152,20 @@ if(CMAKE_CXX_COMPILER_LOADED) unset(tmp_lxx_probe_result) endif() -# Hard coding the compiler version is ugly from cmake POV, but -# at least gives user a friendly error message. The most critical -# demand for C++ compiler is support of C++11 lambdas, added -# only in version 4.5 https://gcc.gnu.org/projects/cxx0x.html +# Hard coding the compiler version is ugly from cmake POV, but at least gives +# user a friendly error message. The most critical demand for C++ compiler is +# support of C++11 lambdas, added only in version 4.5 +# https://gcc.gnu.org/projects/cxx0x.html if(CMAKE_COMPILER_IS_GNUCC) - if(CMAKE_C_COMPILER_VERSION VERSION_LESS 4.5 - AND NOT CMAKE_COMPILER_IS_ELBRUSC) + if(CMAKE_C_COMPILER_VERSION VERSION_LESS 4.5 AND NOT + CMAKE_COMPILER_IS_ELBRUSC) message(FATAL_ERROR " Your GCC version is ${CMAKE_C_COMPILER_VERSION}, please update") endif() endif() if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.5 - AND NOT CMAKE_COMPILER_IS_ELBRUSCXX) + AND NOT CMAKE_COMPILER_IS_ELBRUSCXX) message(FATAL_ERROR " Your G++ version is ${CMAKE_CXX_COMPILER_VERSION}, please update") endif() @@ -162,7 +175,8 @@ if(CMAKE_CL_64) set(MSVC64 1) endif() if(WIN32 AND CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG}) - execute_process(COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -dumpmachine + execute_process( + COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -dumpmachine OUTPUT_VARIABLE __GCC_TARGET_MACHINE OUTPUT_STRIP_TRAILING_WHITESPACE) if(__GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64") @@ -172,9 +186,12 @@ if(WIN32 AND CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG}) endif() if(NOT DEFINED IOS) - if(APPLE AND (CMAKE_SYSTEM_NAME STREQUAL "iOS" - OR DEFINED CMAKE_IOS_DEVELOPER_ROOT - OR DEFINED IOS_PLATFORM OR DEFINED IOS_ARCH)) + if(APPLE + AND (CMAKE_SYSTEM_NAME STREQUAL "iOS" + OR DEFINED CMAKE_IOS_DEVELOPER_ROOT + OR DEFINED IOS_PLATFORM + OR DEFINED IOS_ARCH + )) set(IOS TRUE) else() set(IOS FALSE) @@ -182,9 +199,9 @@ if(NOT DEFINED IOS) endif() if(NOT DEFINED CMAKE_TARGET_BITNESS) - if (CMAKE_SIZEOF_VOID_P LESS 4) + if(CMAKE_SIZEOF_VOID_P LESS 4) set(CMAKE_TARGET_BITNESS 16) - elseif (CMAKE_SIZEOF_VOID_P LESS 8) + elseif(CMAKE_SIZEOF_VOID_P LESS 8) set(CMAKE_TARGET_BITNESS 32) else() set(CMAKE_TARGET_BITNESS 64) @@ -193,10 +210,12 @@ endif() if(NOT CMAKE_SYSTEM_ARCH) if(CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID) - string(TOLOWER "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID}" CMAKE_SYSTEM_ARCH) + string(TOLOWER "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID}" + CMAKE_SYSTEM_ARCH) if(CMAKE_SYSTEM_ARCH STREQUAL "x86") set(X86_32 TRUE) - elseif(CMAKE_SYSTEM_ARCH STREQUAL "x86_64" OR CMAKE_SYSTEM_ARCH STREQUAL "x64") + elseif(CMAKE_SYSTEM_ARCH STREQUAL "x86_64" OR CMAKE_SYSTEM_ARCH STREQUAL + "x64") set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") elseif(CMAKE_SYSTEM_ARCH MATCHES "^(aarch.*|arm.*)") @@ -225,12 +244,18 @@ if(NOT CMAKE_SYSTEM_ARCH) set(MIPS32 TRUE) endif() endif() - elseif(CMAKE_COMPILER_IS_ELBRUSC OR CMAKE_COMPILER_IS_ELBRUSCXX - OR CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ID STREQUAL "LCC" - OR CMAKE_SYSTEM_PROCESSOR MATCHES "e2k.*|E2K.*|elbrus.*|ELBRUS.*") + elseif( + CMAKE_COMPILER_IS_ELBRUSC + OR CMAKE_COMPILER_IS_ELBRUSCXX + OR CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ID STREQUAL "LCC" + OR CMAKE_SYSTEM_PROCESSOR MATCHES "e2k.*|E2K.*|elbrus.*|ELBRUS.*") set(E2K TRUE) set(CMAKE_SYSTEM_ARCH "Elbrus") - elseif(MSVC64 OR MINGW64 OR MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING)) + elseif( + MSVC64 + OR MINGW64 + OR MINGW + OR (MSVC AND NOT CMAKE_CROSSCOMPILING)) if(CMAKE_TARGET_BITNESS EQUAL 64) set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") @@ -238,7 +263,8 @@ if(NOT CMAKE_SYSTEM_ARCH) set(X86_32 TRUE) set(CMAKE_SYSTEM_ARCH "x86") endif() - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|[xXiI]86_64.*|AMD64.*|[iI][3-6]86.*|[xXiI]86.*") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES + "amd64.*|[xXiI]86_64.*|AMD64.*|[iI][3-6]86.*|[xXiI]86.*") if(CMAKE_TARGET_BITNESS EQUAL 64) set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") @@ -310,22 +336,29 @@ if(NOT DEFINED CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) set(CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET TRUE) elseif(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) set(CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET FALSE) - elseif(CMAKE_SYSTEM_NAME STREQUAL CMAKE_HOST_SYSTEM_NAME - AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_PROCESSOR) - OR (CMAKE_SYSTEM_ARCH STREQUAL CMAKE_HOST_ARCH) - OR (WIN32 AND CMAKE_HOST_WIN32 AND X86_32 AND CMAKE_HOST_ARCH STREQUAL "x86_64"))) + elseif( + CMAKE_SYSTEM_NAME STREQUAL CMAKE_HOST_SYSTEM_NAME + AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_PROCESSOR) + OR (CMAKE_SYSTEM_ARCH STREQUAL CMAKE_HOST_ARCH) + OR (WIN32 + AND CMAKE_HOST_WIN32 + AND X86_32 + AND CMAKE_HOST_ARCH STREQUAL "x86_64" + ) + )) set(CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET TRUE) - message(STATUS - "Assume СAN RUN A BUILT EXECUTABLES," - " since host (${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_ARCH})" - " match target (${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_ARCH})") + message(STATUS "Assume СAN RUN A BUILT EXECUTABLES," + " since host (${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_ARCH})" + " match target (${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_ARCH})") else() if(CMAKE_C_COMPILER_LOADED) include(CheckCSourceRuns) - check_c_source_runs("int main(void) { return 0; }" CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) + check_c_source_runs("int main(void) { return 0; }" + CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) elseif(CMAKE_CXX_COMPILER_LOADED) include(CheckCXXSourceRuns) - check_cxx_source_runs("int main(void) { return 0; }" CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) + check_cxx_source_runs("int main(void) { return 0; }" + CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) endif() if(NOT CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) message(STATUS "Force CMAKE_CROSSCOMPILING to TRUE") @@ -340,14 +373,15 @@ if(MSVC) check_compiler_flag("/fsanitize=undefined" CC_HAS_UBSAN) else() # - # GCC started to warn for unused result starting from 4.2, and - # this is when it introduced -Wno-unused-result - # GCC can also be built on top of llvm runtime (on mac). + # GCC started to warn for unused result starting from 4.2, and this is when it + # introduced -Wno-unused-result GCC can also be built on top of llvm runtime + # (on mac). check_compiler_flag("-Wno-unknown-pragmas" CC_HAS_WNO_UNKNOWN_PRAGMAS) check_compiler_flag("-Wextra" CC_HAS_WEXTRA) check_compiler_flag("-Werror" CC_HAS_WERROR) check_compiler_flag("-fexceptions" CC_HAS_FEXCEPTIONS) - check_compiler_flag("-fno-semantic-interposition" CC_HAS_FNO_SEMANTIC_INTERPOSITION) + check_compiler_flag("-fno-semantic-interposition" + CC_HAS_FNO_SEMANTIC_INTERPOSITION) if(CMAKE_CXX_COMPILER_LOADED) check_cxx_compiler_flag("-fcxx-exceptions" CC_HAS_FCXX_EXCEPTIONS) endif() @@ -367,17 +401,21 @@ else() # Check for an omp support set(CMAKE_REQUIRED_FLAGS "-fopenmp -Werror") if(CMAKE_CXX_COMPILER_LOADED) - check_cxx_source_compiles("int main(void) { + check_cxx_source_compiles( + "int main(void) { #pragma omp for for(int i = 0, j = 0; i != 42; i = 1 + i * 12345) j += i % 43; return j; - }" HAVE_OPENMP) + }" + HAVE_OPENMP) else() - check_c_source_compiles("int main(void) { + check_c_source_compiles( + "int main(void) { #pragma omp for for(int i = 0, j = 0; i != 42; i = 1 + i * 12345) j += i % 43; return j; - }" HAVE_OPENMP) + }" + HAVE_OPENMP) endif() set(CMAKE_REQUIRED_FLAGS "") endif() @@ -413,41 +451,66 @@ if(CMAKE_C_COMPILER_LOADED) endif() # Check for LTO support by GCC -if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} AND NOT CMAKE_COMPILER_IS_ELBRUSC AND NOT CMAKE_COMPILER_IS_ELBRUSCXX) +if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} + AND NOT CMAKE_COMPILER_IS_ELBRUSC + AND NOT CMAKE_COMPILER_IS_ELBRUSCXX) unset(gcc_collect) unset(gcc_lto_wrapper) if(NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 4.7) - execute_process(COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -v - OUTPUT_VARIABLE gcc_info_v ERROR_VARIABLE gcc_info_v) + execute_process( + COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -v + OUTPUT_VARIABLE gcc_info_v + ERROR_VARIABLE gcc_info_v) - string(REGEX MATCH "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" gcc_collect_valid ${gcc_info_v}) + string(REGEX MATCH "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" gcc_collect_valid + ${gcc_info_v}) if(gcc_collect_valid) - string(REGEX REPLACE "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" "\\2" gcc_collect ${gcc_info_v}) + string(REGEX REPLACE "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" "\\2" + gcc_collect ${gcc_info_v}) endif() - string(REGEX MATCH "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" gcc_lto_wrapper_valid ${gcc_info_v}) + string(REGEX MATCH + "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" + gcc_lto_wrapper_valid ${gcc_info_v}) if(gcc_lto_wrapper_valid) - string(REGEX REPLACE "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" "\\2" gcc_lto_wrapper ${gcc_info_v}) + string(REGEX + REPLACE "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" + "\\2" gcc_lto_wrapper ${gcc_info_v}) endif() set(gcc_suffix "") if(gcc_collect_valid AND gcc_collect) - string(REGEX MATCH "^(.*(cc|\\+\\+))(-.+)$" gcc_suffix_valid ${gcc_collect}) + string(REGEX MATCH "^(.*(cc|\\+\\+))(-.+)$" gcc_suffix_valid + ${gcc_collect}) if(gcc_suffix_valid) - string(REGEX REPLACE "^(.*(cc|\\+\\+))(-.+)$" "\\3" gcc_suffix ${gcc_collect}) + string(REGEX REPLACE "^(.*(cc|\\+\\+))(-.+)$" "\\3" gcc_suffix + ${gcc_collect}) endif() endif() - get_filename_component(gcc_dir ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) + get_filename_component(gcc_dir ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} + DIRECTORY) if(NOT CMAKE_GCC_AR) - find_program(CMAKE_GCC_AR NAMES "gcc${gcc_suffix}-ar" "gcc-ar${gcc_suffix}" PATHS "${gcc_dir}" NO_DEFAULT_PATH) + find_program( + CMAKE_GCC_AR + NAMES "gcc${gcc_suffix}-ar" "gcc-ar${gcc_suffix}" + PATHS "${gcc_dir}" + NO_DEFAULT_PATH) endif() if(NOT CMAKE_GCC_NM) - find_program(CMAKE_GCC_NM NAMES "gcc${gcc_suffix}-nm" "gcc-nm${gcc_suffix}" PATHS "${gcc_dir}" NO_DEFAULT_PATH) + find_program( + CMAKE_GCC_NM + NAMES "gcc${gcc_suffix}-nm" "gcc-nm${gcc_suffix}" + PATHS "${gcc_dir}" + NO_DEFAULT_PATH) endif() if(NOT CMAKE_GCC_RANLIB) - find_program(CMAKE_GCC_RANLIB NAMES "gcc${gcc_suffix}-ranlib" "gcc-ranlib${gcc_suffix}" PATHS "${gcc_dir}" NO_DEFAULT_PATH) + find_program( + CMAKE_GCC_RANLIB + NAMES "gcc${gcc_suffix}-ranlib" "gcc-ranlib${gcc_suffix}" + PATHS "${gcc_dir}" + NO_DEFAULT_PATH) endif() unset(gcc_dir) @@ -459,8 +522,14 @@ if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} AND NOT CMAKE_COMPILER_IS_ELBRUSC unset(gcc_info_v) endif() - if(CMAKE_GCC_AR AND CMAKE_GCC_NM AND CMAKE_GCC_RANLIB AND gcc_lto_wrapper) - message(STATUS "Found GCC's LTO toolset: ${gcc_lto_wrapper}, ${CMAKE_GCC_AR}, ${CMAKE_GCC_RANLIB}") + if(CMAKE_GCC_AR + AND CMAKE_GCC_NM + AND CMAKE_GCC_RANLIB + AND gcc_lto_wrapper) + message( + STATUS + "Found GCC's LTO toolset: ${gcc_lto_wrapper}, ${CMAKE_GCC_AR}, ${CMAKE_GCC_RANLIB}" + ) set(GCC_LTO_CFLAGS "-flto -fno-fat-lto-objects -fuse-linker-plugin") set(GCC_LTO_AVAILABLE TRUE) message(STATUS "Link-Time Optimization by GCC is available") @@ -485,17 +554,22 @@ endif() # Check for LTO support by CLANG if(CMAKE_COMPILER_IS_CLANG) if(NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 3.5) - execute_process(COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -print-search-dirs - OUTPUT_VARIABLE clang_search_dirs RESULT_VARIABLE clang_probe_result ERROR_QUIET) + execute_process( + COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -print-search-dirs + OUTPUT_VARIABLE clang_search_dirs + RESULT_VARIABLE clang_probe_result + ERROR_QUIET) unset(clang_bindirs) unset(clang_bindirs_x) unset(clang_libdirs) unset(clang_libdirs_x) if(clang_probe_result EQUAL 0) - string(REGEX MATCH "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" regexp_valid ${clang_search_dirs}) + string(REGEX MATCH "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" + regexp_valid ${clang_search_dirs}) if(regexp_valid) - string(REGEX REPLACE "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" "\\3" list ${clang_search_dirs}) + string(REGEX REPLACE "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" "\\3" + list ${clang_search_dirs}) string(REPLACE ":" ";" list "${list}") foreach(dir IN LISTS list) get_filename_component(dir "${dir}" REALPATH) @@ -508,9 +582,11 @@ if(CMAKE_COMPILER_IS_CLANG) list(APPEND clang_bindirs "${clang_bindirs_x}") list(REMOVE_DUPLICATES clang_bindirs) endif() - string(REGEX MATCH "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" regexp_valid ${clang_search_dirs}) + string(REGEX MATCH "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" + regexp_valid ${clang_search_dirs}) if(regexp_valid) - string(REGEX REPLACE "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" "\\3" list ${clang_search_dirs}) + string(REGEX REPLACE "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" "\\3" + list ${clang_search_dirs}) string(REPLACE ":" ";" list "${list}") foreach(dir IN LISTS list) get_filename_component(dir "${dir}" REALPATH) @@ -524,57 +600,97 @@ if(CMAKE_COMPILER_IS_CLANG) list(REMOVE_DUPLICATES clang_libdirs) endif() else() - get_filename_component(clang_bindirs ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) + get_filename_component(clang_bindirs + ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(clang_libdirs ${clang_bindirs}) else() - get_filename_component(clang_libdirs "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER}/../lib" REALPATH) + get_filename_component( + clang_libdirs "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER}/../lib" + REALPATH) endif() endif() if(clang_bindirs AND clang_libdirs) - message(STATUS "Found CLANG/LLVM directories: ${clang_bindirs}, ${clang_libdirs}") + message( + STATUS + "Found CLANG/LLVM directories: ${clang_bindirs}, ${clang_libdirs}") else() message(STATUS "Could NOT find CLANG/LLVM directories (bin and/or lib).") endif() if(NOT CMAKE_CLANG_LD AND clang_bindirs) - find_program(CMAKE_CLANG_LD NAMES lld-link ld.lld "ld${CMAKE_TARGET_BITNESS}.lld" lld llvm-link llvm-ld PATHS ${clang_bindirs} NO_DEFAULT_PATH) + find_program( + CMAKE_CLANG_LD + NAMES lld-link ld.lld "ld${CMAKE_TARGET_BITNESS}.lld" lld llvm-link + llvm-ld + PATHS ${clang_bindirs} + NO_DEFAULT_PATH) endif() if(NOT CMAKE_CLANG_AR AND clang_bindirs) - find_program(CMAKE_CLANG_AR NAMES llvm-ar ar PATHS ${clang_bindirs} NO_DEFAULT_PATH) + find_program( + CMAKE_CLANG_AR + NAMES llvm-ar ar + PATHS ${clang_bindirs} + NO_DEFAULT_PATH) endif() if(NOT CMAKE_CLANG_NM AND clang_bindirs) - find_program(CMAKE_CLANG_NM NAMES llvm-nm nm PATHS ${clang_bindirs} NO_DEFAULT_PATH) + find_program( + CMAKE_CLANG_NM + NAMES llvm-nm nm + PATHS ${clang_bindirs} + NO_DEFAULT_PATH) endif() if(NOT CMAKE_CLANG_RANLIB AND clang_bindirs) - find_program(CMAKE_CLANG_RANLIB NAMES llvm-ranlib ranlib PATHS ${clang_bindirs} NO_DEFAULT_PATH) + find_program( + CMAKE_CLANG_RANLIB + NAMES llvm-ranlib ranlib + PATHS ${clang_bindirs} + NO_DEFAULT_PATH) endif() set(clang_lto_plugin_name "LLVMgold${CMAKE_SHARED_LIBRARY_SUFFIX}") if(NOT CMAKE_LD_GOLD AND clang_bindirs) - find_program(CMAKE_LD_GOLD NAMES ld.gold PATHS ${clang_bindirs}) + find_program( + CMAKE_LD_GOLD + NAMES ld.gold + PATHS ${clang_bindirs}) endif() if(NOT CLANG_LTO_PLUGIN AND clang_libdirs) - find_file(CLANG_LTO_PLUGIN ${clang_lto_plugin_name} PATHS ${clang_libdirs} NO_DEFAULT_PATH) + find_file( + CLANG_LTO_PLUGIN ${clang_lto_plugin_name} + PATHS ${clang_libdirs} + NO_DEFAULT_PATH) endif() if(CLANG_LTO_PLUGIN) message(STATUS "Found CLANG/LLVM's plugin for LTO: ${CLANG_LTO_PLUGIN}") else() - message(STATUS "Could NOT find CLANG/LLVM's plugin (${clang_lto_plugin_name}) for LTO.") + message( + STATUS + "Could NOT find CLANG/LLVM's plugin (${clang_lto_plugin_name}) for LTO." + ) endif() if(CMAKE_CLANG_LD) message(STATUS "Found CLANG/LLVM's linker for LTO: ${CMAKE_CLANG_LD}") else() - message(STATUS "Could NOT find CLANG/LLVM's linker (lld, llvm-ld, llvm-link) for LTO.") + message( + STATUS + "Could NOT find CLANG/LLVM's linker (lld, llvm-ld, llvm-link) for LTO." + ) endif() - if(CMAKE_CLANG_AR AND CMAKE_CLANG_RANLIB AND CMAKE_CLANG_NM) - message(STATUS "Found CLANG/LLVM's binutils for LTO: ${CMAKE_CLANG_AR}, ${CMAKE_CLANG_RANLIB}, ${CMAKE_CLANG_NM}") + if(CMAKE_CLANG_AR + AND CMAKE_CLANG_RANLIB + AND CMAKE_CLANG_NM) + message( + STATUS + "Found CLANG/LLVM's binutils for LTO: ${CMAKE_CLANG_AR}, ${CMAKE_CLANG_RANLIB}, ${CMAKE_CLANG_NM}" + ) else() - message(STATUS "Could NOT find CLANG/LLVM's binutils (ar, ranlib, nm) for LTO.") + message( + STATUS "Could NOT find CLANG/LLVM's binutils (ar, ranlib, nm) for LTO.") endif() unset(clang_lto_plugin_name) @@ -584,20 +700,26 @@ if(CMAKE_COMPILER_IS_CLANG) unset(clang_search_dirs) endif() - if(CMAKE_CLANG_AR AND CMAKE_CLANG_NM AND CMAKE_CLANG_RANLIB - AND ((CLANG_LTO_PLUGIN AND CMAKE_LD_GOLD) - OR (CMAKE_CLANG_LD - AND NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" - AND CMAKE_SYSTEM_NAME STREQUAL "Linux")) - OR APPLE)) + if(CMAKE_CLANG_AR + AND CMAKE_CLANG_NM + AND CMAKE_CLANG_RANLIB + AND ((CLANG_LTO_PLUGIN AND CMAKE_LD_GOLD) + OR (CMAKE_CLANG_LD AND NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" + AND CMAKE_SYSTEM_NAME STREQUAL "Linux")) + OR APPLE + )) if(ANDROID AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 12) set(CLANG_LTO_AVAILABLE FALSE) - message(STATUS "Link-Time Optimization by CLANG/LLVM is available but unusable due https://reviews.llvm.org/D79919") + message( + STATUS + "Link-Time Optimization by CLANG/LLVM is available but unusable due https://reviews.llvm.org/D79919" + ) else() set(CLANG_LTO_AVAILABLE TRUE) message(STATUS "Link-Time Optimization by CLANG/LLVM is available") endif() - elseif(CMAKE_TOOLCHAIN_FILE AND NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 7.0) + elseif(CMAKE_TOOLCHAIN_FILE + AND NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 7.0) set(CLANG_LTO_AVAILABLE TRUE) if(NOT CMAKE_CLANG_LD) set(CMAKE_CLANG_LD ${CMAKE_LINKER}) @@ -611,7 +733,10 @@ if(CMAKE_COMPILER_IS_CLANG) if(NOT CMAKE_CLANG_RANLIB) set(CMAKE_CLANG_RANLIB ${CMAKE_RANLIB}) endif() - message(STATUS "Assume Link-Time Optimization by CLANG/LLVM is available via ${CMAKE_TOOLCHAIN_FILE}") + message( + STATUS + "Assume Link-Time Optimization by CLANG/LLVM is available via ${CMAKE_TOOLCHAIN_FILE}" + ) else() set(CLANG_LTO_AVAILABLE FALSE) message(STATUS "Link-Time Optimization by CLANG/LLVM is NOT available") @@ -619,17 +744,22 @@ if(CMAKE_COMPILER_IS_CLANG) endif() # Perform build type specific configuration. -option(ENABLE_BACKTRACE "Enable output of fiber backtrace information in 'show +option( + ENABLE_BACKTRACE + "Enable output of fiber backtrace information in 'show fiber' administrative command. Only works on x86 architectures, if compiled with gcc. If GNU binutils and binutils-dev libraries are installed, backtrace is output with resolved function (symbol) names. Otherwise only frame - addresses are printed." OFF) + addresses are printed." + OFF) set(HAVE_BFD FALSE) if(ENABLE_BACKTRACE) if(NOT (X86_32 OR X86_64) OR NOT CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG}) # We only know this option to work with gcc - message(FATAL_ERROR "ENABLE_BACKTRACE option is set but the system + message( + FATAL_ERROR + "ENABLE_BACKTRACE option is set but the system is not x86 based (${CMAKE_SYSTEM_PROCESSOR}) or the compiler is not GNU GCC (${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER}).") endif() @@ -642,15 +772,19 @@ if(ENABLE_BACKTRACE) if(IBERTY_LIBRARY) check_library_exists(${IBERTY_LIBRARY} cplus_demangle "" HAVE_IBERTY_LIB) endif() - set(CMAKE_REQUIRED_DEFINITIONS -DPACKAGE=${PACKAGE} -DPACKAGE_VERSION=${PACKAGE_VERSION}) + set(CMAKE_REQUIRED_DEFINITIONS -DPACKAGE=${PACKAGE} + -DPACKAGE_VERSION=${PACKAGE_VERSION}) check_include_files(bfd.h HAVE_BFD_H) set(CMAKE_REQUIRED_DEFINITIONS) find_package(ZLIB) - if(HAVE_BFD_LIB AND HAVE_BFD_H AND HAVE_IBERTY_LIB AND ZLIB_FOUND) + if(HAVE_BFD_LIB + AND HAVE_BFD_H + AND HAVE_IBERTY_LIB + AND ZLIB_FOUND) set(HAVE_BFD ON) set(BFD_LIBRARIES ${BFD_LIBRARY} ${IBERTY_LIBRARY} ${ZLIB_LIBRARIES}) find_package_message(BFD_LIBRARIES "Found libbfd and dependencies" - ${BFD_LIBRARIES}) + ${BFD_LIBRARIES}) if(TARGET_OS_FREEBSD AND NOT TARGET_OS_DEBIAN_FREEBSD) set(BFD_LIBRARIES ${BFD_LIBRARIES} iconv) endif() @@ -661,16 +795,30 @@ macro(setup_compile_flags) # save initial C/CXX flags if(NOT INITIAL_CMAKE_FLAGS_SAVED) if(CMAKE_CXX_COMPILER_LOADED) - set(INITIAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) + set(INITIAL_CMAKE_CXX_FLAGS + ${CMAKE_CXX_FLAGS} + CACHE STRING "Initial CMake's flags" FORCE) endif() if(CMAKE_C_COMPILER_LOADED) - set(INITIAL_CMAKE_C_FLAGS ${CMAKE_C_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) + set(INITIAL_CMAKE_C_FLAGS + ${CMAKE_C_FLAGS} + CACHE STRING "Initial CMake's flags" FORCE) endif() - set(INITIAL_CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) - set(INITIAL_CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) - set(INITIAL_CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) - set(INITIAL_CMAKE_MODULE_LINKER_FLAGS ${CMAKE_MODULE_LINKER_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) - set(INITIAL_CMAKE_FLAGS_SAVED TRUE CACHE INTERNAL "State of initial CMake's flags" FORCE) + set(INITIAL_CMAKE_EXE_LINKER_FLAGS + ${CMAKE_EXE_LINKER_FLAGS} + CACHE STRING "Initial CMake's flags" FORCE) + set(INITIAL_CMAKE_SHARED_LINKER_FLAGS + ${CMAKE_SHARED_LINKER_FLAGS} + CACHE STRING "Initial CMake's flags" FORCE) + set(INITIAL_CMAKE_STATIC_LINKER_FLAGS + ${CMAKE_STATIC_LINKER_FLAGS} + CACHE STRING "Initial CMake's flags" FORCE) + set(INITIAL_CMAKE_MODULE_LINKER_FLAGS + ${CMAKE_MODULE_LINKER_FLAGS} + CACHE STRING "Initial CMake's flags" FORCE) + set(INITIAL_CMAKE_FLAGS_SAVED + TRUE + CACHE INTERNAL "State of initial CMake's flags" FORCE) endif() # reset C/CXX flags @@ -711,14 +859,16 @@ macro(setup_compile_flags) add_compile_flags("C;CXX" "-fno-semantic-interposition") endif() if(MSVC) - # checks for /EHa or /clr options exists, - # i.e. is enabled structured async WinNT exceptions - string(REGEX MATCH "^(.* )*[-/]EHc*a( .*)*$" msvc_async_eh_enabled "${CXX_FLAGS}" "${C_FLAGS}") - string(REGEX MATCH "^(.* )*[-/]clr( .*)*$" msvc_clr_enabled "${CXX_FLAGS}" "${C_FLAGS}") + # checks for /EHa or /clr options exists, i.e. is enabled structured async + # WinNT exceptions + string(REGEX MATCH "^(.* )*[-/]EHc*a( .*)*$" msvc_async_eh_enabled + "${CXX_FLAGS}" "${C_FLAGS}") + string(REGEX MATCH "^(.* )*[-/]clr( .*)*$" msvc_clr_enabled "${CXX_FLAGS}" + "${C_FLAGS}") # remote any /EH? options string(REGEX REPLACE "( *[-/]-*EH[csa]+ *)+" "" CXX_FLAGS "${CXX_FLAGS}") string(REGEX REPLACE "( *[-/]-*EH[csa]+ *)+" "" C_FLAGS "${C_FLAGS}") - if (msvc_clr_enabled STREQUAL "") + if(msvc_clr_enabled STREQUAL "") if(NOT msvc_async_eh_enabled STREQUAL "") add_compile_flags("C;CXX" "/EHa") else() @@ -727,8 +877,9 @@ macro(setup_compile_flags) endif() endif(MSVC) - if(CC_HAS_WNO_ATTRIBUTES AND CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} - AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 9) + if(CC_HAS_WNO_ATTRIBUTES + AND CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} + AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 9) # GCC < 9.x generates false-positive warnings for optimization attributes add_compile_flags("C;CXX" "-Wno-attributes") if(LTO_ENABLED) @@ -737,21 +888,20 @@ macro(setup_compile_flags) endif() # In C a global variable without a storage specifier (static/extern) and - # without an initialiser is called a ’tentative definition’. The - # language permits multiple tentative definitions in the single - # translation unit; i.e. int foo; int foo; is perfectly ok. GNU - # toolchain goes even further, allowing multiple tentative definitions - # in *different* translation units. Internally, variables introduced via - # tentative definitions are implemented as ‘common’ symbols. Linker - # permits multiple definitions if they are common symbols, and it picks - # one arbitrarily for inclusion in the binary being linked. + # without an initialiser is called a ’tentative definition’. The language + # permits multiple tentative definitions in the single translation unit; i.e. + # int foo; int foo; is perfectly ok. GNU toolchain goes even further, allowing + # multiple tentative definitions in *different* translation units. Internally, + # variables introduced via tentative definitions are implemented as ‘common’ + # symbols. Linker permits multiple definitions if they are common symbols, and + # it picks one arbitrarily for inclusion in the binary being linked. # - # -fno-common forces GNU toolchain to behave in a more - # standard-conformant way in respect to tentative definitions and it - # prevents common symbols generation. Since we are a cross-platform - # project it really makes sense. There are toolchains that don’t - # implement GNU style handling of the tentative definitions and there - # are platforms lacking proper support for common symbols (osx). + # -fno-common forces GNU toolchain to behave in a more standard-conformant way + # in respect to tentative definitions and it prevents common symbols + # generation. Since we are a cross-platform project it really makes sense. + # There are toolchains that don’t implement GNU style handling of the + # tentative definitions and there are platforms lacking proper support for + # common symbols (osx). if(CC_HAS_FNO_COMMON) add_compile_flags("C;CXX" "-fno-common") endif() @@ -770,10 +920,9 @@ macro(setup_compile_flags) add_compile_flags("C;CXX" "/Gy") endif() - # We must set -fno-omit-frame-pointer here, since we rely - # on frame pointer when getting a backtrace, and it must - # be used consistently across all object files. - # The same reasoning applies to -fno-stack-protector switch. + # We must set -fno-omit-frame-pointer here, since we rely on frame pointer + # when getting a backtrace, and it must be used consistently across all object + # files. The same reasoning applies to -fno-stack-protector switch. if(ENABLE_BACKTRACE) if(CC_HAS_FNO_OMIT_FRAME_POINTER) add_compile_flags("C;CXX" "-fno-omit-frame-pointer") @@ -782,7 +931,10 @@ macro(setup_compile_flags) if(MSVC) if(MSVC_VERSION LESS 1900) - message(FATAL_ERROR "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required.") + message( + FATAL_ERROR + "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required." + ) endif() if(NOT MSVC_VERSION LESS 1910) add_compile_flags("CXX" "/Zc:__cplusplus") @@ -803,9 +955,12 @@ macro(setup_compile_flags) add_definitions("-D__STDC_CONSTANT_MACROS=1") add_definitions("-D_HAS_EXCEPTIONS=1") - # Only add -Werror if it's a debug build, done by developers. - # Release builds should not cause extra trouble. - if(CC_HAS_WERROR AND (CI OR CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE STREQUAL "Debug")) + # Only add -Werror if it's a debug build, done by developers. Release builds + # should not cause extra trouble. + if(CC_HAS_WERROR + AND (CI + OR CMAKE_CONFIGURATION_TYPES + OR CMAKE_BUILD_TYPE STREQUAL "Debug")) if(MSVC) add_compile_flags("C;CXX" "/WX") elseif(CMAKE_COMPILER_IS_CLANG) @@ -821,17 +976,17 @@ macro(setup_compile_flags) endif() endif() - if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} - AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 5) + AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 5) # G++ bug. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31488 add_compile_flags("CXX" "-Wno-invalid-offsetof") endif() if(MINGW) - # Disable junk MINGW's warnings that issued due to incompatibilities - # and shortcomings of MINGW, - # since the code is checked by builds with GCC, CLANG and MSVC. - add_compile_flags("C;CXX" "-Wno-format-extra-args" "-Wno-format" "-Wno-cast-function-type" "-Wno-implicit-fallthrough") + # Disable junk MINGW's warnings that issued due to incompatibilities and + # shortcomings of MINGW, since the code is checked by builds with GCC, CLANG + # and MSVC. + add_compile_flags("C;CXX" "-Wno-format-extra-args" "-Wno-format" + "-Wno-cast-function-type" "-Wno-implicit-fallthrough") endif() if(ENABLE_ASAN) @@ -845,7 +1000,8 @@ macro(setup_compile_flags) if(ENABLE_UBSAN) if(NOT MSVC) - add_compile_flags("C;CXX" "-fsanitize=undefined" "-fsanitize-undefined-trap-on-error") + add_compile_flags("C;CXX" "-fsanitize=undefined" + "-fsanitize-undefined-trap-on-error") else() add_compile_flags("C;CXX" "/fsanitize=undefined") endif() @@ -854,13 +1010,17 @@ macro(setup_compile_flags) if(ENABLE_GCOV) if(NOT HAVE_GCOV) - message(FATAL_ERROR "ENABLE_GCOV option requested but gcov library is not found") + message( + FATAL_ERROR "ENABLE_GCOV option requested but gcov library is not found" + ) endif() add_compile_flags("C;CXX" "-fprofile-arcs" "-ftest-coverage") set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") - set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") - set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") + set(SHARED_LINKER_FLAGS + "${SHARED_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") + set(MODULE_LINKER_FLAGS + "${MODULE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") # add_library(gcov SHARED IMPORTED) endif() @@ -870,12 +1030,16 @@ macro(setup_compile_flags) if(CMAKE_COMPILER_IS_GNUCC AND LTO_ENABLED) add_compile_flags("C;CXX" ${GCC_LTO_CFLAGS}) - set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm -fwhole-program") - set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") - set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") + set(EXE_LINKER_FLAGS + "${EXE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm -fwhole-program") + set(SHARED_LINKER_FLAGS + "${SHARED_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") + set(MODULE_LINKER_FLAGS + "${MODULE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5) # Pass the same optimization flags to the linker - set(compile_flags "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}") + set(compile_flags + "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}") set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} ${compile_flags}") set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} ${compile_flags}") set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} ${compile_flags}") @@ -885,32 +1049,53 @@ macro(setup_compile_flags) endif() endif() - if(MSVC AND NOT CMAKE_COMPILER_IS_CLANG AND LTO_ENABLED) + if(MSVC + AND NOT CMAKE_COMPILER_IS_CLANG + AND LTO_ENABLED) add_compile_flags("C;CXX" "/GL") foreach(linkmode IN ITEMS EXE SHARED STATIC MODULE) set(${linkmode}_LINKER_FLAGS "${${linkmode}_LINKER_FLAGS} /LTCG") - string(REGEX REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" ${linkmode}_LINKER_FLAGS "${${linkmode}_LINKER_FLAGS}") + string(REGEX + REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" + ${linkmode}_LINKER_FLAGS "${${linkmode}_LINKER_FLAGS}") string(STRIP "${${linkmode}_LINKER_FLAGS}" ${linkmode}_LINKER_FLAGS) - foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES ITEMS Release MinSizeRel RelWithDebInfo Debug) + foreach( + config IN + LISTS CMAKE_CONFIGURATION_TYPES + ITEMS Release MinSizeRel RelWithDebInfo Debug) string(TOUPPER "${config}" config_uppercase) if(DEFINED "CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}") - string(REGEX REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" altered_flags "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") + string( + REGEX + REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" + altered_flags + "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") string(STRIP "${altered_flags}" altered_flags) - if(NOT "${altered_flags}" STREQUAL "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") - set(CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase} "${altered_flags}" CACHE STRING "Altered: '/INCREMENTAL' removed for LTO" FORCE) + if(NOT "${altered_flags}" STREQUAL + "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") + set(CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase} + "${altered_flags}" + CACHE STRING "Altered: '/INCREMENTAL' removed for LTO" FORCE) endif() endif() endforeach(config) endforeach(linkmode) unset(linkmode) - foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES ITEMS Release MinSizeRel RelWithDebInfo) + foreach( + config IN + LISTS CMAKE_CONFIGURATION_TYPES + ITEMS Release MinSizeRel RelWithDebInfo) foreach(lang IN ITEMS C CXX) string(TOUPPER "${config}" config_uppercase) if(DEFINED "CMAKE_${lang}_FLAGS_${config_uppercase}") - string(REPLACE "/O2" "/Ox" altered_flags "${CMAKE_${lang}_FLAGS_${config_uppercase}}") - if(NOT "${altered_flags}" STREQUAL "${CMAKE_${lang}_FLAGS_${config_uppercase}}") - set(CMAKE_${lang}_FLAGS_${config_uppercase} "${altered_flags}" CACHE STRING "Altered: '/O2' replaced by '/Ox' for LTO" FORCE) + string(REPLACE "/O2" "/Ox" altered_flags + "${CMAKE_${lang}_FLAGS_${config_uppercase}}") + if(NOT "${altered_flags}" STREQUAL + "${CMAKE_${lang}_FLAGS_${config_uppercase}}") + set(CMAKE_${lang}_FLAGS_${config_uppercase} + "${altered_flags}" + CACHE STRING "Altered: '/O2' replaced by '/Ox' for LTO" FORCE) endif() endif() unset(config_uppercase) @@ -935,25 +1120,46 @@ macro(setup_compile_flags) endif() add_compile_flags("C;CXX" ${CLANG_LTO_FLAG}) if(NOT MSVC) - set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm -fwhole-program") - set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") - set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") + set(EXE_LINKER_FLAGS + "${EXE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm -fwhole-program") + set(SHARED_LINKER_FLAGS + "${SHARED_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") + set(MODULE_LINKER_FLAGS + "${MODULE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") endif() endif() # push C/CXX flags into the cache if(CMAKE_CXX_COMPILER_LOADED) - set(CMAKE_CXX_FLAGS ${CXX_FLAGS} CACHE STRING "Flags used by the C++ compiler during all build types" FORCE) + set(CMAKE_CXX_FLAGS + ${CXX_FLAGS} + CACHE STRING "Flags used by the C++ compiler during all build types" + FORCE) unset(CXX_FLAGS) endif() if(CMAKE_C_COMPILER_LOADED) - set(CMAKE_C_FLAGS ${C_FLAGS} CACHE STRING "Flags used by the C compiler during all build types" FORCE) + set(CMAKE_C_FLAGS + ${C_FLAGS} + CACHE STRING "Flags used by the C compiler during all build types" + FORCE) unset(C_FLAGS) endif() - set(CMAKE_EXE_LINKER_FLAGS ${EXE_LINKER_FLAGS} CACHE STRING "Flags used by the linker" FORCE) - set(CMAKE_SHARED_LINKER_FLAGS ${SHARED_LINKER_FLAGS} CACHE STRING "Flags used by the linker during the creation of dll's" FORCE) - set(CMAKE_STATIC_LINKER_FLAGS ${STATIC_LINKER_FLAGS} CACHE STRING "Flags used by the linker during the creation of static libraries" FORCE) - set(CMAKE_MODULE_LINKER_FLAGS ${MODULE_LINKER_FLAGS} CACHE STRING "Flags used by the linker during the creation of modules" FORCE) + set(CMAKE_EXE_LINKER_FLAGS + ${EXE_LINKER_FLAGS} + CACHE STRING "Flags used by the linker" FORCE) + set(CMAKE_SHARED_LINKER_FLAGS + ${SHARED_LINKER_FLAGS} + CACHE STRING "Flags used by the linker during the creation of dll's" + FORCE) + set(CMAKE_STATIC_LINKER_FLAGS + ${STATIC_LINKER_FLAGS} + CACHE STRING + "Flags used by the linker during the creation of static libraries" + FORCE) + set(CMAKE_MODULE_LINKER_FLAGS + ${MODULE_LINKER_FLAGS} + CACHE STRING "Flags used by the linker during the creation of modules" + FORCE) unset(EXE_LINKER_FLAGS) unset(SHARED_LINKER_FLAGS) unset(STATIC_LINKER_FLAGS) @@ -963,7 +1169,9 @@ endmacro(setup_compile_flags) macro(probe_libcxx_filesystem) if(CMAKE_CXX_COMPILER_LOADED AND NOT DEFINED LIBCXX_FILESYSTEM) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11) - if(NOT HAS_CXX11 LESS 0 OR CXX_FALLBACK_GNU11 OR CXX_FALLBACK_11) + if(NOT HAS_CXX11 LESS 0 + OR CXX_FALLBACK_GNU11 + OR CXX_FALLBACK_11) include(CMakePushCheckState) include(CheckCXXSourceCompiles) cmake_push_check_state() @@ -976,7 +1184,8 @@ macro(probe_libcxx_filesystem) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_14 HAS_CXX14) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_17 HAS_CXX17) if(NOT HAS_CXX17 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)) + AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 5)) set(CMAKE_CXX_STANDARD 17) elseif(NOT HAS_CXX14 LESS 0) set(CMAKE_CXX_STANDARD 14) @@ -989,16 +1198,20 @@ macro(probe_libcxx_filesystem) endif() set(stdfs_probe_clear_cxx_standard ON) endif() - if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23) + if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION + VERSION_LESS 1.25.23) if(CMAKE_VERSION VERSION_LESS 3.14) - set(stdfs_probe_flags ${stdfs_probe_flags} "-Wl,--allow-multiple-definition") + set(stdfs_probe_flags ${stdfs_probe_flags} + "-Wl,--allow-multiple-definition") else() - set(CMAKE_REQUIRED_LINK_OPTIONS ${stdfs_probe_save_link_options} "-Wl,--allow-multiple-definition") + set(CMAKE_REQUIRED_LINK_OPTIONS ${stdfs_probe_save_link_options} + "-Wl,--allow-multiple-definition") endif() endif() set(CMAKE_REQUIRED_FLAGS ${stdfs_probe_flags}) - set(stdfs_probe_code [[ + set(stdfs_probe_code + [[ #if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && defined(__clang__) && __clang_major__ < 4 #define __GLIBCXX_BITSIZE_INT_N_0 128 #define __GLIBCXX_TYPE_INT_N_0 __int128 @@ -1048,25 +1261,40 @@ macro(probe_libcxx_filesystem) check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_none) if(LIBCXX_FILESYSTEM_none) - message(STATUS "No linking with additional library needed for std::filesystem") + message( + STATUS "No linking with additional library needed for std::filesystem" + ) else() set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} "stdc++fs") - check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_stdcxxfs) + check_cxx_source_compiles("${stdfs_probe_code}" + LIBCXX_FILESYSTEM_stdcxxfs) if(LIBCXX_FILESYSTEM_stdcxxfs) set(LIBCXX_FILESYSTEM "stdc++fs") - message(STATUS "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem") + message( + STATUS + "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem" + ) else() set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} "c++fs") - check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_cxxfs) + check_cxx_source_compiles("${stdfs_probe_code}" + LIBCXX_FILESYSTEM_cxxfs) if(LIBCXX_FILESYSTEM_cxxfs) set(LIBCXX_FILESYSTEM "c++fs") - message(STATUS "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem") + message( + STATUS + "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem" + ) else() - set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} "c++experimental") - check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_cxxexperimental) + set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} + "c++experimental") + check_cxx_source_compiles("${stdfs_probe_code}" + LIBCXX_FILESYSTEM_cxxexperimental) if(LIBCXX_FILESYSTEM_cxxexperimental) set(LIBCXX_FILESYSTEM "c++experimental") - message(STATUS "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem") + message( + STATUS + "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem" + ) else() message(STATUS "No support for std::filesystem") endif() diff --git a/cmake/profile.cmake b/cmake/profile.cmake index 9331a0bb..d325724e 100644 --- a/cmake/profile.cmake +++ b/cmake/profile.cmake @@ -1,5 +1,5 @@ -## Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev -## SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) @@ -14,7 +14,7 @@ cmake_policy(VERSION ${CMAKE_MINIMUM_REQUIRED_VERSION}) unset(MEMCHECK_OPTION_NAME) if(NOT DEFINED ENABLE_MEMCHECK) - if (DEFINED MDBX_USE_VALGRIND) + if(DEFINED MDBX_USE_VALGRIND) set(MEMCHECK_OPTION_NAME "MDBX_USE_VALGRIND") elseif(DEFINED ENABLE_VALGRIND) set(MEMCHECK_OPTION_NAME "ENABLE_VALGRIND") @@ -23,7 +23,7 @@ if(NOT DEFINED ENABLE_MEMCHECK) endif() if(MEMCHECK_OPTION_NAME STREQUAL "ENABLE_MEMCHECK") option(ENABLE_MEMCHECK - "Enable integration with valgrind, a memory analyzing tool" OFF) + "Enable integration with valgrind, a memory analyzing tool" OFF) elseif(${MEMCHECK_OPTION_NAME}) set(ENABLE_MEMCHECK ON) else() @@ -34,17 +34,20 @@ endif() include(CheckLibraryExists) check_library_exists(gcov __gcov_flush "" HAVE_GCOV) -option(ENABLE_GCOV - "Enable integration with gcov, a code coverage program" OFF) +option(ENABLE_GCOV "Enable integration with gcov, a code coverage program" OFF) option(ENABLE_GPROF - "Enable integration with gprof, a performance analyzing tool" OFF) + "Enable integration with gprof, a performance analyzing tool" OFF) -option(ENABLE_ASAN - "Enable AddressSanitizer, a fast memory error detector based on compiler instrumentation" OFF) +option( + ENABLE_ASAN + "Enable AddressSanitizer, a fast memory error detector based on compiler instrumentation" + OFF) -option(ENABLE_UBSAN - "Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector based on compiler instrumentation" OFF) +option( + ENABLE_UBSAN + "Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector based on compiler instrumentation" + OFF) if(ENABLE_MEMCHECK) if(CMAKE_CXX_COMPILER_LOADED) @@ -55,7 +58,10 @@ if(ENABLE_MEMCHECK) check_include_file(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) endif() if(NOT HAVE_VALGRIND_MEMCHECK_H) - message(FATAL_ERROR "${MEMCHECK_OPTION_NAME} option is set but valgrind/memcheck.h is not found") + message( + FATAL_ERROR + "${MEMCHECK_OPTION_NAME} option is set but valgrind/memcheck.h is not found" + ) endif() endif() diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 0348d96d..3a5fddde 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -1,5 +1,5 @@ -## Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev -## SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) @@ -27,8 +27,10 @@ macro(add_compile_flags languages) endmacro(add_compile_flags) macro(remove_flag varname flag) - string(REGEX REPLACE "^(.*)( ${flag} )(.*)$" "\\1 \\3" ${varname} ${${varname}}) - string(REGEX REPLACE "^((.+ )*)(${flag})(( .+)*)$" "\\1\\4" ${varname} ${${varname}}) + string(REGEX REPLACE "^(.*)( ${flag} )(.*)$" "\\1 \\3" ${varname} + ${${varname}}) + string(REGEX REPLACE "^((.+ )*)(${flag})(( .+)*)$" "\\1\\4" ${varname} + ${${varname}}) endmacro(remove_flag) macro(remove_compile_flag languages flag) @@ -49,9 +51,9 @@ macro(set_source_files_compile_flags) set(_lang "") if("${_file_ext}" STREQUAL ".m") set(_lang OBJC) - # CMake believes that Objective C is a flavor of C++, not C, - # and uses g++ compiler for .m files. - # LANGUAGE property forces CMake to use CC for ${file} + # CMake believes that Objective C is a flavor of C++, not C, and uses g++ + # compiler for .m files. LANGUAGE property forces CMake to use CC for + # ${file} set_source_files_properties(${file} PROPERTIES LANGUAGE C) elseif("${_file_ext}" STREQUAL ".mm") set(_lang OBJCXX) @@ -65,15 +67,15 @@ macro(set_source_files_compile_flags) set(_flags "${_flags} ${CMAKE_${_lang}_FLAGS}") endif() # message(STATUS "Set (${file} ${_flags}") - set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS - "${_flags}") + set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${_flags}") endif() endforeach() unset(_file_ext) unset(_lang) endmacro(set_source_files_compile_flags) -macro(fetch_version name source_root_directory parent_scope build_directory_for_json_output) +macro(fetch_version name source_root_directory parent_scope + build_directory_for_json_output) set(_version_4dot "") set(_git_describe "") set(_git_timestamp "") @@ -86,7 +88,8 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ find_program(GIT git) if(GIT) - execute_process(COMMAND ${GIT} rev-parse --show-toplevel + execute_process( + COMMAND ${GIT} rev-parse --show-toplevel OUTPUT_VARIABLE _git_root ERROR_VARIABLE _git_root_error OUTPUT_STRIP_TRAILING_WHITESPACE @@ -94,9 +97,11 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ RESULT_VARIABLE _rc) if(_rc OR _git_root STREQUAL "") if(EXISTS "${source_root_directory}/.git") - message(ERROR "`git rev-parse --show-toplevel` failed '${_git_root_error}'") + message(ERROR + "`git rev-parse --show-toplevel` failed '${_git_root_error}'") else() - message(VERBOSE "`git rev-parse --show-toplevel` failed '${_git_root_error}'") + message(VERBOSE + "`git rev-parse --show-toplevel` failed '${_git_root_error}'") endif() else() set(_source_root "${source_root_directory}") @@ -105,7 +110,10 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ cmake_path(NORMAL_PATH _source_root) endif() if(_source_root STREQUAL _git_root AND EXISTS "${_git_root}/VERSION.json") - message(FATAL_ERROR "Несколько источников информации о версии, допустим только один из: репозиторий git, либо файл VERSION.json") + message( + FATAL_ERROR + "Несколько источников информации о версии, допустим только один из: репозиторий git, либо файл VERSION.json" + ) endif() endif() endif() @@ -114,9 +122,14 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ set(_version_from "${source_root_directory}/VERSION.json") if(CMAKE_VERSION VERSION_LESS 3.19) - message(FATAL_ERROR "Требуется CMake версии >= 3.19 для чтения VERSION.json") + message( + FATAL_ERROR "Требуется CMake версии >= 3.19 для чтения VERSION.json") endif() - file(STRINGS "${_version_from}" _versioninfo_json NEWLINE_CONSUME LIMIT_COUNT 9 LIMIT_INPUT 999 ENCODING UTF-8) + file( + STRINGS "${_version_from}" _versioninfo_json NEWLINE_CONSUME + LIMIT_COUNT 9 + LIMIT_INPUT 999 + ENCODING UTF-8) string(JSON _git_describe GET ${_versioninfo_json} git_describe) string(JSON _git_timestamp GET "${_versioninfo_json}" "git_timestamp") string(JSON _git_tree GET "${_versioninfo_json}" "git_tree") @@ -126,7 +139,9 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ string(REPLACE "." ";" _version_list "${_version_4dot}") if(NOT _version_4dot) - message(ERROR "Unable to retrieve ${name} version from \"${_version_from}\" file.") + message( + ERROR + "Unable to retrieve ${name} version from \"${_version_from}\" file.") set(_version_list ${_git_version}) string(REPLACE ";" "." _version_4dot "${_git_version}") else() @@ -136,158 +151,221 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ elseif(_git_root AND _source_root STREQUAL _git_root) set(_version_from git) - execute_process(COMMAND ${GIT} show --no-patch --format=%cI HEAD + execute_process( + COMMAND ${GIT} show --no-patch --format=%cI HEAD OUTPUT_VARIABLE _git_timestamp OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_timestamp STREQUAL "%cI") - execute_process(COMMAND ${GIT} show --no-patch --format=%ci HEAD + execute_process( + COMMAND ${GIT} show --no-patch --format=%ci HEAD OUTPUT_VARIABLE _git_timestamp OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_timestamp STREQUAL "%ci") - message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)" + ) endif() endif() - execute_process(COMMAND ${GIT} show --no-patch --format=%T HEAD + execute_process( + COMMAND ${GIT} show --no-patch --format=%T HEAD OUTPUT_VARIABLE _git_tree OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_tree STREQUAL "") - message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)" + ) endif() - execute_process(COMMAND ${GIT} show --no-patch --format=%H HEAD + execute_process( + COMMAND ${GIT} show --no-patch --format=%H HEAD OUTPUT_VARIABLE _git_commit OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_commit STREQUAL "") - message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)" + ) endif() - execute_process(COMMAND ${GIT} status --untracked-files=no --porcelain + execute_process( + COMMAND ${GIT} status --untracked-files=no --porcelain OUTPUT_VARIABLE _git_status OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc) - message(FATAL_ERROR "Please install latest version of git (`status --untracked-files=no --porcelain` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`status --untracked-files=no --porcelain` failed)" + ) endif() if(NOT _git_status STREQUAL "") set(_git_commit "${_git_commit}-dirty") endif() unset(_git_status) - execute_process(COMMAND ${GIT} rev-list --tags --count + execute_process( + COMMAND ${GIT} rev-list --tags --count OUTPUT_VARIABLE _tag_count OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc) - message(FATAL_ERROR "Please install latest version of git (`git rev-list --tags --count` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`git rev-list --tags --count` failed)" + ) endif() if(_tag_count EQUAL 0) - execute_process(COMMAND ${GIT} rev-list --all --count + execute_process( + COMMAND ${GIT} rev-list --all --count OUTPUT_VARIABLE _whole_count OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc) - message(FATAL_ERROR "Please install latest version of git (`git rev-list --all --count` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`git rev-list --all --count` failed)" + ) endif() if(_whole_count GREATER 42) - message(FATAL_ERROR "Please fetch tags (no any tags for ${_whole_count} commits)") + message( + FATAL_ERROR + "Please fetch tags (no any tags for ${_whole_count} commits)") endif() set(_git_version "0;0;0") - execute_process(COMMAND ${GIT} rev-list --count --all --no-merges + execute_process( + COMMAND ${GIT} rev-list --count --all --no-merges OUTPUT_VARIABLE _git_revision OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_revision STREQUAL "") - message(FATAL_ERROR "Please install latest version of git (`rev-list --count --all --no-merges` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`rev-list --count --all --no-merges` failed)" + ) endif() else(_tag_count EQUAL 0) - execute_process(COMMAND ${GIT} describe --tags --long --dirty=-dirty "--match=v[0-9]*" + execute_process( + COMMAND ${GIT} describe --tags --long --dirty=-dirty "--match=v[0-9]*" OUTPUT_VARIABLE _git_describe OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_describe STREQUAL "") - execute_process(COMMAND ${GIT} rev-list --all --count + execute_process( + COMMAND ${GIT} rev-list --all --count OUTPUT_VARIABLE _whole_count OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message(FATAL_ERROR "Please install latest version of git (`git rev-list --all --count` failed)") + RESULT_VARIABLE _rc) + if(_rc) + message( + FATAL_ERROR + "Please install latest version of git (`git rev-list --all --count` failed)" + ) endif() if(_whole_count GREATER 42) - message(FATAL_ERROR "Please fetch tags (`describe --tags --long --dirty --match=v[0-9]*` failed)") + message( + FATAL_ERROR + "Please fetch tags (`describe --tags --long --dirty --match=v[0-9]*` failed)" + ) else() - execute_process(COMMAND ${GIT} describe --all --long --dirty=-dirty + execute_process( + COMMAND ${GIT} describe --all --long --dirty=-dirty OUTPUT_VARIABLE _git_describe OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_describe STREQUAL "") - message(FATAL_ERROR "Please install latest version of git (`git rev-list --tags --count` and/or `git rev-list --all --count` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`git rev-list --tags --count` and/or `git rev-list --all --count` failed)" + ) endif() endif() endif() - execute_process(COMMAND ${GIT} describe --tags --abbrev=0 "--match=v[0-9]*" + execute_process( + COMMAND ${GIT} describe --tags --abbrev=0 "--match=v[0-9]*" OUTPUT_VARIABLE _last_release_tag OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc) - message(FATAL_ERROR "Please install latest version of git (`describe --tags --abbrev=0 --match=v[0-9]*` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`describe --tags --abbrev=0 --match=v[0-9]*` failed)" + ) endif() - if (_last_release_tag) + if(_last_release_tag) set(_git_revlist_arg "${_last_release_tag}..HEAD") else() - execute_process(COMMAND ${GIT} tag --sort=-version:refname + execute_process( + COMMAND ${GIT} tag --sort=-version:refname OUTPUT_VARIABLE _tag_list OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc) - message(FATAL_ERROR "Please install latest version of git (`tag --sort=-version:refname` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`tag --sort=-version:refname` failed)" + ) endif() string(REGEX REPLACE "\n" ";" _tag_list "${_tag_list}") set(_git_revlist_arg "HEAD") foreach(_tag IN LISTS _tag_list) if(NOT _last_release_tag) - string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" _last_release_tag "${_tag}") + string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" _last_release_tag + "${_tag}") set(_git_revlist_arg "${_tag}..HEAD") endif() endforeach(_tag) endif() - execute_process(COMMAND ${GIT} rev-list --count "${_git_revlist_arg}" + execute_process( + COMMAND ${GIT} rev-list --count "${_git_revlist_arg}" OUTPUT_VARIABLE _git_revision OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR _git_revision STREQUAL "") - message(FATAL_ERROR "Please install latest version of git (`rev-list --count ${_git_revlist_arg}` failed)") + message( + FATAL_ERROR + "Please install latest version of git (`rev-list --count ${_git_revlist_arg}` failed)" + ) endif() - string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" _git_version_valid "${_git_describe}") + string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" + _git_version_valid "${_git_describe}") if(_git_version_valid) - string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;\\4" _git_version ${_git_describe}) + string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" + "\\2;\\3;\\4" _git_version ${_git_describe}) else() - string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)(.*)?" _git_version_valid "${_git_describe}") + string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)(.*)?" _git_version_valid + "${_git_describe}") if(_git_version_valid) - string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;0" _git_version ${_git_describe}) + string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;0" + _git_version ${_git_describe}) else() - message(AUTHOR_WARNING "Bad ${name} version \"${_git_describe}\"; falling back to 0.0.0 (have you made an initial release?)") + message( + AUTHOR_WARNING + "Bad ${name} version \"${_git_describe}\"; falling back to 0.0.0 (have you made an initial release?)" + ) set(_git_version "0;0;0") endif() endif() @@ -298,7 +376,10 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ string(REPLACE ";" "." _version_4dot "${_version_list}") elseif(GIT) - message(FATAL_ERROR "Нет источника информации о версии (${source_root_directory}), требуется один из: репозиторий git, либо VERSION.json") + message( + FATAL_ERROR + "Нет источника информации о версии (${source_root_directory}), требуется один из: репозиторий git, либо VERSION.json" + ) else() message(FATAL_ERROR "Требуется git для получения информации о версии") endif() @@ -309,24 +390,53 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ list(GET _version_list 2 _version_release) list(GET _version_list 3 _version_revision) - if(NOT _git_describe OR NOT _git_timestamp OR NOT _git_tree OR NOT _git_commit OR _git_revision STREQUAL "" OR NOT _version_list_length EQUAL 4 OR _version_major STREQUAL "" OR _version_minor STREQUAL "" OR _version_release STREQUAL "" OR _version_revision STREQUAL "") + if(NOT _git_describe + OR NOT _git_timestamp + OR NOT _git_tree + OR NOT _git_commit + OR _git_revision STREQUAL "" + OR NOT _version_list_length EQUAL 4 + OR _version_major STREQUAL "" + OR _version_minor STREQUAL "" + OR _version_release STREQUAL "" + OR _version_revision STREQUAL "") message(ERROR "Unable to retrieve ${name} version from ${_version_from}.") else() list(APPEND _git_version "${_git_revision}") endif() if(${parent_scope}) - set(${name}_VERSION_MAJOR "${_version_major}" PARENT_SCOPE) - set(${name}_VERSION_MINOR "${_version_minor}" PARENT_SCOPE) - set(${name}_VERSION_RELEASE "${_version_release}" PARENT_SCOPE) - set(${name}_VERSION_REVISION "${_version_revision}" PARENT_SCOPE) - set(${name}_VERSION "${_version_4dot}" PARENT_SCOPE) + set(${name}_VERSION_MAJOR + "${_version_major}" + PARENT_SCOPE) + set(${name}_VERSION_MINOR + "${_version_minor}" + PARENT_SCOPE) + set(${name}_VERSION_RELEASE + "${_version_release}" + PARENT_SCOPE) + set(${name}_VERSION_REVISION + "${_version_revision}" + PARENT_SCOPE) + set(${name}_VERSION + "${_version_4dot}" + PARENT_SCOPE) - set(${name}_GIT_DESCRIBE "${_git_describe}" PARENT_SCOPE) - set(${name}_GIT_TIMESTAMP "${_git_timestamp}" PARENT_SCOPE) - set(${name}_GIT_TREE "${_git_tree}" PARENT_SCOPE) - set(${name}_GIT_COMMIT "${_git_commit}" PARENT_SCOPE) - set(${name}_GIT_REVISION "${_git_revision}" PARENT_SCOPE) + set(${name}_GIT_DESCRIBE + "${_git_describe}" + PARENT_SCOPE) + set(${name}_GIT_TIMESTAMP + "${_git_timestamp}" + PARENT_SCOPE) + set(${name}_GIT_TREE + "${_git_tree}" + PARENT_SCOPE) + set(${name}_GIT_COMMIT + "${_git_commit}" + PARENT_SCOPE) + set(${name}_GIT_REVISION + "${_git_revision}" + PARENT_SCOPE) else() set(${name}_VERSION_MAJOR "${_version_major}") set(${name}_VERSION_MINOR "${_version_minor}") @@ -342,13 +452,18 @@ macro(fetch_version name source_root_directory parent_scope build_directory_for_ endif() if(_version_from STREQUAL "git") - string(CONFIGURE "{ + string( + CONFIGURE + "{ \"git_describe\" : \"@_git_describe@\", \"git_timestamp\" : \"@_git_timestamp@\", \"git_tree\" : \"@_git_tree@\", \"git_commit\" : \"@_git_commit@\", - \"version_4dot\" : \"@_version_4dot@\"\n}" _versioninfo_json @ONLY ESCAPE_QUOTES) - file(WRITE "${build_directory_for_json_output}/VERSION.json" "${_versioninfo_json}") + \"version_4dot\" : \"@_version_4dot@\"\n}" + _versioninfo_json + @ONLY ESCAPE_QUOTES) + file(WRITE "${build_directory_for_json_output}/VERSION.json" + "${_versioninfo_json}") endif() endmacro(fetch_version) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d4322f61..6784b96b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,38 +1,37 @@ -## Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev -## SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev +# SPDX-License-Identifier: Apache-2.0 enable_language(CXX) include(../cmake/compiler.cmake) set(LIBMDBX_TEST_SOURCES - base.h++ - cases.c++ - chrono.c++ - chrono.h++ - config.c++ - config.h++ - copy.c++ - dead.c++ - hill.c++ - jitter.c++ - keygen.c++ - keygen.h++ - log.c++ - log.h++ - main.c++ - osal.h++ - osal-unix.c++ - osal-windows.c++ - test.c++ - test.h++ - try.c++ - utils.c++ - utils.h++ - append.c++ - ttl.c++ - nested.c++ - fork.c++ - ) + base.h++ + cases.c++ + chrono.c++ + chrono.h++ + config.c++ + config.h++ + copy.c++ + dead.c++ + hill.c++ + jitter.c++ + keygen.c++ + keygen.h++ + log.c++ + log.h++ + main.c++ + osal.h++ + osal-unix.c++ + osal-windows.c++ + test.c++ + test.h++ + try.c++ + utils.c++ + utils.h++ + append.c++ + ttl.c++ + nested.c++ + fork.c++) if(NOT MDBX_BUILD_CXX) probe_libcxx_filesystem() @@ -43,12 +42,13 @@ add_executable(mdbx_test ${LIBMDBX_TEST_SOURCES}) target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_TEST=1 MDBX_BUILD_CXX=1) if(MDBX_CXX_STANDARD) - set_target_properties(mdbx_test PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + set_target_properties(mdbx_test PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} + CXX_STANDARD_REQUIRED ON) endif() -set_target_properties(mdbx_test PROPERTIES - INTERPROCEDURAL_OPTIMIZATION $) +set_target_properties( + mdbx_test PROPERTIES INTERPROCEDURAL_OPTIMIZATION + $) target_setup_options(mdbx_test) if(NOT MDBX_BUILD_CXX) @@ -59,15 +59,17 @@ if(NOT MDBX_BUILD_CXX) endif() if(NOT MDBX_BUILD_CXX AND LIBCXX_FILESYSTEM) - if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 - AND NOT CMAKE_VERSION VERSION_LESS 3.13) + if(CMAKE_COMPILER_IS_ELBRUSCXX + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 + AND NOT CMAKE_VERSION VERSION_LESS 3.13) target_link_options(mdbx_test PRIVATE "-Wl,--allow-multiple-definition") endif() target_link_libraries(mdbx_test ${LIBCXX_FILESYSTEM}) endif() if(CMAKE_VERSION VERSION_LESS 3.1) - target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} + ${CMAKE_THREAD_LIBS_INIT}) else() target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} Threads::Threads) endif() @@ -79,10 +81,14 @@ function(add_extra_test name) set(options DISABLED) set(oneValueArgs TIMEOUT) set(multiValueArgs SOURCE LIBRARY DEPEND DLLPATH) - cmake_parse_arguments(params "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(params "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) if(params_UNPARSED_ARGUMENTS) - message(FATAL_ERROR "Unknown keywords given to add_extra_test(): \"${params_UNPARSED_ARGUMENTS}\".") + message( + FATAL_ERROR + "Unknown keywords given to add_extra_test(): \"${params_UNPARSED_ARGUMENTS}\"." + ) endif() macro(oops) @@ -97,13 +103,12 @@ function(add_extra_test name) add_executable(${target} ${params_SOURCE}) target_include_directories(${target} PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(${target} ${TOOL_MDBX_LIB}) - set_target_properties(${target} PROPERTIES - SKIP_BUILD_RPATH FALSE - BUILD_WITH_INSTALL_RPATH FALSE) + set_target_properties(${target} PROPERTIES SKIP_BUILD_RPATH FALSE + BUILD_WITH_INSTALL_RPATH FALSE) if(MDBX_BUILD_CXX AND MDBX_CXX_STANDARD) - set_target_properties(${target} PROPERTIES - CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + set_target_properties(${target} PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} + CXX_STANDARD_REQUIRED ON) endif() if(params_DEPEND) @@ -115,26 +120,32 @@ function(add_extra_test name) foreach(dep IN LISTS params_LIBRARY) get_target_property(type ${dep} TYPE) if(type STREQUAL SHARED_LIBRARY) - # Windows don't have RPATH feature, - # therefore we should prepare PATH or copy DLL(s)... + # Windows don't have RPATH feature, therefore we should prepare PATH or + # copy DLL(s)... if(CMAKE_CONFIGURATION_TYPES) - # Could not provide static ENVIRONMENT property with configuration-depended path + # Could not provide static ENVIRONMENT property with + # configuration-depended path set(dir FALSE) else(CMAKE_CONFIGURATION_TYPES) - get_target_property(filename ${dep} IMPORTED_LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property(filename ${dep} + IMPORTED_LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) if(NOT filename) get_target_property(filename ${dep} IMPORTED_LOCATION) endif() - get_target_property(filename ${dep} LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property(filename ${dep} + LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) if(NOT filename) get_target_property(filename ${dep} LOCATION) endif() if(filename) get_filename_component(dir ${filename} DIRECTORY) else(filename) - get_target_property(dir ${dep} LIBRARY_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property( + dir ${dep} LIBRARY_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) if(NOT dir) - get_target_property(dir ${dep} RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property( + dir ${dep} + RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) endif() if(NOT dir) get_target_property(dir ${dep} LIBRARY_OUTPUT_DIRECTORY) @@ -148,23 +159,35 @@ function(add_extra_test name) list(APPEND params_DLLPATH ${dir}) else(dir) # Path is configuration-depended or not available, should copy dll - add_custom_command(TARGET ${target} POST_BUILD - COMMAND if exist "$" - ${CMAKE_COMMAND} -E copy_if_different "$" "$") - add_custom_command(TARGET ${target} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$" - COMMENT "${TOOL_MDBX_DLLCRUTCH}: Copy shared library ${dep} for test ${target}") + add_custom_command( + TARGET ${target} + POST_BUILD + COMMAND + if exist "$" ${CMAKE_COMMAND} -E + copy_if_different "$" + "$") + add_custom_command( + TARGET ${target} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "$" "$" + COMMENT + "${TOOL_MDBX_DLLCRUTCH}: Copy shared library ${dep} for test ${target}" + ) endif(dir) endif() endforeach(dep) endif(TOOL_MDBX_DLLCRUTCH) - if(NOT params_DISABLED AND NOT (CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)) + if(NOT params_DISABLED AND NOT (CMAKE_CROSSCOMPILING + AND NOT CMAKE_CROSSCOMPILING_EMULATOR)) add_test(extra_${name} ${MDBX_OUTPUT_DIR}/${target}) if(params_TIMEOUT) - if(MEMORYCHECK_COMMAND OR CMAKE_MEMORYCHECK_COMMAND OR ENABLE_MEMCHECK) - # FIXME: unless there are any other ideas how to fix the - # timeouts problem when testing under Valgrind. + if(MEMORYCHECK_COMMAND + OR CMAKE_MEMORYCHECK_COMMAND + OR ENABLE_MEMCHECK) + # FIXME: unless there are any other ideas how to fix the timeouts + # problem when testing under Valgrind. math(EXPR params_TIMEOUT "${params_TIMEOUT} * 42") endif() set_tests_properties(extra_${name} PROPERTIES TIMEOUT ${params_TIMEOUT}) @@ -183,7 +206,8 @@ function(add_extra_test name) else() string(REPLACE ";" ":" params_DLLPATH_ENV "${params_DLLPATH_ENV}") endif() - set_tests_properties(extra_${name} PROPERTIES ENVIRONMENT "PATH=${params_DLLPATH_ENV}") + set_tests_properties(extra_${name} + PROPERTIES ENVIRONMENT "PATH=${params_DLLPATH_ENV}") endif() endif() endfunction(add_extra_test) @@ -196,80 +220,116 @@ if(NOT SUBPROJECT) endif() endif() -################################################################################ +# ############################################################################## -if (CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) +if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) message(WARNING "No emulator to run cross-compiled tests") - add_test(NAME fake_since_no_crosscompiling_emulator COMMAND ${CMAKE_COMMAND} -E - echo "No emulator to run cross-compiled tests") + add_test(NAME fake_since_no_crosscompiling_emulator + COMMAND ${CMAKE_COMMAND} -E echo + "No emulator to run cross-compiled tests") else() - string(RANDOM LENGTH 9 ALPHABET "1234567890" test_seed) - message(STATUS "The ${test_seed} will be used for seeding tests. Re-run cmake to re-seed it.") + string( + RANDOM + LENGTH 9 + ALPHABET "1234567890" test_seed) + message( + STATUS + "The ${test_seed} will be used for seeding tests. Re-run cmake to re-seed it." + ) - add_test(NAME smoke COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test - --loglevel=verbose - --prng-seed=${test_seed} - --progress --console=no --pathname=smoke.db --dont-cleanup-after basic) - set_tests_properties(smoke PROPERTIES - TIMEOUT 600 - RUN_SERIAL OFF) + add_test( + NAME smoke + COMMAND + ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=verbose --prng-seed=${test_seed} + --progress --console=no --pathname=smoke.db --dont-cleanup-after basic) + set_tests_properties(smoke PROPERTIES TIMEOUT 600 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) add_test(NAME smoke_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv smoke.db) - set_tests_properties(smoke_chk PROPERTIES - DEPENDS smoke - TIMEOUT 60 - FAIL_REGULAR_EXPRESSION "cooperative mode" - REQUIRED_FILES smoke.db) - add_test(NAME smoke_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv smoke.db-copy) - set_tests_properties(smoke_chk_copy PROPERTIES - DEPENDS smoke - TIMEOUT 60 - FAIL_REGULAR_EXPRESSION "cooperative mode" - REQUIRED_FILES smoke.db-copy) + set_tests_properties( + smoke_chk + PROPERTIES DEPENDS + smoke + TIMEOUT + 60 + FAIL_REGULAR_EXPRESSION + "cooperative mode" + REQUIRED_FILES + smoke.db) + add_test(NAME smoke_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv + smoke.db-copy) + set_tests_properties( + smoke_chk_copy + PROPERTIES DEPENDS + smoke + TIMEOUT + 60 + FAIL_REGULAR_EXPRESSION + "cooperative mode" + REQUIRED_FILES + smoke.db-copy) endif() - add_test(NAME dupsort_writemap COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test - --loglevel=notice - --prng-seed=${test_seed} - --table=+data.fixed --keygen.split=29 --datalen=rnd --progress --console=no - --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) - set_tests_properties(dupsort_writemap PROPERTIES - TIMEOUT 3600 - RUN_SERIAL OFF) + add_test( + NAME dupsort_writemap + COMMAND + ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice --prng-seed=${test_seed} + --table=+data.fixed --keygen.split=29 --datalen=rnd --progress + --console=no --repeat=2 --pathname=dupsort_writemap.db + --dont-cleanup-after basic) + set_tests_properties(dupsort_writemap PROPERTIES TIMEOUT 3600 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) - add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvwc dupsort_writemap.db) - set_tests_properties(dupsort_writemap_chk PROPERTIES - DEPENDS dupsort_writemap - TIMEOUT 60 - REQUIRED_FILES dupsort_writemap.db) - add_test(NAME dupsort_writemap_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvc dupsort_writemap.db-copy) - set_tests_properties(dupsort_writemap_chk_copy PROPERTIES - DEPENDS dupsort_writemap - TIMEOUT 60 - FAIL_REGULAR_EXPRESSION "monopolistic mode" - REQUIRED_FILES dupsort_writemap.db-copy) + add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk + -nvvwc dupsort_writemap.db) + set_tests_properties( + dupsort_writemap_chk PROPERTIES DEPENDS dupsort_writemap TIMEOUT 60 + REQUIRED_FILES dupsort_writemap.db) + add_test(NAME dupsort_writemap_chk_copy + COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvc dupsort_writemap.db-copy) + set_tests_properties( + dupsort_writemap_chk_copy + PROPERTIES DEPENDS + dupsort_writemap + TIMEOUT + 60 + FAIL_REGULAR_EXPRESSION + "monopolistic mode" + REQUIRED_FILES + dupsort_writemap.db-copy) endif() - add_test(NAME uniq_nested COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test - --loglevel=notice - --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=2 --pathname=uniq_nested.db --dont-cleanup-after basic) - set_tests_properties(uniq_nested PROPERTIES - TIMEOUT 1800 - RUN_SERIAL OFF) + add_test( + NAME uniq_nested + COMMAND + ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice + --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=2 + --pathname=uniq_nested.db --dont-cleanup-after basic) + set_tests_properties(uniq_nested PROPERTIES TIMEOUT 1800 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) - add_test(NAME uniq_nested_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvw uniq_nested.db) - set_tests_properties(uniq_nested_chk PROPERTIES - DEPENDS uniq_nested - TIMEOUT 60 - FAIL_REGULAR_EXPRESSION "cooperative mode" - REQUIRED_FILES uniq_nested.db) - add_test(NAME uniq_nested_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv uniq_nested.db-copy) - set_tests_properties(uniq_nested_chk_copy PROPERTIES - DEPENDS uniq_nested - TIMEOUT 60 - FAIL_REGULAR_EXPRESSION "cooperative mode" - REQUIRED_FILES uniq_nested.db-copy) + add_test(NAME uniq_nested_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvw + uniq_nested.db) + set_tests_properties( + uniq_nested_chk + PROPERTIES DEPENDS + uniq_nested + TIMEOUT + 60 + FAIL_REGULAR_EXPRESSION + "cooperative mode" + REQUIRED_FILES + uniq_nested.db) + add_test(NAME uniq_nested_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv + uniq_nested.db-copy) + set_tests_properties( + uniq_nested_chk_copy + PROPERTIES DEPENDS + uniq_nested + TIMEOUT + 60 + FAIL_REGULAR_EXPRESSION + "cooperative mode" + REQUIRED_FILES + uniq_nested.db-copy) endif() if(NOT SUBPROJECT) From 0306ba8136022d5948c1065fe49d5b9cd48b9a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 13 Nov 2024 19:16:26 +0300 Subject: [PATCH 334/443] =?UTF-8?q?mdbx-dist:=20=D0=BE=D1=82=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20`clang-format`=20=D0=B2?= =?UTF-8?q?=20=D0=B0=D0=BC=D0=B0=D0=BB=D1=8C=D0=B3=D0=B0=D0=BC=D0=B8=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD=D0=BE=D0=BC=20=D0=B8=D1=81=D1=85?= =?UTF-8?q?=D0=BE=D0=B4=D0=BD=D0=BE=D0=BC=20=D0=BA=D0=BE=D0=B4=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 16 ++++++++++------ src/tools/chk.c | 2 ++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 7c2b0291..7c1f0cae 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -362,7 +362,7 @@ define uname2titer endef DIST_EXTRA := LICENSE NOTICE README.md CMakeLists.txt GNUmakefile Makefile ChangeLog.md VERSION.json config.h.in ntdll.def \ - $(addprefix man1/, $(MANPAGES)) cmake/compiler.cmake cmake/profile.cmake cmake/utils.cmake + $(addprefix man1/, $(MANPAGES)) cmake/compiler.cmake cmake/profile.cmake cmake/utils.cmake .clang-format-ignore DIST_SRC := mdbx.h mdbx.h++ mdbx.c mdbx.c++ $(addsuffix .c, $(MDBX_TOOLS)) TEST_DB ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.db @@ -711,7 +711,7 @@ dist/mdbx.c: dist/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) -e '/#include "debug_begin.h"/r src/debug_begin.h' \ -e '/#include "debug_end.h"/r src/debug_end.h' \ ) | sed -e '/#include "/d;/#pragma once/d' -e 's|@INCLUDE|#include|' \ - -e '/ clang-format o/d;/ \*INDENT-O/d' >$@ + -e '/ clang-format o/d;/ \*INDENT-O/d' -e '3i /* clang-format off */' | cat -s >$@ dist/mdbx.c++: dist/@tmp-essentials.inc src/mdbx.c++ $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' @@ -719,7 +719,7 @@ dist/mdbx.c++: dist/@tmp-essentials.inc src/mdbx.c++ $(lastword $(MAKEFILE_LIST) -e '/#define xMDBX_ALLOY/d' \ -e '/#include "/d;/#pragma once/d' \ -e 's|@INCLUDE|#include|;s|"mdbx.h"|"mdbx.h++"|' \ - -e '/ clang-format o/d;/ \*INDENT-O/d' >$@ + -e '/ clang-format o/d;/ \*INDENT-O/d' -e '3i /* clang-format off */' | cat -s >$@ define dist-tool-rule dist/mdbx_$(1).c: src/tools/$(1).c src/tools/wingetopt.h src/tools/wingetopt.c \ @@ -731,7 +731,7 @@ dist/mdbx_$(1).c: src/tools/$(1).c src/tools/wingetopt.h src/tools/wingetopt.c \ -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ src/tools/$(1).c \ | sed -e '/#include "/d;/#pragma once/d;/#define xMDBX_ALLOY/d' -e 's|@INCLUDE|#include|' \ - -e '/ clang-format o/d;/ \*INDENT-O/d' >$$@ + -e '/ clang-format o/d;/ \*INDENT-O/d' -e '9i /* clang-format off */' | cat -s >$$@ endef $(foreach file,$(TOOLS),$(eval $(call dist-tool-rule,$(file)))) @@ -739,15 +739,19 @@ $(foreach file,$(TOOLS),$(eval $(call dist-tool-rule,$(file)))) define dist-extra-rule dist/$(1): $(1) src/version.c $(lastword $(MAKEFILE_LIST)) @echo ' REFINE $$@' - $(QUIET)mkdir -p $$(dir $$@) && sed -e '/^#> dist-cutoff-begin/,/^#< dist-cutoff-end/d' $$< >$$@ + $(QUIET)mkdir -p $$(dir $$@) && sed -e '/^#> dist-cutoff-begin/,/^#< dist-cutoff-end/d' $$< | cat -s >$$@ endef -$(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.json %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) +$(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.json .clang-format-ignore %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) dist/VERSION.json: src/version.c @echo ' MAKE $@' $(QUIET)mkdir -p dist/ && echo "{ \"git_describe\": \"$(MDBX_GIT_DESCRIBE)\", \"git_timestamp\": \"$(MDBX_GIT_TIMESTAMP)\", \"git_tree\": \"$(shell git show --no-patch --format=%T HEAD 2>&1)\", \"git_commit\": \"$(shell git show --no-patch --format=%H HEAD 2>&1)\", \"version_4dot\": \"$(MDBX_GIT_VERSION).$(MDBX_GIT_REVISION)\" }" >$@ +dist/.clang-format-ignore: $(lastword $(MAKEFILE_LIST)) + @echo ' MAKE $@' + $(QUIET)echo "$(filter-out %.h %h++,$(DIST_SRC))" | tr ' ' \\n > $@ + dist/ntdll.def: src/ntdll.def @echo ' COPY $@' $(QUIET)mkdir -p dist/ && cp $< $@ diff --git a/src/tools/chk.c b/src/tools/chk.c index 75586632..69b5de01 100644 --- a/src/tools/chk.c +++ b/src/tools/chk.c @@ -1,5 +1,7 @@ /// \copyright SPDX-License-Identifier: Apache-2.0 /// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 +/// + /// /// mdbx_chk.c - memory-mapped database check tool /// From 47f96b6afa7df33e20e970390ab79280000a70d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 15 Nov 2024 15:58:48 +0300 Subject: [PATCH 335/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 639a106b..aca30385 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -28,6 +28,8 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Исправлено проверяемое условие внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. - На 32-битных платформах разрешено использовть 4-байтное выравнивание при получении 64-битных значений посредством `MDBX_MULTIPLE`. - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang. + - Поправлено определение `MDBX_DEPRECATED_ENUM` для старых компиляторов при включении С++11. + - Доработано использование `std::experimental::filesystem`. Новое: @@ -45,6 +47,8 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Для размерных констант `mdbx::env::geometry` базовый тип изменен с беззнакового `size_t` на знаковый `intptr_t`. - Включен стандарт `C23` в CMake-скриптах сборки. - Добавлены T-макросы для парных `char`/`wchar_t` функций. + - Поддержка вложенных пишущих транзакций в C++ API. + - Экспорт информации о версии в `VERSION.json`. Мелочи: @@ -62,6 +66,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Уточнение описания `mdbx_dbi_close()` для случая хендлов измененных таблиц. - Добавление теста `extra/early_close_dbi`. - Доработка скрипта стохастического теста и его переименование в `stochastic.sh`. + - Доработка тестов для совместимости с режимами сборки до С++17. -------------------------------------------------------------------------------- From f32d3f260fbf76533c909197efb14765d6f18c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 16 Nov 2024 11:11:26 +0300 Subject: [PATCH 336/443] =?UTF-8?q?mdbx:=20=D0=B1=D0=B5=D0=B7=D1=83=D1=81?= =?UTF-8?q?=D0=BB=D0=BE=D0=B2=D0=BD=D0=BE=D0=B5=20=D0=BF=D1=80=D0=B5=D1=80?= =?UTF-8?q?=D1=8B=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8=20=D0=BF=D1=80=D0=B8=20?= =?UTF-8?q?=D0=BE=D0=BF=D1=86=D0=B8=D0=B8=20`MDBX=5FCP=5FDISPOSE=5FTXN`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/copy.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/copy.c b/src/copy.c index 7aaaba73..fd3cdc2e 100644 --- a/src/copy.c +++ b/src/copy.c @@ -838,10 +838,8 @@ __cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, __cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = copy2fd(txn, fd, flags); + if (likely(rc == MDBX_SUCCESS)) + rc = copy2fd(txn, fd, flags); if (flags & MDBX_CP_DISPOSE_TXN) mdbx_txn_abort(txn); return rc; @@ -882,10 +880,8 @@ __cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, MDBX_copy_flags_t flags) { #endif /* Windows */ int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = copy2pathname(txn, dest_path, flags); + if (likely(rc == MDBX_SUCCESS)) + rc = copy2pathname(txn, dest_path, flags); if (flags & MDBX_CP_DISPOSE_TXN) mdbx_txn_abort(txn); return rc; From f5b1e36b9ea2bbe0caaca47a829233460a69be8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 16 Nov 2024 11:21:27 +0300 Subject: [PATCH 337/443] =?UTF-8?q?mdbx-testing:=20=D1=83=D1=81=D1=82?= =?UTF-8?q?=D1=80=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=B8=D1=88?= =?UTF-8?q?=D0=BD=D0=B5=D0=B9=20=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BA=D0=B8=20prng=20=D0=BF=D1=80=D0=B8=20=D0=B7=D0=B0=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D1=88=D0=B5=D0=BD=D0=B8=D0=B8=20=D1=86=D0=B8=D0=BA?= =?UTF-8?q?=D0=BB=D0=B0=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index 079f50c7..9c827fc5 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -815,6 +815,10 @@ static bool execute_thunk(const actor_config *const_config, std::unique_ptr test(registry::create_actor(config, pid)); size_t iter = 0; do { + if (iter) { + prng_seed(config.params.prng_seed += INT32_C(0xA4F4D37B)); + log_verbose("turn PRNG to %u", config.params.prng_seed); + } iter++; if (!test->setup()) { log_notice("test setup failed"); @@ -837,8 +841,6 @@ static bool execute_thunk(const actor_config *const_config, size_t(config.params.nrepeat)); else log_verbose("test successfully (iteration %zi)", iter); - prng_seed(config.params.prng_seed += INT32_C(0xA4F4D37B)); - log_verbose("turn PRNG to %u", config.params.prng_seed); } } while (config.params.nrepeat == 0 || iter < config.params.nrepeat); @@ -856,7 +858,7 @@ bool test_execute(const actor_config &config) { return execute_thunk(&config, osal_getpid()); #ifdef _MSC_VER } __except (seh_filter(GetExceptionInformation(), stderr)) { - fprintf(stderr, "Exception \n"); + fprintf(stderr, "Exception\n"); return false; } #endif From 12442bd1f4e238ca12365bfd39f8430dadeeab66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 16 Nov 2024 11:10:29 +0300 Subject: [PATCH 338/443] =?UTF-8?q?mdbx-testing:=20=D0=BA=D0=BE=D1=80?= =?UTF-8?q?=D1=80=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BD=D1=82=D1=80=D0=BE=D0=BB=D1=8F=20=D1=80=D0=B5?= =?UTF-8?q?=D0=B7=D1=83=D0=BB=D1=8C=D1=82=D0=B0=D1=82=D0=B0=20=D0=B2=20cop?= =?UTF-8?q?y-=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/copy.c++ | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/copy.c++ b/test/copy.c++ index 188fd1b1..44676122 100644 --- a/test/copy.c++ +++ b/test/copy.c++ @@ -43,13 +43,16 @@ void testcase_copy::copy_db(const bool with_compaction) { txn_begin(ro); err = mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); - if (unlikely(err != MDBX_SUCCESS && (!throttle || err != MDBX_OUSTED) && - (!enable_renew && err != MDBX_MVCC_RETARDED))) + txn_end(err != MDBX_SUCCESS || flipcoin()); + if (unlikely( + err != MDBX_SUCCESS && !(throttle && err == MDBX_OUSTED) && + !(!enable_renew && err == MDBX_MVCC_RETARDED) && + !(err == MDBX_EINVAL && !ro && + (flags & (MDBX_CP_THROTTLE_MVCC | MDBX_CP_RENEW_TXN)) != 0))) failure_perror(with_compaction ? "mdbx_txn_copy2pathname(MDBX_CP_COMPACT)" : "mdbx_txn_copy2pathname(MDBX_CP_ASIS)", err); - txn_end(err != MDBX_SUCCESS || flipcoin()); } while (err != MDBX_SUCCESS); } } From c13efb791f001686c69c91d40af45596ff727201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 16 Nov 2024 11:20:32 +0300 Subject: [PATCH 339/443] =?UTF-8?q?mdbx-testing:=20=D0=BB=D0=BE=D0=B3?= =?UTF-8?q?=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D1=84=D0=BB?= =?UTF-8?q?=D0=B0=D0=B3=D0=BE=D0=B2/=D0=BE=D0=BF=D1=86=D0=B8=D0=B9=20?= =?UTF-8?q?=D0=B2=20copy-=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/copy.c++ | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/copy.c++ b/test/copy.c++ index 44676122..e21a1318 100644 --- a/test/copy.c++ +++ b/test/copy.c++ @@ -23,6 +23,8 @@ void testcase_copy::copy_db(const bool with_compaction) { if (flipcoin()) { err = mdbx_env_copy(db_guard.get(), copy_pathname.c_str(), with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS); + log_verbose("mdbx_env_copy(%s), err %d", with_compaction ? "true" : "false", + err); if (unlikely(err != MDBX_SUCCESS)) failure_perror(with_compaction ? "mdbx_env_copy(MDBX_CP_COMPACT)" : "mdbx_env_copy(MDBX_CP_ASIS)", @@ -43,6 +45,7 @@ void testcase_copy::copy_db(const bool with_compaction) { txn_begin(ro); err = mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); + log_verbose("mdbx_txn_copy2pathname(flags=0x%X), err %d", flags, err); txn_end(err != MDBX_SUCCESS || flipcoin()); if (unlikely( err != MDBX_SUCCESS && !(throttle && err == MDBX_OUSTED) && From 92dec0bca9b6f16fa4157c86b969a5097f418971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 16 Nov 2024 23:43:26 +0300 Subject: [PATCH 340/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D1=82=D0=B5=D1=87?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=BF=D0=B0=D0=BC=D1=8F=D1=82=D0=B8=20=D0=B8?= =?UTF-8?q?=D0=B7-=D0=B7=D0=B0=20=D1=80=D0=B5=D0=B3=D1=80=D0=B5=D1=81?= =?UTF-8?q?=D1=81=D0=B0=20=D0=B2=20`txn=5Fend()`=20=D0=BF=D1=80=D0=B8=20?= =?UTF-8?q?=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B8=20?= =?UTF-8?q?=D0=BF=D0=B0=D1=80=D0=BA=D0=BE=D0=B2=D0=BA=D0=B8=20=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Если читающая транзакция была припаркована и затем вытеснена, то при её завершении ресурсы не освобождались. --- src/proto.h | 7 +++---- src/txn.c | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/proto.h b/src/proto.h index 80f9394b..28562eb2 100644 --- a/src/proto.h +++ b/src/proto.h @@ -61,20 +61,19 @@ MDBX_INTERNAL int txn_unpark(MDBX_txn *txn); MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits); #define TXN_END_NAMES \ - {"committed", "empty-commit", "abort", "reset", \ - "reset-tmp", "fail-begin", "fail-beginchild", "ousted"} + {"committed", "empty-commit", "abort", "reset", \ + "fail-begin", "fail-beginchild", "ousted", nullptr} enum { /* txn_end operation number, for logging */ TXN_END_COMMITTED, TXN_END_PURE_COMMIT, TXN_END_ABORT, TXN_END_RESET, - TXN_END_RESET_TMP, TXN_END_FAIL_BEGIN, TXN_END_FAIL_BEGINCHILD, TXN_END_OUSTED, - TXN_END_OPMASK = 0x0F /* mask for txn_end() operation number */, + TXN_END_OPMASK = 0x07 /* mask for txn_end() operation number */, TXN_END_UPDATE = 0x10 /* update env state (DBIs) */, TXN_END_FREE = 0x20 /* free txn unless it is env.basal_txn */, TXN_END_EOTDONE = 0x40 /* txn's cursors already closed */, diff --git a/src/txn.c b/src/txn.c index bd8a1a59..724fe2b1 100644 --- a/src/txn.c +++ b/src/txn.c @@ -1350,11 +1350,11 @@ int txn_end(MDBX_txn *txn, unsigned mode) { MDBX_env *env = txn->env; static const char *const names[] = TXN_END_NAMES; - DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO + DEBUG("%s txn %" PRIaTXN "%c-0x%X %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK], txn->txnid, - (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, - txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', txn->flags, (void *)txn, + (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ done_cursors(txn, false); @@ -1374,7 +1374,7 @@ int txn_end(MDBX_txn *txn, unsigned mode) { } else { if ((mode & TXN_END_OPMASK) != TXN_END_OUSTED && safe64_read(&slot->tid) == MDBX_TID_TXN_OUSTED) - mode = (mode & TXN_END_OPMASK) | TXN_END_OUSTED; + mode = (mode & ~TXN_END_OPMASK) | TXN_END_OUSTED; do { safe64_reset(&slot->txnid, false); atomic_store64(&slot->tid, txn->owner, mo_AcquireRelease); From efaa46d7cd276e72c3b5709f3668675851caca13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 17 Nov 2024 10:24:03 +0300 Subject: [PATCH 341/443] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=B5=D0=B4=D0=BE?= =?UTF-8?q?=D1=82=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD?= =?UTF-8?q?=D0=B5=D0=B7=D0=BD=D0=B0=D1=87=D0=B0=D1=89=D0=B8=D1=85,=20?= =?UTF-8?q?=D0=BD=D0=BE=20=D0=BC=D0=B5=D1=88=D0=B0=D1=8E=D1=89=D0=B8=D1=85?= =?UTF-8?q?=20=D0=BE=D1=82=D0=BB=D0=B0=D0=B4=D0=BA=D0=B5,=20=D0=BE=D1=88?= =?UTF-8?q?=D0=B8=D0=B1=D0=BE=D0=BA=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8?= =?UTF-8?q?=20`copy2fd()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/copy.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/copy.c b/src/copy.c index fd3cdc2e..ad80a815 100644 --- a/src/copy.c +++ b/src/copy.c @@ -762,10 +762,12 @@ __cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, rc = mdbx_txn_unpark(txn, false); } - if (flags & MDBX_CP_THROTTLE_MVCC) - mdbx_txn_park(txn, true); - else if (flags & MDBX_CP_DISPOSE_TXN) - mdbx_txn_reset(txn); + if (txn->flags & MDBX_TXN_RDONLY) { + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, true); + else if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_reset(txn); + } if (!dest_is_pipe) { if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0) From ddea36c54a234d3c6bf5854a96519da0027819dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 17 Nov 2024 17:42:00 +0300 Subject: [PATCH 342/443] =?UTF-8?q?mdbx:=20=D0=BE=D1=81=D0=B2=D0=BE=D0=B1?= =?UTF-8?q?=D0=BE=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B0=D0=BC?= =?UTF-8?q?=D1=8F=D1=82=D0=B8=20=D1=81=D0=B1=D1=80=D0=BE=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=BD=D1=8B=D1=85/=D0=BF=D1=80=D0=B5=D1=80=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=BD=D1=8B=D1=85=20=D1=87=D0=B8=D1=82=D0=B0=D1=8E=D1=89=D0=B8?= =?UTF-8?q?=D1=85=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B9=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4=D0=B0=D0=B2=D0=B0=D0=B5?= =?UTF-8?q?=D0=BC=D1=8B=D1=85=20=D0=B2=20`mdbx=5Ftxn=5Fcommit()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Исторически в API была слабость/неоднозначность в жизненном цикле читающих транзакций: - В простейших сценариях читающие транзакции запускались посредством mdbx_txn_begin() и завершались посредством mdbx_txn_abort(), либо mdbx_txn_commit(); - Для экономии накладных расходов были предусмотрены функции mdbx_txn_reset() и mdbx_txn_renew(), которые сбрасывали/прерывали читающую транзакцию без её освобождения/разрушения и затем перезапускали её. При этом транзакции сброшенные посредством mdbx_txn_reset() должны были быть либо перезапущены, либо освобождены посредством mdbx_txn_abort(); - Заминка возникала при вызове mdbx_txn_commit() для читающих транзакций сброшенных/прерванных посредством mdbx_txn_reset(). В таких ситуациях возвращалась ошибка MDBX_BAD_TXN, а транзакция не освобождалась. Такое поведение вносило лишнюю асимметрию в API и способствовало появлению ошибок утечки ресурсов, но поддерживалось для совместимости. Этот коммит изменяет историческое поведение с нарушением совместимости, но делает API более регулярным и уменьшает вероятность ошибок утечки ресурсов. Теперь mdbx_txn_commit() освобождает/разрушает читающие транзакции сброшенные/прерванные посредством mdbx_txn_reset() возвращая при этом MDBX_RESULT_TRUE вместо MDBX_SUCCESS, по аналогии обработки фиксации аварийных пишущих транзакций. --- src/txn.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/txn.c b/src/txn.c index 724fe2b1..f6ac98d5 100644 --- a/src/txn.c +++ b/src/txn.c @@ -460,20 +460,22 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { int rc = check_txn(txn, MDBX_TXN_FINISHED); if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_BAD_TXN && (txn->flags & MDBX_TXN_RDONLY)) { + rc = MDBX_RESULT_TRUE; + goto fail; + } + bailout: if (latency) memset(latency, 0, sizeof(*latency)); return rc; } MDBX_env *const env = txn->env; -#if MDBX_ENV_CHECKPID - if (unlikely(env->pid != osal_getpid())) { + if (MDBX_ENV_CHECKPID && unlikely(env->pid != osal_getpid())) { env->flags |= ENV_FATAL_ERROR; - if (latency) - memset(latency, 0, sizeof(*latency)); - return MDBX_PANIC; + rc = MDBX_PANIC; + goto bailout; } -#endif /* MDBX_ENV_CHECKPID */ if (unlikely(txn->flags & MDBX_TXN_ERROR)) { rc = MDBX_RESULT_TRUE; From 5815ff2ef7d93a4efb33f34956c64dbdbc8fc17f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 19 Nov 2024 01:24:40 +0300 Subject: [PATCH 343/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4?= =?UTF-8?q?=D0=B5=D0=BB=D0=BA=D0=B0=20=D0=BA=D0=BE=D1=81=D1=82=D1=8B=D0=BB?= =?UTF-8?q?=D1=8F=20`namespace::attr`=20=D0=B4=D0=BB=D1=8F=20MSVC=20=D0=B8?= =?UTF-8?q?=20Apple.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/mdbx.h b/mdbx.h index 4b0ebbb9..56a532c7 100644 --- a/mdbx.h +++ b/mdbx.h @@ -191,28 +191,31 @@ typedef mode_t mdbx_mode_t; #ifndef __has_c_attribute #define __has_c_attribute(x) (0) +#define __has_c_attribute_qualified(x) 0 +#elif !defined(__STDC_VERSION__) || __STDC_VERSION__ < 202311L +#define __has_c_attribute_qualified(x) 0 +#elif defined(_MSC_VER) +/* MSVC don't support `namespace::attr` syntax */ +#define __has_c_attribute_qualified(x) 0 +#else +#define __has_c_attribute_qualified(x) __has_c_attribute(x) #endif /* __has_c_attribute */ #ifndef __has_cpp_attribute #define __has_cpp_attribute(x) 0 -#endif /* __has_cpp_attribute */ - -#ifndef __has_CXX_attribute -#if defined(__cplusplus) && \ - (!defined(_MSC_VER) || defined(__clang__) || _MSC_VER >= 1942) -#define __has_CXX_attribute(x) __has_cpp_attribute(x) +#define __has_cpp_attribute_qualified(x) 0 +#elif defined(_MSC_VER) +/* MSVC don't support `namespace::attr` syntax */ +#define __has_cpp_attribute_qualified(x) 0 #else -#define __has_CXX_attribute(x) 0 -#endif -#endif /* __has_CXX_attribute */ +#define __has_cpp_attribute_qualified(x) __has_cpp_attribute(x) +#endif /* __has_cpp_attribute */ #ifndef __has_C23_or_CXX_attribute #if defined(__cplusplus) -#define __has_C23_or_CXX_attribute(x) __has_CXX_attribute(x) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ > 202311L -#define __has_C23_or_CXX_attribute(x) __has_c_attribute(x) +#define __has_C23_or_CXX_attribute(x) __has_cpp_attribute_qualified(x) #else -#define __has_C23_or_CXX_attribute(x) 0 +#define __has_C23_or_CXX_attribute(x) __has_c_attribute_qualified(x) #endif #endif /* __has_C23_or_CXX_attribute */ @@ -240,9 +243,7 @@ typedef mode_t mdbx_mode_t; * These functions should be declared with the attribute pure. */ #if defined(DOXYGEN) #define MDBX_PURE_FUNCTION [[gnu::pure]] -#elif __has_C23_or_CXX_attribute(gnu::pure) && \ - (!defined(__apple_build_version__) || !defined(__clang_major__) || \ - __clang_major__ > 17) +#elif __has_C23_or_CXX_attribute(gnu::pure) #define MDBX_PURE_FUNCTION [[gnu::pure]] #elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ @@ -266,7 +267,7 @@ typedef mode_t mdbx_mode_t; #elif defined(__GNUC__) || \ (__has_attribute(__pure__) && __has_attribute(__nothrow__)) #define MDBX_NOTHROW_PURE_FUNCTION __attribute__((__pure__, __nothrow__)) -#elif __has_CXX_attribute(pure) +#elif __has_cpp_attribute(pure) #define MDBX_NOTHROW_PURE_FUNCTION [[pure]] #else #define MDBX_NOTHROW_PURE_FUNCTION @@ -284,9 +285,7 @@ typedef mode_t mdbx_mode_t; * It does not make sense for a const function to return void. */ #if defined(DOXYGEN) #define MDBX_CONST_FUNCTION [[gnu::const]] -#elif __has_C23_or_CXX_attribute(gnu::const) && \ - (!defined(__apple_build_version__) || !defined(__clang_major__) || \ - __clang_major__ > 17) +#elif __has_C23_or_CXX_attribute(gnu::const) #define MDBX_CONST_FUNCTION [[gnu::const]] #elif (defined(__GNUC__) || __has_attribute(__const__)) && \ (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ @@ -310,7 +309,7 @@ typedef mode_t mdbx_mode_t; #elif defined(__GNUC__) || \ (__has_attribute(__const__) && __has_attribute(__nothrow__)) #define MDBX_NOTHROW_CONST_FUNCTION __attribute__((__const__, __nothrow__)) -#elif __has_CXX_attribute(const) +#elif __has_cpp_attribute_qualified(const) #define MDBX_NOTHROW_CONST_FUNCTION [[const]] #else #define MDBX_NOTHROW_CONST_FUNCTION MDBX_NOTHROW_PURE_FUNCTION From 2b71df417e28938727d4c94a2fc9ad981daf1042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 19 Nov 2024 20:14:56 +0300 Subject: [PATCH 344/443] =?UTF-8?q?mdbx-windows:=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?ntdll=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE=20CRT=20=D1=82?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE=20=D0=BF=D1=80=D0=B8=20=D1=8F?= =?UTF-8?q?=D0=B2=D0=BD=D0=BE=D0=BC=20=D0=BE=D1=82=D0=BA=D0=BB=D1=8E=D1=87?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B8=20C++=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Изменение поведения по-умолчанию, но без утраты контроля. Без изменения: Определение опции MDBX_WITHOUT_MSVC_CRT в значение 0 или 1 позволяет явно выбирать между использование ntdll и CRT. При этом включение C++ API (MDBX_BUILD_CXX=1) требует использования CRT. Ранее: По-умолчанию, когда не определены опции MDBX_WITHOUT_MSVC_CRT и MDBX_BUILD_CXX, делался выбор в пользу использования ntdll, вместо CRT. Теперь: Функции ntdll будет использоваться вместо CRT только если явно выключена поддержка C++ API (задано MDBX_BUILD_CXX=0). --- src/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.h b/src/options.h index 8ae89183..1ca2a8bc 100644 --- a/src/options.h +++ b/src/options.h @@ -195,7 +195,7 @@ /** Avoid dependence from MSVC CRT and use ntdll.dll instead. */ #ifndef MDBX_WITHOUT_MSVC_CRT -#if !defined(MDBX_BUILD_CXX) || !MDBX_BUILD_CXX +#if defined(MDBX_BUILD_CXX) && !MDBX_BUILD_CXX #define MDBX_WITHOUT_MSVC_CRT 1 #else #define MDBX_WITHOUT_MSVC_CRT 0 From 881d4d420736c28f0ef4131c7666be2794ea4e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 19 Nov 2024 22:42:08 +0300 Subject: [PATCH 345/443] =?UTF-8?q?mdbx-build:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20build-metadata=20=D0=B8?= =?UTF-8?q?=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B8=20=D1=81=D0=B1=D0=BE=D1=80?= =?UTF-8?q?=D0=BA=D0=B8=20`MDBX=5FBUILD=5FMETADATA`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 6 +++++- GNUmakefile | 5 +++++ mdbx.h | 3 +++ src/config.h.in | 3 +++ src/global.c | 1 + src/options.h | 4 ++++ 6 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 793b8a60..86af4bed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -648,6 +648,10 @@ endif() # ~~~ # ############################################################################## +set(MDBX_BUILD_METADATA + "" + CACHE STRING "An extra/custom information provided during libmdbx build") + set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) macro(add_mdbx_option NAME DESCRIPTION DEFAULT) @@ -663,7 +667,7 @@ if(IOS) set(MDBX_BUILD_TOOLS_DEFAULT OFF) if(NOT_SUBPROJECT) cmake_policy(SET CMP0006 OLD) - set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO") + set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED NO) endif() else() set(MDBX_BUILD_TOOLS_DEFAULT ON) diff --git a/GNUmakefile b/GNUmakefile index 7c1f0cae..1b208662 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -58,6 +58,7 @@ CMAKE_OPT ?= MDBX_BUILD_OPTIONS ?=-DNDEBUG=1 MDBX_BUILD_TIMESTAMP ?=$(shell date +%Y-%m-%dT%H:%M:%S%z) MDBX_BUILD_CXX ?= YES +MDBX_BUILD_METADATA ?= "" # probe and compose common compiler flags with variable expansion trick (seems this work two times per session for GNU Make 3.81) CFLAGS ?= $(strip $(eval CFLAGS := -std=gnu11 -O2 -g -Wall -Werror -Wextra -Wpedantic -ffunction-sections -fPIC -fvisibility=hidden -pthread -Wno-error=attributes $$(shell for opt in -fno-semantic-interposition -Wno-unused-command-line-argument -Wno-tautological-compare; do [ -z "$$$$($(CC) '-DMDBX_BUILD_FLAGS="probe"' $$$${opt} -c $(SRC_PROBE_C) -o /dev/null >/dev/null 2>&1 || echo failed)" ] && echo "$$$${opt} "; done)$(CFLAGS_EXTRA))$(CFLAGS)) @@ -203,6 +204,7 @@ show-options: @echo " MDBX_BUILD_OPTIONS = $(MDBX_BUILD_OPTIONS)" @echo " MDBX_BUILD_CXX = $(MDBX_BUILD_CXX)" @echo " MDBX_BUILD_TIMESTAMP = $(MDBX_BUILD_TIMESTAMP)" + @echo " MDBX_BUILD_METADATA = $(MDBX_BUILD_METADATA)" @echo '$(TIP) Use `make options` to listing available build options.' @echo $(call select_by,MDBX_BUILD_CXX," CXX =`which $(CXX)` | `$(CXX) --version | head -1`"," CC =`which $(CC)` | `$(CC) --version | head -1`") @echo $(call select_by,MDBX_BUILD_CXX," CXXFLAGS =$(CXXFLAGS)"," CFLAGS =$(CFLAGS)") @@ -230,6 +232,7 @@ options: @echo "" @echo " MDBX_BUILD_OPTIONS = $(MDBX_BUILD_OPTIONS)" @echo " MDBX_BUILD_TIMESTAMP = $(MDBX_BUILD_TIMESTAMP)" + @echo " MDBX_BUILD_METADATA = $(MDBX_BUILD_METADATA)" @echo "" @echo "## Assortment items for MDBX_BUILD_OPTIONS:" @echo "## Note that the defaults should already be correct for most platforms;" @@ -300,6 +303,7 @@ config.h: @buildflags.tag mdbx.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE && echo '#define MDBX_BUILD_COMPILER "$(shell (LC_ALL=C $(CC) --version || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_TARGET "$(shell set -o pipefail; (LC_ALL=C $(CC) -v 2>&1 | grep -i '^Target:' | cut -d ' ' -f 2- || (LC_ALL=C $(CC) --version | grep -qi e2k && echo E2K) || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_CXX $(call select_by,MDBX_BUILD_CXX,1,0)' \ + && echo '#define MDBX_BUILD_METADATA "$(MDBX_BUILD_METADATA)"' \ ) >$@ mdbx-dylib.o: config.h mdbx.c mdbx.h $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @@ -548,6 +552,7 @@ src/config.h: @buildflags.tag src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE && echo '#define MDBX_BUILD_TARGET "$(shell set -o pipefail; (LC_ALL=C $(CC) -v 2>&1 | grep -i '^Target:' | cut -d ' ' -f 2- || (LC_ALL=C $(CC) --version | grep -qi e2k && echo E2K) || echo 'Please use GCC or CLANG compatible compiler') | head -1)"' \ && echo '#define MDBX_BUILD_SOURCERY $(MDBX_BUILD_SOURCERY)' \ && echo '#define MDBX_BUILD_CXX $(call select_by,MDBX_BUILD_CXX,1,0)' \ + && echo '#define MDBX_BUILD_METADATA "$(MDBX_BUILD_METADATA)"' \ ) >$@ mdbx-dylib.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE diff --git a/mdbx.h b/mdbx.h index 56a532c7..1c729eee 100644 --- a/mdbx.h +++ b/mdbx.h @@ -682,6 +682,9 @@ extern LIBMDBX_VERINFO_API const struct MDBX_build_info { const char *options; /**< mdbx-related options */ const char *compiler; /**< compiler */ const char *flags; /**< CFLAGS and CXXFLAGS */ + const char *metadata; /**< an extra/custom information provided via + the MDBX_BUILD_METADATA definition + during library build */ } /** \brief libmdbx build information */ mdbx_build; #if (defined(_WIN32) || defined(_WIN64)) && !MDBX_BUILD_SHARED_LIBRARY diff --git a/src/config.h.in b/src/config.h.in index bd5b7c46..bba24605 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -70,6 +70,9 @@ #ifndef MDBX_BUILD_FLAGS #cmakedefine MDBX_BUILD_FLAGS "@MDBX_BUILD_FLAGS@" #endif +#ifndef MDBX_BUILD_METADATA +#cmakedefine MDBX_BUILD_METADATA "@MDBX_BUILD_METADATA@" +#endif #cmakedefine MDBX_BUILD_SOURCERY @MDBX_BUILD_SOURCERY@ /* *INDENT-ON* */ diff --git a/src/global.c b/src/global.c index 05755826..54c686c0 100644 --- a/src/global.c +++ b/src/global.c @@ -441,6 +441,7 @@ __dll_export #warning "Build flags undefined. Please use correct build script" #endif // _MSC_VER #endif + , MDBX_BUILD_METADATA }; #ifdef __SANITIZE_ADDRESS__ diff --git a/src/options.h b/src/options.h index 1ca2a8bc..89f64967 100644 --- a/src/options.h +++ b/src/options.h @@ -513,6 +513,10 @@ #define MDBX_AUXILARY_IOV_MAX IOV_MAX #endif /* MDBX_AUXILARY_IOV_MAX */ +/* An extra/custom information provided during library build */ +#ifndef MDBX_BUILD_METADATA +#define MDBX_BUILD_METADATA "" +#endif /* MDBX_BUILD_METADATA */ /** @} end of build options */ /******************************************************************************* ******************************************************************************* From 1c9c49dd1a7a2176c8652523b3f241f10378e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 19 Nov 2024 23:37:37 +0300 Subject: [PATCH 346/443] =?UTF-8?q?mdbx-build:=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B0=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=BE=D0=B9=20=D1=81=D1=80=D0=B5=D0=B4=D1=8B?= =?UTF-8?q?=20`SOURCE=5FDATE=5FEPOCH`=20=D0=B2=20=D0=BA=D0=B0=D1=87=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=B2=D0=B5=20`MDBX=5FBUILD=5FTIMESTAMP`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B2=D0=BE=D1=81=D0=BF=D1=80=D0=BE=D0=B8=D0=B7?= =?UTF-8?q?=D0=B2=D0=BE=D0=B4=D0=B8=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D1=81?= =?UTF-8?q?=D0=B1=D0=BE=D1=80=D0=BE=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 6 +++++- GNUmakefile | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86af4bed..d6e17d17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1216,7 +1216,11 @@ endif(MDBX_INSTALL_STATIC) # collect options & build info if(NOT DEFINED MDBX_BUILD_TIMESTAMP) - string(TIMESTAMP MDBX_BUILD_TIMESTAMP UTC) + if(NOT "$ENV{SOURCE_DATE_EPOCH}" STREQUAL "") + set(FPTA_BUILD_TIMESTAMP "$ENV{SOURCE_DATE_EPOCH}") + else() + string(TIMESTAMP FPTA_BUILD_TIMESTAMP UTC) + endif() endif() set(MDBX_BUILD_FLAGS ${CMAKE_C_FLAGS}) if(MDBX_BUILD_CXX) diff --git a/GNUmakefile b/GNUmakefile index 1b208662..69b810da 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -56,9 +56,9 @@ CMAKE_OPT ?= # build options MDBX_BUILD_OPTIONS ?=-DNDEBUG=1 -MDBX_BUILD_TIMESTAMP ?=$(shell date +%Y-%m-%dT%H:%M:%S%z) -MDBX_BUILD_CXX ?= YES -MDBX_BUILD_METADATA ?= "" +MDBX_BUILD_TIMESTAMP ?=$(if $(SOURCE_DATE_EPOCH),$(SOURCE_DATE_EPOCH),$(shell date +%Y-%m-%dT%H:%M:%S%z)) +MDBX_BUILD_CXX ?=YES +MDBX_BUILD_METADATA ?= # probe and compose common compiler flags with variable expansion trick (seems this work two times per session for GNU Make 3.81) CFLAGS ?= $(strip $(eval CFLAGS := -std=gnu11 -O2 -g -Wall -Werror -Wextra -Wpedantic -ffunction-sections -fPIC -fvisibility=hidden -pthread -Wno-error=attributes $$(shell for opt in -fno-semantic-interposition -Wno-unused-command-line-argument -Wno-tautological-compare; do [ -z "$$$$($(CC) '-DMDBX_BUILD_FLAGS="probe"' $$$${opt} -c $(SRC_PROBE_C) -o /dev/null >/dev/null 2>&1 || echo failed)" ] && echo "$$$${opt} "; done)$(CFLAGS_EXTRA))$(CFLAGS)) From 21943496449ead66667d22c0d8f3ddf145ae157f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Nov 2024 19:50:03 +0300 Subject: [PATCH 347/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B7=D0=B0=D1=86=D0=B8=D0=BA?= =?UTF-8?q?=D0=BB=D0=B8=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BE=D0=B1=D0=BD?= =?UTF-8?q?=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D1=8F=20GC=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B8=20=D1=84=D0=B8=D0=BA=D1=81=D0=B0=D1=86=D0=B8=D0=B8=20?= =?UTF-8?q?=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit В продолжение 6c56ed97bbd8ca46abac61886a113ba31e5f1291, включая исправление регрессов. --- src/gc-put.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/gc-put.c b/src/gc-put.c index be740cbf..c8648830 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -170,7 +170,7 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { if (err == MDBX_SUCCESS) { TRACE("%s: retry since gc-slot for %zu loose-pages available", dbg_prefix(ctx), txn->tw.loose_count); - return MDBX_SUCCESS; + return MDBX_RESULT_TRUE; } /* Put loose page numbers in tw.retired_pages, @@ -538,9 +538,9 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, goto return_error; } const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (ctx->rid >= gc_first) + if (ctx->rid >= gc_first && gc_first) ctx->rid = gc_first - 1; - if (unlikely(ctx->rid == 0)) { + if (unlikely(ctx->rid <= MIN_TXNID)) { ERROR("%s", "** no GC tail-space to store (going dense-mode)"); ctx->dense = true; goto return_restart; @@ -597,7 +597,7 @@ int gc_update(MDBX_txn *txn, gcu_t *ctx) { retry_clean_adj: ctx->reserve_adj = 0; retry: - ctx->loop += ctx->prev_first_unallocated == txn->geo.first_unallocated; + ctx->loop += !(ctx->prev_first_unallocated > txn->geo.first_unallocated); TRACE(">> restart, loop %u", ctx->loop); tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - @@ -671,8 +671,13 @@ retry: while (txn->tw.gc.last_reclaimed && ctx->cleaned_id <= txn->tw.gc.last_reclaimed) { rc = outer_first(&ctx->cursor, &key, nullptr); - if (rc == MDBX_NOTFOUND) + if (rc == MDBX_NOTFOUND) { + ctx->cleaned_id = txn->tw.gc.last_reclaimed + 1; + ctx->rid = txn->tw.gc.last_reclaimed; + ctx->reserved = 0; + ctx->reused_slot = 0; break; + } if (unlikely(rc != MDBX_SUCCESS)) goto bailout; if (!MDBX_DISABLE_VALIDATION && @@ -729,10 +734,12 @@ retry: if (txn->tw.loose_pages) { /* put loose pages into the reclaimed- or retired-list */ rc = gcu_loose(txn, ctx); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_RESULT_TRUE) + continue; goto bailout; - if (unlikely(txn->tw.loose_pages)) - continue; + } + tASSERT(txn, txn->tw.loose_pages == 0); } if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) && @@ -865,6 +872,8 @@ retry: goto bailout; } + tASSERT(txn, + reservation_gc_id >= MIN_TXNID && reservation_gc_id <= MAX_TXNID); key.iov_len = sizeof(reservation_gc_id); key.iov_base = (void *)&reservation_gc_id; data.iov_len = (chunk + 1) * sizeof(pgno_t); From 8369b8ff64ba5fece3d4b3c0293906b99c6f048a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Nov 2024 13:20:24 +0300 Subject: [PATCH 348/443] =?UTF-8?q?mdbx-cmake:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=BC=D0=B5=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20`add=5Fextra=5Ftes?= =?UTF-8?q?t()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 146 ++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6784b96b..6ec8219a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,79 +4,6 @@ enable_language(CXX) include(../cmake/compiler.cmake) -set(LIBMDBX_TEST_SOURCES - base.h++ - cases.c++ - chrono.c++ - chrono.h++ - config.c++ - config.h++ - copy.c++ - dead.c++ - hill.c++ - jitter.c++ - keygen.c++ - keygen.h++ - log.c++ - log.h++ - main.c++ - osal.h++ - osal-unix.c++ - osal-windows.c++ - test.c++ - test.h++ - try.c++ - utils.c++ - utils.h++ - append.c++ - ttl.c++ - nested.c++ - fork.c++) - -if(NOT MDBX_BUILD_CXX) - probe_libcxx_filesystem() - list(APPEND LIBMDBX_TEST_SOURCES "${MDBX_SOURCE_DIR}/mdbx.c++" ../mdbx.h++) -endif() - -add_executable(mdbx_test ${LIBMDBX_TEST_SOURCES}) -target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_TEST=1 MDBX_BUILD_CXX=1) - -if(MDBX_CXX_STANDARD) - set_target_properties(mdbx_test PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} - CXX_STANDARD_REQUIRED ON) -endif() - -set_target_properties( - mdbx_test PROPERTIES INTERPROCEDURAL_OPTIMIZATION - $) -target_setup_options(mdbx_test) - -if(NOT MDBX_BUILD_CXX) - target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_CXX=1) - if(WIN32) - target_compile_definitions(mdbx_test PRIVATE MDBX_WITHOUT_MSVC_CRT=0) - endif() -endif() - -if(NOT MDBX_BUILD_CXX AND LIBCXX_FILESYSTEM) - if(CMAKE_COMPILER_IS_ELBRUSCXX - AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 - AND NOT CMAKE_VERSION VERSION_LESS 3.13) - target_link_options(mdbx_test PRIVATE "-Wl,--allow-multiple-definition") - endif() - target_link_libraries(mdbx_test ${LIBCXX_FILESYSTEM}) -endif() - -if(CMAKE_VERSION VERSION_LESS 3.1) - target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} - ${CMAKE_THREAD_LIBS_INIT}) -else() - target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} Threads::Threads) -endif() -if(WIN32) - target_link_libraries(mdbx_test winmm.lib) -endif() - function(add_extra_test name) set(options DISABLED) set(oneValueArgs TIMEOUT) @@ -212,6 +139,79 @@ function(add_extra_test name) endif() endfunction(add_extra_test) +set(LIBMDBX_TEST_SOURCES + base.h++ + cases.c++ + chrono.c++ + chrono.h++ + config.c++ + config.h++ + copy.c++ + dead.c++ + hill.c++ + jitter.c++ + keygen.c++ + keygen.h++ + log.c++ + log.h++ + main.c++ + osal.h++ + osal-unix.c++ + osal-windows.c++ + test.c++ + test.h++ + try.c++ + utils.c++ + utils.h++ + append.c++ + ttl.c++ + nested.c++ + fork.c++) + +if(NOT MDBX_BUILD_CXX) + probe_libcxx_filesystem() + list(APPEND LIBMDBX_TEST_SOURCES "${MDBX_SOURCE_DIR}/mdbx.c++" ../mdbx.h++) +endif() + +add_executable(mdbx_test ${LIBMDBX_TEST_SOURCES}) +target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_TEST=1 MDBX_BUILD_CXX=1) + +if(MDBX_CXX_STANDARD) + set_target_properties(mdbx_test PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} + CXX_STANDARD_REQUIRED ON) +endif() + +set_target_properties( + mdbx_test PROPERTIES INTERPROCEDURAL_OPTIMIZATION + $) +target_setup_options(mdbx_test) + +if(NOT MDBX_BUILD_CXX) + target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_CXX=1) + if(WIN32) + target_compile_definitions(mdbx_test PRIVATE MDBX_WITHOUT_MSVC_CRT=0) + endif() +endif() + +if(NOT MDBX_BUILD_CXX AND LIBCXX_FILESYSTEM) + if(CMAKE_COMPILER_IS_ELBRUSCXX + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23 + AND NOT CMAKE_VERSION VERSION_LESS 3.13) + target_link_options(mdbx_test PRIVATE "-Wl,--allow-multiple-definition") + endif() + target_link_libraries(mdbx_test ${LIBCXX_FILESYSTEM}) +endif() + +if(CMAKE_VERSION VERSION_LESS 3.1) + target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} + ${CMAKE_THREAD_LIBS_INIT}) +else() + target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} Threads::Threads) +endif() +if(WIN32) + target_link_libraries(mdbx_test winmm.lib) +endif() + if(NOT SUBPROJECT) if(UNIX) add_executable(test_extra_pcrf extra/pcrf/pcrf_test.c) From b6a851b3d651d4b57d2ffe1ed69d1030fbd10709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Nov 2024 13:25:55 +0300 Subject: [PATCH 349/443] =?UTF-8?q?mdbx-testing:=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/probe.c++`?= =?UTF-8?q?=20=D0=BF=D1=80=D0=BE=D1=81=D1=82=D0=BE=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=BA=D0=BE?= =?UTF-8?q?=D0=BC=D0=BF=D0=B8=D0=BB=D0=B8=D1=80=D1=83=D0=B5=D0=BC=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 4 +++- test/extra/open.c++ | 2 +- test/extra/probe.c++ | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 test/extra/probe.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6ec8219a..7e884c55 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -168,7 +168,9 @@ set(LIBMDBX_TEST_SOURCES nested.c++ fork.c++) -if(NOT MDBX_BUILD_CXX) +if(MDBX_BUILD_CXX) + add_extra_test(probe DISABLED) +else() probe_libcxx_filesystem() list(APPEND LIBMDBX_TEST_SOURCES "${MDBX_SOURCE_DIR}/mdbx.c++" ../mdbx.h++) endif() diff --git a/test/extra/open.c++ b/test/extra/open.c++ index d475182c..bc955c83 100644 --- a/test/extra/open.c++ +++ b/test/extra/open.c++ @@ -7,7 +7,7 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - std::cout << "FAKE-OK (since no C++20 std::thread and/or std::latch\n"; + std::cout << "FAKE-OK (since no C++20 std::thread and/or std::latch)\n"; return EXIT_SUCCESS; } diff --git a/test/extra/probe.c++ b/test/extra/probe.c++ new file mode 100644 index 00000000..c80f5f87 --- /dev/null +++ b/test/extra/probe.c++ @@ -0,0 +1,11 @@ +#include + +#include "mdbx.h++" + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + std::cout + << "OK (but this is do-nothing test just for a check for compilation)\n"; + return EXIT_SUCCESS; +} From aa3b39d9ed8850b193db0d17c8cdf65440228da8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Nov 2024 18:33:18 +0300 Subject: [PATCH 350/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D1=82=D0=B5?= =?UTF-8?q?=D0=BD=D1=86=D0=B8=D0=B0=D0=BB=D1=8C=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D0=BF=D0=BE=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D0=BE=D0=BF=D1=80=D0=B5=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20`=5F=5Fhas=5Fexceptions=5Fdisabled`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index 1c729eee..89d659ba 100644 --- a/mdbx.h +++ b/mdbx.h @@ -222,7 +222,7 @@ typedef mode_t mdbx_mode_t; #ifndef __has_feature #define __has_feature(x) (0) #define __has_exceptions_disabled (0) -#else +#elif !defined(__has_exceptions_disabled) #define __has_exceptions_disabled \ (__has_feature(cxx_noexcept) && !__has_feature(cxx_exceptions)) #endif /* __has_feature */ From 652587b33fcae550560915ef46a05b6e832e6e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 22 Nov 2024 20:15:29 +0300 Subject: [PATCH 351/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BA=D0=B8=20=D0=B8=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20`=5F=5Fdepre?= =?UTF-8?q?cated=5Fenum`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index 89d659ba..3022810d 100644 --- a/mdbx.h +++ b/mdbx.h @@ -341,7 +341,9 @@ typedef mode_t mdbx_mode_t; #endif /* MDBX_DEPRECATED */ #ifndef MDBX_DEPRECATED_ENUM -#if !defined(DOXYGEN) && \ +#ifdef __deprecated_enum +#define MDBX_DEPRECATED_ENUM __deprecated_enum +#elif defined(DOXYGEN) || \ (!defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201403L && \ __has_cpp_attribute(deprecated) && \ __has_cpp_attribute(deprecated) >= 201309L)) From 3110c2206f3eda5a09b23fffe96dfab9aad40aea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 23 Nov 2024 01:12:55 +0300 Subject: [PATCH 352/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 63 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index aca30385..6f3b662d 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -9,28 +9,6 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов. -Исправления: - - - Функция `mdbx_close_dbi()` доработана для возврата ошибки `MDBX_DANGLING_DBI` - при попытке закрыть dbi-дескриптор таблицы, созданной и/или измененной в - ещё выполняющейся транзакции. Такое преждевременное закрытие дескриптора - является неверным использованием API и нарушением контракта/предусловий - сформулированных в описании `mdbx_close_dbi()`. Однако, вместо возврата - ошибки выполнялось некорректное закрытие дескриптора, что могло - приводить к созданию таблицы с пустым именем, утечки страниц БД и/или - нарушению структуры b-tree (неверной ссылкой на корень таблицы). - - - Исправлено открытие таблицы с пустым/нулевым именем, в том числе устранена - возможность `SIGSEGV` при закрытии её дескриптора. - - - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. - - Продолжена корректировка описания С++ API для использования термина "таблица" вместо "sub-database". - - Исправлено проверяемое условие внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. - - На 32-битных платформах разрешено использовть 4-байтное выравнивание при получении 64-битных значений посредством `MDBX_MULTIPLE`. - - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang. - - Поправлено определение `MDBX_DEPRECATED_ENUM` для старых компиляторов при включении С++11. - - Доработано использование `std::experimental::filesystem`. - Новое: - Ускорено обновление GC при возврате/помещении списков страниц в @@ -41,14 +19,47 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic фиксации транзакций (с возвратом ошибки `MDBX_PROBLEM`) в редких специфических условиях. Подробности см. в описании коммита [`6c56ed97bbd8ca46abac61886a113ba31e5f1291`](https://gitflic.ru/project/erthink/libmdbx/commit/6c56ed97bbd8ca46abac61886a113ba31e5f1291). - - - Добавлен метод `mdbx::cursor::get_multiple_samelength()` и переименован `mdbx::txn::put_multiple_samelength()`. - - Для единообразия C++ API при выполнении операции `MDBX_GET_MULTIPLE` теперь также возвращается значение самого ключа. - - Для размерных констант `mdbx::env::geometry` базовый тип изменен с беззнакового `size_t` на знаковый `intptr_t`. - Включен стандарт `C23` в CMake-скриптах сборки. - Добавлены T-макросы для парных `char`/`wchar_t` функций. - Поддержка вложенных пишущих транзакций в C++ API. - Экспорт информации о версии в `VERSION.json`. + - Добавлена поддержка переменной среды `SOURCE_DATE_EPOCH` для воспроизводимости сборок. + Прежний способ посредством `MDBX_BUILD_TIMESTAMP` также работает и имеет приоритет. + - Добавлена возможность указывать дополнительную информацию о сборке libmdbx через опцию `MDBX_BUILD_METADATA`. + Сейчас задаваемая информация просто включается внутрь библиотеки в качестве значения `mdbx_build.metadata`, + а в дальнейшем также будет использоваться при формировании пакетов и т.п. + +Изменение поведения: + + - Добавлен метод `mdbx::cursor::get_multiple_samelength()` и переименован `mdbx::txn::put_multiple_samelength()`. + - Для единообразия C++ API при выполнении операции `MDBX_GET_MULTIPLE` теперь также возвращается значение самого ключа. + - Для размерных констант `mdbx::env::geometry` базовый тип изменен с беззнакового `size_t` на знаковый `intptr_t`. + - Теперь выбор в пользу использования ntdll вместо CRT делается только при явном отключении C++ API. + - Теперь выполняется освобождение памяти сброшенных/прерванных читающих транзакций передаваемых в `mdbx_txn_commit()`. + Соглашение по API требует чтобы такие транзакции освобождались посредством `mdbx_txn_abort()`, из-за чего + функция `mdbx_txn_commit()` возвращала ошибку в таких случаях, не разрушая сами транзакции. + Это приводило к утечкам памяти из-за ошибок в приложениях, что побудило изменить поведение. + - Использование макроса `__deprecated_enum` если он определен. + +Исправления: + + - Функция `mdbx_close_dbi()` доработана для возврата ошибки `MDBX_DANGLING_DBI` + при попытке закрыть dbi-дескриптор таблицы, созданной и/или измененной в + ещё выполняющейся транзакции. Такое преждевременное закрытие дескриптора + является неверным использованием API и нарушением контракта/предусловий + сформулированных в описании `mdbx_close_dbi()`. Однако, вместо возврата + ошибки выполнялось некорректное закрытие дескриптора, что могло + приводить к созданию таблицы с пустым именем, утечки страниц БД и/или + нарушению структуры b-tree (неверной ссылкой на корень таблицы). + - Исправлено открытие таблицы с пустым/нулевым именем, в том числе устранена + возможность `SIGSEGV` при закрытии её дескриптора. + - Добавлены упущенные inline-реализации `mdbx::cursor::upper_bound()` и `mdbx::cursor::upper_bound_multivalue()`. + - Продолжена корректировка описания С++ API для использования термина "таблица" вместо "sub-database". + - Исправлено проверяемое условие внутри `assert()` в пути обработки `MDBX_GET/NEXT/PREV_MULTIPLE`. + - На 32-битных платформах разрешено использовать 4-байтное выравнивание при получении 64-битных значений посредством `MDBX_MULTIPLE`. + - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang и MSVC. + - Поправлено определение `MDBX_DEPRECATED_ENUM` для старых компиляторов при включении С++11. + - Доработано использование `std::experimental::filesystem` для решения проблем со сборкой в старых компиляторах. Мелочи: From 6508bd5a97edb7b4dfa5c7032b3d6aae6fa93ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 24 Nov 2024 17:15:20 +0300 Subject: [PATCH 353/443] =?UTF-8?q?mdbx-cmake:=20=D0=B2=D0=BA=D0=BB=D1=8E?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20CMP0054.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d6e17d17..93890aa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,8 @@ else() set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_AVAILABLE FALSE) endif() +cmake_policy(SET CMP0054 NEW) + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/COPYRIGHT" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" From bcf0a1273f414bcc57c30656d4477bdb021b0143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 24 Nov 2024 17:50:58 +0300 Subject: [PATCH 354/443] =?UTF-8?q?mdbx-make:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`.WAIT`=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D1=83=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=BA=D0=BE=D0=BB=D0=BB=D0=B8=D0=B7=D0=B8=D0=B9=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D1=80=D0=B0=D1=81=D0=BF=D0=B0=D1=80=D0=B0?= =?UTF-8?q?=D0=BB=D0=BB=D0=B5=D0=BB=D0=B8=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20?= =?UTF-8?q?=D1=81=D0=B1=D0=BE=D1=80=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 69b810da..48aa5867 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,4 +1,4 @@ -# This makefile is for GNU Make 3.80 or above, and nowadays provided +# This makefile is for GNU Make 3.81 or above, and nowadays provided # just for compatibility and preservation of traditions. # # Please use CMake in case of any difficulties or @@ -16,6 +16,7 @@ ifneq ($(make_lt_3_81),0) $(error Please use GNU Make 3.81 or above) endif make_ge_4_1 := $(shell expr "$(MAKE_VERx3)" ">=" " 4 1") +make_ge_4_4 := $(shell expr "$(MAKE_VERx3)" ">=" " 4 4") SRC_PROBE_C := $(shell [ -f mdbx.c ] && echo mdbx.c || echo src/osal.c) SRC_PROBE_CXX := $(shell [ -f mdbx.c++ ] && echo mdbx.c++ || echo src/mdbx.c++) UNAME := $(shell uname -s 2>/dev/null || echo Unknown) @@ -81,6 +82,13 @@ LDFLAGS ?= $(eval LDFLAGS := $$(shell $$(uname2ldflags)))$(LDFLAGS) LIB_STDCXXFS ?= $(eval LIB_STDCXXFS := $$(shell echo '$$(cxx_filesystem_probe)' | cat mdbx.h++ - | sed $$$$'1s/\xef\xbb\xbf//' | $(CXX) -x c++ $(CXXFLAGS) -Wno-error - -Wl,--allow-multiple-definition -lstdc++fs $(LIBS) $(LDFLAGS) $(EXE_LDFLAGS) -o /dev/null 2>probe4lstdfs.err >/dev/null && echo '-Wl,--allow-multiple-definition -lstdc++fs'))$(LIB_STDCXXFS) endif +ifneq ($(make_ge_4_4),1) +.NOTPARALLEL: +WAIT = +else +WAIT = .WAIT +endif + ################################################################################ define uname2sosuffix @@ -296,7 +304,7 @@ ifeq ($(wildcard mdbx.c),mdbx.c) # Amalgamated source code, i.e. distributed after `make dist` MAN_SRCDIR := man1/ -config.h: @buildflags.tag mdbx.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE +config.h: @buildflags.tag $(WAIT) mdbx.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' MAKE $@' $(QUIET)(echo '#define MDBX_BUILD_TIMESTAMP "$(MDBX_BUILD_TIMESTAMP)"' \ && echo "#define MDBX_BUILD_FLAGS \"$$(cat @buildflags.tag)\"" \ @@ -544,7 +552,7 @@ src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(g -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_GIT_REVISION)|" \ src/version.c.in >$@ -src/config.h: @buildflags.tag src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE +src/config.h: @buildflags.tag $(WAIT) src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @echo ' MAKE $@' $(QUIET)(echo '#define MDBX_BUILD_TIMESTAMP "$(MDBX_BUILD_TIMESTAMP)"' \ && echo "#define MDBX_BUILD_FLAGS \"$$(cat @buildflags.tag)\"" \ @@ -614,7 +622,7 @@ mdbx++-static.o: src/config.h src/mdbx.c++ mdbx.h mdbx.h++ $(lastword $(MAKEFILE @echo ' CC $@' $(QUIET)$(CXX) $(CXXFLAGS) $(MDBX_BUILD_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -ULIBMDBX_EXPORTS -c src/mdbx.c++ -o $@ -dist: tags @dist-checked.tag libmdbx-sources-$(MDBX_VERSION_IDENT).tar.gz $(lastword $(MAKEFILE_LIST)) +dist: tags $(WAIT) @dist-checked.tag libmdbx-sources-$(MDBX_VERSION_IDENT).tar.gz $(lastword $(MAKEFILE_LIST)) @echo ' AMALGAMATION is done' tags: From 9daff17c82c74b9a7795a63681182af49f471695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 24 Nov 2024 20:46:21 +0300 Subject: [PATCH 355/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5?= =?UTF-8?q?=D1=80=D0=B6=D0=BA=D0=B0=20Semantic=20Versioning.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Было `MAJOR.MINOR.RELEASE.REVISION` Теперь `MAJOR.MINOR.PATCH[.TWEAK][-PRERELEASE][+BUILDMETADATA]` https://semver.org/ - вместо квартета `MAJOR.MINOR.RELEASE.REVISION` триплет c опцинальным четвертым членом `MAJOR.MINOR.PATCH[.TWEAK]` - `TWEAK` не входит в тег git, а формируется автоматически и соответствует кол-ву коммитов после тега git и опускается если 0. - Поле `PRERELEASE` опционально и переносится в версию из тега git. - Поле `BUILDMETADATA` опционально, не входит в тег git, а добавляется во время сборки если задана опцией `MDBX_BUILD_METADATA`. --- CMakeLists.txt | 27 +- GNUmakefile | 49 ++-- cmake/utils.cmake | 645 ++++++++++++++++++++++++++-------------------- docs/Doxyfile.in | 2 +- mdbx.h | 11 +- src/tools/chk.c | 4 +- src/tools/copy.c | 4 +- src/tools/drop.c | 4 +- src/tools/dump.c | 4 +- src/tools/load.c | 4 +- src/tools/stat.c | 4 +- src/version.c.in | 6 +- 12 files changed, 430 insertions(+), 334 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93890aa5..c8c25a74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,6 +195,15 @@ else() "PLEASE, AVOID USING ANY OTHER TECHNIQUES.") endif() +# Provide version +include(cmake/utils.cmake) +set(MDBX_BUILD_METADATA + "${MDBX_BUILD_METADATA}" + CACHE STRING "An extra/custom information provided during libmdbx build") +semver_provide(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" + "${MDBX_BUILD_METADATA}" FALSE) +message(STATUS "libmdbx version is ${MDBX_VERSION}") + if(DEFINED PROJECT_NAME) option( MDBX_FORCE_BUILD_AS_MAIN_PROJECT @@ -304,7 +313,6 @@ if(NOT DEFINED THREADS_PREFER_PTHREAD_FLAG) endif() find_package(Threads REQUIRED) -include(cmake/utils.cmake) include(cmake/compiler.cmake) include(cmake/profile.cmake) @@ -650,10 +658,6 @@ endif() # ~~~ # ############################################################################## -set(MDBX_BUILD_METADATA - "" - CACHE STRING "An extra/custom information provided during libmdbx build") - set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) macro(add_mdbx_option NAME DESCRIPTION DEFAULT) @@ -812,11 +816,6 @@ if(MDBX_BUILD_CXX) probe_libcxx_filesystem() endif() -# Get version -fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" FALSE - "${CMAKE_CURRENT_BINARY_DIR}") -message(STATUS "libmdbx version is ${MDBX_VERSION}") - # sources list set(LIBMDBX_PUBLIC_HEADERS mdbx.h) set(LIBMDBX_SOURCES mdbx.h "${CMAKE_CURRENT_BINARY_DIR}/config.h") @@ -1370,11 +1369,9 @@ if(NOT SUBPROJECT) set(PACKAGE "libmdbx") set(CPACK_PACKAGE_VERSION_MAJOR ${MDBX_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${MDBX_VERSION_MINOR}) - set(CPACK_PACKAGE_VERSION_PATCH ${MDBX_VERSION_RELEASE}) - set(CPACK_PACKAGE_VERSION_COMMIT ${MDBX_VERSION_REVISION}) - set(PACKAGE_VERSION - "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${CPACK_PACKAGE_VERSION_COMMIT}" - ) + set(CPACK_PACKAGE_VERSION_PATCH ${MDBX_VERSION_PATCH}) + set(CPACK_PACKAGE_VERSION_TWEAK ${MDBX_VERSION_TWEAK}) + set(PACKAGE_VERSION ${MDBX_VERSION}) message(STATUS "libmdbx package version is ${PACKAGE_VERSION}") endif() diff --git a/GNUmakefile b/GNUmakefile index 48aa5867..01cc0d77 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -398,11 +398,14 @@ reformat: MAN_SRCDIR := src/man1/ ALLOY_DEPS := $(shell git ls-files src/ | grep -e /tools -e /man -v) -git_DIR := $(shell if [ -d .git ]; then echo .git; elif [ -s .git -a -f .git ]; then grep '^gitdir: ' .git | cut -d ':' -f 2; else echo git_directory_is_absent; fi) -MDBX_GIT_VERSION = $(shell set -o pipefail; git describe --tags '--match=v[0-9]*' 2>&- | sed -n 's|^v*\([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\)\(.*\)|\1|p' || echo 'Please fetch tags and/or use non-obsolete git version') -MDBX_GIT_REVISION = $(shell set -o pipefail; git rev-list `git describe --tags --abbrev=0`..HEAD --count 2>&- || echo 'Please fetch tags and/or use non-obsolete git version') -MDBX_GIT_TIMESTAMP = $(shell git show --no-patch --format=%cI HEAD 2>&- || echo 'Please install latest get version') -MDBX_GIT_DESCRIBE = $(shell git describe --tags --long --dirty=-dirty '--match=v[0-9]*' 2>&- || echo 'Please fetch tags and/or install non-obsolete git version') +MDBX_GIT_DIR := $(shell if [ -d .git ]; then echo .git; elif [ -s .git -a -f .git ]; then grep '^gitdir: ' .git | cut -d ':' -f 2; else echo git_directory_is_absent; fi) +MDBX_GIT_LASTVTAG := $(shell git describe --tags --dirty=-DIRTY --abbrev=0 '--match=v[0-9]*' 2>&- || echo 'Please fetch tags and/or install non-obsolete git version') +MDBX_GIT_3DOT := $(shell set -o pipefail; echo "$(MDBX_GIT_LASTVTAG)" | sed -n 's|^v*\([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\)\(.*\)|\1|p' || echo 'Please fetch tags and/or use non-obsolete git version') +MDBX_GIT_TWEAK := $(shell set -o pipefail; git rev-list $(shell git describe --tags --abbrev=0 '--match=v[0-9]*')..HEAD --count 2>&- || echo 'Please fetch tags and/or use non-obsolete git version') +MDBX_GIT_TIMESTAMP := $(shell git show --no-patch --format=%cI HEAD 2>&- || echo 'Please install latest get version') +MDBX_GIT_DESCRIBE := $(shell git describe --tags --long --dirty '--match=v[0-9]*' 2>&- || echo 'Please fetch tags and/or install non-obsolete git version') +MDBX_GIT_PRERELEASE := $(shell echo "$(MDBX_GIT_LASTVTAG)" | sed -n 's|^v*\([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\)\(.*\)-\([-.0-1a-zA-Z]\+\)|\3|p') +MDBX_VERSION_PURE = $(MDBX_GIT_3DOT)$(if $(filter-out 0,$(MDBX_GIT_TWEAK)),.$(MDBX_GIT_TWEAK),)$(if $(MDBX_GIT_PRERELEASE),-$(MDBX_GIT_PRERELEASE),) MDBX_VERSION_IDENT = $(shell set -o pipefail; echo -n '$(MDBX_GIT_DESCRIBE)' | tr -c -s '[a-zA-Z0-9.]' _) MDBX_VERSION_NODOT = $(subst .,_,$(MDBX_VERSION_IDENT)) MDBX_BUILD_SOURCERY = $(shell set -o pipefail; $(MAKE) IOARENA=false CXXSTD= -s src/version.c >/dev/null && (openssl dgst -r -sha256 src/version.c || sha256sum src/version.c || shasum -a 256 src/version.c) 2>/dev/null | cut -d ' ' -f 1 || (echo 'Please install openssl or sha256sum or shasum' >&2 && echo sha256sum_is_no_available))_$(MDBX_VERSION_NODOT) @@ -529,7 +532,7 @@ mdbx_test: $(TEST_OBJ) libmdbx.$(SO_SUFFIX) @echo ' LD $@' $(QUIET)$(CXX) $(CXXFLAGS) $(TEST_OBJ) -Wl,-rpath . -L . -l mdbx $(EXE_LDFLAGS) $(LIBS) -o $@ -$(git_DIR)/HEAD $(git_DIR)/index $(git_DIR)/refs/tags: +$(MDBX_GIT_DIR)/HEAD $(MDBX_GIT_DIR)/index $(MDBX_GIT_DIR)/refs/tags: @echo '*** ' >&2 @echo '*** Please don''t use tarballs nor zips which are automatically provided by Github !' >&2 @echo '*** These archives do not contain version information and thus are unfit to build libmdbx.' >&2 @@ -539,17 +542,19 @@ $(git_DIR)/HEAD $(git_DIR)/index $(git_DIR)/refs/tags: @echo '*** ' >&2 @false -src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(git_DIR)/index $(git_DIR)/refs/tags LICENSE NOTICE +src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(MDBX_GIT_DIR)/HEAD $(MDBX_GIT_DIR)/index $(MDBX_GIT_DIR)/refs/tags LICENSE NOTICE @echo ' MAKE $@' $(QUIET)sed \ -e "s|@MDBX_GIT_TIMESTAMP@|$(MDBX_GIT_TIMESTAMP)|" \ -e "s|@MDBX_GIT_TREE@|$(shell git show --no-patch --format=%T HEAD || echo 'Please install latest get version')|" \ -e "s|@MDBX_GIT_COMMIT@|$(shell git show --no-patch --format=%H HEAD || echo 'Please install latest get version')|" \ -e "s|@MDBX_GIT_DESCRIBE@|$(MDBX_GIT_DESCRIBE)|" \ - -e "s|\$${MDBX_VERSION_MAJOR}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 1)|" \ - -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 2)|" \ - -e "s|\$${MDBX_VERSION_RELEASE}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 3)|" \ - -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_GIT_REVISION)|" \ + -e "s|\$${MDBX_VERSION_MAJOR}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 1)|" \ + -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 2)|" \ + -e "s|\$${MDBX_VERSION_PATCH}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 3)|" \ + -e "s|\$${MDBX_VERSION_TWEAK}|$(MDBX_GIT_TWEAK)|" \ + -e "s|\$${MDBX_VERSION_PRERELEASE}|$(MDBX_GIT_PRERELEASE)|" \ + -e "s|\$${MDBX_VERSION_PURE}|$(MDBX_VERSION_PURE)|" \ src/version.c.in >$@ src/config.h: @buildflags.tag $(WAIT) src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @@ -578,10 +583,12 @@ docs/Doxyfile: docs/Doxyfile.in src/version.c $(lastword $(MAKEFILE_LIST)) -e "s|@MDBX_GIT_TREE@|$(shell git show --no-patch --format=%T HEAD || echo 'Please install latest get version')|" \ -e "s|@MDBX_GIT_COMMIT@|$(shell git show --no-patch --format=%H HEAD || echo 'Please install latest get version')|" \ -e "s|@MDBX_GIT_DESCRIBE@|$(MDBX_GIT_DESCRIBE)|" \ - -e "s|\$${MDBX_VERSION_MAJOR}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 1)|" \ - -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 2)|" \ - -e "s|\$${MDBX_VERSION_RELEASE}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 3)|" \ - -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_GIT_REVISION)|" \ + -e "s|\$${MDBX_VERSION_MAJOR}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 1)|" \ + -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 2)|" \ + -e "s|\$${MDBX_VERSION_PATCH}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 3)|" \ + -e "s|\$${MDBX_VERSION_TWEAK}|$(MDBX_GIT_TWEAK)|" \ + -e "s|\$${MDBX_VERSION_PRERELEASE}|$(MDBX_GIT_PRERELEASE)|" \ + -e "s|\$${MDBX_VERSION_PURE}|$(MDBX_VERSION_PURE)|" \ docs/Doxyfile.in >$@ define md-extract-section @@ -629,11 +636,11 @@ tags: @echo ' FETCH git tags...' $(QUIET)git fetch --tags --force -release-assets: libmdbx-amalgamated-$(MDBX_GIT_VERSION).zpaq \ - libmdbx-amalgamated-$(MDBX_GIT_VERSION).tar.xz \ - libmdbx-amalgamated-$(MDBX_GIT_VERSION).tar.bz2 \ - libmdbx-amalgamated-$(MDBX_GIT_VERSION).tar.gz \ - libmdbx-amalgamated-$(subst .,_,$(MDBX_GIT_VERSION)).zip +release-assets: libmdbx-amalgamated-$(MDBX_GIT_3DOT).zpaq \ + libmdbx-amalgamated-$(MDBX_GIT_3DOT).tar.xz \ + libmdbx-amalgamated-$(MDBX_GIT_3DOT).tar.bz2 \ + libmdbx-amalgamated-$(MDBX_GIT_3DOT).tar.gz \ + libmdbx-amalgamated-$(subst .,_,$(MDBX_GIT_3DOT)).zip $(QUIET)([ \ "$$(set -o pipefail; git describe | sed -n '/^v[0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}$$/p' || echo fail-left)" \ == \ @@ -759,7 +766,7 @@ $(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.json .clang-format-ig dist/VERSION.json: src/version.c @echo ' MAKE $@' - $(QUIET)mkdir -p dist/ && echo "{ \"git_describe\": \"$(MDBX_GIT_DESCRIBE)\", \"git_timestamp\": \"$(MDBX_GIT_TIMESTAMP)\", \"git_tree\": \"$(shell git show --no-patch --format=%T HEAD 2>&1)\", \"git_commit\": \"$(shell git show --no-patch --format=%H HEAD 2>&1)\", \"version_4dot\": \"$(MDBX_GIT_VERSION).$(MDBX_GIT_REVISION)\" }" >$@ + $(QUIET)mkdir -p dist/ && echo "{ \"git_describe\": \"$(MDBX_GIT_DESCRIBE)\", \"git_timestamp\": \"$(MDBX_GIT_TIMESTAMP)\", \"git_tree\": \"$(shell git show --no-patch --format=%T HEAD 2>&1)\", \"git_commit\": \"$(shell git show --no-patch --format=%H HEAD 2>&1)\", \"semver\": \"$(MDBX_VERSION_PURE)\" }" >$@ dist/.clang-format-ignore: $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 3a5fddde..31cbf436 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -74,15 +74,313 @@ macro(set_source_files_compile_flags) unset(_lang) endmacro(set_source_files_compile_flags) -macro(fetch_version name source_root_directory parent_scope - build_directory_for_json_output) - set(_version_4dot "") +macro(semver_parse str) + set(_semver_ok FALSE) + set(_semver_err "") + set(_semver_major 0) + set(_semver_minor 0) + set(_semver_patch 0) + set(_semver_tweak_withdot "") + set(_semver_tweak "") + set(_semver_extra "") + set(_semver_prerelease_withdash "") + set(_semver_prerelease "") + set(_semver_buildmetadata_withplus "") + set(_semver_buildmetadata "") + if("${str}" + MATCHES + "^v?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))?([-+]-*[0-9a-zA-Z]+.*)?$" + ) + set(_semver_major ${CMAKE_MATCH_1}) + set(_semver_minor ${CMAKE_MATCH_2}) + set(_semver_patch ${CMAKE_MATCH_3}) + set(_semver_tweak_withdot ${CMAKE_MATCH_4}) + set(_semver_tweak ${CMAKE_MATCH_5}) + set(_semver_extra "${CMAKE_MATCH_6}") + if("${_semver_extra}" STREQUAL "") + set(_semver_ok TRUE) + elseif("${_semver_extra}" MATCHES "^([.-][a-zA-Z0-9-]+)*(\\+[^+]+)?$") + set(_semver_prerelease_withdash "${CMAKE_MATCH_1}") + if(NOT "${_semver_prerelease_withdash}" STREQUAL "") + string(SUBSTRING "${_semver_prerelease_withdash}" 1 -1 + _semver_prerelease) + endif() + set(_semver_buildmetadata_withplus "${CMAKE_MATCH_2}") + if(NOT "${_semver_buildmetadata_withplus}" STREQUAL "") + string(SUBSTRING "${_semver_buildmetadata_withplus}" 1 -1 + _semver_buildmetadata) + endif() + set(_semver_ok TRUE) + else() + set(_semver_err + "Поля prerelease и/или buildmetadata (строка `-foo+bar` в составе `0.0.0[.0][-foo][+bar]`) не соответствуют SemVer-спецификации" + ) + endif() + else() + set(_semver_err + "Версионная отметка в целом не соответствует шаблону `0.0.0[.0][-foo][+bar]` SemVer-спецификации" + ) + endif() +endmacro(semver_parse) + +function(_semver_parse_probe str expect) + semver_parse(${str}) + if(expect AND NOT _semver_ok) + message( + FATAL_ERROR + "semver_parse(${str}) expect SUCCESS, got ${_semver_ok}: ${_semver_err}" + ) + elseif(NOT expect AND _semver_ok) + message(FATAL_ERROR "semver_parse(${str}) expect FAIL, got ${_semver_ok}") + endif() +endfunction() + +function(semver_parse_selfcheck) + _semver_parse_probe("0.0.4" TRUE) + _semver_parse_probe("v1.2.3" TRUE) + _semver_parse_probe("10.20.30" TRUE) + _semver_parse_probe("10.20.30.42" TRUE) + _semver_parse_probe("1.1.2-prerelease+meta" TRUE) + _semver_parse_probe("1.1.2+meta" TRUE) + _semver_parse_probe("1.1.2+meta-valid" TRUE) + _semver_parse_probe("1.0.0-alpha" TRUE) + _semver_parse_probe("1.0.0-beta" TRUE) + _semver_parse_probe("1.0.0-alpha.beta" TRUE) + _semver_parse_probe("1.0.0-alpha.beta.1" TRUE) + _semver_parse_probe("1.0.0-alpha.1" TRUE) + _semver_parse_probe("1.0.0-alpha0.valid" TRUE) + _semver_parse_probe("1.0.0-alpha.0valid" TRUE) + _semver_parse_probe("1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay" + TRUE) + _semver_parse_probe("1.0.0-rc.1+build.1" TRUE) + _semver_parse_probe("2.0.0-rc.1+build.123" TRUE) + _semver_parse_probe("1.2.3-beta" TRUE) + _semver_parse_probe("10.2.3-DEV-SNAPSHOT" TRUE) + _semver_parse_probe("1.2.3-SNAPSHOT-123" TRUE) + _semver_parse_probe("1.0.0" TRUE) + _semver_parse_probe("2.0.0" TRUE) + _semver_parse_probe("1.1.7" TRUE) + _semver_parse_probe("2.0.0+build.1848" TRUE) + _semver_parse_probe("2.0.1-alpha.1227" TRUE) + _semver_parse_probe("1.0.0-alpha+beta" TRUE) + _semver_parse_probe("1.2.3----RC-SNAPSHOT.12.9.1--.12+788" TRUE) + _semver_parse_probe("1.2.3----R-S.12.9.1--.12+meta" TRUE) + _semver_parse_probe("1.2.3----RC-SNAPSHOT.12.9.1--.12" TRUE) + _semver_parse_probe("1.0.0+0.build.1-rc.10000aaa-kk-0.1" TRUE) + _semver_parse_probe( + "99999999999999999999999.999999999999999999.99999999999999999" TRUE) + _semver_parse_probe("v1.0.0-0A.is.legal" TRUE) + + _semver_parse_probe("1" FALSE) + _semver_parse_probe("1.2" FALSE) + # _semver_parse_probe("1.2.3-0123" FALSE) + # _semver_parse_probe("1.2.3-0123.0123" FALSE) + _semver_parse_probe("1.1.2+.123" FALSE) + _semver_parse_probe("+invalid" FALSE) + _semver_parse_probe("-invalid" FALSE) + _semver_parse_probe("-invalid+invalid" FALSE) + _semver_parse_probe("-invalid.01" FALSE) + _semver_parse_probe("alpha" FALSE) + _semver_parse_probe("alpha.beta" FALSE) + _semver_parse_probe("alpha.beta.1" FALSE) + _semver_parse_probe("alpha.1" FALSE) + _semver_parse_probe("alpha+beta" FALSE) + _semver_parse_probe("alpha_beta" FALSE) + _semver_parse_probe("alpha." FALSE) + _semver_parse_probe("alpha.." FALSE) + _semver_parse_probe("beta" FALSE) + _semver_parse_probe("1.0.0-alpha_beta" FALSE) + _semver_parse_probe("-alpha." FALSE) + _semver_parse_probe("1.0.0-alpha.." FALSE) + _semver_parse_probe("1.0.0-alpha..1" FALSE) + _semver_parse_probe("1.0.0-alpha...1" FALSE) + _semver_parse_probe("1.0.0-alpha....1" FALSE) + _semver_parse_probe("1.0.0-alpha.....1" FALSE) + _semver_parse_probe("1.0.0-alpha......1" FALSE) + _semver_parse_probe("1.0.0-alpha.......1" FALSE) + _semver_parse_probe("01.1.1" FALSE) + _semver_parse_probe("1.01.1" FALSE) + _semver_parse_probe("1.1.01" FALSE) + _semver_parse_probe("1.2" FALSE) + _semver_parse_probe("1.2.3.DEV" FALSE) + _semver_parse_probe("1.2-SNAPSHOT" FALSE) + _semver_parse_probe("1.2.31.2.3----RC-SNAPSHOT.12.09.1--..12+788" FALSE) + _semver_parse_probe("1.2-RC-SNAPSHOT" FALSE) + _semver_parse_probe("-1.0.3-gamma+b7718" FALSE) + _semver_parse_probe("+justmeta" FALSE) + _semver_parse_probe("9.8.7+meta+meta" FALSE) + _semver_parse_probe("9.8.7-whatever+meta+meta" FALSE) + _semver_parse_probe( + "99999999999999999999999.999999999999999999.99999999999999999----RC-SNAPSHOT.12.09.1--------------------------------..12" + FALSE) +endfunction() + +macro(git_get_versioninfo source_root_directory) + set(_git_describe "") + set(_git_timestamp "") + set(_git_tree "") + set(_git_commit "") + set(_git_last_vtag "") + set(_git_trailing_commits 0) + set(_git_is_dirty FALSE) + + execute_process( + COMMAND ${GIT} show --no-patch --format=%cI HEAD + OUTPUT_VARIABLE _git_timestamp + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_timestamp STREQUAL "%cI") + execute_process( + COMMAND ${GIT} show --no-patch --format=%ci HEAD + OUTPUT_VARIABLE _git_timestamp + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_timestamp STREQUAL "%ci") + message( + FATAL_ERROR + "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)" + ) + endif() + endif() + + execute_process( + COMMAND ${GIT} show --no-patch --format=%T HEAD + OUTPUT_VARIABLE _git_tree + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_tree STREQUAL "") + message( + FATAL_ERROR + "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)" + ) + endif() + + execute_process( + COMMAND ${GIT} show --no-patch --format=%H HEAD + OUTPUT_VARIABLE _git_commit + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_commit STREQUAL "") + message( + FATAL_ERROR + "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)" + ) + endif() + + execute_process( + COMMAND ${GIT} status --untracked-files=no --porcelain + OUTPUT_VARIABLE _git_status + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc) + message( + FATAL_ERROR + "Please install latest version of git (`status --untracked-files=no --porcelain` failed)" + ) + endif() + if(NOT _git_status STREQUAL "") + set(_git_commit "DIRTY-${_git_commit}") + set(_git_is_dirty TRUE) + endif() + unset(_git_status) + + execute_process( + COMMAND ${GIT} describe --tags --abbrev=0 "--match=v[0-9]*" + OUTPUT_VARIABLE _git_last_vtag + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_last_vtag STREQUAL "") + execute_process( + COMMAND ${GIT} tag + OUTPUT_VARIABLE _git_tags_dump + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + execute_process( + COMMAND ${GIT} rev-list --count --no-merges --remove-empty HEAD + OUTPUT_VARIABLE _git_whole_count + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc) + message( + FATAL_ERROR + "Please install latest version of git (`git rev-list --count --no-merges --remove-empty HEAD` failed)" + ) + endif() + if(_git_whole_count GREATER 42 AND _git_tags_dump STREQUAL "") + message( + FATAL_ERROR + "Please fetch tags (`describe --tags --abbrev=0 --match=v[0-9]*` failed)" + ) + else() + message( + NOTICE + "Falling back to version `0.0.0` (have you made an initial release?") + endif() + set(_git_last_vtag "0.0.0") + set(_git_trailing_commits ${_git_whole_count}) + execute_process( + COMMAND ${GIT} describe --tags --dirty --long --always + OUTPUT_VARIABLE _git_describe + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_describe STREQUAL "") + execute_process( + COMMAND ${GIT} describe --tags --all --dirty --long --always + OUTPUT_VARIABLE _git_describe + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_describe STREQUAL "") + message( + FATAL_ERROR + "Please install latest version of git (`describe --tags --all --long` failed)" + ) + endif() + endif() + else() + execute_process( + COMMAND ${GIT} describe --tags --dirty --long "--match=v[0-9]*" + OUTPUT_VARIABLE _git_describe + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_describe STREQUAL "") + message( + FATAL_ERROR + "Please install latest version of git (`describe --tags --long --match=v[0-9]*`)" + ) + endif() + execute_process( + COMMAND ${GIT} rev-list --count "${_git_last_vtag}..HEAD" + OUTPUT_VARIABLE _git_trailing_commits + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE _rc) + if(_rc OR _git_trailing_commits STREQUAL "") + message( + FATAL_ERROR + "Please install latest version of git (`rev-list --count ${_git_last_vtag}..HEAD` failed)" + ) + endif() + endif() +endmacro(git_get_versioninfo) + +macro(semver_provide name source_root_directory build_directory_for_json_output + build_metadata parent_scope) + set(_semver "") set(_git_describe "") set(_git_timestamp "") set(_git_tree "") set(_git_commit "") - set(_git_revision 0) - set(_git_version "") set(_version_from "") set(_git_root FALSE) @@ -134,247 +432,29 @@ macro(fetch_version name source_root_directory parent_scope string(JSON _git_timestamp GET "${_versioninfo_json}" "git_timestamp") string(JSON _git_tree GET "${_versioninfo_json}" "git_tree") string(JSON _git_commit GET "${_versioninfo_json}" "git_commit") - string(JSON _version_4dot GET "${_versioninfo_json}" "version_4dot") + string(JSON _semver GET "${_versioninfo_json}" "semver") unset(_json_object) - string(REPLACE "." ";" _version_list "${_version_4dot}") - - if(NOT _version_4dot) + if(NOT _semver) message( - ERROR - "Unable to retrieve ${name} version from \"${_version_from}\" file.") - set(_version_list ${_git_version}) - string(REPLACE ";" "." _version_4dot "${_git_version}") - else() - string(REPLACE "." ";" _version_list ${_version_4dot}) + FATAL_ERROR + "Unable to retrieve ${name} version from \"${_version_from}\" file.") + endif() + semver_parse("${_semver}") + if(NOT _semver_ok) + message( + FATAL_ERROR "SemVer `${_semver}` from ${_version_from}: ${_semver_err}") endif() - elseif(_git_root AND _source_root STREQUAL _git_root) set(_version_from git) - - execute_process( - COMMAND ${GIT} show --no-patch --format=%cI HEAD - OUTPUT_VARIABLE _git_timestamp - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_timestamp STREQUAL "%cI") - execute_process( - COMMAND ${GIT} show --no-patch --format=%ci HEAD - OUTPUT_VARIABLE _git_timestamp - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_timestamp STREQUAL "%ci") - message( - FATAL_ERROR - "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)" - ) - endif() + git_get_versioninfo(${source_root_directory}) + semver_parse(${_git_last_vtag}) + if(NOT _semver_ok) + message(FATAL_ERROR "Git tag `${_git_last_vtag}`: ${_semver_err}") endif() - - execute_process( - COMMAND ${GIT} show --no-patch --format=%T HEAD - OUTPUT_VARIABLE _git_tree - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_tree STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)" - ) + if(_git_trailing_commits GREATER 0 AND "${_semver_tweak}" STREQUAL "") + set(_semver_tweak ${_git_trailing_commits}) endif() - execute_process( - COMMAND ${GIT} show --no-patch --format=%H HEAD - OUTPUT_VARIABLE _git_commit - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_commit STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)" - ) - endif() - - execute_process( - COMMAND ${GIT} status --untracked-files=no --porcelain - OUTPUT_VARIABLE _git_status - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`status --untracked-files=no --porcelain` failed)" - ) - endif() - if(NOT _git_status STREQUAL "") - set(_git_commit "${_git_commit}-dirty") - endif() - unset(_git_status) - - execute_process( - COMMAND ${GIT} rev-list --tags --count - OUTPUT_VARIABLE _tag_count - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`git rev-list --tags --count` failed)" - ) - endif() - - if(_tag_count EQUAL 0) - execute_process( - COMMAND ${GIT} rev-list --all --count - OUTPUT_VARIABLE _whole_count - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`git rev-list --all --count` failed)" - ) - endif() - if(_whole_count GREATER 42) - message( - FATAL_ERROR - "Please fetch tags (no any tags for ${_whole_count} commits)") - endif() - set(_git_version "0;0;0") - execute_process( - COMMAND ${GIT} rev-list --count --all --no-merges - OUTPUT_VARIABLE _git_revision - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_revision STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`rev-list --count --all --no-merges` failed)" - ) - endif() - else(_tag_count EQUAL 0) - execute_process( - COMMAND ${GIT} describe --tags --long --dirty=-dirty "--match=v[0-9]*" - OUTPUT_VARIABLE _git_describe - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_describe STREQUAL "") - execute_process( - COMMAND ${GIT} rev-list --all --count - OUTPUT_VARIABLE _whole_count - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`git rev-list --all --count` failed)" - ) - endif() - if(_whole_count GREATER 42) - message( - FATAL_ERROR - "Please fetch tags (`describe --tags --long --dirty --match=v[0-9]*` failed)" - ) - else() - execute_process( - COMMAND ${GIT} describe --all --long --dirty=-dirty - OUTPUT_VARIABLE _git_describe - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_describe STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`git rev-list --tags --count` and/or `git rev-list --all --count` failed)" - ) - endif() - endif() - endif() - - execute_process( - COMMAND ${GIT} describe --tags --abbrev=0 "--match=v[0-9]*" - OUTPUT_VARIABLE _last_release_tag - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`describe --tags --abbrev=0 --match=v[0-9]*` failed)" - ) - endif() - if(_last_release_tag) - set(_git_revlist_arg "${_last_release_tag}..HEAD") - else() - execute_process( - COMMAND ${GIT} tag --sort=-version:refname - OUTPUT_VARIABLE _tag_list - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`tag --sort=-version:refname` failed)" - ) - endif() - string(REGEX REPLACE "\n" ";" _tag_list "${_tag_list}") - set(_git_revlist_arg "HEAD") - foreach(_tag IN LISTS _tag_list) - if(NOT _last_release_tag) - string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" _last_release_tag - "${_tag}") - set(_git_revlist_arg "${_tag}..HEAD") - endif() - endforeach(_tag) - endif() - execute_process( - COMMAND ${GIT} rev-list --count "${_git_revlist_arg}" - OUTPUT_VARIABLE _git_revision - OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${source_root_directory} - RESULT_VARIABLE _rc) - if(_rc OR _git_revision STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`rev-list --count ${_git_revlist_arg}` failed)" - ) - endif() - - string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" - _git_version_valid "${_git_describe}") - if(_git_version_valid) - string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" - "\\2;\\3;\\4" _git_version ${_git_describe}) - else() - string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)(.*)?" _git_version_valid - "${_git_describe}") - if(_git_version_valid) - string(REGEX REPLACE "^(v)?([0-9]+)\\.([0-9]+)(.*)?" "\\2;\\3;0" - _git_version ${_git_describe}) - else() - message( - AUTHOR_WARNING - "Bad ${name} version \"${_git_describe}\"; falling back to 0.0.0 (have you made an initial release?)" - ) - set(_git_version "0;0;0") - endif() - endif() - endif(_tag_count EQUAL 0) - - list(APPEND _git_version "${_git_revision}") - set(_version_list "${_git_version}") - string(REPLACE ";" "." _version_4dot "${_version_list}") - elseif(GIT) message( FATAL_ERROR @@ -384,44 +464,68 @@ macro(fetch_version name source_root_directory parent_scope message(FATAL_ERROR "Требуется git для получения информации о версии") endif() - list(LENGTH _version_list _version_list_length) - list(GET _version_list 0 _version_major) - list(GET _version_list 1 _version_minor) - list(GET _version_list 2 _version_release) - list(GET _version_list 3 _version_revision) - if(NOT _git_describe OR NOT _git_timestamp OR NOT _git_tree OR NOT _git_commit - OR _git_revision STREQUAL "" - OR NOT _version_list_length EQUAL 4 - OR _version_major STREQUAL "" - OR _version_minor STREQUAL "" - OR _version_release STREQUAL "" - OR _version_revision STREQUAL "") + OR _semver_major STREQUAL "" + OR _semver_minor STREQUAL "" + OR _semver_patch STREQUAL "") message(ERROR "Unable to retrieve ${name} version from ${_version_from}.") - else() - list(APPEND _git_version "${_git_revision}") endif() + set(_semver "${_semver_major}.${_semver_minor}.${_semver_patch}") + if(_semver_tweak STREQUAL "") + set(_semver_tweak 0) + elseif(_semver_tweak GREATER 0) + string(APPEND _semver ".${_semver_tweak}") + endif() + if(NOT _semver_prerelease STREQUAL "") + string(APPEND _semver "-${_semver_prerelease}") + endif() + if(_git_is_dirty) + string(APPEND _semver "-DIRTY") + endif() + + set(_semver_complete "${_semver}") + if(NOT "${build_metadata}" STREQUAL "") + string(APPEND _semver_complete "+${build_metadata}") + endif() + + set(${name}_VERSION "${_semver_complete}") + set(${name}_VERSION_PURE "${_semver}") + set(${name}_VERSION_MAJOR ${_semver_major}) + set(${name}_VERSION_MINOR ${_semver_minor}) + set(${name}_VERSION_PATCH ${_semver_patch}) + set(${name}_VERSION_TWEAK "${_semver_tweak}") + set(${name}_VERSION_PRERELEASE "${_semver_prerelease}") + set(${name}_GIT_DESCRIBE "${_git_describe}") + set(${name}_GIT_TIMESTAMP "${_git_timestamp}") + set(${name}_GIT_TREE "${_git_tree}") + set(${name}_GIT_COMMIT "${_git_commit}") + if(${parent_scope}) + set(${name}_VERSION + "${_semver_complete}" + PARENT_SCOPE) + set(${name}_VERSION_PURE + "${_semver}" + PARENT_SCOPE) set(${name}_VERSION_MAJOR - "${_version_major}" + ${_semver_major} PARENT_SCOPE) set(${name}_VERSION_MINOR - "${_version_minor}" + ${_semver_minor} PARENT_SCOPE) - set(${name}_VERSION_RELEASE - "${_version_release}" + set(${name}_VERSION_PATCH + ${_semver_patch} PARENT_SCOPE) - set(${name}_VERSION_REVISION - "${_version_revision}" + set(${name}_VERSION_TWEAK + "${_semver_tweak}" PARENT_SCOPE) - set(${name}_VERSION - "${_version_4dot}" + set(${name}_VERSION_PRERELEASE + "${_semver_prerelease}" PARENT_SCOPE) - set(${name}_GIT_DESCRIBE "${_git_describe}" PARENT_SCOPE) @@ -434,21 +538,6 @@ macro(fetch_version name source_root_directory parent_scope set(${name}_GIT_COMMIT "${_git_commit}" PARENT_SCOPE) - set(${name}_GIT_REVISION - "${_git_revision}" - PARENT_SCOPE) - else() - set(${name}_VERSION_MAJOR "${_version_major}") - set(${name}_VERSION_MINOR "${_version_minor}") - set(${name}_VERSION_RELEASE "${_version_release}") - set(${name}_VERSION_REVISION "${_version_revision}") - set(${name}_VERSION "${_version_4dot}") - - set(${name}_GIT_DESCRIBE "${_git_describe}") - set(${name}_GIT_TIMESTAMP "${_git_timestamp}") - set(${name}_GIT_TREE "${_git_tree}") - set(${name}_GIT_COMMIT "${_git_commit}") - set(${name}_GIT_REVISION "${_git_revision}") endif() if(_version_from STREQUAL "git") @@ -459,12 +548,12 @@ macro(fetch_version name source_root_directory parent_scope \"git_timestamp\" : \"@_git_timestamp@\", \"git_tree\" : \"@_git_tree@\", \"git_commit\" : \"@_git_commit@\", - \"version_4dot\" : \"@_version_4dot@\"\n}" + \"semver\" : \"@_semver@\"\n}" _versioninfo_json @ONLY ESCAPE_QUOTES) file(WRITE "${build_directory_for_json_output}/VERSION.json" "${_versioninfo_json}") endif() -endmacro(fetch_version) +endmacro(semver_provide) cmake_policy(POP) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 8158a457..888f5486 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -48,7 +48,7 @@ PROJECT_NAME = libmdbx # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VERSION_RELEASE}.${MDBX_VERSION_REVISION} (@MDBX_GIT_TIMESTAMP@)" +PROJECT_NUMBER = "${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VERSION_PATCH}.${MDBX_VERSION_TWEAK} (@MDBX_GIT_TIMESTAMP@)" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/mdbx.h b/mdbx.h index 3022810d..62600e63 100644 --- a/mdbx.h +++ b/mdbx.h @@ -660,12 +660,13 @@ extern "C" { #define LIBMDBX_VERINFO_API __dll_export #endif /* LIBMDBX_VERINFO_API */ -/** \brief libmdbx version information */ +/** \brief libmdbx version information, \see https://semver.org/ */ extern LIBMDBX_VERINFO_API const struct MDBX_version_info { - uint8_t major; /**< Major version number */ - uint8_t minor; /**< Minor version number */ - uint16_t release; /**< Release number of Major.Minor */ - uint32_t revision; /**< Revision number of Release */ + uint16_t major; /**< Major version number */ + uint16_t minor; /**< Minor version number */ + uint16_t patch; /**< Patch number */ + uint16_t tweak; /**< Tweak number */ + const char *semver_prerelease; /**< Semantic Versioning `pre-release` */ struct { const char *datetime; /**< committer date, strict ISO-8601 format */ const char *tree; /**< commit hash (hexadecimal digits) */ diff --git a/src/tools/chk.c b/src/tools/chk.c index 69b5de01..22e38460 100644 --- a/src/tools/chk.c +++ b/src/tools/chk.c @@ -475,8 +475,8 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.release, - mdbx_version.revision, mdbx_version.git.describe, + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, + mdbx_version.tweak, mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, diff --git a/src/tools/copy.c b/src/tools/copy.c index aa6157f3..4441a982 100644 --- a/src/tools/copy.c +++ b/src/tools/copy.c @@ -91,8 +91,8 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.release, - mdbx_version.revision, mdbx_version.git.describe, + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, + mdbx_version.tweak, mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, diff --git a/src/tools/drop.c b/src/tools/drop.c index 22bab11a..8de80cd9 100644 --- a/src/tools/drop.c +++ b/src/tools/drop.c @@ -86,8 +86,8 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.release, - mdbx_version.revision, mdbx_version.git.describe, + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, + mdbx_version.tweak, mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, diff --git a/src/tools/dump.c b/src/tools/dump.c index ce892216..de93422f 100644 --- a/src/tools/dump.c +++ b/src/tools/dump.c @@ -274,8 +274,8 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.release, - mdbx_version.revision, mdbx_version.git.describe, + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, + mdbx_version.tweak, mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, diff --git a/src/tools/load.c b/src/tools/load.c index b0a5364f..9182926f 100644 --- a/src/tools/load.c +++ b/src/tools/load.c @@ -530,8 +530,8 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.release, - mdbx_version.revision, mdbx_version.git.describe, + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, + mdbx_version.tweak, mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, diff --git a/src/tools/stat.c b/src/tools/stat.c index 2306fb5a..f8808caa 100644 --- a/src/tools/stat.c +++ b/src/tools/stat.c @@ -129,8 +129,8 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.release, - mdbx_version.revision, mdbx_version.git.describe, + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, + mdbx_version.tweak, mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, diff --git a/src/version.c.in b/src/version.c.in index 9686a9b7..19513c68 100644 --- a/src/version.c.in +++ b/src/version.c.in @@ -25,8 +25,10 @@ __dll_export const struct MDBX_version_info mdbx_version = { ${MDBX_VERSION_MAJOR}, ${MDBX_VERSION_MINOR}, - ${MDBX_VERSION_RELEASE}, - ${MDBX_VERSION_REVISION}, + ${MDBX_VERSION_PATCH}, + ${MDBX_VERSION_TWEAK}, + "@MDBX_VERSION_PRERELEASE@", /* pre-release suffix of SemVer + @MDBX_VERSION_PURE@ */ {"@MDBX_GIT_TIMESTAMP@", "@MDBX_GIT_TREE@", "@MDBX_GIT_COMMIT@", "@MDBX_GIT_DESCRIBE@"}, sourcery}; From 28bd805ed8e0ab0f9373639cd27493c5061b8974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 25 Nov 2024 21:20:02 +0300 Subject: [PATCH 356/443] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=BC=D0=BE?= =?UTF-8?q?=D0=B6=D0=BD=D0=BE=D1=81=D1=82=D1=8C=20=D0=BB=D0=BE=D0=B3=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BE=D1=88=D0=B8?= =?UTF-8?q?=D0=B1=D0=BE=D0=BA=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80=D0=B0=D1=89?= =?UTF-8?q?=D0=B0=D0=B5=D0=BC=D1=8B=D1=85=20=D0=B8=D0=B7=20API=20(return?= =?UTF-8?q?=20LOG=5FIFERR).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Возможность полезная, но пожалуй еще нуждается в доработке и/или до-осмыслении. Основное неудобство в нестыковке с основным логированием. С одной стороны, сообщение об ошибках следует выводить с уровнем/severity MDBX_LOG_ERROR. Однако, это замусоривает и ломает тесты. Поэтому сейчас при возвращении ошибок из API сообщения логируются MDBX_LOG_ERROR, но производится это только при включении уровня логирования MDBX_LOG_DEBUG или более детальном. --- src/api-cursor.c | 207 ++++++++++++++++++++++------------------ src/api-env.c | 112 +++++++++++----------- src/api-extra.c | 24 ++--- src/api-txn.c | 104 ++++++++++---------- src/chk.c | 8 +- src/cold.c | 70 +++++++------- src/copy.c | 24 ++--- src/dbi.c | 68 ++++++------- src/env-opts.c | 64 ++++++------- src/logging_and_debug.c | 10 ++ src/logging_and_debug.h | 8 ++ src/misc.c | 14 +-- src/mvcc-readers.c | 16 ++-- src/osal.c | 26 ++--- src/range-estimate.c | 67 ++++++------- 15 files changed, 433 insertions(+), 389 deletions(-) diff --git a/src/api-cursor.c b/src/api-cursor.c index f5f90d0f..6bb89cfa 100644 --- a/src/api-cursor.c +++ b/src/api-cursor.c @@ -29,16 +29,16 @@ MDBX_cursor *mdbx_cursor_create(void *context) { int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, (kvx_t *)mc->clc - txn->env->kvs) - : MDBX_EINVAL; + : LOG_IFERR(MDBX_EINVAL); } int mdbx_cursor_reset(MDBX_cursor *mc) { if (unlikely(!mc)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); cursor_couple_t *couple = (cursor_couple_t *)mc; couple->outer.top_and_flags = z_poor_mark; @@ -48,29 +48,29 @@ int mdbx_cursor_reset(MDBX_cursor *mc) { int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(dbi == FREE_DBI && !(txn->flags & MDBX_TXN_RDONLY))) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (unlikely(mc->backup)) /* Cursor from parent transaction */ { cASSERT(mc, mc->signature == cur_signature_live); if (unlikely(cursor_dbi(mc) != dbi || /* paranoia */ mc->signature != cur_signature_live || mc->txn != txn)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); cASSERT(mc, mc->tree == &txn->dbs[dbi]); cASSERT(mc, mc->clc == &txn->env->kvs[dbi].clc); @@ -79,7 +79,9 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { /* paranoia */ mc->signature == cur_signature_live && mc->txn == txn) ? MDBX_SUCCESS - : MDBX_EINVAL /* Disallow change DBI in nested transactions */; + : LOG_IFERR(MDBX_EINVAL) /* Disallow change DBI in nested + transactions */ + ; } if (mc->signature == cur_signature_live) { @@ -91,7 +93,7 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { rc = cursor_init(mc, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); mc->next = txn->cursors[dbi]; txn->cursors[dbi] = mc; @@ -100,14 +102,15 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { int mdbx_cursor_unbind(MDBX_cursor *mc) { if (unlikely(!mc)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_SUCCESS - : MDBX_EBADSIGN; + return (mc->signature == cur_signature_ready4dispose) + ? MDBX_SUCCESS + : LOG_IFERR(MDBX_EBADSIGN); if (unlikely(mc->backup)) /* Cursor from parent transaction */ - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); eASSERT(nullptr, mc->txn && mc->txn->signature == txn_signature); cASSERT(mc, mc->signature == cur_signature_live); @@ -116,7 +119,7 @@ int mdbx_cursor_unbind(MDBX_cursor *mc) { ERROR("Wrong cursor's transaction %p 0x%x", __Wpedantic_format_voidptr(mc->txn), mc->txn ? mc->txn->signature : 0); - return MDBX_PROBLEM; + return LOG_IFERR(MDBX_PROBLEM); } if (mc->next != mc) { const size_t dbi = (kvx_t *)mc->clc - mc->txn->env->kvs; @@ -138,17 +141,17 @@ int mdbx_cursor_unbind(MDBX_cursor *mc) { int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { if (unlikely(!ret)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *ret = nullptr; MDBX_cursor *const mc = mdbx_cursor_create(nullptr); if (unlikely(!mc)) - return MDBX_ENOMEM; + return LOG_IFERR(MDBX_ENOMEM); int rc = mdbx_cursor_bind(txn, mc, dbi); if (unlikely(rc != MDBX_SUCCESS)) { mdbx_cursor_close(mc); - return rc; + return LOG_IFERR(rc); } *ret = mc; @@ -189,10 +192,11 @@ void mdbx_cursor_close(MDBX_cursor *mc) { int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { if (unlikely(!src)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(src->signature != cur_signature_live)) - return (src->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((src->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = mdbx_cursor_bind(src->txn, dest, cursor_dbi(src)); if (unlikely(rc != MDBX_SUCCESS)) @@ -241,6 +245,7 @@ int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { } } else { eASSERT(nullptr, rc < 0); + LOG_IFERR(rc); } return rc; } @@ -325,18 +330,19 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, /* Return the count of duplicate data items for the current key */ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(countp == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if ((*countp = is_filled(mc)) > 0) { if (!inner_hollow(mc)) { @@ -353,11 +359,12 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { int mdbx_cursor_on_first(const MDBX_cursor *mc) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); for (intptr_t i = 0; i <= mc->top; ++i) { if (mc->ki[i]) @@ -369,11 +376,12 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); if (is_filled(mc) && mc->subcur) { mc = &mc->subcur->cursor; @@ -388,11 +396,12 @@ int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { int mdbx_cursor_on_last(const MDBX_cursor *mc) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); for (intptr_t i = 0; i <= mc->top; ++i) { size_t nkeys = page_numkeys(mc->pg[i]); @@ -405,11 +414,12 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); if (is_filled(mc) && mc->subcur) { mc = &mc->subcur->cursor; @@ -425,11 +435,12 @@ int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { int mdbx_cursor_eof(const MDBX_cursor *mc) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); return is_eof(mc) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } @@ -437,20 +448,21 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) { int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { if (unlikely(mc == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(cursor_dbi_changed(mc))) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); - return cursor_ops(mc, key, data, op); + return LOG_IFERR(cursor_ops(mc, key, data, op)); } __hot static int scan_confinue(MDBX_cursor *mc, MDBX_predicate_func *predicate, @@ -520,33 +532,34 @@ int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op start_op, MDBX_cursor_op turn_op, void *arg) { if (unlikely(!predicate)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const unsigned valid_start_mask = 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | 1 << MDBX_LAST_DUP | 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const unsigned valid_turn_mask = 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); MDBX_val key = {nullptr, 0}, value = {nullptr, 0}; int rc = mdbx_cursor_get(mc, &key, &value, start_op); if (unlikely(rc != MDBX_SUCCESS)) - return rc; - return scan_confinue(mc, predicate, context, arg, &key, &value, turn_op); + return LOG_IFERR(rc); + return LOG_IFERR( + scan_confinue(mc, predicate, context, arg, &key, &value, turn_op)); } int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op from_op, MDBX_val *key, MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { if (unlikely(!predicate || !key)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const unsigned valid_start_mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | @@ -554,18 +567,18 @@ int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, 1 << MDBX_SET_UPPERBOUND; if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && ((1 << from_op) & valid_start_mask) == 0)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const unsigned valid_turn_mask = 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = mdbx_cursor_get(mc, key, value, from_op); if (unlikely(MDBX_IS_ERROR(rc))) - return rc; + return LOG_IFERR(rc); cASSERT(mc, key != nullptr); MDBX_val stub; @@ -573,51 +586,53 @@ int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, value = &stub; rc = cursor_ops(mc, key, value, MDBX_GET_CURRENT); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } - return scan_confinue(mc, predicate, context, arg, key, value, turn_op); + return LOG_IFERR( + scan_confinue(mc, predicate, context, arg, key, value, turn_op)); } int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, size_t limit, MDBX_cursor_op op) { if (unlikely(!count)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *count = 0; if (unlikely(mc == nullptr || limit < 4 || limit > INTPTR_MAX - 2)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(cursor_dbi_changed(mc))) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); if (unlikely(mc->subcur)) - return MDBX_INCOMPATIBLE /* must be a non-dupsort table */; + return LOG_IFERR(MDBX_INCOMPATIBLE) /* must be a non-dupsort table */; switch (op) { case MDBX_NEXT: if (unlikely(is_eof(mc))) - return is_pointed(mc) ? MDBX_NOTFOUND : MDBX_ENODATA; + return LOG_IFERR(is_pointed(mc) ? MDBX_NOTFOUND : MDBX_ENODATA); break; case MDBX_FIRST: if (!is_filled(mc)) { rc = outer_first(mc, nullptr, nullptr); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } break; default: DEBUG("unhandled/unimplemented cursor operation %u", op); - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); } const page_t *mp = mc->pg[mc->top]; @@ -662,18 +677,18 @@ int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, bailout: *count = n; - return rc; + return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) { if (unlikely(!mc)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer); couple->userctx = ctx; @@ -714,80 +729,84 @@ MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) { if (unlikely(mc == nullptr || key == nullptr || data == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = check_txn_rw(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(cursor_dbi_changed(mc))) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); cASSERT(mc, cursor_is_tracked(mc)); /* Check this first so counter will always be zero on any early failures. */ if (unlikely(flags & MDBX_MULTIPLE)) { if (unlikely(flags & MDBX_RESERVE)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(!(mc->tree->flags & MDBX_DUPFIXED))) - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); const size_t dcount = data[1].iov_len; if (unlikely(dcount < 2 || data->iov_len == 0)) - return MDBX_BAD_VALSIZE; + return LOG_IFERR(MDBX_BAD_VALSIZE); if (unlikely(mc->tree->dupfix_size != data->iov_len) && mc->tree->dupfix_size) - return MDBX_BAD_VALSIZE; + return LOG_IFERR(MDBX_BAD_VALSIZE); if (unlikely(dcount > MAX_MAPSIZE / 2 / (BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) - NODESIZE))) { /* checking for multiplication overflow */ if (unlikely(dcount > MAX_MAPSIZE / 2 / data->iov_len)) - return MDBX_TOO_LARGE; + return LOG_IFERR(MDBX_TOO_LARGE); } } if (flags & MDBX_RESERVE) { if (unlikely(mc->tree->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_INTEGERDUP | MDBX_DUPFIXED))) - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); data->iov_base = nullptr; } if (unlikely(mc->txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return (mc->txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; + return LOG_IFERR((mc->txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS + : MDBX_BAD_TXN); - return cursor_put_checklen(mc, key, data, flags); + return LOG_IFERR(cursor_put_checklen(mc, key, data, flags)); } int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { if (unlikely(!mc)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = check_txn_rw(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(cursor_dbi_changed(mc))) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); - return cursor_del(mc, flags); + return LOG_IFERR(cursor_del(mc, flags)); } __cold int mdbx_cursor_ignord(MDBX_cursor *mc) { if (unlikely(!mc)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); mc->checking |= z_ignord; if (mc->subcur) diff --git a/src/api-env.c b/src/api-env.c index 48324f68..be47d566 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -196,18 +196,18 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(!penv)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *penv = nullptr; #ifdef MDBX_HAVE_C11ATOMICS if (unlikely(!atomic_is_lock_free((const volatile uint32_t *)penv))) { ERROR("lock-free atomic ops for %u-bit types is required", 32); - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); } #if MDBX_64BIT_ATOMIC if (unlikely(!atomic_is_lock_free((const volatile uint64_t *)penv))) { ERROR("lock-free atomic ops for %u-bit types is required", 64); - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); } #endif /* MDBX_64BIT_ATOMIC */ #endif /* MDBX_HAVE_C11ATOMICS */ @@ -215,25 +215,25 @@ __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { ERROR("unsuitable system pagesize %u", globals.sys_pagesize); - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); } #if defined(__linux__) || defined(__gnu_linux__) if (unlikely(globals.linux_kernel_version < 0x04000000)) { - /* 2022-09-01: Прошло уже больше двух после окончания какой-либо поддержки - * самого "долгоиграющего" ядра 3.16.85 ветки 3.x */ + /* 2022-09-01: Прошло уже более двух лет после окончания какой-либо + * поддержки самого "долгоиграющего" ядра 3.16.85 ветки 3.x */ ERROR("too old linux kernel %u.%u.%u.%u, the >= 4.0.0 is required", globals.linux_kernel_version >> 24, (globals.linux_kernel_version >> 16) & 255, (globals.linux_kernel_version >> 8) & 255, globals.linux_kernel_version & 255); - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); } #endif /* Linux */ MDBX_env *env = osal_calloc(1, sizeof(MDBX_env)); if (unlikely(!env)) - return MDBX_ENOMEM; + return LOG_IFERR(MDBX_ENOMEM); env->max_readers = DEFAULT_READERS; env->max_dbi = env->n_dbi = CORE_DBS; @@ -278,18 +278,18 @@ __cold int mdbx_env_create(MDBX_env **penv) { bailout: osal_free(env); - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { if (unlikely(target >= NUM_METAS)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_EXCLUSIVE)) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); const meta_t *const target_meta = METAPAGE(env, target); txnid_t new_txnid = constmeta_txnid(target_meta); @@ -303,7 +303,7 @@ __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { if (meta_validate(env, &meta, page, n, nullptr) != MDBX_SUCCESS) { int err = meta_override(env, n, 0, nullptr); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); } else { txnid_t txnid = constmeta_txnid(&meta); if (new_txnid <= txnid) @@ -313,9 +313,9 @@ __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { if (unlikely(new_txnid > MAX_TXNID)) { ERROR("txnid overflow, raise %d", MDBX_TXN_FULL); - return MDBX_TXN_FULL; + return LOG_IFERR(MDBX_TXN_FULL); } - return meta_override(env, target, new_txnid, target_meta); + return LOG_IFERR(meta_override(env, target, new_txnid, target_meta)); } __cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, @@ -327,7 +327,7 @@ __cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, rc = mdbx_env_open_for_recoveryW(env, pathnameW, target_meta, writeable); osal_free(pathnameW); } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, @@ -335,12 +335,12 @@ __cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, #endif /* Windows */ if (unlikely(target_meta >= NUM_METAS)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(env->dxb_mmap.base)) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); env->stuck_meta = (int8_t)target_meta; return @@ -361,7 +361,7 @@ __cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) { rc = mdbx_env_deleteW(pathnameW, mode); osal_free(pathnameW); } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_deleteW(const wchar_t *pathname, @@ -370,7 +370,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, switch (mode) { default: - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); case MDBX_ENV_JUST_DELETE: case MDBX_ENV_ENSURE_UNUSED: case MDBX_ENV_WAIT_FOR_UNUSED: @@ -442,7 +442,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, err = MDBX_SUCCESS; osal_free(dummy_env->pathname.buffer); - return (err == MDBX_SUCCESS) ? rc : err; + return LOG_IFERR((err == MDBX_SUCCESS) ? rc : err); } __cold int mdbx_env_open(MDBX_env *env, const char *pathname, @@ -457,7 +457,7 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname, /* force to make cache of the multi-byte pathname representation */ mdbx_env_get_path(env, &pathname); } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, @@ -466,14 +466,14 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(flags & ~ENV_USABLE_FLAGS)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->lazy_fd != INVALID_HANDLE_VALUE || (env->flags & ENV_ACTIVE) != 0 || env->dxb_mmap.base)) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); /* Pickup previously mdbx_env_set_flags(), * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ @@ -497,7 +497,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, debug_log(MDBX_LOG_ERROR, __func__, __LINE__, "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " "of an internal flaw(s) in a file/buffer/page cache.\n"); - return 42 /* ENOPROTOOPT */; + return LOG_IFERR(42 /* ENOPROTOOPT */); } } #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ @@ -590,7 +590,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, env->flags = saved_me_flags | ENV_FATAL_ERROR; } } - return rc; + return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ @@ -598,13 +598,13 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #if !(defined(_WIN32) || defined(_WIN64)) __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { if (unlikely(!env)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->signature.weak != env_signature)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); if (unlikely(env->flags & ENV_FATAL_ERROR)) - return MDBX_PANIC; + return LOG_IFERR(MDBX_PANIC); if (unlikely((env->flags & ENV_ACTIVE) == 0)) return MDBX_SUCCESS; @@ -614,7 +614,7 @@ __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { return MDBX_SUCCESS; if (!atomic_cas32(&env->signature, env_signature, ~env_signature)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); if (env->txn) txn_abort(env->basal_txn); @@ -628,7 +628,7 @@ __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { env->flags |= ENV_FATAL_ERROR; } } - return rc; + return LOG_IFERR(rc); } #endif /* Windows */ @@ -637,10 +637,10 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { int rc = MDBX_SUCCESS; if (unlikely(!env)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->signature.weak != env_signature)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); #if MDBX_ENV_CHECKPID || !(defined(_WIN32) || defined(_WIN64)) /* Check the PID even if MDBX_ENV_CHECKPID=0 on non-Windows @@ -654,12 +654,12 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { if (env->dxb_mmap.base && (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0 && env->basal_txn) { if (env->basal_txn->owner && env->basal_txn->owner != osal_thread_self()) - return MDBX_BUSY; + return LOG_IFERR(MDBX_BUSY); } else dont_sync = true; if (!atomic_cas32(&env->signature, env_signature, 0)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); if (!dont_sync) { #if defined(_WIN32) || defined(_WIN64) @@ -712,7 +712,7 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { VALGRIND_DESTROY_MEMPOOL(env); osal_free(env); - return rc; + return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ @@ -889,32 +889,32 @@ __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == nullptr && txn == nullptr) || arg == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat && bytes != size_before_dxbid) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (txn) { int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); } if (env) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); if (txn && unlikely(txn->env != env)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); } else { env = txn->env; } troika_t troika; - return env_info(env, txn, arg, bytes, &troika); + return LOG_IFERR(env_info(env, txn, arg, bytes, &troika)); } __cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, @@ -926,21 +926,21 @@ __cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes); osal_free(pathnameW); } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, size_t bytes) { #endif /* Windows */ if (unlikely(!out)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat && bytes != size_before_dxbid) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); memset(out, 0, bytes); if (likely(bytes > size_before_bootid)) { @@ -954,7 +954,7 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { ERROR("unsuitable system pagesize %u", globals.sys_pagesize); - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); } out->mi_sys_pagesize = globals.sys_pagesize; env.flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION; @@ -1001,7 +1001,7 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, bailout: env_close(&env, false); - return rc; + return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ @@ -1012,7 +1012,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t shrink_threshold, intptr_t pagesize) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); const bool txn0_owned = env->basal_txn && env_txn0_owned(env); const bool inside_txn = txn0_owned && env->txn; @@ -1029,12 +1029,12 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (env->dxb_mmap.base) { /* env already mapped */ if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (!txn0_owned) { int err = lck_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); should_unlock = true; env->basal_txn->tw.troika = meta_tap(env); eASSERT(env, !env->txn && !env->basal_txn->nested); @@ -1076,7 +1076,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, } else { /* env NOT yet mapped */ if (unlikely(inside_txn)) - return MDBX_PANIC; + return LOG_IFERR(MDBX_PANIC); /* is requested some auto-value for pagesize ? */ if (pagesize >= INT_MAX /* maximal */) @@ -1393,13 +1393,13 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, bailout: if (should_unlock) lck_txn_unlock(env); - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); - return env_sync(env, force, nonblock); + return LOG_IFERR(env_sync(env, force, nonblock)); } diff --git a/src/api-extra.c b/src/api-extra.c index 1a9b8b08..8c6a6301 100644 --- a/src/api-extra.c +++ b/src/api-extra.c @@ -10,10 +10,10 @@ __cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, void *ctx) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!func)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); rc = MDBX_RESULT_TRUE; int serial = 0; @@ -74,13 +74,13 @@ __cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, } } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_reader_check(MDBX_env *env, int *dead) { if (dead) *dead = 0; - return mvcc_cleanup_dead(env, false, dead); + return LOG_IFERR(mvcc_cleanup_dead(env, false, dead)); } /*------------------------------------------------------------------------------ @@ -89,28 +89,28 @@ __cold int mdbx_reader_check(MDBX_env *env, int *dead) { int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (unlikely(env->basal_txn->owner || (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0)) - return MDBX_BUSY; + return LOG_IFERR(MDBX_BUSY); - return lck_txn_lock(env, dont_wait); + return LOG_IFERR(lck_txn_lock(env, dont_wait)); } int mdbx_txn_unlock(MDBX_env *env) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (unlikely(env->basal_txn->owner != osal_thread_self())) - return MDBX_THREAD_MISMATCH; + return LOG_IFERR(MDBX_THREAD_MISMATCH); if (unlikely((env->basal_txn->flags & MDBX_TXN_FINISHED) == 0)) - return MDBX_BUSY; + return LOG_IFERR(MDBX_BUSY); lck_txn_unlock(env); return MDBX_SUCCESS; diff --git a/src/api-txn.c b/src/api-txn.c index 054d1112..ab5d4df4 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -11,7 +11,7 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return (rc > 0) ? -rc : rc; + return LOG_IFERR((rc > 0) ? -rc : rc); MDBX_env *env = txn->env; if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) { @@ -42,15 +42,15 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!mask)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0) return MDBX_RESULT_TRUE; @@ -79,21 +79,21 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, default: ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node-size", flags); - return MDBX_CORRUPTED; + return LOG_IFERR(MDBX_CORRUPTED); } rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); } - return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; + return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc); } int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(canary == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *canary = txn->canary; return MDBX_SUCCESS; @@ -106,37 +106,37 @@ int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!key || !data)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); - return cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err; + return LOG_IFERR(cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err); } int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!key || !data)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) - return MDBX_BAD_TXN; + return LOG_IFERR(MDBX_BAD_TXN); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); - return cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND); + return LOG_IFERR(cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND)); } int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, @@ -146,21 +146,21 @@ int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!key || !data)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = cursor_seek(&cx.outer, key, data, MDBX_SET_KEY).err; if (unlikely(rc != MDBX_SUCCESS)) { if (values_count) *values_count = 0; - return rc; + return LOG_IFERR(rc); } if (values_count) { @@ -180,7 +180,7 @@ int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (likely(canary)) { if (txn->canary.x == canary->x && txn->canary.y == canary->y && @@ -221,7 +221,7 @@ int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); const MDBX_env *env = txn->env; const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base); @@ -232,7 +232,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { if (unlikely(page->pgno != pgno || (page->flags & P_ILL_BITS) != 0)) { /* The ptr pointed into middle of a large page, * not to the beginning of a data. */ - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); } return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) ? MDBX_RESULT_FALSE @@ -243,7 +243,8 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { * распределенных страниц. Такое может случится если mdbx_is_dirty() * вызывается после операции, в ходе которой грязная страница была * возвращена в нераспределенное пространство. */ - return (txn->flags & MDBX_TXN_RDONLY) ? MDBX_EINVAL : MDBX_RESULT_TRUE; + return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) + : MDBX_RESULT_TRUE; } } @@ -253,29 +254,31 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { * * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная", * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */ - return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? MDBX_EINVAL - : MDBX_RESULT_TRUE; + return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) + ? LOG_IFERR(MDBX_EINVAL) + : MDBX_RESULT_TRUE; } int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!key)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(dbi <= FREE_DBI)) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return (txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; + return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS + : MDBX_BAD_TXN); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); MDBX_val proxy; MDBX_cursor_op op = MDBX_SET; @@ -288,39 +291,40 @@ int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, } rc = cursor_seek(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err; if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cx.outer.next = txn->cursors[dbi]; txn->cursors[dbi] = &cx.outer; rc = cursor_del(&cx.outer, flags); txn->cursors[dbi] = cx.outer.next; - return rc; + return LOG_IFERR(rc); } int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!key || !data)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(dbi <= FREE_DBI)) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return (txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; + return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS + : MDBX_BAD_TXN); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cx.outer.next = txn->cursors[dbi]; txn->cursors[dbi] = &cx.outer; @@ -348,7 +352,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, rc = cursor_put_checklen(&cx.outer, key, data, flags); txn->cursors[dbi] = cx.outer.next; - return rc; + return LOG_IFERR(rc); } //------------------------------------------------------------------------------ @@ -383,30 +387,30 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, void *preserver_context) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!key || !old_data || old_data == new_data)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(old_data->iov_base == nullptr && old_data->iov_len)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(new_data == nullptr && (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(dbi <= FREE_DBI)) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cx.outer.next = txn->cursors[dbi]; txn->cursors[dbi] = &cx.outer; @@ -427,7 +431,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, } else { /* в old_data буфер для сохранения предыдущего значения */ if (unlikely(new_data && old_data->iov_base == new_data->iov_base)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); MDBX_val present_data; rc = cursor_seek(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err; if (unlikely(rc != MDBX_SUCCESS)) { @@ -485,7 +489,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, bailout: txn->cursors[dbi] = cx.outer.next; - return rc; + return LOG_IFERR(rc); } static int default_value_preserver(void *context, MDBX_val *target, diff --git a/src/chk.c b/src/chk.c index 8af68b3f..83d7d74c 100644 --- a/src/chk.c +++ b/src/chk.c @@ -2019,13 +2019,13 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, unsigned timeout_seconds_16dot16) { int err, rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!cb || !ctx || ctx->internal)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); if (unlikely(!chk)) - return MDBX_ENOMEM; + return LOG_IFERR(MDBX_ENOMEM); chk->cb = cb; chk->usr = ctx; @@ -2101,5 +2101,5 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); rc = chk_scope_end(chk, err ? err : rc); chk_dispose(chk); - return rc; + return LOG_IFERR(rc); } diff --git a/src/cold.c b/src/cold.c index 837f89ce..db3b0b87 100644 --- a/src/cold.c +++ b/src/cold.c @@ -229,35 +229,35 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) { if (unlikely(!dest)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (likely(txn)) { if (env && unlikely(txn->env != env)) - return MDBX_EINVAL; - return stat_acc(txn, dest, bytes); + return LOG_IFERR(MDBX_EINVAL); + return LOG_IFERR(stat_acc(txn, dest, bytes)); } int err = check_env(env, true); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); if (env->txn && env_txn0_owned(env)) /* inside write-txn */ - return stat_acc(env->txn, dest, bytes); + return LOG_IFERR(stat_acc(env->txn, dest, bytes)); MDBX_txn *tmp_txn; err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); const int rc = stat_acc(tmp_txn, dest, bytes); err = mdbx_txn_abort(tmp_txn); if (unlikely(err != MDBX_SUCCESS)) - return err; - return rc; + return LOG_IFERR(err); + return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ @@ -271,23 +271,23 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, MDBX_warmup_flags_t flags, unsigned timeout_seconds_16dot16) { if (unlikely(env == nullptr && txn == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(flags > (MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | MDBX_warmup_touchlimit | MDBX_warmup_release))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (txn) { int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); } if (env) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); if (txn && unlikely(txn->env != env)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); } else { env = txn->env; } @@ -504,7 +504,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, #endif } - return rc; + return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ @@ -512,10 +512,10 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, __cold int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!arg)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *arg = env->lazy_fd; return MDBX_SUCCESS; @@ -525,21 +525,21 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, bool onoff) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(flags & ((env->flags & ENV_ACTIVE) ? ~ENV_CHANGEABLE_FLAGS : ~ENV_USABLE_FLAGS))) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); const bool lock_needed = (env->flags & ENV_ACTIVE) && !env_txn0_owned(env); bool should_unlock = false; if (lock_needed) { rc = lck_txn_lock(env, false); - if (unlikely(rc)) - return rc; + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); should_unlock = true; } @@ -556,10 +556,10 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, __cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!arg)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *arg = env->flags & ENV_USABLE_FLAGS; return MDBX_SUCCESS; @@ -568,7 +568,7 @@ __cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) { __cold int mdbx_env_set_userctx(MDBX_env *env, void *ctx) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); env->userctx = ctx; return MDBX_SUCCESS; @@ -581,21 +581,21 @@ __cold void *mdbx_env_get_userctx(const MDBX_env *env) { __cold int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); #if MDBX_DEBUG env->assert_func = func; return MDBX_SUCCESS; #else (void)func; - return MDBX_ENOSYS; + return LOG_IFERR(MDBX_ENOSYS); #endif } __cold int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); env->hsr_callback = hsr; return MDBX_SUCCESS; @@ -610,10 +610,10 @@ __cold MDBX_hsr_func *mdbx_env_get_hsr(const MDBX_env *env) { __cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!arg)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *arg = env->pathname.specified; return MDBX_SUCCESS; @@ -623,10 +623,10 @@ __cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) { __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!arg)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); #if defined(_WIN32) || defined(_WIN64) if (!env->pathname_char) { @@ -643,17 +643,17 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); } if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); char *const mb_pathname = osal_malloc(mb_len); if (!mb_pathname) - return MDBX_ENOMEM; + return LOG_IFERR(MDBX_ENOMEM); if (mb_len != (size_t)WideCharToMultiByte( CP_THREAD_ACP, flags, env->pathname.specified, -1, mb_pathname, (int)mb_len, nullptr, nullptr)) { rc = (int)GetLastError(); osal_free(mb_pathname); - return rc; + return LOG_IFERR(rc); } if (env->pathname_char || InterlockedCompareExchangePointer((PVOID volatile *)&env->pathname_char, diff --git a/src/copy.c b/src/copy.c index ad80a815..c1b7ef7d 100644 --- a/src/copy.c +++ b/src/copy.c @@ -844,26 +844,26 @@ __cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, rc = copy2fd(txn, fd, flags); if (flags & MDBX_CP_DISPOSE_TXN) mdbx_txn_abort(txn); - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) { if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); MDBX_txn *txn = nullptr; rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = copy2fd(txn, fd, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); mdbx_txn_abort(txn); - return rc; + return LOG_IFERR(rc); } __cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, @@ -875,7 +875,7 @@ __cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, rc = mdbx_txn_copy2pathnameW(txn, dest_pathW, flags); osal_free(dest_pathW); } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, @@ -886,7 +886,7 @@ __cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, rc = copy2pathname(txn, dest_path, flags); if (flags & MDBX_CP_DISPOSE_TXN) mdbx_txn_abort(txn); - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, @@ -898,26 +898,26 @@ __cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, rc = mdbx_env_copyW(env, dest_pathW, flags); osal_free(dest_pathW); } - return rc; + return LOG_IFERR(rc); } __cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, MDBX_copy_flags_t flags) { #endif /* Windows */ if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); MDBX_txn *txn = nullptr; rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = copy2pathname(txn, dest_path, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); mdbx_txn_abort(txn); - return rc; + return LOG_IFERR(rc); } diff --git a/src/dbi.c b/src/dbi.c index fb6577c3..109198d5 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -745,35 +745,35 @@ static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { - return dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr); + return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr)); } int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { - return dbi_open(txn, name, flags, dbi, nullptr, nullptr); + return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr)); } int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - return dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp); + return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp)); } int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - return dbi_open(txn, name, flags, dbi, keycmp, datacmp); + return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp)); } __cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (txn->dbs[dbi].height) { cx.outer.next = txn->cursors[dbi]; @@ -782,7 +782,7 @@ __cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT)); txn->cursors[dbi] = cx.outer.next; if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } /* Invalidate the dropped DB's cursors */ @@ -820,12 +820,12 @@ __cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; rc = osal_fastmutex_acquire(&env->dbi_lock); if (likely(rc == MDBX_SUCCESS)) - return defer_and_release(env, dbi_close_locked(env, dbi)); + return LOG_IFERR(defer_and_release(env, dbi_close_locked(env, dbi))); } } } txn->flags |= MDBX_TXN_ERROR; - return rc; + return LOG_IFERR(rc); } __cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { @@ -838,22 +838,22 @@ __cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { thunk.iov_base = (void *)name_cstr; name = &thunk; } - return mdbx_dbi_rename2(txn, dbi, name); + return LOG_IFERR(mdbx_dbi_rename2(txn, dbi, name)); } int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(dbi < CORE_DBS)) - return (dbi == MAIN_DBI) ? MDBX_SUCCESS : MDBX_BAD_DBI; + return (dbi == MAIN_DBI) ? MDBX_SUCCESS : LOG_IFERR(MDBX_BAD_DBI); if (unlikely(dbi >= env->max_dbi)) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi)) - return MDBX_BAD_DBI; + return LOG_IFERR(MDBX_BAD_DBI); rc = osal_fastmutex_acquire(&env->dbi_lock); if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) { @@ -886,7 +886,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { bailout_dirty_dbi: osal_fastmutex_release(&env->dbi_lock); - return MDBX_DANGLING_DBI; + return LOG_IFERR(MDBX_DANGLING_DBI); } osal_memory_barrier(); if (unlikely(hazard != env->txn)) @@ -903,21 +903,21 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { } rc = defer_and_release(env, dbi_close_locked(env, dbi)); } - return rc; + return LOG_IFERR(rc); } int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!flags || !state)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; *state = @@ -930,19 +930,19 @@ __cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(dbi < CORE_DBS)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = osal_fastmutex_acquire(&txn->env->dbi_lock); if (likely(rc == MDBX_SUCCESS)) { @@ -952,7 +952,7 @@ __cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, defer_and_release(txn->env, pair.defer); rc = pair.err; } - return rc; + return LOG_IFERR(rc); } static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { @@ -970,26 +970,26 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!dest)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) - return MDBX_BAD_TXN; + return LOG_IFERR(MDBX_BAD_TXN); if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { rc = tbl_fetch((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } dest->ms_psize = txn->env->ps; @@ -1024,16 +1024,16 @@ __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, __cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) { if (unlikely(!func)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cx.outer.next = txn->cursors[MAIN_DBI]; txn->cursors[MAIN_DBI] = &cx.outer; @@ -1076,5 +1076,5 @@ __cold int mdbx_enumerate_tables(const MDBX_txn *txn, bailout: txn->cursors[MAIN_DBI] = cx.outer.next; - return rc; + return LOG_IFERR(rc); } diff --git a/src/env-opts.c b/src/env-opts.c index 659fb5fa..e22d1a6a 100644 --- a/src/env-opts.c +++ b/src/env-opts.c @@ -105,7 +105,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64_t value) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); const bool lock_needed = ((env->flags & ENV_ACTIVE) && env->basal_txn && !env_txn0_owned(env)); @@ -115,11 +115,11 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = MAX_WRITE; if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (unlikely(!(env->flags & ENV_ACTIVE))) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); if (unlikely(value > SIZE_MAX - 65536)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); value = bytes2pgno(env, (size_t)value + env->ps - 1); if ((uint32_t)value != atomic_load32(&env->lck->autosync_threshold, mo_AcquireRelease) && @@ -138,11 +138,11 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = 2780315 /* 42.42424 секунды */; if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (unlikely(!(env->flags & ENV_ACTIVE))) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); if (unlikely(value > UINT32_MAX)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); value = osal_16dot16_to_monotime((uint32_t)value); if (value != atomic_load64(&env->lck->autosync_period, mo_AcquireRelease) && atomic_store64(&env->lck->autosync_period, value, mo_Relaxed) @@ -159,9 +159,9 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = 42; if (unlikely(value > MDBX_MAX_DBI)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->dxb_mmap.base)) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); env->max_dbi = (unsigned)value + CORE_DBS; break; @@ -169,9 +169,9 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = MDBX_READERS_LIMIT; if (unlikely(value < 1 || value > MDBX_READERS_LIMIT)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->dxb_mmap.base)) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); env->max_readers = (unsigned)value; break; @@ -179,12 +179,12 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = INT_MAX; if (unlikely(value > INT_MAX)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (env->options.dp_reserve_limit != (unsigned)value) { if (lock_needed) { err = lck_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); should_unlock = true; } env->options.dp_reserve_limit = (unsigned)value; @@ -206,7 +206,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, env->options.flags.non_auto.rp_augment_limit = 0; env->options.rp_augment_limit = default_rp_augment_limit(env); } else if (unlikely(value > PAGELIST_LIMIT)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); else { env->options.flags.non_auto.rp_augment_limit = 1; env->options.rp_augment_limit = (unsigned)value; @@ -217,13 +217,13 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = 0; if (unlikely(value > UINT32_MAX)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); value = osal_16dot16_to_monotime((uint32_t)value); if (value != env->options.gc_time_limit) { if (env->txn && lock_needed) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); env->options.gc_time_limit = value; if (!env->options.flags.non_auto.rp_augment_limit) env->options.rp_augment_limit = default_rp_augment_limit(env); @@ -235,13 +235,13 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = PAGELIST_LIMIT; if (unlikely(value > PAGELIST_LIMIT || value < CURSOR_STACK_SIZE * 4)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->flags & MDBX_RDONLY)) - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); if (lock_needed) { err = lck_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); should_unlock = true; } if (env->txn) @@ -269,21 +269,21 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = 8; if (unlikely(value > 255)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); env->options.spill_max_denominator = (uint8_t)value; break; case MDBX_opt_spill_min_denominator: if (value == /* default */ UINT64_MAX) value = 8; if (unlikely(value > 255)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); env->options.spill_min_denominator = (uint8_t)value; break; case MDBX_opt_spill_parent4child_denominator: if (value == /* default */ UINT64_MAX) value = 0; if (unlikely(value > 255)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); env->options.spill_parent4child_denominator = (uint8_t)value; break; @@ -291,7 +291,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = 64; if (unlikely(value > 255)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); env->options.dp_loose_limit = (uint8_t)value; break; @@ -299,7 +299,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (value == /* default */ UINT64_MAX) value = 65536 / 4 /* 25% */; if (unlikely(value < 8192 || value > 32768)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); env->options.merge_threshold_16dot16_percent = (unsigned)value; recalculate_merge_thresholds(env); break; @@ -392,33 +392,33 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, break; default: - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); } if (should_unlock) lck_txn_unlock(env); - return err; + return LOG_IFERR(err); } __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, uint64_t *pvalue) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); if (unlikely(!pvalue)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); switch (option) { case MDBX_opt_sync_bytes: if (unlikely(!(env->flags & ENV_ACTIVE))) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); *pvalue = pgno2bytes( env, atomic_load32(&env->lck->autosync_threshold, mo_Relaxed)); break; case MDBX_opt_sync_period: if (unlikely(!(env->flags & ENV_ACTIVE))) - return MDBX_EPERM; + return LOG_IFERR(MDBX_EPERM); *pvalue = osal_monotime_to_16dot16( atomic_load64(&env->lck->autosync_period, mo_Relaxed)); break; @@ -501,7 +501,7 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, break; default: - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); } return MDBX_SUCCESS; diff --git a/src/logging_and_debug.c b/src/logging_and_debug.c index 2ce08d05..0fc4098a 100644 --- a/src/logging_and_debug.c +++ b/src/logging_and_debug.c @@ -59,6 +59,16 @@ __cold void debug_log(int level, const char *function, int line, va_end(args); } +__cold int log_error(const int err, const char *func, unsigned line) { + assert(err != MDBX_SUCCESS); + if (unlikely(globals.loglevel >= MDBX_LOG_DEBUG)) { + char buf[256]; + debug_log(MDBX_LOG_ERROR, func, line, "error %d (%s)\n", err, + mdbx_strerror_r(err, buf, sizeof(buf))); + } + return err; +} + /* Dump a val in ascii or hexadecimal. */ __cold const char *mdbx_dump_val(const MDBX_val *val, char *const buf, const size_t bufsize) { diff --git a/src/logging_and_debug.h b/src/logging_and_debug.h index bfb45631..6b9ef145 100644 --- a/src/logging_and_debug.h +++ b/src/logging_and_debug.h @@ -158,3 +158,11 @@ MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, #define DKEY_DEBUG(x) ("-") #define DVAL_DEBUG(x) ("-") #endif + +MDBX_INTERNAL int log_error(const int err, const char *func, unsigned line); + +static inline int log_if_error(int err, const char *func, unsigned line) { + return likely(err == MDBX_SUCCESS) ? err : log_error(err, func, line); +} + +#define LOG_IFERR(err) log_if_error((err), __func__, __LINE__) diff --git a/src/misc.c b/src/misc.c index 06755121..367bcdfb 100644 --- a/src/misc.c +++ b/src/misc.c @@ -10,7 +10,7 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { intptr_t pagesize, total_ram_pages; int err = mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); const int log2page = log2n_powerof2(pagesize); const intptr_t volume_pages = (volume + pagesize - 1) >> log2page; @@ -24,7 +24,7 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { intptr_t avail_ram_pages; err = mdbx_get_sysraminfo(nullptr, nullptr, &avail_ram_pages); if (unlikely(err != MDBX_SUCCESS)) - return err; + return LOG_IFERR(err); return (volume_pages + redundancy_pages >= avail_ram_pages) ? MDBX_RESULT_FALSE @@ -35,16 +35,16 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { rc = tbl_fetch(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } tree_t *dbs = &txn->dbs[dbi]; @@ -95,10 +95,10 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = tree_search(&cx.outer, nullptr, Z_MODIFY | Z_ROOTONLY); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } } dbs->sequence = new; diff --git a/src/mvcc-readers.c b/src/mvcc-readers.c index 4bfdfa5b..f342599f 100644 --- a/src/mvcc-readers.c +++ b/src/mvcc-readers.c @@ -536,14 +536,14 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { __cold int mdbx_thread_register(const MDBX_env *env) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!env->lck_mmap.lck)) - return (env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM; + return LOG_IFERR((env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM); if (unlikely((env->flags & ENV_TXKEY) == 0)) { eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); - return MDBX_EINVAL /* MDBX_NOSTICKYTHREADS mode */; + return LOG_IFERR(MDBX_EINVAL) /* MDBX_NOSTICKYTHREADS mode */; } eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); @@ -552,17 +552,17 @@ __cold int mdbx_thread_register(const MDBX_env *env) { eASSERT(env, r->pid.weak == env->pid); eASSERT(env, r->tid.weak == osal_thread_self()); if (unlikely(r->pid.weak != env->pid)) - return MDBX_BAD_RSLOT; + return LOG_IFERR(MDBX_BAD_RSLOT); return MDBX_RESULT_TRUE /* already registered */; } - return mvcc_bind_slot((MDBX_env *)env).err; + return LOG_IFERR(mvcc_bind_slot((MDBX_env *)env).err); } __cold int mdbx_thread_unregister(const MDBX_env *env) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!env->lck_mmap.lck)) return MDBX_RESULT_TRUE; @@ -580,11 +580,11 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) { eASSERT(env, r->pid.weak == env->pid); eASSERT(env, r->tid.weak == osal_thread_self()); if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self())) - return MDBX_BAD_RSLOT; + return LOG_IFERR(MDBX_BAD_RSLOT); eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD); if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD)) - return MDBX_BUSY /* transaction is still active */; + return LOG_IFERR(MDBX_BUSY) /* transaction is still active */; atomic_store32(&r->pid, 0, mo_Relaxed); atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease); diff --git a/src/osal.c b/src/osal.c index 3aa30349..32c07c3d 100644 --- a/src/osal.c +++ b/src/osal.c @@ -3405,7 +3405,7 @@ __cold static bin128_t osal_bootid(void) { __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, intptr_t *avail_pages) { if (!page_size && !total_pages && !avail_pages) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (total_pages) *total_pages = -1; if (avail_pages) @@ -3415,7 +3415,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, if (page_size) *page_size = pagesize; if (unlikely(pagesize < MDBX_MIN_PAGESIZE || !is_powerof2(pagesize))) - return MDBX_INCOMPATIBLE; + return LOG_IFERR(MDBX_INCOMPATIBLE); MDBX_MAYBE_UNUSED const int log2page = log2n_powerof2(pagesize); assert(pagesize == (INT64_C(1) << log2page)); @@ -3426,7 +3426,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, memset(&info, 0, sizeof(info)); info.dwLength = sizeof(info); if (!GlobalMemoryStatusEx(&info)) - return (int)GetLastError(); + return LOG_IFERR((int)GetLastError()); #endif if (total_pages) { @@ -3435,11 +3435,11 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #elif defined(_SC_PHYS_PAGES) const intptr_t total_ram_pages = sysconf(_SC_PHYS_PAGES); if (total_ram_pages == -1) - return errno; + return LOG_IFERR(errno); #elif defined(_SC_AIX_REALMEM) const intptr_t total_ram_Kb = sysconf(_SC_AIX_REALMEM); if (total_ram_Kb == -1) - return errno; + return LOG_IFERR(errno); const intptr_t total_ram_pages = (total_ram_Kb << 10) >> log2page; #elif defined(HW_USERMEM) || defined(HW_PHYSMEM64) || defined(HW_MEMSIZE) || \ defined(HW_PHYSMEM) @@ -3461,16 +3461,16 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #endif mib, ARRAY_LENGTH(mib), &ram, &len, nullptr, 0) != 0) - return errno; + return LOG_IFERR(errno); if (len != sizeof(ram)) - return MDBX_ENOSYS; + return LOG_IFERR(MDBX_ENOSYS); const intptr_t total_ram_pages = (intptr_t)(ram >> log2page); #else #error "FIXME: Get User-accessible or physical RAM" #endif *total_pages = total_ram_pages; if (total_ram_pages < 1) - return MDBX_ENOSYS; + return LOG_IFERR(MDBX_ENOSYS); } if (avail_pages) { @@ -3479,7 +3479,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #elif defined(_SC_AVPHYS_PAGES) const intptr_t avail_ram_pages = sysconf(_SC_AVPHYS_PAGES); if (avail_ram_pages == -1) - return errno; + return LOG_IFERR(errno); #elif defined(__MACH__) mach_msg_type_number_t count = HOST_VM_INFO_COUNT; vm_statistics_data_t vmstat; @@ -3488,7 +3488,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, (host_info_t)&vmstat, &count); mach_port_deallocate(mach_task_self(), mport); if (unlikely(kerr != KERN_SUCCESS)) - return MDBX_ENOSYS; + return LOG_IFERR(MDBX_ENOSYS); const intptr_t avail_ram_pages = vmstat.free_count; #elif defined(VM_TOTAL) || defined(VM_METER) struct vmtotal info; @@ -3506,16 +3506,16 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #endif mib, ARRAY_LENGTH(mib), &info, &len, nullptr, 0) != 0) - return errno; + return LOG_IFERR(errno); if (len != sizeof(info)) - return MDBX_ENOSYS; + return LOG_IFERR(MDBX_ENOSYS); const intptr_t avail_ram_pages = info.t_free; #else #error "FIXME: Get Available RAM" #endif *avail_pages = avail_ram_pages; if (avail_ram_pages < 1) - return MDBX_ENOSYS; + return LOG_IFERR(MDBX_ENOSYS); } return MDBX_SUCCESS; diff --git a/src/range-estimate.c b/src/range-estimate.c index 51b19538..2deb3905 100644 --- a/src/range-estimate.c +++ b/src/range-estimate.c @@ -154,13 +154,13 @@ __hot int mdbx_estimate_distance(const MDBX_cursor *first, ptrdiff_t *distance_items) { if (unlikely(first == nullptr || last == nullptr || distance_items == nullptr)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *distance_items = 0; diff_t dr; int rc = cursor_diff(last, first, &dr); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cASSERT(first, dr.diff || inner_pointed(first) == inner_pointed(last)); if (unlikely(dr.diff == 0) && inner_pointed(first)) { @@ -168,7 +168,7 @@ __hot int mdbx_estimate_distance(const MDBX_cursor *first, last = &last->subcur->cursor; rc = cursor_diff(first, last, &dr); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } if (likely(dr.diff != 0)) @@ -182,23 +182,24 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, ptrdiff_t *distance_items) { if (unlikely(cursor == nullptr || distance_items == nullptr || move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(cursor->signature != cur_signature_live)) - return (cursor->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return LOG_IFERR((cursor->signature == cur_signature_ready4dispose) + ? MDBX_EINVAL + : MDBX_EBADSIGN); int rc = check_txn(cursor->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!is_pointed(cursor))) - return MDBX_ENODATA; + return LOG_IFERR(MDBX_ENODATA); cursor_couple_t next; rc = cursor_init(&next.outer, cursor->txn, cursor_dbi(cursor)); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); cursor_cpstk(cursor, &next.outer); if (cursor->tree->flags & MDBX_DUPSORT) { @@ -211,7 +212,7 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY; if (unlikely(mask & (1 << move_op))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); stub_data.iov_base = nullptr; stub_data.iov_len = 0; data = &stub_data; @@ -223,7 +224,7 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, 1 << MDBX_SET_KEY | 1 << MDBX_SET | 1 << MDBX_SET_RANGE; if (unlikely(mask & (1 << move_op))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); stub_key.iov_base = nullptr; stub_key.iov_len = 0; key = &stub_key; @@ -233,7 +234,7 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, rc = cursor_ops(&next.outer, key, data, move_op); if (unlikely(rc != MDBX_SUCCESS && (rc != MDBX_NOTFOUND || !is_pointed(&next.outer)))) - return rc; + return LOG_IFERR(rc); if (move_op == MDBX_LAST) { next.outer.flags |= z_eof_hard; @@ -249,26 +250,26 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, ptrdiff_t *size_items) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!size_items)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(begin_data && (begin_key == nullptr || begin_key == MDBX_EPSILON))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(end_data && (end_key == nullptr || end_key == MDBX_EPSILON))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); cursor_couple_t begin; /* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */ rc = cursor_init(&begin.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(begin.outer.tree->items == 0)) { *size_items = 0; @@ -284,18 +285,20 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, rc = outer_first(&begin.outer, nullptr, nullptr); if (unlikely(end_key == MDBX_EPSILON)) { /* LY: FIRST..+epsilon case */ - return (rc == MDBX_SUCCESS) - ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) - : rc; + return LOG_IFERR( + (rc == MDBX_SUCCESS) + ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) + : rc); } } else { if (unlikely(begin_key == MDBX_EPSILON)) { if (end_key == nullptr) { /* LY: -epsilon..LAST case */ rc = outer_last(&begin.outer, nullptr, nullptr); - return (rc == MDBX_SUCCESS) - ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) - : rc; + return LOG_IFERR( + (rc == MDBX_SUCCESS) + ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) + : rc); } /* LY: -epsilon..value case */ assert(end_key != MDBX_EPSILON); @@ -313,7 +316,7 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, .err; if (unlikely(rc != MDBX_SUCCESS)) { *size_items = 0; - return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; + return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc); } *size_items = 1; if (inner_pointed(&begin.outer)) @@ -329,21 +332,21 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val proxy_data = {nullptr, 0}; if (begin_data) proxy_data = *begin_data; - rc = cursor_seek(&begin.outer, &proxy_key, &proxy_data, - MDBX_SET_LOWERBOUND) - .err; + rc = LOG_IFERR(cursor_seek(&begin.outer, &proxy_key, &proxy_data, + MDBX_SET_LOWERBOUND) + .err); } } if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_NOTFOUND || !is_pointed(&begin.outer)) - return rc; + return LOG_IFERR(rc); } cursor_couple_t end; rc = cursor_init(&end.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (!end_key) { rc = outer_last(&end.outer, nullptr, nullptr); end.outer.flags |= z_eof_hard; @@ -358,12 +361,12 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, } if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_NOTFOUND || !is_pointed(&end.outer)) - return rc; + return LOG_IFERR(rc); } rc = mdbx_estimate_distance(&begin.outer, &end.outer, size_items); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); assert(*size_items >= -(ptrdiff_t)begin.outer.tree->items && *size_items <= (ptrdiff_t)begin.outer.tree->items); From 81a8127084d9a6a7777bb375e029062330e51979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 27 Nov 2024 18:26:24 +0300 Subject: [PATCH 357/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20"may=20be=20used=20uninitializ?= =?UTF-8?q?ed"=20=D0=BF=D1=80=D0=B5=D0=B4=D1=83=D0=BF=D1=80=D0=B5=D0=B6?= =?UTF-8?q?=D0=B4=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=B2=20LTO-=D1=81=D0=B1?= =?UTF-8?q?=D1=80=D0=BA=D0=B0=D1=85=20=D0=B8=D0=B7-=D0=B7=D0=B0=20=D1=83?= =?UTF-8?q?=D1=81=D0=BB=D0=BE=D0=B6=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20SSA/CT?= =?UTF-8?q?F=20=D0=B2=D1=81=D0=BB=D0=B5=D0=B4=D1=81=D1=82=D0=B2=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20LOG=5FIFERR().?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-txn.c | 18 +++++++++------- src/cold.c | 10 +++++---- src/dbi.c | 47 ++++++++++++++++++++++++++--------------- src/logging_and_debug.c | 4 +++- src/logging_and_debug.h | 21 ++++++++++++++++-- src/misc.c | 10 ++++++--- 6 files changed, 75 insertions(+), 35 deletions(-) diff --git a/src/api-txn.c b/src/api-txn.c index ab5d4df4..d75f05ff 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -40,13 +40,14 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { + if (unlikely(!mask)) + return LOG_IFERR(MDBX_EINVAL); + + *mask = 0; int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - if (unlikely(!mask)) - return LOG_IFERR(MDBX_EINVAL); - cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -56,7 +57,6 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val key, data; rc = outer_first(&cx.outer, &key, &data); - *mask = 0; while (rc == MDBX_SUCCESS) { const node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); @@ -88,13 +88,15 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, } int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if (unlikely(canary == nullptr)) return LOG_IFERR(MDBX_EINVAL); + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) { + memset(canary, 0, sizeof(*canary)); + return LOG_IFERR(rc); + } + *canary = txn->canary; return MDBX_SUCCESS; } diff --git a/src/cold.c b/src/cold.c index db3b0b87..53f0a6a0 100644 --- a/src/cold.c +++ b/src/cold.c @@ -554,13 +554,15 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, } __cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) { - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if (unlikely(!arg)) return LOG_IFERR(MDBX_EINVAL); + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) { + *arg = 0; + return LOG_IFERR(rc); + } + *arg = env->flags & ENV_USABLE_FLAGS; return MDBX_SUCCESS; } diff --git a/src/dbi.c b/src/dbi.c index 109198d5..888f0e03 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -908,21 +908,26 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if (unlikely(!flags || !state)) return LOG_IFERR(MDBX_EINVAL); - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); + if (unlikely(rc != MDBX_SUCCESS)) { + *flags = 0; + *state = 0; return LOG_IFERR(rc); + } + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + *flags = 0; + *state = 0; + return LOG_IFERR(rc); + } *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; *state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); - return MDBX_SUCCESS; } @@ -968,33 +973,41 @@ static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if (unlikely(!dest)) return LOG_IFERR(MDBX_EINVAL); + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); + goto bailout; const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); - if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) - return LOG_IFERR(MDBX_EINVAL); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) { + rc = MDBX_EINVAL; + goto bailout; + } - if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) - return LOG_IFERR(MDBX_BAD_TXN); + if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) { + rc = MDBX_BAD_TXN; + goto bailout; + } if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { rc = tbl_fetch((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); + goto bailout; } dest->ms_psize = txn->env->ps; stat_get(&txn->dbs[dbi], dest, bytes); return MDBX_SUCCESS; + +bailout: + memset(dest, 0, bytes); + return LOG_IFERR(rc); } __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, diff --git a/src/logging_and_debug.c b/src/logging_and_debug.c index 0fc4098a..20d8419e 100644 --- a/src/logging_and_debug.c +++ b/src/logging_and_debug.c @@ -61,7 +61,9 @@ __cold void debug_log(int level, const char *function, int line, __cold int log_error(const int err, const char *func, unsigned line) { assert(err != MDBX_SUCCESS); - if (unlikely(globals.loglevel >= MDBX_LOG_DEBUG)) { + if (unlikely(globals.loglevel >= MDBX_LOG_DEBUG) && + (globals.loglevel >= MDBX_LOG_TRACE || + !(err == MDBX_RESULT_TRUE || err == MDBX_NOTFOUND))) { char buf[256]; debug_log(MDBX_LOG_ERROR, func, line, "error %d (%s)\n", err, mdbx_strerror_r(err, buf, sizeof(buf))); diff --git a/src/logging_and_debug.h b/src/logging_and_debug.h index 6b9ef145..b3cef271 100644 --- a/src/logging_and_debug.h +++ b/src/logging_and_debug.h @@ -161,8 +161,25 @@ MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, MDBX_INTERNAL int log_error(const int err, const char *func, unsigned line); -static inline int log_if_error(int err, const char *func, unsigned line) { - return likely(err == MDBX_SUCCESS) ? err : log_error(err, func, line); +static inline int log_if_error(const int err, const char *func, unsigned line) { + if (likely(err == MDBX_SUCCESS)) + return err; + int rc = log_error(err, func, line); +#if __has_c_attribute(assume) + [[assume(rc == err && rc != MDBX_SUCCESS)]]; +#endif +#if defined(__clang__) || __has_builtin(assume) + __builtin_assume(rc == err && rc != MDBX_SUCCESS); +#endif + if (rc != err || rc == MDBX_SUCCESS) { +#if defined(__GNUC__) + __builtin_unreachable(); +#elif defined(_MSC_VER) && !defined(__clang__) + __assume(0); +#endif + rc = err; + } + return rc; } #define LOG_IFERR(err) log_if_error((err), __func__, __LINE__) diff --git a/src/misc.c b/src/misc.c index 367bcdfb..f1a58014 100644 --- a/src/misc.c +++ b/src/misc.c @@ -34,17 +34,21 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(rc != MDBX_SUCCESS)) { + bailout: + if (likely(result)) + *result = ~UINT64_C(0); return LOG_IFERR(rc); + } rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); + goto bailout; if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { rc = tbl_fetch(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); + goto bailout; } tree_t *dbs = &txn->dbs[dbi]; From 76c9b42e86b89234b01308b47df5414ba2a2537f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 27 Nov 2024 11:49:53 +0300 Subject: [PATCH 358/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`GET=5FMULTIPLE`=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=81=D0=BF=D0=B5=D1=86=D0=B8=D0=B0=D0=BB?= =?UTF-8?q?=D1=8C=D0=BD=D1=8B=D1=85=20=D1=81=D0=BB=D1=83=D1=87=D0=B0=D0=B5?= =?UTF-8?q?=D0=B2=20=D0=B8/=D0=B8=D0=BB=D0=B8=20=D0=BE=D0=B4=D0=BD=D0=BE?= =?UTF-8?q?=D0=B3=D0=BE=20=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D1=8F?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cursor.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/cursor.c b/src/cursor.c index 0d4e1ec5..4371f753 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -2184,15 +2184,22 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (unlikely(rc != MDBX_SUCCESS)) return rc; } else { - if (unlikely(is_eof(mc) || !inner_filled(mc))) + if (unlikely(!is_filled(mc))) return MDBX_ENODATA; - cASSERT(mc, is_filled(mc)); if (key) { const page_t *mp = mc->pg[mc->top]; const node_t *node = page_node(mp, mc->ki[mc->top]); *key = get_key(node); } } + cASSERT(mc, is_filled(mc)); + if (unlikely(!inner_filled(mc))) { + if (inner_pointed(mc)) + return MDBX_ENODATA; + const page_t *mp = mc->pg[mc->top]; + const node_t *node = page_node(mp, mc->ki[mc->top]); + return node_read(mc, node, data, mp); + } goto fetch_multiple; case MDBX_NEXT_MULTIPLE: From 9d79d2ba956063287b4abded87d2d0d7bf9d0a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 27 Nov 2024 21:17:49 +0300 Subject: [PATCH 359/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 6f3b662d..b730557f 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -22,12 +22,16 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Включен стандарт `C23` в CMake-скриптах сборки. - Добавлены T-макросы для парных `char`/`wchar_t` функций. - Поддержка вложенных пишущих транзакций в C++ API. - - Экспорт информации о версии в `VERSION.json`. + - Переход на "Semantic Versioning 2" и экспорт информации о версии в `VERSION.json`. - Добавлена поддержка переменной среды `SOURCE_DATE_EPOCH` для воспроизводимости сборок. Прежний способ посредством `MDBX_BUILD_TIMESTAMP` также работает и имеет приоритет. - Добавлена возможность указывать дополнительную информацию о сборке libmdbx через опцию `MDBX_BUILD_METADATA`. Сейчас задаваемая информация просто включается внутрь библиотеки в качестве значения `mdbx_build.metadata`, а в дальнейшем также будет использоваться при формировании пакетов и т.п. + - Добавлено логирование ошибок возвращаемых из API. Теперь для этого + достаточно задать уровень логирования `MDBX_LOG_DEBUG` (для логирования + ошибок за вычетом `MDBX_NOTFOUND`) или `MDBX_LOG_TRACE` (для логирования + всех ошибок, а также `MDBX_RESULT_TRUE`). Изменение поведения: @@ -60,6 +64,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавлен костыль для устранения проблем из-за некорректной обработки `[[gnu::pure]]` в Apple Clang и MSVC. - Поправлено определение `MDBX_DEPRECATED_ENUM` для старых компиляторов при включении С++11. - Доработано использование `std::experimental::filesystem` для решения проблем со сборкой в старых компиляторах. + - Исправлена обработка `MDBX_GET_MULTIPLE` в специальных случаях и одного значения у ключа в позиции курсора. Мелочи: @@ -78,6 +83,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Добавление теста `extra/early_close_dbi`. - Доработка скрипта стохастического теста и его переименование в `stochastic.sh`. - Доработка тестов для совместимости с режимами сборки до С++17. + - Добавление `.WAIT` для устранения коллизий при распараллеливании сборки посредстом GNU Make 4.4. -------------------------------------------------------------------------------- From c716531bd4fe41ab2643222caa8251328c0b7028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 27 Nov 2024 23:25:41 +0300 Subject: [PATCH 360/443] =?UTF-8?q?mdbx-cmake:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`CMAKE?= =?UTF-8?q?=5FC=5FSTANDARD`=20=D0=BF=D1=80=D0=B8=20=D0=B2=D1=8B=D0=B1?= =?UTF-8?q?=D0=BE=D1=80=D0=B5=20=D1=81=D1=82=D0=B0=D0=BD=D0=B4=D0=B0=D1=80?= =?UTF-8?q?=D1=82=D0=B0=20C.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c8c25a74..2e28776a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -566,11 +566,17 @@ endif() list(FIND CMAKE_C_COMPILE_FEATURES c_std_11 HAS_C11) list(FIND CMAKE_C_COMPILE_FEATURES c_std_23 HAS_C23) if(NOT DEFINED MDBX_C_STANDARD) - # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! It unable process - # Windows SDK headers in the C11 mode! - if(MSVC - AND MSVC_VERSION GREATER 1927 - AND NOT MSVC_VERSION GREATER 1929) + if(DEFINED ENV{CMAKE_C_STANDARD}) + set(CMAKE_C_STANDARD $ENV{CMAKE_C_STANDARD}) + endif() + if(DEFINED CMAKE_C_STANDARD) + set(MDBX_C_STANDARD ${CMAKE_C_STANDARD}) + elseif( + MSVC + # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! It unable process + # Windows SDK headers in the C11 mode! + AND MSVC_VERSION GREATER 1927 + AND NOT MSVC_VERSION GREATER 1929) set(MDBX_C_STANDARD 99) set(C_FALLBACK_11 OFF) set(C_FALLBACK_GNU11 OFF) From ad0b374eb5e439314e68e382df560548e5b2c06c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 28 Nov 2024 00:05:36 +0300 Subject: [PATCH 361/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FMAYBE=5FUNUSED`=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20`log=5Fif=5Ferror()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/logging_and_debug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/logging_and_debug.h b/src/logging_and_debug.h index b3cef271..4feac0b8 100644 --- a/src/logging_and_debug.h +++ b/src/logging_and_debug.h @@ -161,7 +161,8 @@ MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, MDBX_INTERNAL int log_error(const int err, const char *func, unsigned line); -static inline int log_if_error(const int err, const char *func, unsigned line) { +MDBX_MAYBE_UNUSED static inline int +log_if_error(const int err, const char *func, unsigned line) { if (likely(err == MDBX_SUCCESS)) return err; int rc = log_error(err, func, line); From 4c5be880384999ef4fb0d1483f5fd65119233af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 28 Nov 2024 14:54:55 +0300 Subject: [PATCH 362/443] =?UTF-8?q?mdbx-cmake:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`semver=5Fprovide(?= =?UTF-8?q?)`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D0=BB=D1=83=D1=87=D0=B0=D1=8F?= =?UTF-8?q?=20=D1=81=D0=B8=D0=BC=D0=B2=D0=BE=D0=BB=D0=B8=D1=87=D0=B5=D1=81?= =?UTF-8?q?=D0=BA=D0=B8=D1=85=20=D1=81=D1=81=D1=8B=D0=BB=D0=BE=D0=BA=20?= =?UTF-8?q?=D0=B2=20=D0=BF=D1=83=D1=82=D1=8F=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake/utils.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 31cbf436..b669676b 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -403,9 +403,9 @@ macro(semver_provide name source_root_directory build_directory_for_json_output endif() else() set(_source_root "${source_root_directory}") - if(NOT CMAKE_VERSION VERSION_LESS 3.20) - cmake_path(NORMAL_PATH _git_root) - cmake_path(NORMAL_PATH _source_root) + if(NOT CMAKE_VERSION VERSION_LESS 3.19) + file(REAL_PATH "${_git_root}" _git_root) + file(REAL_PATH "${_source_root}" _source_root) endif() if(_source_root STREQUAL _git_root AND EXISTS "${_git_root}/VERSION.json") message( From 5327f42465967a950c45101d24c2e07d0dada96d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 28 Nov 2024 20:07:27 +0300 Subject: [PATCH 363/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog.md b/ChangeLog.md index b730557f..fe5203fe 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -44,6 +44,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic функция `mdbx_txn_commit()` возвращала ошибку в таких случаях, не разрушая сами транзакции. Это приводило к утечкам памяти из-за ошибок в приложениях, что побудило изменить поведение. - Использование макроса `__deprecated_enum` если он определен. + - При сборке посредством CMake выбор стандарта языка `C` теперь выполняется с учётом `CMAKE_C_STANDARD`. Исправления: From acb3cb0290d202cd9dc36be4fbfacc2c2b1b7d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 1 Dec 2024 00:45:40 +0300 Subject: [PATCH 364/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D0=B1=D0=BE=D1=80?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=BF=D1=80=D0=B8=20=D0=B2=D0=BA=D0=BB=D1=8E?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D0=B8=20=D0=BF=D1=80=D0=BE=D1=84=D0=B8?= =?UTF-8?q?=D0=BB=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20GC=20(?= =?UTF-8?q?=D0=BE=D0=BF=D1=86=D0=B8=D1=8F=20`MDBX=5FENABLE=5FPROFGC`).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-get.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gc-get.c b/src/gc-get.c index 595c18a5..04656595 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -773,7 +773,7 @@ static inline pgr_t page_alloc_finalize(MDBX_env *const env, #if MDBX_ENABLE_PROFGC size_t majflt_before; const uint64_t cputime_before = osal_cputime(&majflt_before); - gc_prof_stat_t *const prof = (mc->mc_dbi == FREE_DBI) + gc_prof_stat_t *const prof = (cursor_dbi(mc) == FREE_DBI) ? &env->lck->pgops.gc_prof.self : &env->lck->pgops.gc_prof.work; #else @@ -889,7 +889,7 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, MDBX_txn *const txn = mc->txn; MDBX_env *const env = txn->env; #if MDBX_ENABLE_PROFGC - gc_prof_stat_t *const prof = (mc->mc_dbi == FREE_DBI) + gc_prof_stat_t *const prof = (cursor_dbi(mc) == FREE_DBI) ? &env->lck->pgops.gc_prof.self : &env->lck->pgops.gc_prof.work; prof->spe_counter += 1; From 99fa43f32248b0f013c803e53960d34792427e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 2 Dec 2024 18:09:51 +0300 Subject: [PATCH 365/443] =?UTF-8?q?mdbx:=20=D1=83=D0=B4=D0=B0=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D1=83=D1=81=D1=82=D0=B0=D1=80=D0=B5=D0=B2?= =?UTF-8?q?=D1=88=D0=B5=D0=B3=D0=BE=20CMakeSettings.json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeSettings.json | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 CMakeSettings.json diff --git a/CMakeSettings.json b/CMakeSettings.json deleted file mode 100644 index 00cd4632..00000000 --- a/CMakeSettings.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "configurations": [ - { - "name": "x64-Debug", - "generator": "Ninja", - "configurationType": "Debug", - "inheritEnvironments": [ "msvc_x64_x64" ], - "buildRoot": "${env.USERPROFILE}\\CMakeBuilds\\libmdbx\\build\\${name}", - "installRoot": "${env.USERPROFILE}\\CMakeBuilds\\libmdbx\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "", - "ctestCommandArgs": "" - }, - { - "name": "x64-Release", - "generator": "Ninja", - "configurationType": "Release", - "inheritEnvironments": [ "msvc_x64_x64" ], - "buildRoot": "${env.USERPROFILE}\\CMakeBuilds\\libmdbx\\build\\${name}", - "installRoot": "${env.USERPROFILE}\\CMakeBuilds\\libmdbx\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "", - "ctestCommandArgs": "" - } - ] -} \ No newline at end of file From 35177611d20feee3b1d92c65159ec55e8e711d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 2 Dec 2024 22:12:17 +0300 Subject: [PATCH 366/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D0=B1=D0=BE=D1=80?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=BF=D1=80=D0=B8=20`MDBX=5FENABLE=5FDBI=5FSPAR?= =?UTF-8?q?SE=3DOFF`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dbi.c | 2 ++ src/dbi.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/dbi.c b/src/dbi.c index 888f0e03..acadf0e5 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -3,6 +3,7 @@ #include "internals.h" +#if MDBX_ENABLE_DBI_SPARSE size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi) { tASSERT(txn, bmi > 0); bmi &= -bmi; @@ -20,6 +21,7 @@ size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi) { return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27]; } } +#endif /* MDBX_ENABLE_DBI_SPARSE */ struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) { eASSERT(env, dbi < env->n_dbi); diff --git a/src/dbi.h b/src/dbi.h index 4c66c664..bfafe3e3 100644 --- a/src/dbi.h +++ b/src/dbi.h @@ -5,11 +5,11 @@ #include "essentials.h" +#if MDBX_ENABLE_DBI_SPARSE + MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi); -#if MDBX_ENABLE_DBI_SPARSE - static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { tASSERT(txn, bmi > 0); STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->dbi_sparse[0])); From 44865dadc76409193b4770bc827be6a3b4ff77cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 2 Dec 2024 18:11:36 +0300 Subject: [PATCH 367/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=B8?= =?UTF-8?q?=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20?= =?UTF-8?q?=D0=BE=D0=BF=D1=86=D0=B8=D0=B9=20=D1=81=D0=B1=D0=BE=D1=80=D0=BA?= =?UTF-8?q?=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 45 +++++++++++++++++---------- src/cold.c | 42 ------------------------- src/config.h.in | 19 ++++++++--- src/dxb.c | 19 ++--------- src/gc-get.c | 8 ++--- src/global.c | 5 ++- src/lck.c | 2 -- src/options.h | 83 +++++++++++++++++++++---------------------------- src/osal.c | 10 ++---- 9 files changed, 88 insertions(+), 145 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e28776a..b954f58e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -690,7 +690,7 @@ add_mdbx_option(MDBX_INSTALL_STATIC add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) add_mdbx_option( - MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" + MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy/drop)" ${MDBX_BUILD_TOOLS_DEFAULT}) cmake_dependent_option( MDBX_INSTALL_MANPAGES @@ -699,8 +699,10 @@ cmake_dependent_option( add_mdbx_option( MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON) -add_mdbx_option(MDBX_ENV_CHECKPID "Paranoid checking PID inside libmdbx's API" - AUTO) +add_mdbx_option( + MDBX_ENV_CHECKPID + "Checking PID inside libmdbx's API against reuse DB environment after the fork()" + AUTO) mark_as_advanced(MDBX_ENV_CHECKPID) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" @@ -708,9 +710,9 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") mark_as_advanced(MDBX_DISABLE_GNU_SOURCE) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" OR IOS) - add_mdbx_option(MDBX_OSX_SPEED_INSTEADOF_DURABILITY + add_mdbx_option(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) - mark_as_advanced(MDBX_OSX_SPEED_INSTEADOF_DURABILITY) + mark_as_advanced(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY) endif() if(WIN32) if(MDBX_NTDLL_EXTRA_IMPLIB) @@ -724,15 +726,25 @@ else() MDBX_USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) mark_as_advanced(MDBX_USE_OFDLOCKS) + add_mdbx_option( + MDBX_USE_MINCORE + "Use Unix' mincore() to determine whether DB-pages are resident in memory" + ON) + mark_as_advanced(MDBX_USE_MINCORE) set(MDBX_AVOID_MSYNC_DEFAULT OFF) endif() add_mdbx_option( MDBX_AVOID_MSYNC - "Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP mode" + "Disable in-memory database updating with consequent flush-to-disk/msync syscall in `MDBX_WRITEMAP` mode" ${MDBX_AVOID_MSYNC_DEFAULT}) +add_mdbx_option( + MDBX_MMAP_NEEDS_JOLT + "Assume system needs explicit syscall to sync/flush/write modified mapped memory" + AUTO) +mark_as_advanced(MDBX_MMAP_NEEDS_JOLT) add_mdbx_option( MDBX_LOCKING - "Locking method (Windows=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" + "Locking method (Windows=-1, SystemV=5, POSIX=1988, POSIX=2001, POSIX=2008)" AUTO) mark_as_advanced(MDBX_LOCKING) add_mdbx_option( @@ -747,23 +759,22 @@ add_mdbx_option( mark_as_advanced(MDBX_DISABLE_VALIDATION) add_mdbx_option(MDBX_ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON) -add_mdbx_option(MDBX_ENABLE_MADVISE - "Using POSIX' madvise() and/or similar hints" ON) -if(CMAKE_TARGET_BITNESS GREATER 32) - set(MDBX_BIGFOOT_DEFAULT ON) -else() - set(MDBX_BIGFOOT_DEFAULT OFF) -endif() add_mdbx_option( MDBX_ENABLE_BIGFOOT "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" - ${MDBX_BIGFOOT_DEFAULT}) + ON) add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) -add_mdbx_option(MDBX_ENABLE_DBI_SPARSE "FIXME" ON) -add_mdbx_option(MDBX_ENABLE_DBI_LOCKFREE "FIXME" ON) +add_mdbx_option( + MDBX_ENABLE_DBI_SPARSE + "Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions" + ON) +add_mdbx_option( + MDBX_ENABLE_DBI_LOCKFREE + "Support for deferred releasing and a lockfree path to quickly open DBI handles" + ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") diff --git a/src/cold.c b/src/cold.c index 53f0a6a0..8b904776 100644 --- a/src/cold.c +++ b/src/cold.c @@ -382,49 +382,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, #endif /* MLOCK_ONFAULT */ int err = MDBX_ENOSYS; -#if MDBX_ENABLE_MADVISE err = dxb_set_readahead(env, used_pgno, true, true); -#else -#if defined(_WIN32) || defined(_WIN64) - if (imports.PrefetchVirtualMemory) { - WIN32_MEMORY_RANGE_ENTRY hint; - hint.VirtualAddress = env->dxb_mmap.base; - hint.NumberOfBytes = used_range; - if (imports.PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0)) - err = MDBX_SUCCESS; - else { - err = (int)GetLastError(); - ERROR("%s(%zu) error %d", "PrefetchVirtualMemory", used_range, err); - } - } -#endif /* Windows */ - -#if defined(POSIX_MADV_WILLNEED) - err = posix_madvise(env->dxb_mmap.base, used_range, POSIX_MADV_WILLNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; -#elif defined(MADV_WILLNEED) - err = madvise(env->dxb_mmap.base, used_range, MADV_WILLNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; -#endif - -#if defined(F_RDADVISE) - if (err) { - fcntl(env->lazy_fd, F_RDAHEAD, true); - struct radvisory hint; - hint.ra_offset = 0; - hint.ra_count = unlikely(used_range > INT_MAX && - sizeof(used_range) > sizeof(hint.ra_count)) - ? INT_MAX - : (int)used_range; - err = fcntl(env->lazy_fd, F_RDADVISE, &hint) ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (err == ENOTTY) - err = MDBX_SUCCESS /* Ignore ENOTTY for DB on the ram-disk */; - } -#endif /* F_RDADVISE */ -#endif /* MDBX_ENABLE_MADVISE */ if (err != MDBX_SUCCESS && rc == MDBX_SUCCESS) rc = err; diff --git a/src/config.h.in b/src/config.h.in index bba24605..5d53860c 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -32,7 +32,6 @@ #cmakedefine01 MDBX_DISABLE_VALIDATION #cmakedefine01 MDBX_AVOID_MSYNC #cmakedefine01 MDBX_ENABLE_REFUND -#cmakedefine01 MDBX_ENABLE_MADVISE #cmakedefine01 MDBX_ENABLE_BIGFOOT #cmakedefine01 MDBX_ENABLE_PGOP_STAT #cmakedefine01 MDBX_ENABLE_PROFGC @@ -40,19 +39,29 @@ #cmakedefine01 MDBX_ENABLE_DBI_LOCKFREE /* Windows */ -#if !defined(MDBX_BUILD_TEST) && !defined(MDBX_WITHOUT_MSVC_CRT) +#if defined(MDBX_BUILD_TEST) || !defined(MDBX_BUILD_CXX) || MDBX_BUILD_CXX +#define MDBX_WITHOUT_MSVC_CRT 0 +#else #cmakedefine01 MDBX_WITHOUT_MSVC_CRT -#endif +#endif /* MDBX_WITHOUT_MSVC_CRT */ /* MacOS & iOS */ -#cmakedefine01 MDBX_OSX_SPEED_INSTEADOF_DURABILITY +#cmakedefine01 MDBX_APPLE_SPEED_INSTEADOF_DURABILITY /* POSIX */ #cmakedefine01 MDBX_DISABLE_GNU_SOURCE + #cmakedefine MDBX_USE_OFDLOCKS_AUTO #ifndef MDBX_USE_OFDLOCKS_AUTO #cmakedefine01 MDBX_USE_OFDLOCKS -#endif +#endif /* MDBX_USE_OFDLOCKS */ + +#cmakedefine MDBX_MMAP_NEEDS_JOLT_AUTO +#ifndef MDBX_MMAP_NEEDS_JOLT_AUTO +#cmakedefine01 MDBX_MMAP_NEEDS_JOLT +#endif /* MDBX_MMAP_NEEDS_JOLT */ + +#cmakedefine01 MDBX_USE_MINCORE /* Build Info */ #ifndef MDBX_BUILD_TIMESTAMP diff --git a/src/dxb.c b/src/dxb.c index b57d0e0a..8d50895c 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -153,9 +153,7 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); -#if MDBX_ENABLE_MADVISE || defined(ENABLE_MEMCHECK) const void *const prev_map = env->dxb_mmap.base; -#endif /* MDBX_ENABLE_MADVISE || ENABLE_MEMCHECK */ VERBOSE("resize(env-flags 0x%x, mode %d) datafile/mapping: " "present %" PRIuPTR " -> %" PRIuPTR ", " @@ -252,7 +250,6 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } munlock_after(env, aligned_munlock_pgno, size_bytes); -#if MDBX_ENABLE_MADVISE if (size_bytes < prev_size && mode > implicit_grow) { NOTICE("resize-MADV_%s %u..%u", (env->flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno, @@ -304,12 +301,10 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } else env->lck->discarded_tail.weak = size_pgno; } -#endif /* MDBX_ENABLE_MADVISE */ rc = osal_mresize(mresize_flags, &env->dxb_mmap, size_bytes, limit_bytes); eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); -#if MDBX_ENABLE_MADVISE if (rc == MDBX_SUCCESS) { eASSERT(env, limit_bytes == env->dxb_mmap.limit); eASSERT(env, size_bytes <= env->dxb_mmap.filesize); @@ -329,7 +324,6 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, ; rc = dxb_set_readahead(env, size_pgno, readahead, force); } -#endif /* MDBX_ENABLE_MADVISE */ bailout: if (rc == MDBX_SUCCESS) { @@ -448,7 +442,6 @@ void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { } #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ -#if MDBX_ENABLE_MADVISE /* Turn on/off readahead. It's harmful when the DB is larger than RAM. */ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, const bool enable, const bool force_whole) { @@ -570,7 +563,6 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, err = MDBX_SUCCESS; return err; } -#endif /* MDBX_ENABLE_MADVISE */ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits) { @@ -776,12 +768,10 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, globals.bootid.x, globals.bootid.y, (globals.bootid.x | globals.bootid.y) ? "" : "not-"); -#if MDBX_ENABLE_MADVISE /* calculate readahead hint before mmap with zero redundant pages */ const bool readahead = !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; -#endif /* MDBX_ENABLE_MADVISE */ err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, env->geo_in_bytes.upper, @@ -789,7 +779,6 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if (unlikely(err != MDBX_SUCCESS)) return err; -#if MDBX_ENABLE_MADVISE #if defined(MADV_DONTDUMP) err = madvise(env->dxb_mmap.base, env->dxb_mmap.limit, MADV_DONTDUMP) ? ignore_enosys(errno) @@ -807,7 +796,6 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, return err; } #endif /* MADV_DODUMP */ -#endif /* MDBX_ENABLE_MADVISE */ #ifdef ENABLE_MEMCHECK env->valgrind_handle = @@ -1082,7 +1070,6 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, } /* lck exclusive, lck_rc == MDBX_RESULT_TRUE */ //---------------------------------------------------- setup madvise/readahead -#if MDBX_ENABLE_MADVISE if (used_aligned2os_bytes < env->dxb_mmap.current) { #if defined(MADV_REMOVE) if (lck_rc && (env->flags & MDBX_WRITEMAP) != 0 && @@ -1125,7 +1112,6 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, err = dxb_set_readahead(env, bytes2pgno(env, used_bytes), readahead, true); if (unlikely(err != MDBX_SUCCESS)) return err; -#endif /* MDBX_ENABLE_MADVISE */ return rc; } @@ -1192,8 +1178,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, } #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ -#if MDBX_ENABLE_MADVISE && \ - (defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)) +#if defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED) const size_t discard_edge_pgno = pgno_align2os_pgno(env, largest_pgno); if (prev_discarded_pgno >= discard_edge_pgno + env->madv_threshold) { const size_t prev_discarded_bytes = @@ -1249,7 +1234,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, env->lck->discarded_tail.weak = discard_edge_pgno; } } -#endif /* MDBX_ENABLE_MADVISE && (MADV_DONTNEED || POSIX_MADV_DONTNEED) */ +#endif /* MADV_DONTNEED || POSIX_MADV_DONTNEED */ /* LY: check conditions to shrink datafile */ const pgno_t backlog_gap = 3 + pending->trees.gc.height * 3; diff --git a/src/gc-get.c b/src/gc-get.c index 04656595..9b9c0826 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -3,7 +3,7 @@ #include "internals.h" -#if MDBX_ENABLE_MINCORE +#if MDBX_USE_MINCORE /*------------------------------------------------------------------------------ * Проверка размещения/расположения отображенных страниц БД в ОЗУ (mem-in-core), * с кешированием этой информации. */ @@ -77,11 +77,11 @@ static bool mincore_fetch(MDBX_env *const env, const size_t unit_begin) { lck->mincore_cache.mask[0] = ~mask; return bit_tas(lck->mincore_cache.mask, 0); } -#endif /* MDBX_ENABLE_MINCORE */ +#endif /* MDBX_USE_MINCORE */ MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, const pgno_t pgno) { -#if MDBX_ENABLE_MINCORE +#if MDBX_USE_MINCORE const size_t offset_aligned = floor_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize); const unsigned unit_log2 = (env->ps2ln > globals.sys_pagesize_ln2) @@ -97,7 +97,7 @@ MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, (void)env; (void)pgno; return false; -#endif /* MDBX_ENABLE_MINCORE */ +#endif /* MDBX_USE_MINCORE */ } /*----------------------------------------------------------------------------*/ diff --git a/src/global.c b/src/global.c index 54c686c0..88432580 100644 --- a/src/global.c +++ b/src/global.c @@ -351,8 +351,7 @@ __dll_export " MDBX_TRUST_RTC=" MDBX_TRUST_RTC_CONFIG " MDBX_AVOID_MSYNC=" MDBX_STRINGIFY(MDBX_AVOID_MSYNC) " MDBX_ENABLE_REFUND=" MDBX_STRINGIFY(MDBX_ENABLE_REFUND) - " MDBX_ENABLE_MADVISE=" MDBX_STRINGIFY(MDBX_ENABLE_MADVISE) - " MDBX_ENABLE_MINCORE=" MDBX_STRINGIFY(MDBX_ENABLE_MINCORE) + " MDBX_USE_MINCORE=" MDBX_STRINGIFY(MDBX_USE_MINCORE) " MDBX_ENABLE_PGOP_STAT=" MDBX_STRINGIFY(MDBX_ENABLE_PGOP_STAT) " MDBX_ENABLE_PROFGC=" MDBX_STRINGIFY(MDBX_ENABLE_PROFGC) #if MDBX_DISABLE_VALIDATION @@ -373,7 +372,7 @@ __dll_export " _GNU_SOURCE=NO" #endif /* _GNU_SOURCE */ #ifdef __APPLE__ - " MDBX_OSX_SPEED_INSTEADOF_DURABILITY=" MDBX_STRINGIFY(MDBX_OSX_SPEED_INSTEADOF_DURABILITY) + " MDBX_APPLE_SPEED_INSTEADOF_DURABILITY=" MDBX_STRINGIFY(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY) #endif /* MacOS */ #if defined(_WIN32) || defined(_WIN64) " MDBX_WITHOUT_MSVC_CRT=" MDBX_STRINGIFY(MDBX_WITHOUT_MSVC_CRT) diff --git a/src/lck.c b/src/lck.c index c6b7228c..291a257d 100644 --- a/src/lck.c +++ b/src/lck.c @@ -78,7 +78,6 @@ __cold static int lck_setup_locked(MDBX_env *env) { if (unlikely(err != MDBX_SUCCESS)) return err; -#if MDBX_ENABLE_MADVISE #ifdef MADV_DODUMP err = madvise(env->lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) : MDBX_SUCCESS; @@ -97,7 +96,6 @@ __cold static int lck_setup_locked(MDBX_env *env) { if (unlikely(MDBX_IS_ERROR(err))) return err; #endif /* MADV_WILLNEED */ -#endif /* MDBX_ENABLE_MADVISE */ lck_t *lck = env->lck_mmap.lck; if (lck_seize_rc == MDBX_RESULT_TRUE) { diff --git a/src/options.h b/src/options.h index 89f64967..acfccfb1 100644 --- a/src/options.h +++ b/src/options.h @@ -29,23 +29,15 @@ /** Using fsync() with chance of data lost on power failure */ #define MDBX_OSX_WANNA_SPEED 1 -#ifndef MDBX_OSX_SPEED_INSTEADOF_DURABILITY +#ifndef MDBX_APPLE_SPEED_INSTEADOF_DURABILITY /** Choices \ref MDBX_OSX_WANNA_DURABILITY or \ref MDBX_OSX_WANNA_SPEED * for OSX & iOS */ -#define MDBX_OSX_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY -#endif /* MDBX_OSX_SPEED_INSTEADOF_DURABILITY */ - -/** Controls using of POSIX' madvise() and/or similar hints. */ -#ifndef MDBX_ENABLE_MADVISE -#define MDBX_ENABLE_MADVISE 1 -#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1) -#error MDBX_ENABLE_MADVISE must be defined as 0 or 1 -#endif /* MDBX_ENABLE_MADVISE */ +#define MDBX_APPLE_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY +#endif /* MDBX_APPLE_SPEED_INSTEADOF_DURABILITY */ /** Controls checking PID against reuse DB environment after the fork() */ #ifndef MDBX_ENV_CHECKPID -#if (defined(MADV_DONTFORK) && MDBX_ENABLE_MADVISE) || defined(_WIN32) || \ - defined(_WIN64) +#if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64) /* PID check could be omitted: * - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork() * mapped pages will not be available for child process. @@ -110,15 +102,16 @@ /** Controls using Unix' mincore() to determine whether DB-pages * are resident in memory. */ -#ifndef MDBX_ENABLE_MINCORE +#ifndef MDBX_USE_MINCORE #if defined(MINCORE_INCORE) || !(defined(_WIN32) || defined(_WIN64)) -#define MDBX_ENABLE_MINCORE 1 +#define MDBX_USE_MINCORE 1 #else -#define MDBX_ENABLE_MINCORE 0 +#define MDBX_USE_MINCORE 0 #endif -#elif !(MDBX_ENABLE_MINCORE == 0 || MDBX_ENABLE_MINCORE == 1) -#error MDBX_ENABLE_MINCORE must be defined as 0 or 1 -#endif /* MDBX_ENABLE_MINCORE */ +#define MDBX_USE_MINCORE_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_MINCORE) +#elif !(MDBX_USE_MINCORE == 0 || MDBX_USE_MINCORE == 1) +#error MDBX_USE_MINCORE must be defined as 0 or 1 +#endif /* MDBX_USE_MINCORE */ /** Enables chunking long list of retired pages during huge transactions commit * to avoid use sequences of pages. */ @@ -151,12 +144,16 @@ #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ /** Controls dirty pages tracking, spilling and persisting in `MDBX_WRITEMAP` - * mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use - * msync() to persist data. This is by-default on Linux and other systems where - * kernel provides properly LRU tracking and effective flushing on-demand. 1/ON - * = Tracking of dirty pages but with LRU labels for spilling and explicit - * persist ones by write(). This may be reasonable for systems which low - * performance of msync() and/or LRU tracking. */ + * mode, i.e. disables in-memory database updating with consequent + * flush-to-disk/msync syscall. + * + * 0/OFF = Don't track dirty pages at all, don't spill ones, and use msync() to + * persist data. This is by-default on Linux and other systems where kernel + * provides properly LRU tracking and effective flushing on-demand. + * + * 1/ON = Tracking of dirty pages but with LRU labels for spilling and explicit + * persist ones by write(). This may be reasonable for goofy systems (Windows) + * which low performance of msync() and/or zany LRU tracking. */ #ifndef MDBX_AVOID_MSYNC #if defined(_WIN32) || defined(_WIN64) #define MDBX_AVOID_MSYNC 1 @@ -212,7 +209,7 @@ #error MDBX_ENVCOPY_WRITEBUF must be defined in range 65536..1073741824 and be multiple of 65536 #endif /* MDBX_ENVCOPY_WRITEBUF */ -/** Forces assertion checking */ +/** Forces assertion checking. */ #ifndef MDBX_FORCE_ASSERTIONS #define MDBX_FORCE_ASSERTIONS 0 #elif !(MDBX_FORCE_ASSERTIONS == 0 || MDBX_FORCE_ASSERTIONS == 1) @@ -284,9 +281,6 @@ /** POSIX-2008 Robust Mutexes for \ref MDBX_LOCKING */ #define MDBX_LOCKING_POSIX2008 2008 -/** BeOS Benaphores, aka Futexes for \ref MDBX_LOCKING */ -#define MDBX_LOCKING_BENAPHORE 1995 - /** Advanced: Choices the locking implementation (autodetection by default). */ #if defined(_WIN32) || defined(_WIN64) #define MDBX_LOCKING MDBX_LOCKING_WIN32FILES @@ -363,19 +357,6 @@ #error MDBX_USE_COPYFILERANGE must be defined as 0 or 1 #endif /* MDBX_USE_COPYFILERANGE */ -/** Advanced: Using sync_file_range() syscall (autodetection by default). */ -#ifndef MDBX_USE_SYNCFILERANGE -#if ((defined(__linux__) || defined(__gnu_linux__)) && \ - defined(SYNC_FILE_RANGE_WRITE) && !defined(__ANDROID_API__)) || \ - (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) -#define MDBX_USE_SYNCFILERANGE 1 -#else -#define MDBX_USE_SYNCFILERANGE 0 -#endif -#elif !(MDBX_USE_SYNCFILERANGE == 0 || MDBX_USE_SYNCFILERANGE == 1) -#error MDBX_USE_SYNCFILERANGE must be defined as 0 or 1 -#endif /* MDBX_USE_SYNCFILERANGE */ - //------------------------------------------------------------------------------ #ifndef MDBX_CPU_WRITEBACK_INCOHERENT @@ -416,15 +397,19 @@ #error MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined as 0 or 1 #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ -#ifndef MDBX_MMAP_USE_MS_ASYNC -#if MDBX_MMAP_INCOHERENT_FILE_WRITE || MDBX_MMAP_INCOHERENT_CPU_CACHE -#define MDBX_MMAP_USE_MS_ASYNC 1 +/** Assume system needs explicit syscall to sync/flush/write modified mapped + * memory. */ +#ifndef MDBX_MMAP_NEEDS_JOLT +#if MDBX_MMAP_INCOHERENT_FILE_WRITE || MDBX_MMAP_INCOHERENT_CPU_CACHE || \ + !(defined(__linux__) || defined(__gnu_linux__)) +#define MDBX_MMAP_NEEDS_JOLT 1 #else -#define MDBX_MMAP_USE_MS_ASYNC 0 +#define MDBX_MMAP_NEEDS_JOLT 0 #endif -#elif !(MDBX_MMAP_USE_MS_ASYNC == 0 || MDBX_MMAP_USE_MS_ASYNC == 1) -#error MDBX_MMAP_USE_MS_ASYNC must be defined as 0 or 1 -#endif /* MDBX_MMAP_USE_MS_ASYNC */ +#define MDBX_MMAP_NEEDS_JOLT_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_MMAP_NEEDS_JOLT) +#elif !(MDBX_MMAP_NEEDS_JOLT == 0 || MDBX_MMAP_NEEDS_JOLT == 1) +#error MDBX_MMAP_NEEDS_JOLT must be defined as 0 or 1 +#endif /* MDBX_MMAP_NEEDS_JOLT */ #ifndef MDBX_64BIT_ATOMIC #if MDBX_WORDBITS >= 64 || defined(DOXYGEN) @@ -507,7 +492,9 @@ #endif /* MDBX_CACHELINE_SIZE */ /* Max length of iov-vector passed to writev() call, used for auxilary writes */ +#ifndef MDBX_AUXILARY_IOV_MAX #define MDBX_AUXILARY_IOV_MAX 64 +#endif #if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX #undef MDBX_AUXILARY_IOV_MAX #define MDBX_AUXILARY_IOV_MAX IOV_MAX diff --git a/src/osal.c b/src/osal.c index 32c07c3d..caff1ad2 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1623,7 +1623,7 @@ MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, #else #if defined(__APPLE__) && \ - MDBX_OSX_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY + MDBX_APPLE_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY if (mode_bits & MDBX_SYNC_IODQ) return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno; #endif /* MacOS */ @@ -1776,7 +1776,7 @@ MDBX_INTERNAL int osal_thread_join(osal_thread_t thread) { MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, size_t length, enum osal_syncmode_bits mode_bits) { - if (!MDBX_MMAP_USE_MS_ASYNC && mode_bits == MDBX_SYNC_NONE) + if (!MDBX_MMAP_NEEDS_JOLT && mode_bits == MDBX_SYNC_NONE) return MDBX_SUCCESS; void *ptr = ptr_disp(map->base, offset); @@ -1793,7 +1793,7 @@ MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, // // However, this behavior may be changed in custom kernels, // so just leave such optimization to the libc discretion. - // NOTE: The MDBX_MMAP_USE_MS_ASYNC must be defined to 1 for such cases. + // NOTE: The MDBX_MMAP_NEEDS_JOLT must be defined to 1 for such cases. // // assert(mdbx.linux_kernel_version > 0x02061300); // if (mode_bits <= MDBX_SYNC_KICK) @@ -2314,7 +2314,6 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, } map->limit = limit; -#if MDBX_ENABLE_MADVISE #ifdef MADV_DONTFORK if (unlikely(madvise(map->base, map->limit, MADV_DONTFORK) != 0)) return errno; @@ -2322,7 +2321,6 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, #ifdef MADV_NOHUGEPAGE (void)madvise(map->base, map->limit, MADV_NOHUGEPAGE); #endif /* MADV_NOHUGEPAGE */ -#endif /* MDBX_ENABLE_MADVISE */ #endif /* ! Windows */ @@ -2735,7 +2733,6 @@ retry_mapview:; map->limit = limit; map->current = size; -#if MDBX_ENABLE_MADVISE #ifdef MADV_DONTFORK if (unlikely(madvise(map->base, map->limit, MADV_DONTFORK) != 0)) { assert(errno != 0); @@ -2745,7 +2742,6 @@ retry_mapview:; #ifdef MADV_NOHUGEPAGE (void)madvise(map->base, map->limit, MADV_NOHUGEPAGE); #endif /* MADV_NOHUGEPAGE */ -#endif /* MDBX_ENABLE_MADVISE */ #endif /* POSIX / Windows */ From e2ca81ae83018a53a415fce8ce01e55fef391050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 29 Nov 2024 20:15:27 +0300 Subject: [PATCH 368/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5?= =?UTF-8?q?=D1=80=D0=B6=D0=BA=D0=B0=20Conan.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 48 ++++---- conanfile.py | 323 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+), 23 deletions(-) create mode 100644 conanfile.py diff --git a/GNUmakefile b/GNUmakefile index 01cc0d77..fa1c1e45 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -54,6 +54,8 @@ CFLAGS_EXTRA ?= LD ?= ld CMAKE ?= cmake CMAKE_OPT ?= +# target directory for `make dist` +DIST_DIR ?= dist # build options MDBX_BUILD_OPTIONS ?=-DNDEBUG=1 @@ -648,11 +650,11 @@ release-assets: libmdbx-amalgamated-$(MDBX_GIT_3DOT).zpaq \ || (echo 'ERROR: Is not a valid release because not in the clean state with a suitable annotated tag!!!' >&2 && false)) \ && echo ' RELEASE ASSETS are done' -@dist-checked.tag: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) +@dist-checked.tag: $(addprefix $(DIST_DIR)/, $(DIST_SRC) $(DIST_EXTRA)) @echo -n ' VERIFY amalgamated sources...' - $(QUIET)rm -rf $@ dist/@tmp-essentials.inc dist/@tmp-internals.inc \ + $(QUIET)rm -rf $@ $(DIST_DIR)/@tmp-essentials.inc $(DIST_DIR)/@tmp-internals.inc \ && if grep -R "define xMDBX_ALLOY" dist | grep -q MDBX_BUILD_SOURCERY; then echo "sed output is WRONG!" >&2; exit 2; fi \ - && rm -rf @dist-check && cp -r -p dist @dist-check && ($(MAKE) -j IOARENA=false CXXSTD=$(CXXSTD) -C @dist-check all ninja >@dist-check.log 2>@dist-check.err || (cat @dist-check.err && exit 1)) \ + && rm -rf @dist-check && cp -r -p $(DIST_DIR) @dist-check && ($(MAKE) -j IOARENA=false CXXSTD=$(CXXSTD) -C @dist-check all ninja >@dist-check.log 2>@dist-check.err || (cat @dist-check.err && exit 1)) \ && touch $@ || (echo " FAILED! See @dist-check.log and @dist-check.err" >&2; exit 2) && echo " Ok" %.tar.gz: @dist-checked.tag @@ -675,7 +677,7 @@ release-assets: libmdbx-amalgamated-$(MDBX_GIT_3DOT).zpaq \ @echo ' CREATE $@' $(QUIET)rm -rf $@ && (cd dist && zpaq a ../$@ $(DIST_SRC) $(DIST_EXTRA) -m59) &>@zpaq.log -dist/@tmp-essentials.inc: src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) +$(DIST_DIR)/@tmp-essentials.inc: src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) @echo ' ALLOYING...' $(QUIET)mkdir -p dist \ && (grep -v '#include ' src/alloy.c && echo '#define MDBX_BUILD_SOURCERY $(MDBX_BUILD_SOURCERY)' \ @@ -696,8 +698,8 @@ dist/@tmp-essentials.inc: src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ | grep -v '^/// ') >$@ -dist/@tmp-internals.inc: dist/@tmp-essentials.inc src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) - $(QUIET)(cat dist/@tmp-essentials.inc \ +$(DIST_DIR)/@tmp-internals.inc: $(DIST_DIR)/@tmp-essentials.inc src/version.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) + $(QUIET)(cat $(DIST_DIR)/@tmp-essentials.inc \ && sed \ -e '/#include "essentials.h"/d' \ -e '/#include "atomics-ops.h"/r src/atomics-ops.h' \ @@ -725,28 +727,28 @@ dist/@tmp-internals.inc: dist/@tmp-essentials.inc src/version.c $(ALLOY_DEPS) $( -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ | grep -v '^/// ') >$@ -dist/mdbx.c: dist/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) +$(DIST_DIR)/mdbx.c: $(DIST_DIR)/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' - $(QUIET)(cat dist/@tmp-internals.inc $(shell git ls-files src/*.c | grep -v alloy) src/version.c | sed \ + $(QUIET)(cat $(DIST_DIR)/@tmp-internals.inc $(shell git ls-files src/*.c | grep -v alloy) src/version.c | sed \ -e '/#include "debug_begin.h"/r src/debug_begin.h' \ -e '/#include "debug_end.h"/r src/debug_end.h' \ ) | sed -e '/#include "/d;/#pragma once/d' -e 's|@INCLUDE|#include|' \ -e '/ clang-format o/d;/ \*INDENT-O/d' -e '3i /* clang-format off */' | cat -s >$@ -dist/mdbx.c++: dist/@tmp-essentials.inc src/mdbx.c++ $(lastword $(MAKEFILE_LIST)) +$(DIST_DIR)/mdbx.c++: $(DIST_DIR)/@tmp-essentials.inc src/mdbx.c++ $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' - $(QUIET)cat dist/@tmp-essentials.inc src/mdbx.c++ | sed \ + $(QUIET)cat $(DIST_DIR)/@tmp-essentials.inc src/mdbx.c++ | sed \ -e '/#define xMDBX_ALLOY/d' \ -e '/#include "/d;/#pragma once/d' \ -e 's|@INCLUDE|#include|;s|"mdbx.h"|"mdbx.h++"|' \ -e '/ clang-format o/d;/ \*INDENT-O/d' -e '3i /* clang-format off */' | cat -s >$@ define dist-tool-rule -dist/mdbx_$(1).c: src/tools/$(1).c src/tools/wingetopt.h src/tools/wingetopt.c \ - dist/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) +$(DIST_DIR)/mdbx_$(1).c: src/tools/$(1).c src/tools/wingetopt.h src/tools/wingetopt.c \ + $(DIST_DIR)/@tmp-internals.inc $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $$@' $(QUIET)mkdir -p dist && sed \ - -e '/#include "essentials.h"/r dist/@tmp-essentials.inc' \ + -e '/#include "essentials.h"/r $(DIST_DIR)/@tmp-essentials.inc' \ -e '/#include "wingetopt.h"/r src/tools/wingetopt.c' \ -e '/ clang-format o/d' -e '/ \*INDENT-O/d' \ src/tools/$(1).c \ @@ -757,32 +759,32 @@ endef $(foreach file,$(TOOLS),$(eval $(call dist-tool-rule,$(file)))) define dist-extra-rule -dist/$(1): $(1) src/version.c $(lastword $(MAKEFILE_LIST)) +$(DIST_DIR)/$(1): $(1) src/version.c $(lastword $(MAKEFILE_LIST)) @echo ' REFINE $$@' $(QUIET)mkdir -p $$(dir $$@) && sed -e '/^#> dist-cutoff-begin/,/^#< dist-cutoff-end/d' $$< | cat -s >$$@ endef $(foreach file,mdbx.h mdbx.h++ $(filter-out man1/% VERSION.json .clang-format-ignore %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) -dist/VERSION.json: src/version.c +$(DIST_DIR)/VERSION.json: src/version.c @echo ' MAKE $@' - $(QUIET)mkdir -p dist/ && echo "{ \"git_describe\": \"$(MDBX_GIT_DESCRIBE)\", \"git_timestamp\": \"$(MDBX_GIT_TIMESTAMP)\", \"git_tree\": \"$(shell git show --no-patch --format=%T HEAD 2>&1)\", \"git_commit\": \"$(shell git show --no-patch --format=%H HEAD 2>&1)\", \"semver\": \"$(MDBX_VERSION_PURE)\" }" >$@ + $(QUIET)mkdir -p $(DIST_DIR)/ && echo "{ \"git_describe\": \"$(MDBX_GIT_DESCRIBE)\", \"git_timestamp\": \"$(MDBX_GIT_TIMESTAMP)\", \"git_tree\": \"$(shell git show --no-patch --format=%T HEAD 2>&1)\", \"git_commit\": \"$(shell git show --no-patch --format=%H HEAD 2>&1)\", \"semver\": \"$(MDBX_VERSION_PURE)\" }" >$@ -dist/.clang-format-ignore: $(lastword $(MAKEFILE_LIST)) +$(DIST_DIR)/.clang-format-ignore: $(lastword $(MAKEFILE_LIST)) @echo ' MAKE $@' $(QUIET)echo "$(filter-out %.h %h++,$(DIST_SRC))" | tr ' ' \\n > $@ -dist/ntdll.def: src/ntdll.def +$(DIST_DIR)/ntdll.def: src/ntdll.def @echo ' COPY $@' - $(QUIET)mkdir -p dist/ && cp $< $@ + $(QUIET)mkdir -p $(DIST_DIR)/ && cp $< $@ -dist/config.h.in: src/config.h.in +$(DIST_DIR)/config.h.in: src/config.h.in @echo ' COPY $@' - $(QUIET)mkdir -p dist/ && cp $< $@ + $(QUIET)mkdir -p $(DIST_DIR)/ && cp $< $@ -dist/man1/mdbx_%.1: src/man1/mdbx_%.1 +$(DIST_DIR)/man1/mdbx_%.1: src/man1/mdbx_%.1 @echo ' COPY $@' - $(QUIET)mkdir -p dist/man1/ && cp $< $@ + $(QUIET)mkdir -p $(DIST_DIR)/man1/ && cp $< $@ endif diff --git a/conanfile.py b/conanfile.py new file mode 100644 index 00000000..79241260 --- /dev/null +++ b/conanfile.py @@ -0,0 +1,323 @@ +import shutil +import json +import os +import re +import subprocess +from conan.tools.files import rm +from conan.tools.scm import Git +from conan.tools.apple import is_apple_os +from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps +from conan import ConanFile +required_conan_version = '>=2.7' + + +def semver_parse(s): + m = re.match('^v?(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(\\.(?P0|[1-9]\d*))?(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$', s) + return m.groupdict() if m else None + + +def semver_string(semver): + s = str(semver['major']) + '.' + \ + str(semver['minor']) + '.' + str(semver['patch']) + if not semver['tweak'] is None and semver['tweak'] != 0: + s += '.' + str(semver['tweak']) + if not semver['prerelease'] is None and semver['prerelease'] != '': + s += '-' + semver['prerelease'] + return s + + +def semver_string_with_buildmetadata(semver): + s = semver_string(semver) + if not semver['buildmetadata'] is None and semver['buildmetadata'] != '': + s += '+' + semver['buildmetadata'] + return s + + +class libmdbx(ConanFile): + name = 'mdbx' + package_type = 'library' + description = 'One of the fastest embeddable key-value ACID database without WAL. libmdbx surpasses the legendary LMDB in terms of reliability, features and performance.' + license = 'Apache-2.0' + author = 'Leo Yuriev ' + homepage = 'https://libmdbx.dqdkfa.ru' + url = 'https://gitflic.ru/project/erthink/libmdbx.git' + topics = ('embedded-database', 'key-value', 'btree', 'LMDB', 'storage-engine', + 'data-storage', 'nosql', 'ACID', 'MVCC', 'MDBX') + no_copy_source = True + test_type = 'explicit' + build_policy = 'missing' + revision_mode = 'scm' + languages = 'C', 'C++' + provides = 'libmdbx' + implements = ['auto_shared_fpic'] + # upload_policy = 'skip' + # exports_sources = 'LICENSE', 'NOTICE', 'CMakeLists.txt', '*.h', '*.h++', '*.c', '*.c++', 'ntdll.def', 'man1/*', 'cmake/*', 'config.h.in' + + settings = 'os', 'compiler', 'build_type', 'arch' + options = { + 'mdbx.64bit_atomic': ['Auto', True, False], + 'mdbx.64bit_cas': ['Auto', True, False], + 'mdbx.apple.speed_insteadof_durability': ['Default', True, False], + 'mdbx.avoid_msync': ['Auto', True, False], + 'mdbx.build_cxx': ['Default', True, False], + 'mdbx.build_tools': ['Default', True, False], + 'mdbx.cacheline_size': ['Auto', 16, 32, 64, 128, 256], + 'mdbx.disable_validation': ['Default', True, False], + 'mdbx.enable_bigfoot': ['Default', True, False], + 'mdbx.enable_dbi_lockfree': ['Default', True, False], + 'mdbx.enable_dbi_sparse': ['Default', True, False], + 'mdbx.enable_pgop_stat': ['Default', True, False], + 'mdbx.enable_profgc': ['Default', True, False], + 'mdbx.enable_refund': ['Default', True, False], + 'mdbx.env_checkpid': ['Default', True, False], + 'mdbx.force_assertions': ['Default', True, False], + 'mdbx.have_builtin_cpu_supports': ['Auto', True, False], + 'mdbx.locking': ['Auto', 'WindowsFileLocking', 'SystemV', 'POSIX1988', 'POSIX2001', 'POSIX2008'], + 'mdbx.mmap_incoherent_file_write': ['Auto', True, False], + 'mdbx.mmap_needs_jolt': ['Auto', True, False], + 'mdbx.trust_rtc': ['Default', True, False], + 'mdbx.txn_checkowner': ['Default', True, False], + 'mdbx.unaligned_ok': ['Auto', True, False], + 'mdbx.use_copyfilerange': ['Auto', True, False], + 'mdbx.use_mincore': ['Auto', True, False], + 'mdbx.use_ofdlocks': ['Auto', True, False], + 'mdbx.use_sendfile': ['Auto', True, False], + 'mdbx.without_msvc_crt': ['Default', True, False], + 'shared': [True, False], + } + default_options = { + 'mdbx.64bit_atomic': 'Auto', + 'mdbx.64bit_cas': 'Auto', + 'mdbx.apple.speed_insteadof_durability': 'Default', + 'mdbx.avoid_msync': 'Auto', + 'mdbx.build_cxx': 'Default', + 'mdbx.build_tools': 'Default', + 'mdbx.cacheline_size': 'Auto', + 'mdbx.disable_validation': 'Default', + 'mdbx.enable_bigfoot': 'Default', + 'mdbx.enable_dbi_lockfree': 'Default', + 'mdbx.enable_dbi_sparse': 'Default', + 'mdbx.enable_pgop_stat': 'Default', + 'mdbx.enable_profgc': 'Default', + 'mdbx.enable_refund': 'Default', + 'mdbx.env_checkpid': 'Default', + 'mdbx.force_assertions': 'Default', + 'mdbx.have_builtin_cpu_supports': 'Auto', + 'mdbx.locking': 'Auto', + 'mdbx.mmap_incoherent_file_write': 'Auto', + 'mdbx.mmap_needs_jolt': 'Auto', + 'mdbx.trust_rtc': 'Default', + 'mdbx.txn_checkowner': 'Default', + 'mdbx.unaligned_ok': 'Auto', + 'mdbx.use_copyfilerange': 'Auto', + 'mdbx.use_mincore': 'Auto', + 'mdbx.use_ofdlocks': 'Auto', + 'mdbx.use_sendfile': 'Auto', + 'mdbx.without_msvc_crt': 'Default', + 'shared': True, + } + options_description = { + 'mdbx.64bit_atomic': 'Advanced: Assume 64-bit operations are atomic and not splitted to 32-bit halves. ', + 'mdbx.64bit_cas': 'Advanced: Assume 64-bit atomic compare-and-swap operation is available. ', + 'mdbx.apple.speed_insteadof_durability': 'Disable using `fcntl(F_FULLFSYNC)` for a performance reasons at the cost of durability on power failure. ', + 'mdbx.avoid_msync': 'Disable in-memory database updating with consequent flush-to-disk/msync syscall in `MDBX_WRITEMAP` mode. ', + 'mdbx.build_cxx': 'Build C++ portion. ', + 'mdbx.build_tools': 'Build CLI tools (mdbx_chk/stat/dump/load/copy/drop). ', + 'mdbx.cacheline_size': 'Advanced: CPU cache line size for data alignment to avoid cache line false-sharing. ', + 'mdbx.disable_validation': 'Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB. ', + 'mdbx.enable_bigfoot': 'Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages. ', + 'mdbx.enable_dbi_lockfree': 'Support for deferred releasing and a lockfree path to quickly open DBI handles. ', + 'mdbx.enable_dbi_sparse': 'Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions. ', + 'mdbx.enable_pgop_stat': 'Gathering statistics for page operations. ', + 'mdbx.enable_profgc': 'Profiling of GC search and updates. ', + 'mdbx.enable_refund': 'Online database zero-cost auto-compactification during write-transactions. ', + 'mdbx.env_checkpid': "Checking PID inside libmdbx's API against reuse database environment after the `fork()`. ", + 'mdbx.force_assertions': 'Forces assertion checking even for release builds. ', + 'mdbx.have_builtin_cpu_supports': 'Advanced: Assume the compiler and target system has `__builtin_cpu_supports()`. ', + 'mdbx.locking': 'Advanced: Choices the locking implementation. ', + 'mdbx.mmap_incoherent_file_write': "Advanced: Assume system don't have unified page cache and/or file write operations incoherent with memory-mapped files. ", + 'mdbx.mmap_needs_jolt': 'Advanced: Assume system needs explicit syscall to sync/flush/write modified mapped memory. ', + 'mdbx.trust_rtc': 'Advanced: Does a system have battery-backed Real-Time Clock or just a fake. ', + 'mdbx.txn_checkowner': 'Checking transaction owner thread against misuse transactions from other threads. ', + 'mdbx.unaligned_ok': 'Advanced: Assume a target CPU and/or the compiler support unaligned access. ', + 'mdbx.use_copyfilerange': 'Advanced: Use `copy_file_range()` syscall. ', + 'mdbx.use_mincore': "Use Unix' `mincore()` to determine whether database pages are resident in memory. ", + 'mdbx.use_ofdlocks': 'Advanced: Use POSIX OFD-locks. ', + 'mdbx.use_sendfile': 'Advancedc: Use `sendfile()` syscall. ', + 'mdbx.without_msvc_crt': 'Avoid dependence from MSVC CRT and use ntdll.dll instead. ', + } + + build_metadata = None + + def config_options(self): + if self.settings.get_safe('os') != 'Linux': + self.options.rm_safe('mdbx.use_copyfilerange') + self.options.rm_safe('mdbx.use_sendfile') + if self.settings.get_safe('os') == 'Windows': + self.default_options['mdbx.avoid_msync'] = True + self.options.rm_safe('mdbx.env_checkpid') + self.options.rm_safe('mdbx.locking') + self.options.rm_safe('mdbx.mmap_incoherent_file_write') + self.options.rm_safe('mdbx.use_mincore') + self.options.rm_safe('mdbx.use_ofdlocks') + else: + self.options.rm_safe('mdbx.without_msvc_crt') + if is_apple_os(self): + self.options.rm_safe('mdbx.mmap_incoherent_file_write') + else: + self.options.rm_safe('mdbx.apple.speed_insteadof_durability') + + def fetch_versioninfo_from_git(self): + git = Git(self, folder=self.recipe_folder) + git_timestamp = git.run('show --no-patch --format=%cI HEAD') + git_tree = git.run('show --no-patch --format=%T HEAD') + git_commit = git.run('show --no-patch --format=%H HEAD') + if git.run('rev-list --tags --count') == 0: + git.run('fetch --tags') + git_last_vtag = git.run('describe --tags --abbrev=0 --match=v[0-9]*') + if git_last_vtag == '': + git_describe = git.run('describe --all --long --always') + git_semver = semver_parse( + '0.0.0.' + git.run('rev-list --count --remove-empty --no-merges HEAD')) + else: + git_describe = git.run('describe --tags --long --match=v[0-9]*') + git_version = '.'.join( + map(str, re.split('[-v.]+', git.run('describe --tags --match=v[0-9]*'))[1:5])) + git_semver = semver_parse(git_last_vtag) + if git_semver['prerelease'] is None or git_semver['prerelease'] == '': + git_since_vtag = git.run( + 'rev-list ' + git_last_vtag + '.. --count') + if int(git_since_vtag) > 0: + git_semver['tweak'] = int(git_since_vtag) + else: + git_semver['tweak'] = None + info = {'git_describe': git_describe, 'git_timestamp': git_timestamp, + 'git_tree': git_tree, 'git_commit': git_commit, 'semver': semver_string(git_semver)} + return info + + def export_sources(self): + subprocess.run(['make', '-C', self.recipe_folder, 'DIST_DIR=' + + self.export_sources_folder, '@dist-checked.tag'], check=True) + rm(self, 'Makefile', self.export_sources_folder) + rm(self, 'GNUmakefile', self.export_sources_folder) + # json.dump(self.fetch_versioninfo_from_git(), open(os.path.join( + # self.export_sources_folder, 'VERSION.json'), 'w', encoding='utf-8')) + + def source(self): + version_json_pathname = os.path.join( + self.export_sources_folder, 'VERSION.json') + version_json = json.load( + open(os.path.join(version_json_pathname), encoding='utf-8'))['semver'] + if version_json != semver_string(semver_parse(self.version)): + self.output.error('Package/Recipe version "' + self.version + + '" mismatch VERSION.json "' + version_json + '"') + + def set_version(self): + if self.build_metadata is None and not self.version is None: + self.build_metadata = self.version + semver = semver_parse(self.build_metadata) + if semver: + self.build_metadata = semver['buildmetadata'] + else: + self.build_metadata = re.match( + '^[^0-9a-zA-Z]*([0-9a-zA-Z]+[-.0-9a-zA-Z]*)', self.build_metadata).group(1) + if self.build_metadata is None: + self.build_metadata = '' + version_json_pathname = os.path.join( + self.recipe_folder, 'VERSION.json') + if os.path.exists(version_json_pathname): + self.version = json.load( + open(version_json_pathname, encoding='utf-8'))['semver'] + version_from = "'" + version_jsonpath_name + "'" + else: + self.version = self.fetch_versioninfo_from_git()['semver'] + version_from = 'Git' + self.output.verbose('Fetch version from ' + + version_from + ': ' + self.version) + if self.build_metadata != '': + self.version += '+' + self.build_metadata + + def layout(self): + cmake_layout(self) + + def handle_option(self, tc, name, define=False): + opt = self.options.get_safe(name) + if not opt is None: + value = str(opt).lower() + if value != 'auto' and value != 'default': + name = name.upper().replace('.', '_') + if define: + if value == 'false' or value == 'no' or value == 'off': + tc.preprocessor_definitions[name] = 0 + elif value == 'true' or value == 'yes' or value == 'on': + tc.preprocessor_definitions[name] = 1 + else: + tc.preprocessor_definitions[name] = int(opt) + self.output.highlight( + name + '=' + str(tc.preprocessor_definitions[name]) + ' (' + str(opt) + ')') + else: + tc.cache_variables[name] = opt + self.output.highlight( + name + '=' + str(tc.cache_variables[name]) + ' (' + str(opt) + ')') + + def generate(self): + tc = CMakeToolchain(self) + if self.build_metadata is None: + self.build_metadata = semver_parse(self.version)['buildmetadata'] + if not self.build_metadata is None and self.build_metadata != '': + tc.variables['MDBX_BUILD_METADATA'] = self.build_metadata + self.output.highlight('MDBX_BUILD_METADATA is ' + + str(tc.variables['MDBX_BUILD_METADATA'])) + self.handle_option(tc, 'mdbx.64bit_atomic', True) + self.handle_option(tc, 'mdbx.64bit_cas', True) + self.handle_option(tc, 'mdbx.apple.speed_insteadof_durability') + self.handle_option(tc, 'mdbx.avoid_msync') + self.handle_option(tc, 'mdbx.build_tools') + self.handle_option(tc, 'mdbx.build_cxx') + self.handle_option(tc, 'mdbx.cacheline_size', True) + self.handle_option(tc, 'mdbx.disable_validation') + self.handle_option(tc, 'mdbx.enable_bigfoot') + self.handle_option(tc, 'mdbx.enable_dbi_lockfree') + self.handle_option(tc, 'mdbx.enable_dbi_sparse') + self.handle_option(tc, 'mdbx.enable_pgop_stat') + self.handle_option(tc, 'mdbx.enable_profgc') + self.handle_option(tc, 'mdbx.enable_refund') + self.handle_option(tc, 'mdbx.env_checkpid') + self.handle_option(tc, 'mdbx.force_assertions') + self.handle_option(tc, 'mdbx.have_builtin_cpu_supports', True) + self.handle_option(tc, 'mdbx.mmap_incoherent_file_write', True) + self.handle_option(tc, 'mdbx.mmap_needs_jolt') + self.handle_option(tc, 'mdbx.trust_rtc') + self.handle_option(tc, 'mdbx.txn_checkowner') + self.handle_option(tc, 'mdbx.unaligned_ok', True) + self.handle_option(tc, 'mdbx.use_copyfilerange', True) + self.handle_option(tc, 'mdbx.use_mincore') + self.handle_option(tc, 'mdbx.use_ofdlocks') + self.handle_option(tc, 'mdbx.use_sendfile', True) + self.handle_option(tc, 'mdbx.without_msvc_crt') + opt = self.options.get_safe('mdbx.locking', 'auto') + if not opt is None: + value = str(opt).lower() + if value != 'auto' and value != 'default': + map = {'windowsfilelocking': -1, 'systemv': 5, 'posix1988': 1988, + 'posix2001': 2001, 'posix2008': 2008} + value = map[value] + tc.cache_variables['MDBX_LOCKING'] = value + self.output.highlight('MDBX_LOCKING=' + + str(tc.cache_variables['MDBX_LOCKING'])) + tc.generate() + + def build(self): + cmake = CMake(self) + cmake.configure() + cmake.build() + + def package(self): + cmake = CMake(self) + cmake.install() + + def package_info(self): + if self.options.shared: + self.cpp_info.libs = ['mdbx'] + else: + self.cpp_info.libs = ['mdbx-static'] From 367a118a8fe5c2f6ee0b1d4d1e522c37215e0dab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 3 Dec 2024 00:16:21 +0300 Subject: [PATCH 369/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index fe5203fe..83c7a58a 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -25,26 +25,47 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Переход на "Semantic Versioning 2" и экспорт информации о версии в `VERSION.json`. - Добавлена поддержка переменной среды `SOURCE_DATE_EPOCH` для воспроизводимости сборок. Прежний способ посредством `MDBX_BUILD_TIMESTAMP` также работает и имеет приоритет. - - Добавлена возможность указывать дополнительную информацию о сборке libmdbx через опцию `MDBX_BUILD_METADATA`. - Сейчас задаваемая информация просто включается внутрь библиотеки в качестве значения `mdbx_build.metadata`, - а в дальнейшем также будет использоваться при формировании пакетов и т.п. + - Добавлена возможность указывать дополнительную информацию о сборке + libmdbx через опцию `MDBX_BUILD_METADATA`. Сейчас задаваемая информация + просто включается внутрь библиотеки в качестве значения + `mdbx_build.metadata`, а в дальнейшем также будет использоваться при + формировании пакетов и т.п. - Добавлено логирование ошибок возвращаемых из API. Теперь для этого достаточно задать уровень логирования `MDBX_LOG_DEBUG` (для логирования ошибок за вычетом `MDBX_NOTFOUND`) или `MDBX_LOG_TRACE` (для логирования всех ошибок, а также `MDBX_RESULT_TRUE`). + - Поддержка сборки посредством Conan. Изменение поведения: - - Добавлен метод `mdbx::cursor::get_multiple_samelength()` и переименован `mdbx::txn::put_multiple_samelength()`. - - Для единообразия C++ API при выполнении операции `MDBX_GET_MULTIPLE` теперь также возвращается значение самого ключа. - - Для размерных констант `mdbx::env::geometry` базовый тип изменен с беззнакового `size_t` на знаковый `intptr_t`. - - Теперь выбор в пользу использования ntdll вместо CRT делается только при явном отключении C++ API. - - Теперь выполняется освобождение памяти сброшенных/прерванных читающих транзакций передаваемых в `mdbx_txn_commit()`. - Соглашение по API требует чтобы такие транзакции освобождались посредством `mdbx_txn_abort()`, из-за чего - функция `mdbx_txn_commit()` возвращала ошибку в таких случаях, не разрушая сами транзакции. - Это приводило к утечкам памяти из-за ошибок в приложениях, что побудило изменить поведение. + - Добавлен метод `mdbx::cursor::get_multiple_samelength()` и + переименован `mdbx::txn::put_multiple_samelength()`. + - Для единообразия C++ API при выполнении операции `MDBX_GET_MULTIPLE` + теперь также возвращается значение самого ключа. + - Для размерных констант `mdbx::env::geometry` базовый тип изменен с + беззнакового `size_t` на знаковый `intptr_t`. + - Теперь выбор в пользу использования ntdll вместо CRT делается только + при явном отключении C++ API. + - Теперь выполняется освобождение памяти сброшенных/прерванных читающих + транзакций передаваемых в `mdbx_txn_commit()`. Соглашение по API требует + чтобы такие транзакции освобождались посредством `mdbx_txn_abort()`, + из-за чего функция `mdbx_txn_commit()` возвращала ошибку в таких + случаях, не разрушая сами транзакции. Это приводило к утечкам памяти + из-за ошибок в приложениях, что побудило изменить поведение. - Использование макроса `__deprecated_enum` если он определен. - - При сборке посредством CMake выбор стандарта языка `C` теперь выполняется с учётом `CMAKE_C_STANDARD`. + - При сборке посредством CMake выбор стандарта языка `C` теперь + выполняется с учётом `CMAKE_C_STANDARD`. + - Изменения в опциях сборки: + * опция `MDBX_OSX_SPEED_INSTEADOF_DURABILITY` переименована в + `MDBX_APPLE_SPEED_INSTEADOF_DURABILITY`, так как актуальна для всех + "Яблочных" платформ; + * опция `MDBX_MMAP_USE_MS_ASYNC` переименована в + `MDBX_MMAP_NEEDS_JOLT`, для более точного соответствия своей семантики; + * в CMake добавлена поддержка опции `MDBX_USE_MINCORE`; + * использование `madvise()` и родственных системных вызовов теперь + всегда включено, а опция `MDBX_ENABLE_MADVISE` удалена; + * удалены неиспользуемая опция `MDBX_USE_SYNCFILERANGE` и + неиспользуемый режим `MDBX_LOCKING_BENAPHORE`. Исправления: @@ -66,6 +87,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic - Поправлено определение `MDBX_DEPRECATED_ENUM` для старых компиляторов при включении С++11. - Доработано использование `std::experimental::filesystem` для решения проблем со сборкой в старых компиляторах. - Исправлена обработка `MDBX_GET_MULTIPLE` в специальных случаях и одного значения у ключа в позиции курсора. + - Исправление сборки при включении профилирования GC и `MDBX_ENABLE_DBI_SPARSE=OFF`. Мелочи: From d1b2ec0489aefda2ee0c784136228ea101c3a80b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 4 Dec 2024 23:50:33 +0300 Subject: [PATCH 370/443] =?UTF-8?q?mdbx-cmake:=20=D1=83=D0=B4=D0=B0=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B4=D0=B2=D0=BE=D0=B9=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20?= =?UTF-8?q?=D0=B2=D0=B5=D1=80=D1=81=D0=B8=D0=B8=20MSVC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b954f58e..c405d143 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -304,8 +304,10 @@ include(GNUInstallDirs) if(CMAKE_C_COMPILER_ID STREQUAL "MSVC" AND MSVC_VERSION LESS 1900) message( - SEND_ERROR "MSVC compiler ${MSVC_VERSION} is too old for building MDBX." - " At least 'Microsoft Visual Studio 2015' is required.") + SEND_ERROR + "MSVC compiler ${MSVC_VERSION} is too old for building MDBX." + " At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required." + ) endif() if(NOT DEFINED THREADS_PREFER_PTHREAD_FLAG) @@ -946,13 +948,7 @@ else() STATUS "Use C${MDBX_C_STANDARD} for libmdbx but C++ portion is disabled") endif() -if(SUBPROJECT AND MSVC) - if(MSVC_VERSION LESS 1900) - message( - FATAL_ERROR - "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required." - ) - endif() +if(MSVC) add_compile_options("/utf-8") endif() From 3a09b8fb0c1a9bfde382bcba2997408452581188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 6 Dec 2024 18:18:36 +0300 Subject: [PATCH 371/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D0=BE=D1=80=D0=B4?= =?UTF-8?q?=D0=B8=D0=BD=D0=B0=D1=82=D1=8B=20=D0=B4=D0=BB=D1=8F=20=D1=81?= =?UTF-8?q?=D0=BF=D0=BE=D0=BD=D1=81=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D1=8F=20=D0=B2=20ETH.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit // Пока Positive Technologies, пора заняться делом. --- CMakeLists.txt | 7 +++---- NOTICE | 4 ++-- README.md | 5 +++-- docs/Doxyfile.in | 2 +- docs/_toc.md | 13 +++---------- mdbx.h | 3 ++- mdbx.h++ | 3 +++ 7 files changed, 17 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c405d143..2bad731d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,9 @@ # Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev # SPDX-License-Identifier: Apache-2.0 - # +# Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +# Всё будет хорошо! + # libmdbx = { Revised and extended descendant of Symas LMDB. } Please see # README.md at https://gitflic.ru/project/erthink/libmdbx # @@ -19,9 +21,6 @@ # and API based on C++17, as well as the Apache 2.0 License. The goal of this # revolution is to provide a clearer and robust API, add more features and new # valuable properties of database. -# -# The Future will (be) Positive. Всё будет хорошо. -# if(CMAKE_VERSION VERSION_LESS 3.8.2) cmake_minimum_required(VERSION 3.0.2) diff --git a/NOTICE b/NOTICE index cfc8dd45..639c628c 100644 --- a/NOTICE +++ b/NOTICE @@ -8,8 +8,8 @@ documentation, C++ API description and links to the original git repo with the source code. Questions, feedback and suggestions are welcome to the Telegram' group https://t.me/libmdbx. -Since 2017 _libmdbx_ development is funded by [Positive Technologies](https://www.ptsecurity.com) -and used inside company products. Всё будет хорошо! +Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +Всё будет хорошо! Copyright 2015-2024 Леонид Юрьев aka Leonid Yuriev SPDX-License-Identifier: Apache-2.0 diff --git a/README.md b/README.md index 1fbf8e65..bb2a6437 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,8 @@ > For NEWS take a look to the [ChangeLog](https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md) > or the [TODO](https://gitflic.ru/project/erthink/libmdbx/blob?file=TODO.md). -*The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.* +Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +Всё будет хорошо! libmdbx @@ -368,7 +369,7 @@ year later _libmdbx_ was separated into a standalone project, which was conference](http://www.highload.ru/2015/abstracts/1831.html). Since 2017 _libmdbx_ is used in [Fast Positive Tables](https://gitflic.ru/project/erthink/libfpta), -and development is funded by [Positive Technologies](https://www.ptsecurity.com). +and until 2025 development was funded by [Positive Technologies](https://www.ptsecurity.com). On 2022-04-15 the Github administration, without any warning nor explanation, deleted _libmdbx_ along with a lot of other projects, diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 888f5486..c29a8711 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -54,7 +54,7 @@ PROJECT_NUMBER = "${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VER # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. -PROJECT_BRIEF = "One of the fastest compact embeddable key-value ACID database without WAL." +PROJECT_BRIEF = "One of the fastest compact embeddable key-value ACID storage engine without WAL." # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 diff --git a/docs/_toc.md b/docs/_toc.md index 45482dc6..46b489ec 100644 --- a/docs/_toc.md +++ b/docs/_toc.md @@ -1,16 +1,9 @@ -### We have migrated to a reliable trusted infrastructure - -The origin for now is at -[GitFlic](https://gitflic.ru/project/erthink/libmdbx) since on -2022-04-15 the Github administration, without any warning nor -explanation, deleted _libmdbx_ along with a lot of other projects, -simultaneously blocking access for many developers. For the same reason -~~Github~~ is blacklisted forever. +The source code is availale on [Gitflic](https://gitflic.ru/project/erthink/libmdbx). +Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +Всё будет хорошо! > Questions, feedback and suggestions are welcome to the [Telegram' group](https://t.me/libmdbx). -_The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо._ - \section toc Table of Contents This manual is divided into parts, diff --git a/mdbx.h b/mdbx.h index 62600e63..185112d0 100644 --- a/mdbx.h +++ b/mdbx.h @@ -18,7 +18,8 @@ C++ API description and links to the origin git repo with the source code. Questions, feedback and suggestions are welcome to the Telegram' group https://t.me/libmdbx. -_The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо._ +Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +Всё будет хорошо! \note The origin has been migrated to [GitFlic](https://gitflic.ru/project/erthink/libmdbx) since on 2022-04-15 the diff --git a/mdbx.h++ b/mdbx.h++ index eb370d9f..7e680af3 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1,6 +1,9 @@ /// \copyright SPDX-License-Identifier: Apache-2.0 /// \author Леонид Юрьев aka Leonid Yuriev \date 2020-2024 /// +/// Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +/// Всё будет хорошо! +/// /// \file mdbx.h++ /// \brief The libmdbx C++ API header file. /// From 207ad31257799f0ae7a518e319edefd920d924ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 6 Dec 2024 20:34:28 +0300 Subject: [PATCH 372/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=20README=20=D1=83=D0=BF?= =?UTF-8?q?=D0=BE=D0=BC=D0=B8=D0=BD=D0=B0=D0=BD=D0=B8=D0=B9=20=D0=BE=20?= =?UTF-8?q?=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80=D0=B6=D0=BA=D0=B5=20Conan.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bb2a6437..bb057b9e 100644 --- a/README.md +++ b/README.md @@ -416,7 +416,7 @@ since release the version 1.0. ## Source code embedding -_libmdbx_ provides two official ways for integration in source code form: +_libmdbx_ provides two official three for integration in source code form: 1. Using an amalgamated source code which available in the [releases section](https://gitflic.ru/project/erthink/libmdbx/release) on GitFlic. > An amalgamated source code includes all files required to build and @@ -425,7 +425,13 @@ _libmdbx_ provides two official ways for integration in source code form: > repository on Linux by executing `make dist`. As a result, the desired > set of files will be formed in the `dist` subdirectory. -2. Adding the complete source code as a `git submodule` from the [origin git repository](https://gitflic.ru/project/erthink/libmdbx) on GitFlic. +2. Using [Conan Package Manager](https://conan.io/): + - optional: Setup your own conan-server; + - Create conan-package by `conan create .` inside the _libmdbx_' repo subdirectory; + - optional: Upload created recipe and/or package to the conan-server by `conan upload -r SERVER 'mdbx/*'`; + - Consume libmdbx-package from the local conan-cache or from conan-server in accordance with the [Conan tutorial](https://docs.conan.io/2/tutorial/consuming_packages.html). + +3. Adding the complete source code as a `git submodule` from the [origin git repository](https://gitflic.ru/project/erthink/libmdbx) on GitFlic. > This allows you to build as _libmdbx_ and testing tool. > On the other hand, this way requires you to pull git tags, and use C++11 compiler for test tool. From 9481c0e5c4b4179c3a8d4f04a8ffb7caa65c1913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 6 Dec 2024 22:15:23 +0300 Subject: [PATCH 373/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=BF=D0=B5=D1=87=D0=B0?= =?UTF-8?q?=D1=82=D0=BA=D0=B8/=D0=BE=D1=80=D1=84=D0=BE=D0=B3=D1=80=D0=B0?= =?UTF-8?q?=D1=84=D0=B8=D1=8F.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 14 +++++++------- mdbx.h++ | 2 +- src/man1/mdbx_chk.1 | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mdbx.h b/mdbx.h index 185112d0..29feb245 100644 --- a/mdbx.h +++ b/mdbx.h @@ -100,7 +100,7 @@ are only a few cases of changing data. | _DELETING_||| |Key is absent → Error since no such key |\ref mdbx_del() or \ref mdbx_replace()|Error \ref MDBX_NOTFOUND| |Key exist → Delete by key |\ref mdbx_del() with the parameter `data = NULL`|Deletion| -|Key exist → Delete by key with with data matching check|\ref mdbx_del() with the parameter `data` filled with the value which should be match for deletion|Deletion or \ref MDBX_NOTFOUND if the value does not match| +|Key exist → Delete by key with data matching check|\ref mdbx_del() with the parameter `data` filled with the value which should be match for deletion|Deletion or \ref MDBX_NOTFOUND if the value does not match| |Delete at the current cursor position |\ref mdbx_cursor_del() with \ref MDBX_CURRENT flag|Deletion| |Extract (read & delete) value by the key |\ref mdbx_replace() with zero flag and parameter `new_data = NULL`|Returning a deleted value| @@ -133,7 +133,7 @@ as a duplicates or as like a multiple values corresponds to keys. |Key exist → Delete all values corresponds given key|\ref mdbx_del() with the parameter `data = NULL`|Deletion| |Key exist → Delete particular value corresponds given key|\ref mdbx_del() with the parameter `data` filled with the value that wanna to delete, or \ref mdbx_replace() with \ref MDBX_CURRENT + \ref MDBX_NOOVERWRITE and the `old_value` parameter filled with the value that wanna to delete and `new_data = NULL`| Deletion or \ref MDBX_NOTFOUND if no such key-value pair| |Delete one value at the current cursor position|\ref mdbx_cursor_del() with \ref MDBX_CURRENT flag|Deletion only the current entry| -|Delete all values of key at the current cursor position|\ref mdbx_cursor_del() with with \ref MDBX_ALLDUPS flag|Deletion all duplicates of key (all multi-values) at the current cursor position| +|Delete all values of key at the current cursor position|\ref mdbx_cursor_del() with \ref MDBX_ALLDUPS flag|Deletion all duplicates of key (all multi-values) at the current cursor position| \defgroup c_cursors Cursors \defgroup c_statinfo Statistics & Information @@ -1470,7 +1470,7 @@ typedef enum MDBX_env_flags { * \ref mdbx_env_set_syncbytes() and \ref mdbx_env_set_syncperiod() functions * could be very useful with `MDBX_SAFE_NOSYNC` flag. * - * The number and volume of of disk IOPs with MDBX_SAFE_NOSYNC flag will + * The number and volume of disk IOPs with MDBX_SAFE_NOSYNC flag will * exactly the as without any no-sync flags. However, you should expect a * larger process's [work set](https://bit.ly/2kA2tFX) and significantly worse * a [locality of reference](https://bit.ly/2mbYq2J), due to the more @@ -2176,7 +2176,7 @@ typedef enum MDBX_option { * for all processes interacting with the database. * * \details This defines the number of slots in the lock table that is used to - * track readers in the the environment. The default is about 100 for 4K + * track readers in the environment. The default is about 100 for 4K * system page size. Starting a read-only transaction normally ties a lock * table slot to the current thread until the environment closes or the thread * exits. If \ref MDBX_NOSTICKYTHREADS is in use, \ref mdbx_txn_begin() @@ -3753,7 +3753,7 @@ mdbx_limits_txnsize_max(intptr_t pagesize); * \ingroup c_settings * * \details This defines the number of slots in the lock table that is used to - * track readers in the the environment. The default is about 100 for 4K system + * track readers in the environment. The default is about 100 for 4K system * page size. Starting a read-only transaction normally ties a lock table slot * to the current thread until the environment closes or the thread exits. If * \ref MDBX_NOSTICKYTHREADS is in use, \ref mdbx_txn_begin() instead ties the @@ -6157,7 +6157,7 @@ LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, * This returns a comparison as if the two data items were keys in the * specified table. * - * \warning There ss a Undefined behavior if one of arguments is invalid. + * \warning There is a Undefined behavior if one of arguments is invalid. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). @@ -6182,7 +6182,7 @@ mdbx_get_keycmp(MDBX_db_flags_t flags); * This returns a comparison as if the two items were data items of the * specified table. * - * \warning There ss a Undefined behavior if one of arguments is invalid. + * \warning There is a Undefined behavior if one of arguments is invalid. * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). * \param [in] dbi A table handle returned by \ref mdbx_dbi_open(). diff --git a/mdbx.h++ b/mdbx.h++ index 7e680af3..1166dc0a 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -945,7 +945,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Checks whether the content of the slice is printable. /// \param [in] disable_utf8 By default if `disable_utf8` is `false` function /// checks that content bytes are printable ASCII-7 characters or a valid UTF8 - /// sequences. Otherwise, if if `disable_utf8` is `true` function checks that + /// sequences. Otherwise, if `disable_utf8` is `true` function checks that /// content bytes are printable extended 8-bit ASCII codes. MDBX_NOTHROW_PURE_FUNCTION bool is_printable(bool disable_utf8 = false) const noexcept; diff --git a/src/man1/mdbx_chk.1 b/src/man1/mdbx_chk.1 index b48a7984..b53422c7 100644 --- a/src/man1/mdbx_chk.1 +++ b/src/man1/mdbx_chk.1 @@ -27,7 +27,7 @@ mdbx_chk \- MDBX checking tool .SH DESCRIPTION The .B mdbx_chk -utility intended to check an MDBX database file. +utility is intended to check an MDBX database file. .SH OPTIONS .TP .BR \-V @@ -55,7 +55,7 @@ check, including full check of all meta-pages and actual size of database file. .BR \-w Open environment in read-write mode and lock for writing while checking. This could be impossible if environment already used by another process(s) -in an incompatible read-write mode. This allow rollback to last steady commit +in an incompatible read-write mode. This allows rollback to last steady commit (in case environment was not closed properly) and then check transaction IDs of meta-pages. Otherwise, without \fB\-w\fP option environment will be opened in read-only mode. @@ -90,7 +90,7 @@ then forcibly loads ones by sequential access and tries to lock database pages i .TP .BR \-n Open MDBX environment(s) which do not use subdirectories. -This is legacy option. For now MDBX handles this automatically. +This is a legacy option. For now MDBX handles this automatically. .SH DIAGNOSTICS Exit status is zero if no errors occur. Errors result in a non-zero exit status From 1566a0006c0b2ebc75b8f8a39cbf703db781bec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 7 Dec 2024 19:04:27 +0300 Subject: [PATCH 374/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B5=D0=B3=D1=80?= =?UTF-8?q?=D0=B5=D1=81=D1=81=D0=B0=20=D0=B2=20`mdbx=5Fenv=5Fstat=5Fex()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cold.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cold.c b/src/cold.c index 8b904776..35665101 100644 --- a/src/cold.c +++ b/src/cold.c @@ -166,6 +166,8 @@ __cold static void stat_add(const tree_t *db, MDBX_stat *const st, } __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { + memset(st, 0, bytes); + int err = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(err != MDBX_SUCCESS)) return err; From 7aed3a76099afa865103b69496324ab82b952c44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 7 Dec 2024 19:50:26 +0300 Subject: [PATCH 375/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D0=BA=D0=BE=D1=80?= =?UTF-8?q?=D1=80=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20?= =?UTF-8?q?README.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index bb057b9e..bbbebe4b 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,12 @@ On the other hand, if you make something suboptimally, you can notice detrimenta ## Comparison with other databases For now please refer to [chapter of "BoltDB comparison with other databases"](https://github.com/coreos/bbolt#comparison-with-other-databases) -which is also (mostly) applicable to _libmdbx_. +which is also (mostly) applicable to _libmdbx_ with minor clarification: + - a database could shared by multiple processes, i.e. no multi-process issues; + - no issues with moving a cursor(s) after the deletion; + - _libmdbx_ provides zero-overhead database compactification, so a database file could be shrinked/truncated in particular cases; + - excluding dist I/O time _libmdbx_ could be -3 times faster than BoltDB and up to 10-100K times faster than both BoltDB and LMDB in particular extreme cases; + - _libmdbx_ provides more features compared to BoltDB and/or LMDB. @@ -371,6 +376,9 @@ conference](http://www.highload.ru/2015/abstracts/1831.html). Since 2017 _libmdbx_ is used in [Fast Positive Tables](https://gitflic.ru/project/erthink/libfpta), and until 2025 development was funded by [Positive Technologies](https://www.ptsecurity.com). +Since 2020 _libmdbx_ is used in Ethereum: [Erigon](https://github.com/erigontech/erigon), [Akula](https://github.com/akula-bft/akula), +[Silkworm](https://github.com/erigontech/silkworm), [Reth](https://github.com/paradigmxyz/reth), etc. + On 2022-04-15 the Github administration, without any warning nor explanation, deleted _libmdbx_ along with a lot of other projects, simultaneously blocking access for many developers. Therefore on @@ -379,17 +387,8 @@ The origin for now is at [GitFlic](https://gitflic.ru/project/erthink/libmdbx) with backup at [ABF by ROSA Лаб](https://abf.rosalinux.ru/erthink/libmdbx). For the same reason ~~Github~~ is blacklisted forever. -Начиная с 2021 года наблюдаются устойчивые тенденции к распространению -недостоверной информации о libmdbx в странах НАТО, политизированной -критики, а также отказу от использования библиотеки в пользу LMDB, -несмотря на явные проблемы с одной стороны и преимущества с другой. -Поэтому, начиная с 17 марта 2024 года, прекращается документирование и -сопровождение проекта на английском языке. Новая функциональность будет -документироваться только на русском языке, однако, целенаправленного -переписывания/перевода документации пока не планируется. - -Since May 2024 and version v0.13 _libmdbx_ was re-licensed under Apache-2.0 license. -Please refer to the `COPYRIGHT` file for license change explanations. +Since May 2024 and version 0.13 _libmdbx_ was re-licensed under Apache-2.0 license. +Please refer to the [`COPYRIGHT` file](https://gitflic.ru/project/erthink/libmdbx/blob/raw?file=COPYRIGHT) for license change explanations. ## Acknowledgments @@ -445,11 +444,14 @@ don't ask for support and don't name such chimeras `libmdbx`. Both amalgamated and original source code provides build through the use [CMake](https://cmake.org/) or [GNU Make](https://www.gnu.org/software/make/) with -[bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)). All build ways +[bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)). + +All build ways are completely traditional and have minimal prerequirements like `build-essential`, i.e. the non-obsolete C/C++ compiler and a [SDK](https://en.wikipedia.org/wiki/Software_development_kit) for the -target platform. Obviously you need building tools itself, i.e. `git`, +target platform. +Obviously you need building tools itself, i.e. `git`, `cmake` or GNU `make` with `bash`. For your convenience, `make help` and `make options` are also available for listing existing targets and build options respectively. @@ -478,7 +480,7 @@ Therefore, only basic information is provided: This is the `basic` test scenario. - The `Makefile` provide several self-described targets for testing: `smoke`, `test`, `check`, `memcheck`, `test-valgrind`, `test-asan`, `test-leak`, `test-ubsan`, `cross-gcc`, `cross-qemu`, `gcc-analyzer`, `smoke-fault`, `smoke-singleprocess`, - `test-singleprocess`, 'long-test'. Please run `make --help` if doubt. + `test-singleprocess`, `long-test`. Please run `make --help` if doubt. - In addition to the `mdbx_test` utility, there is the script [`stochastic.sh`](https://gitflic.ru/project/erthink/libmdbx/blob/master/test/stochastic.sh), which calls `mdbx_test` by going through set of modes and options, with gradually increasing the number of operations and the size of transactions. This script is used for mostly of all automatic testing, including `Makefile` targets and Continuous Integration. @@ -588,9 +590,7 @@ runtime dependencies from CRT and other MSVC libraries. For this is enough to pass the `-DMDBX_WITHOUT_MSVC_CRT:BOOL=ON` option during configure by CMake. -An example of running a basic test script can be found in the -[CI-script](appveyor.yml) for [AppVeyor](https://www.appveyor.com/). To -run the [long stochastic test scenario](test/stochastic.sh), +To run the [long stochastic test scenario](test/stochastic.sh), [bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)) is required, and such testing is recommended with placing the test data on the [RAM-disk](https://en.wikipedia.org/wiki/RAM_drive). From fe627ed2f21a80da358af05cc7b8a3482a0cf86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 7 Dec 2024 20:17:36 +0300 Subject: [PATCH 376/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 83c7a58a..cff93e63 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -69,6 +69,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Исправления: + - Устранен регресс возврата неверной информации из функций + `mdbx_env_stat_ex()` и `mdbx_env_stat()`. При рефакторинге до выпуска + v0.13.1 была допущена ошибка, из-за которой выполнялось суммирование + значений без очистки переданного пользователем буфера для результата. + Таким образом, возвращаемая информация была верной, только если память + используемая для размещения результата содержала нули на момент вызова + функции. - Функция `mdbx_close_dbi()` доработана для возврата ошибки `MDBX_DANGLING_DBI` при попытке закрыть dbi-дескриптор таблицы, созданной и/или измененной в ещё выполняющейся транзакции. Такое преждевременное закрытие дескриптора From 529b0357e8ebd6c730f9b175713e52113f62399a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 10 Dec 2024 17:06:45 +0300 Subject: [PATCH 377/443] =?UTF-8?q?mdbx-cmake:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8?= =?UTF-8?q?=D0=B1=D0=BA=D0=B8=20=D0=BF=D1=80=D0=B8=20copy&paste.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bad731d..d3fb4704 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1230,9 +1230,9 @@ endif(MDBX_INSTALL_STATIC) # collect options & build info if(NOT DEFINED MDBX_BUILD_TIMESTAMP) if(NOT "$ENV{SOURCE_DATE_EPOCH}" STREQUAL "") - set(FPTA_BUILD_TIMESTAMP "$ENV{SOURCE_DATE_EPOCH}") + set(MDBX_BUILD_TIMESTAMP "$ENV{SOURCE_DATE_EPOCH}") else() - string(TIMESTAMP FPTA_BUILD_TIMESTAMP UTC) + string(TIMESTAMP MDBX_BUILD_TIMESTAMP UTC) endif() endif() set(MDBX_BUILD_FLAGS ${CMAKE_C_FLAGS}) From 5652b360b989bafdf5f292825d8d23842e5e44fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Dec 2024 15:04:49 +0300 Subject: [PATCH 378/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=B5=D0=B7=D0=BD=D0=B0?= =?UTF-8?q?=D1=87=D0=B8=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B5=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BA=D0=B8=20README.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index bbbebe4b..7b3d8631 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,10 @@ > and pay attention to the [`C++` API](https://gitflic.ru/project/erthink/libmdbx/blob?file=mdbx.h%2B%2B#line-num-1). > Questions, feedback and suggestions are welcome to the [Telegram' group](https://t.me/libmdbx). +> See the [ChangeLog](https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md) for `NEWS` and latest updates. -> For NEWS take a look to the [ChangeLog](https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md) -> or the [TODO](https://gitflic.ru/project/erthink/libmdbx/blob?file=TODO.md). - -Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. -Всё будет хорошо! +> Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. +> Всё будет хорошо! libmdbx @@ -94,15 +92,15 @@ However, _MithrilDB_ will not be available for countries unfriendly to Russia (i.e. acceded the sanctions, devil adepts and/or NATO). But it is not yet known whether such restriction will be implemented only through a license and support, either the source code will not be open at all. -Basically we are not inclined to allow our work to contribute to the -profit that goes to weapons that kill our relatives and friends. +Basically I am not inclined to allow my work to contribute to the +profit that goes to weapons that kill my relatives and friends. NO OPTIONS. Nonetheless, I try not to make any promises regarding _MithrilDB_ until release. Contrary to _MithrilDB_, _libmdbx_ will forever free and open source. Moreover with high-quality support whenever possible. Tu deviens -responsable pour toujours de ce que tu as apprivois. So we will continue +responsable pour toujours de ce que tu as apprivois. So I will continue to comply with the original open license and the principles of constructive cooperation, in spite of outright Github sabotage and sanctions. I will also try to keep (not drop) Windows support, despite @@ -375,14 +373,13 @@ conference](http://www.highload.ru/2015/abstracts/1831.html). Since 2017 _libmdbx_ is used in [Fast Positive Tables](https://gitflic.ru/project/erthink/libfpta), and until 2025 development was funded by [Positive Technologies](https://www.ptsecurity.com). - Since 2020 _libmdbx_ is used in Ethereum: [Erigon](https://github.com/erigontech/erigon), [Akula](https://github.com/akula-bft/akula), [Silkworm](https://github.com/erigontech/silkworm), [Reth](https://github.com/paradigmxyz/reth), etc. On 2022-04-15 the Github administration, without any warning nor explanation, deleted _libmdbx_ along with a lot of other projects, simultaneously blocking access for many developers. Therefore on -2022-04-21 we have migrated to a reliable trusted infrastructure. +2022-04-21 I have migrated to a reliable trusted infrastructure. The origin for now is at [GitFlic](https://gitflic.ru/project/erthink/libmdbx) with backup at [ABF by ROSA Лаб](https://abf.rosalinux.ru/erthink/libmdbx). For the same reason ~~Github~~ is blacklisted forever. @@ -612,16 +609,16 @@ tests. If something goes wrong, it is recommended to install To run the [long stochastic test scenario](test/stochastic.sh), you will need to install the current (not outdated) version of -[Bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)). To do this, we +[Bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell)). To do this, I recommend that you install [Homebrew](https://brew.sh/) and then execute `brew install bash`. ### Android -We recommend using CMake to build _libmdbx_ for Android. +I recommend using CMake to build _libmdbx_ for Android. Please refer to the [official guide](https://developer.android.com/studio/projects/add-native-code). ### iOS -To build _libmdbx_ for iOS, we recommend using CMake with the +To build _libmdbx_ for iOS, I recommend using CMake with the ["toolchain file"](https://cmake.org/cmake/help/latest/variable/CMAKE_TOOLCHAIN_FILE.html) from the [ios-cmake](https://github.com/leetal/ios-cmake) project. @@ -662,10 +659,17 @@ Bindings Performance comparison ====================== -All benchmarks were done in 2015 by [IOArena](https://abf.io/erthink/ioarena) +Over the past 10 years, _libmdbx_ has had a lot of significant +improvements and innovations. _libmdbx_ has become a slightly faster in +simple cases and many times faster in complex scenarios, especially with +a huge transactions in gigantic databases. Therefore, on the one hand, +the results below are outdated. However, on the other hand, these simple +benchmarks are evident, easy to reproduce, and are close to the most +common use cases. + +The following all benchmark results were obtained in 2015 by [IOArena](https://abf.io/erthink/ioarena) and multiple [scripts](https://github.com/pmwkaa/ioarena/tree/HL%2B%2B2015) -runs on Lenovo Carbon-2 laptop, i7-4600U 2.1 GHz (2 physical cores, 4 HyperThreading cores), 8 Gb RAM, -SSD SAMSUNG MZNTD512HAGL-000L1 (DXT23L0Q) 512 Gb. +runs on my laptop (i7-4600U 2.1 GHz, SSD MZNTD512HAGL-000L1). ## Integral performance @@ -696,7 +700,8 @@ Here showed sum of performance metrics in 3 benchmarks: ## Read Scalability Summary performance with concurrent read/search queries in 1-2-4-8 -threads on the machine with 4 logical CPUs in HyperThreading mode (i.e. actually 2 physical CPU cores). +threads on the machine with 4 logical CPUs in HyperThreading mode (i.e. +actually 2 physical CPU cores). ![Comparison #2: Read Scalability](https://libmdbx.dqdkfa.ru/img/perf-slide-2.png) @@ -761,10 +766,10 @@ records. execution time, cross marks standard deviation. **1,000,000 transactions in async-write mode**. -In case of a crash all data is consistent and conforms to the one of last successful transactions, -but lost transaction count is much higher than in -lazy-write mode. All DB engines in this mode do as little writes as -possible on persistent storage. _libmdbx_ uses +In case of a crash all data is consistent and conforms to the one of +last successful transactions, but lost transaction count is much higher +than in lazy-write mode. All DB engines in this mode do as little writes +as possible on persistent storage. _libmdbx_ uses [msync(MS_ASYNC)](https://linux.die.net/man/2/msync) in this mode. In the benchmark each transaction contains combined CRUD operations (2 From dbf18b4c22f819200b7949d22431abdbb2deeb1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Dec 2024 20:32:52 +0300 Subject: [PATCH 379/443] =?UTF-8?q?mdbx-make:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=B4?= =?UTF-8?q?=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BA=D0=B8=20=D0=BF=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=BC=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20SemVer.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index fa1c1e45..65827d0d 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -555,8 +555,8 @@ src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(MDBX_GIT_DIR)/HEA -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 2)|" \ -e "s|\$${MDBX_VERSION_PATCH}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 3)|" \ -e "s|\$${MDBX_VERSION_TWEAK}|$(MDBX_GIT_TWEAK)|" \ - -e "s|\$${MDBX_VERSION_PRERELEASE}|$(MDBX_GIT_PRERELEASE)|" \ - -e "s|\$${MDBX_VERSION_PURE}|$(MDBX_VERSION_PURE)|" \ + -e "s|@MDBX_VERSION_PRERELEASE@|$(MDBX_GIT_PRERELEASE)|" \ + -e "s|@MDBX_VERSION_PURE@|$(MDBX_VERSION_PURE)|" \ src/version.c.in >$@ src/config.h: @buildflags.tag $(WAIT) src/version.c $(lastword $(MAKEFILE_LIST)) LICENSE NOTICE @@ -589,8 +589,8 @@ docs/Doxyfile: docs/Doxyfile.in src/version.c $(lastword $(MAKEFILE_LIST)) -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 2)|" \ -e "s|\$${MDBX_VERSION_PATCH}|$(shell echo '$(MDBX_GIT_3DOT)' | cut -d . -f 3)|" \ -e "s|\$${MDBX_VERSION_TWEAK}|$(MDBX_GIT_TWEAK)|" \ - -e "s|\$${MDBX_VERSION_PRERELEASE}|$(MDBX_GIT_PRERELEASE)|" \ - -e "s|\$${MDBX_VERSION_PURE}|$(MDBX_VERSION_PURE)|" \ + -e "s|@MDBX_VERSION_PRERELEASE@|$(MDBX_GIT_PRERELEASE)|" \ + -e "s|@MDBX_VERSION_PURE@|$(MDBX_VERSION_PURE)|" \ docs/Doxyfile.in >$@ define md-extract-section From 3c4d019d00d8e29f2c6b485d1a8b5f3a619909bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Dec 2024 20:57:57 +0300 Subject: [PATCH 380/443] =?UTF-8?q?mdbx-cmake:=20=D1=8F=D0=B2=D0=BD=D0=BE?= =?UTF-8?q?=D0=B5=20=D1=80=D0=B0=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=BF=D0=B5=D1=80=D0=B5=D0=BC=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20?= =?UTF-8?q?=D0=B2=20if-=D1=83=D1=81=D0=BB=D0=BE=D0=B2=D0=B8=D1=8F=D1=85=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D0=BE=D0=B1=D1=85=D0=BE=D0=B4=D0=B0=20?= =?UTF-8?q?=D0=B1=D0=B0=D0=B3=D0=B0-=D1=84=D0=B8=D1=87=D0=B8=20CMake.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake/utils.cmake | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index b669676b..8e5ba6de 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -230,14 +230,14 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_timestamp STREQUAL "%cI") + if(_rc OR "${_git_timestamp}" STREQUAL "%cI") execute_process( COMMAND ${GIT} show --no-patch --format=%ci HEAD OUTPUT_VARIABLE _git_timestamp OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_timestamp STREQUAL "%ci") + if(_rc OR "${_git_timestamp}" STREQUAL "%ci") message( FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)" @@ -251,7 +251,7 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_tree STREQUAL "") + if(_rc OR "${_git_tree}" STREQUAL "") message( FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)" @@ -264,7 +264,7 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_commit STREQUAL "") + if(_rc OR "${_git_commit}" STREQUAL "") message( FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)" @@ -283,7 +283,7 @@ macro(git_get_versioninfo source_root_directory) "Please install latest version of git (`status --untracked-files=no --porcelain` failed)" ) endif() - if(NOT _git_status STREQUAL "") + if(NOT "${_git_status}" STREQUAL "") set(_git_commit "DIRTY-${_git_commit}") set(_git_is_dirty TRUE) endif() @@ -295,7 +295,7 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_last_vtag STREQUAL "") + if(_rc OR "${_git_last_vtag}" STREQUAL "") execute_process( COMMAND ${GIT} tag OUTPUT_VARIABLE _git_tags_dump @@ -314,7 +314,7 @@ macro(git_get_versioninfo source_root_directory) "Please install latest version of git (`git rev-list --count --no-merges --remove-empty HEAD` failed)" ) endif() - if(_git_whole_count GREATER 42 AND _git_tags_dump STREQUAL "") + if(_git_whole_count GREATER 42 AND "${_git_tags_dump}" STREQUAL "") message( FATAL_ERROR "Please fetch tags (`describe --tags --abbrev=0 --match=v[0-9]*` failed)" @@ -332,14 +332,14 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_describe STREQUAL "") + if(_rc OR "${_git_describe}" STREQUAL "") execute_process( COMMAND ${GIT} describe --tags --all --dirty --long --always OUTPUT_VARIABLE _git_describe OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_describe STREQUAL "") + if(_rc OR "${_git_describe}" STREQUAL "") message( FATAL_ERROR "Please install latest version of git (`describe --tags --all --long` failed)" @@ -353,7 +353,7 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_describe STREQUAL "") + if(_rc OR "${_git_describe}" STREQUAL "") message( FATAL_ERROR "Please install latest version of git (`describe --tags --long --match=v[0-9]*`)" @@ -365,7 +365,7 @@ macro(git_get_versioninfo source_root_directory) OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_trailing_commits STREQUAL "") + if(_rc OR "${_git_trailing_commits}" STREQUAL "") message( FATAL_ERROR "Please install latest version of git (`rev-list --count ${_git_last_vtag}..HEAD` failed)" @@ -393,7 +393,7 @@ macro(semver_provide name source_root_directory build_directory_for_json_output OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) - if(_rc OR _git_root STREQUAL "") + if(_rc OR "${_git_root}" STREQUAL "") if(EXISTS "${source_root_directory}/.git") message(ERROR "`git rev-parse --show-toplevel` failed '${_git_root_error}'") @@ -468,19 +468,19 @@ macro(semver_provide name source_root_directory build_directory_for_json_output OR NOT _git_timestamp OR NOT _git_tree OR NOT _git_commit - OR _semver_major STREQUAL "" - OR _semver_minor STREQUAL "" - OR _semver_patch STREQUAL "") + OR "${_semver_major}" STREQUAL "" + OR "${_semver_minor}" STREQUAL "" + OR "${_semver_patch}" STREQUAL "") message(ERROR "Unable to retrieve ${name} version from ${_version_from}.") endif() set(_semver "${_semver_major}.${_semver_minor}.${_semver_patch}") - if(_semver_tweak STREQUAL "") + if("${_semver_tweak}" STREQUAL "") set(_semver_tweak 0) elseif(_semver_tweak GREATER 0) string(APPEND _semver ".${_semver_tweak}") endif() - if(NOT _semver_prerelease STREQUAL "") + if(NOT "${_semver_prerelease}" STREQUAL "") string(APPEND _semver "-${_semver_prerelease}") endif() if(_git_is_dirty) @@ -497,7 +497,7 @@ macro(semver_provide name source_root_directory build_directory_for_json_output set(${name}_VERSION_MAJOR ${_semver_major}) set(${name}_VERSION_MINOR ${_semver_minor}) set(${name}_VERSION_PATCH ${_semver_patch}) - set(${name}_VERSION_TWEAK "${_semver_tweak}") + set(${name}_VERSION_TWEAK ${_semver_tweak}) set(${name}_VERSION_PRERELEASE "${_semver_prerelease}") set(${name}_GIT_DESCRIBE "${_git_describe}") set(${name}_GIT_TIMESTAMP "${_git_timestamp}") From 8867c2ddc27df750bbc7526f8d7745ca481331f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Dec 2024 21:22:04 +0300 Subject: [PATCH 381/443] =?UTF-8?q?mdbx:=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B5?= =?UTF-8?q?=20=D0=BD=D0=B0=D1=81=D1=82=D1=80=D0=BE=D0=B9=D0=BA=D0=B8=20cla?= =?UTF-8?q?ng-format=20(=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8=D0=BA?= =?UTF-8?q?=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .clang-format | 4 +- example/example-mdbx.c | 39 +- mdbx.h | 735 ++---- mdbx.h++ | 3149 +++++++++----------------- src/api-cursor.c | 177 +- src/api-env.c | 272 +-- src/api-extra.c | 36 +- src/api-key-transform.c | 84 +- src/api-txn.c | 96 +- src/atomics-ops.h | 112 +- src/atomics-types.h | 26 +- src/audit.c | 46 +- src/chk.c | 1007 +++----- src/cogs.c | 122 +- src/cogs.h | 208 +- src/coherency.c | 118 +- src/cold.c | 228 +- src/copy.c | 177 +- src/cursor.c | 393 ++-- src/cursor.h | 144 +- src/dbi.c | 238 +- src/dbi.h | 67 +- src/debug_begin.h | 18 +- src/dpl.c | 104 +- src/dpl.h | 33 +- src/dxb.c | 712 ++---- src/env-opts.c | 54 +- src/env.c | 161 +- src/essentials.h | 15 +- src/gc-get.c | 351 +-- src/gc-put.c | 357 +-- src/gc.h | 3 +- src/global.c | 17 +- src/internals.h | 54 +- src/layout-dxb.h | 32 +- src/layout-lck.h | 30 +- src/lck-posix.c | 161 +- src/lck-windows.c | 161 +- src/lck.c | 60 +- src/lck.h | 6 +- src/logging_and_debug.c | 67 +- src/logging_and_debug.h | 124 +- src/mdbx.c++ | 374 ++- src/meta.c | 322 +-- src/meta.h | 106 +- src/misc.c | 44 +- src/mvcc-readers.c | 126 +- src/node.c | 89 +- src/node.h | 53 +- src/options.h | 72 +- src/osal.c | 842 +++---- src/osal.h | 148 +- src/page-get.c | 280 +-- src/page-iov.c | 37 +- src/page-iov.h | 12 +- src/page-ops.c | 141 +- src/page-ops.h | 103 +- src/page-search.c | 19 +- src/pnl.c | 60 +- src/pnl.h | 53 +- src/preface.h | 244 +- src/proto.h | 69 +- src/range-estimate.c | 95 +- src/refund.c | 49 +- src/sort.h | 551 +++-- src/spill.c | 163 +- src/spill.h | 34 +- src/table.c | 33 +- src/tls.c | 204 +- src/tools/chk.c | 209 +- src/tools/copy.c | 52 +- src/tools/drop.c | 15 +- src/tools/dump.c | 97 +- src/tools/load.c | 188 +- src/tools/stat.c | 117 +- src/tools/wingetopt.c | 11 +- src/tree.c | 286 +-- src/txl.c | 29 +- src/txl.h | 6 +- src/txn.c | 539 ++--- src/unaligned.h | 119 +- src/utils.c | 11 +- src/utils.h | 41 +- src/version.c.in | 12 +- src/walk.c | 76 +- src/walk.h | 13 +- src/windows-import.c | 27 +- src/windows-import.h | 55 +- test/append.c++ | 47 +- test/base.h++ | 22 +- test/cases.c++ | 30 +- test/chrono.c++ | 32 +- test/chrono.h++ | 9 +- test/config.c++ | 335 +-- test/config.h++ | 69 +- test/copy.c++ | 41 +- test/dead.c++ | 6 +- test/extra/crunched_delete.c++ | 76 +- test/extra/cursor_closing.c++ | 15 +- test/extra/dbi.c++ | 13 +- test/extra/doubtless_positioning.c++ | 68 +- test/extra/dupfix_addodd.c | 6 +- test/extra/dupfix_multiple.c++ | 158 +- test/extra/early_close_dbi.c++ | 9 +- test/extra/hex_base64_base58.c++ | 56 +- test/extra/maindb_ordinal.c++ | 21 +- test/extra/open.c++ | 22 +- test/extra/pcrf/pcrf_test.c | 129 +- test/extra/probe.c++ | 3 +- test/extra/upsert_alldups.c | 33 +- test/fork.c++ | 83 +- test/hill.c++ | 38 +- test/jitter.c++ | 52 +- test/keygen.c++ | 247 +- test/keygen.h++ | 28 +- test/log.c++ | 48 +- test/log.h++ | 17 +- test/main.c++ | 449 ++-- test/nested.c++ | 81 +- test/osal-unix.c++ | 78 +- test/osal-windows.c++ | 46 +- test/stub/pthread_barrier.c | 6 +- test/stub/pthread_barrier.h | 6 +- test/test.c++ | 492 ++-- test/test.h++ | 121 +- test/try.c++ | 3 +- test/ttl.c++ | 55 +- test/utils.c++ | 24 +- test/utils.h++ | 69 +- 129 files changed, 6727 insertions(+), 12640 deletions(-) diff --git a/.clang-format b/.clang-format index 6c59ef3a..03069f65 100644 --- a/.clang-format +++ b/.clang-format @@ -1,3 +1,3 @@ BasedOnStyle: LLVM -Standard: Cpp11 -ReflowComments: true +Standard: c++20 +ColumnLimit: 120 diff --git a/example/example-mdbx.c b/example/example-mdbx.c index 215a0fca..beae17f5 100644 --- a/example/example-mdbx.c +++ b/example/example-mdbx.c @@ -18,8 +18,7 @@ * . */ -#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \ - !defined(__USE_MINGW_ANSI_STDIO) +#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && !defined(__USE_MINGW_ANSI_STDIO) #define __USE_MINGW_ANSI_STDIO 1 #endif /* MinGW */ @@ -59,33 +58,23 @@ int main(int argc, char *argv[]) { pagesize_min, pagesize_max, pagesize_default); printf("\tKey size: minimum %zu, maximum ≈¼ pagesize (%zu bytes for default" " %zuK pagesize, %zu bytes for %zuK pagesize).\n", - (size_t)0, mdbx_limits_keysize_max(-1, MDBX_DB_DEFAULTS), - pagesize_default / 1024, - mdbx_limits_keysize_max(pagesize_max, MDBX_DB_DEFAULTS), - pagesize_max / 1024); + (size_t)0, mdbx_limits_keysize_max(-1, MDBX_DB_DEFAULTS), pagesize_default / 1024, + mdbx_limits_keysize_max(pagesize_max, MDBX_DB_DEFAULTS), pagesize_max / 1024); printf("\tValue size: minimum %zu, maximum %zu (0x%08zX) bytes for maps," " ≈¼ pagesize for multimaps (%zu bytes for default %zuK pagesize," " %zu bytes for %zuK pagesize).\n", (size_t)0, mdbx_limits_valsize_max(pagesize_min, MDBX_DB_DEFAULTS), - mdbx_limits_valsize_max(pagesize_min, MDBX_DB_DEFAULTS), - mdbx_limits_valsize_max(-1, MDBX_DUPSORT), pagesize_default / 1024, - mdbx_limits_valsize_max(pagesize_max, MDBX_DUPSORT), - pagesize_max / 1024); + mdbx_limits_valsize_max(pagesize_min, MDBX_DB_DEFAULTS), mdbx_limits_valsize_max(-1, MDBX_DUPSORT), + pagesize_default / 1024, mdbx_limits_valsize_max(pagesize_max, MDBX_DUPSORT), pagesize_max / 1024); printf("\tWrite transaction size: up to %zu (0x%zX) pages (%f %s for default " "%zuK pagesize, %f %s for %zuK pagesize).\n", - mdbx_limits_txnsize_max(pagesize_min) / pagesize_min, - mdbx_limits_txnsize_max(pagesize_min) / pagesize_min, - mdbx_limits_txnsize_max(-1) / scale_factor, scale_unit, - pagesize_default / 1024, - mdbx_limits_txnsize_max(pagesize_max) / scale_factor, scale_unit, - pagesize_max / 1024); + mdbx_limits_txnsize_max(pagesize_min) / pagesize_min, mdbx_limits_txnsize_max(pagesize_min) / pagesize_min, + mdbx_limits_txnsize_max(-1) / scale_factor, scale_unit, pagesize_default / 1024, + mdbx_limits_txnsize_max(pagesize_max) / scale_factor, scale_unit, pagesize_max / 1024); printf("\tDatabase size: up to %zu pages (%f %s for default %zuK " "pagesize, %f %s for %zuK pagesize).\n", - mdbx_limits_dbsize_max(pagesize_min) / pagesize_min, - mdbx_limits_dbsize_max(-1) / scale_factor, scale_unit, - pagesize_default / 1024, - mdbx_limits_dbsize_max(pagesize_max) / scale_factor, scale_unit, - pagesize_max / 1024); + mdbx_limits_dbsize_max(pagesize_min) / pagesize_min, mdbx_limits_dbsize_max(-1) / scale_factor, scale_unit, + pagesize_default / 1024, mdbx_limits_dbsize_max(pagesize_max) / scale_factor, scale_unit, pagesize_max / 1024); printf("\tMaximum sub-databases: %u.\n", MDBX_MAX_DBI); printf("-----\n"); @@ -94,8 +83,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); goto bailout; } - rc = mdbx_env_open(env, "./example-db", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, - 0664); + rc = mdbx_env_open(env, "./example-db", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, 0664); if (rc != MDBX_SUCCESS) { fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); goto bailout; @@ -143,9 +131,8 @@ int main(int argc, char *argv[]) { int found = 0; while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) == 0) { - printf("key: %p %.*s, data: %p %.*s\n", key.iov_base, (int)key.iov_len, - (char *)key.iov_base, data.iov_base, (int)data.iov_len, - (char *)data.iov_base); + printf("key: %p %.*s, data: %p %.*s\n", key.iov_base, (int)key.iov_len, (char *)key.iov_base, data.iov_base, + (int)data.iov_len, (char *)data.iov_base); found += 1; } if (rc != MDBX_NOTFOUND || found == 0) { diff --git a/mdbx.h b/mdbx.h index 29feb245..9854c6e5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -39,8 +39,7 @@ credits and acknowledgments. #ifndef LIBMDBX_H #define LIBMDBX_H -#if defined(__riscv) || defined(__riscv__) || defined(__RISCV) || \ - defined(__RISCV__) +#if defined(__riscv) || defined(__riscv__) || defined(__RISCV) || defined(__RISCV__) #warning "The RISC-V architecture is intentionally insecure by design. \ Please delete this admonition at your own risk, \ if you make such decision informed and consciously. \ @@ -49,12 +48,12 @@ credits and acknowledgments. #ifdef _MSC_VER #pragma warning(push, 1) -#pragma warning(disable : 4548) /* expression before comma has no effect; \ +#pragma warning(disable : 4548) /* expression before comma has no effect; \ expected expression with side - effect */ -#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ +#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ * semantics are not enabled. Specify /EHsc */ -#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ - * mode specified; termination on exception is \ +#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ + * mode specified; termination on exception is \ * not guaranteed. Specify /EHsc */ #endif /* _MSC_VER (warnings) */ @@ -224,8 +223,7 @@ typedef mode_t mdbx_mode_t; #define __has_feature(x) (0) #define __has_exceptions_disabled (0) #elif !defined(__has_exceptions_disabled) -#define __has_exceptions_disabled \ - (__has_feature(cxx_noexcept) && !__has_feature(cxx_exceptions)) +#define __has_exceptions_disabled (__has_feature(cxx_noexcept) && !__has_feature(cxx_exceptions)) #endif /* __has_feature */ #ifndef __has_extension @@ -246,9 +244,9 @@ typedef mode_t mdbx_mode_t; #define MDBX_PURE_FUNCTION [[gnu::pure]] #elif __has_C23_or_CXX_attribute(gnu::pure) #define MDBX_PURE_FUNCTION [[gnu::pure]] -#elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ - (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ - !defined(__cplusplus) || __has_exceptions_disabled) +#elif (defined(__GNUC__) || __has_attribute(__pure__)) && \ + (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || !defined(__cplusplus) || \ + __has_exceptions_disabled) #define MDBX_PURE_FUNCTION __attribute__((__pure__)) #else #define MDBX_PURE_FUNCTION @@ -265,8 +263,7 @@ typedef mode_t mdbx_mode_t; #else #define MDBX_NOTHROW_PURE_FUNCTION [[gnu::pure]] #endif -#elif defined(__GNUC__) || \ - (__has_attribute(__pure__) && __has_attribute(__nothrow__)) +#elif defined(__GNUC__) || (__has_attribute(__pure__) && __has_attribute(__nothrow__)) #define MDBX_NOTHROW_PURE_FUNCTION __attribute__((__pure__, __nothrow__)) #elif __has_cpp_attribute(pure) #define MDBX_NOTHROW_PURE_FUNCTION [[pure]] @@ -288,9 +285,9 @@ typedef mode_t mdbx_mode_t; #define MDBX_CONST_FUNCTION [[gnu::const]] #elif __has_C23_or_CXX_attribute(gnu::const) #define MDBX_CONST_FUNCTION [[gnu::const]] -#elif (defined(__GNUC__) || __has_attribute(__const__)) && \ - (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || \ - !defined(__cplusplus) || __has_exceptions_disabled) +#elif (defined(__GNUC__) || __has_attribute(__const__)) && \ + (!defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=43275 */ || !defined(__cplusplus) || \ + __has_exceptions_disabled) #define MDBX_CONST_FUNCTION __attribute__((__const__)) #else #define MDBX_CONST_FUNCTION MDBX_PURE_FUNCTION @@ -307,8 +304,7 @@ typedef mode_t mdbx_mode_t; #else #define MDBX_NOTHROW_CONST_FUNCTION [[gnu::const]] #endif -#elif defined(__GNUC__) || \ - (__has_attribute(__const__) && __has_attribute(__nothrow__)) +#elif defined(__GNUC__) || (__has_attribute(__const__) && __has_attribute(__nothrow__)) #define MDBX_NOTHROW_CONST_FUNCTION __attribute__((__const__, __nothrow__)) #elif __has_cpp_attribute_qualified(const) #define MDBX_NOTHROW_CONST_FUNCTION [[const]] @@ -322,17 +318,13 @@ typedef mode_t mdbx_mode_t; #ifndef MDBX_DEPRECATED #ifdef __deprecated #define MDBX_DEPRECATED __deprecated -#elif defined(DOXYGEN) || \ - ((!defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5) && \ - ((defined(__cplusplus) && __cplusplus >= 201403L && \ - __has_cpp_attribute(deprecated) && \ - __has_cpp_attribute(deprecated) >= 201309L) || \ - (!defined(__cplusplus) && defined(__STDC_VERSION__) && \ - __STDC_VERSION__ >= 202304L))) +#elif defined(DOXYGEN) || ((!defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5) && \ + ((defined(__cplusplus) && __cplusplus >= 201403L && __has_cpp_attribute(deprecated) && \ + __has_cpp_attribute(deprecated) >= 201309L) || \ + (!defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202304L))) #define MDBX_DEPRECATED [[deprecated]] -#elif (defined(__GNUC__) && __GNUC__ > 5) || \ - (__has_attribute(__deprecated__) && \ - (!defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5)) +#elif (defined(__GNUC__) && __GNUC__ > 5) || \ + (__has_attribute(__deprecated__) && (!defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5)) #define MDBX_DEPRECATED __attribute__((__deprecated__)) #elif defined(_MSC_VER) #define MDBX_DEPRECATED __declspec(deprecated) @@ -344,9 +336,8 @@ typedef mode_t mdbx_mode_t; #ifndef MDBX_DEPRECATED_ENUM #ifdef __deprecated_enum #define MDBX_DEPRECATED_ENUM __deprecated_enum -#elif defined(DOXYGEN) || \ - (!defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201403L && \ - __has_cpp_attribute(deprecated) && \ +#elif defined(DOXYGEN) || \ + (!defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201403L && __has_cpp_attribute(deprecated) && \ __has_cpp_attribute(deprecated) >= 201309L)) #define MDBX_DEPRECATED_ENUM MDBX_DEPRECATED #else @@ -355,8 +346,8 @@ typedef mode_t mdbx_mode_t; #endif /* MDBX_DEPRECATED_ENUM */ #ifndef __dll_export -#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || \ - defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__) +#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || defined(__MINGW__) || defined(__MINGW32__) || \ + defined(__MINGW64__) #if defined(__GNUC__) || __has_attribute(__dllexport__) #define __dll_export __attribute__((__dllexport__)) #elif defined(_MSC_VER) @@ -372,8 +363,8 @@ typedef mode_t mdbx_mode_t; #endif /* __dll_export */ #ifndef __dll_import -#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || \ - defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__) +#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || defined(__MINGW__) || defined(__MINGW32__) || \ + defined(__MINGW64__) #if defined(__GNUC__) || __has_attribute(__dllimport__) #define __dll_import __attribute__((__dllimport__)) #elif defined(_MSC_VER) @@ -391,8 +382,8 @@ typedef mode_t mdbx_mode_t; * with old version of libmdbx, with a strictly ODR-common implementation. Thus, * we emulate __extern_inline for all compilers, including non-GNU ones. */ #if defined(LIBMDBX_INTERNALS) && !defined(LIBMDBX_NO_EXPORTS_LEGACY_API) -#define LIBMDBX_INLINE_API(TYPE, NAME, ARGS) \ - /* proto of exported which uses common impl */ LIBMDBX_API TYPE NAME ARGS; \ +#define LIBMDBX_INLINE_API(TYPE, NAME, ARGS) \ + /* proto of exported which uses common impl */ LIBMDBX_API TYPE NAME ARGS; \ /* definition of common impl */ static __inline TYPE __inline_##NAME ARGS #else #define LIBMDBX_INLINE_API(TYPE, NAME, ARGS) static __inline TYPE NAME ARGS @@ -421,8 +412,7 @@ typedef mode_t mdbx_mode_t; /** Workaround for old compilers without support for C++17 `noexcept`. */ #if defined(DOXYGEN) #define MDBX_CXX17_NOEXCEPT noexcept -#elif !defined(__cpp_noexcept_function_type) || \ - __cpp_noexcept_function_type < 201510L +#elif !defined(__cpp_noexcept_function_type) || __cpp_noexcept_function_type < 201510L #define MDBX_CXX17_NOEXCEPT #else #define MDBX_CXX17_NOEXCEPT noexcept @@ -435,14 +425,11 @@ typedef mode_t mdbx_mode_t; #elif !defined(__cplusplus) #define MDBX_CXX01_CONSTEXPR __inline #define MDBX_CXX01_CONSTEXPR_VAR const -#elif !defined(DOXYGEN) && \ - ((__cplusplus < 201103L && defined(__cpp_constexpr) && \ - __cpp_constexpr < 200704L) || \ - (defined(__LCC__) && __LCC__ < 124) || \ - (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 407) && \ - !defined(__clang__) && !defined(__LCC__)) || \ - (defined(_MSC_VER) && _MSC_VER < 1910) || \ - (defined(__clang__) && __clang_major__ < 4)) +#elif !defined(DOXYGEN) && \ + ((__cplusplus < 201103L && defined(__cpp_constexpr) && __cpp_constexpr < 200704L) || \ + (defined(__LCC__) && __LCC__ < 124) || \ + (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 407) && !defined(__clang__) && !defined(__LCC__)) || \ + (defined(_MSC_VER) && _MSC_VER < 1910) || (defined(__clang__) && __clang_major__ < 4)) #define MDBX_CXX01_CONSTEXPR inline #define MDBX_CXX01_CONSTEXPR_VAR const #else @@ -458,13 +445,10 @@ typedef mode_t mdbx_mode_t; #elif !defined(__cplusplus) #define MDBX_CXX11_CONSTEXPR __inline #define MDBX_CXX11_CONSTEXPR_VAR const -#elif !defined(DOXYGEN) && \ - (!defined(__cpp_constexpr) || __cpp_constexpr < 201304L || \ - (defined(__LCC__) && __LCC__ < 124) || \ - (defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__) && \ - !defined(__LCC__)) || \ - (defined(_MSC_VER) && _MSC_VER < 1910) || \ - (defined(__clang__) && __clang_major__ < 5)) +#elif !defined(DOXYGEN) && \ + (!defined(__cpp_constexpr) || __cpp_constexpr < 201304L || (defined(__LCC__) && __LCC__ < 124) || \ + (defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__) && !defined(__LCC__)) || \ + (defined(_MSC_VER) && _MSC_VER < 1910) || (defined(__clang__) && __clang_major__ < 5)) #define MDBX_CXX11_CONSTEXPR inline #define MDBX_CXX11_CONSTEXPR_VAR const #else @@ -480,12 +464,10 @@ typedef mode_t mdbx_mode_t; #elif !defined(__cplusplus) #define MDBX_CXX14_CONSTEXPR __inline #define MDBX_CXX14_CONSTEXPR_VAR const -#elif defined(DOXYGEN) || \ - defined(__cpp_constexpr) && __cpp_constexpr >= 201304L && \ - ((defined(_MSC_VER) && _MSC_VER >= 1910) || \ - (defined(__clang__) && __clang_major__ > 4) || \ - (defined(__GNUC__) && __GNUC__ > 6) || \ - (!defined(__GNUC__) && !defined(__clang__) && !defined(_MSC_VER))) +#elif defined(DOXYGEN) || \ + defined(__cpp_constexpr) && __cpp_constexpr >= 201304L && \ + ((defined(_MSC_VER) && _MSC_VER >= 1910) || (defined(__clang__) && __clang_major__ > 4) || \ + (defined(__GNUC__) && __GNUC__ > 6) || (!defined(__GNUC__) && !defined(__clang__) && !defined(_MSC_VER))) #define MDBX_CXX14_CONSTEXPR constexpr #define MDBX_CXX14_CONSTEXPR_VAR constexpr #else @@ -497,9 +479,8 @@ typedef mode_t mdbx_mode_t; #define MDBX_NORETURN __noreturn #elif defined(_Noreturn) #define MDBX_NORETURN _Noreturn -#elif defined(DOXYGEN) || (defined(__cplusplus) && __cplusplus >= 201103L) || \ - (!defined(__cplusplus) && defined(__STDC_VERSION__) && \ - __STDC_VERSION__ > 202005L) +#elif defined(DOXYGEN) || (defined(__cplusplus) && __cplusplus >= 201103L) || \ + (!defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ > 202005L) #define MDBX_NORETURN [[noreturn]] #elif defined(__GNUC__) || __has_attribute(__noreturn__) #define MDBX_NORETURN __attribute__((__noreturn__)) @@ -512,23 +493,19 @@ typedef mode_t mdbx_mode_t; #ifndef MDBX_PRINTF_ARGS #if defined(__GNUC__) || __has_attribute(__format__) || defined(DOXYGEN) #if defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__) -#define MDBX_PRINTF_ARGS(format_index, first_arg) \ - __attribute__((__format__(__gnu_printf__, format_index, first_arg))) +#define MDBX_PRINTF_ARGS(format_index, first_arg) __attribute__((__format__(__gnu_printf__, format_index, first_arg))) #else -#define MDBX_PRINTF_ARGS(format_index, first_arg) \ - __attribute__((__format__(__printf__, format_index, first_arg))) +#define MDBX_PRINTF_ARGS(format_index, first_arg) __attribute__((__format__(__printf__, format_index, first_arg))) #endif /* MinGW */ #else #define MDBX_PRINTF_ARGS(format_index, first_arg) #endif #endif /* MDBX_PRINTF_ARGS */ -#if defined(DOXYGEN) || \ - (defined(__cplusplus) && __cplusplus >= 201603L && \ - __has_cpp_attribute(maybe_unused) && \ - __has_cpp_attribute(maybe_unused) >= 201603L) || \ - (!defined(__cplusplus) && defined(__STDC_VERSION__) && \ - __STDC_VERSION__ > 202005L) +#if defined(DOXYGEN) || \ + (defined(__cplusplus) && __cplusplus >= 201603L && __has_cpp_attribute(maybe_unused) && \ + __has_cpp_attribute(maybe_unused) >= 201603L) || \ + (!defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ > 202005L) #define MDBX_MAYBE_UNUSED [[maybe_unused]] #elif defined(__GNUC__) || __has_attribute(__unused__) #define MDBX_MAYBE_UNUSED __attribute__((__unused__)) @@ -550,12 +527,9 @@ typedef mode_t mdbx_mode_t; #if !defined(DEFINE_ENUM_FLAG_OPERATORS) && !defined(DOXYGEN) #ifdef __cplusplus -#if !defined(__cpp_constexpr) || __cpp_constexpr < 200704L || \ - (defined(__LCC__) && __LCC__ < 124) || \ - (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 407) && \ - !defined(__clang__) && !defined(__LCC__)) || \ - (defined(_MSC_VER) && _MSC_VER < 1910) || \ - (defined(__clang__) && __clang_major__ < 4) +#if !defined(__cpp_constexpr) || __cpp_constexpr < 200704L || (defined(__LCC__) && __LCC__ < 124) || \ + (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 407) && !defined(__clang__) && !defined(__LCC__)) || \ + (defined(_MSC_VER) && _MSC_VER < 1910) || (defined(__clang__) && __clang_major__ < 4) /* The constexpr feature is not available or (may be) broken */ #define CONSTEXPR_ENUM_FLAGS_OPERATIONS 0 #else @@ -565,42 +539,18 @@ typedef mode_t mdbx_mode_t; /// Define operator overloads to enable bit operations on enum values that are /// used to define flags (based on Microsoft's DEFINE_ENUM_FLAG_OPERATORS). -#define DEFINE_ENUM_FLAG_OPERATORS(ENUM) \ - extern "C++" { \ - MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator|(ENUM a, ENUM b) { \ - return ENUM(unsigned(a) | unsigned(b)); \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator|=(ENUM &a, \ - ENUM b) { \ - return a = a | b; \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, ENUM b) { \ - return ENUM(unsigned(a) & unsigned(b)); \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, \ - unsigned b) { \ - return ENUM(unsigned(a) & b); \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(unsigned a, \ - ENUM b) { \ - return ENUM(a & unsigned(b)); \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, \ - ENUM b) { \ - return a = a & b; \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, \ - unsigned b) { \ - return a = a & b; \ - } \ - MDBX_CXX01_CONSTEXPR unsigned operator~(ENUM a) { return ~unsigned(a); } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator^(ENUM a, ENUM b) { \ - return ENUM(unsigned(a) ^ unsigned(b)); \ - } \ - MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator^=(ENUM &a, \ - ENUM b) { \ - return a = a ^ b; \ - } \ +#define DEFINE_ENUM_FLAG_OPERATORS(ENUM) \ + extern "C++" { \ + MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator|(ENUM a, ENUM b) { return ENUM(unsigned(a) | unsigned(b)); } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator|=(ENUM &a, ENUM b) { return a = a | b; } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, ENUM b) { return ENUM(unsigned(a) & unsigned(b)); } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, unsigned b) { return ENUM(unsigned(a) & b); } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator&(unsigned a, ENUM b) { return ENUM(a & unsigned(b)); } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, ENUM b) { return a = a & b; } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, unsigned b) { return a = a & b; } \ + MDBX_CXX01_CONSTEXPR unsigned operator~(ENUM a) { return ~unsigned(a); } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX01_CONSTEXPR ENUM operator^(ENUM a, ENUM b) { return ENUM(unsigned(a) ^ unsigned(b)); } \ + MDBX_NOSANITIZE_ENUM MDBX_CXX14_CONSTEXPR ENUM &operator^=(ENUM &a, ENUM b) { return a = a ^ b; } \ } #else /* __cplusplus */ /* nope for C since it always allows these operators for enums */ @@ -732,8 +682,7 @@ extern LIBMDBX_VERINFO_API const struct MDBX_build_info { /* As described above mdbx_module_handler() IS REQUIRED for Windows versions * prior to Windows Vista. */ #define MDBX_MANUAL_MODULE_HANDLER 1 -void LIBMDBX_API NTAPI mdbx_module_handler(PVOID module, DWORD reason, - PVOID reserved); +void LIBMDBX_API NTAPI mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved); #endif #endif /* Windows && !DLL && MDBX_MANUAL_MODULE_HANDLER */ @@ -977,8 +926,7 @@ typedef enum MDBX_debug_flags { MDBX_DBG_DONT_UPGRADE = 64, #ifdef ENABLE_UBSAN - MDBX_DBG_MAX = ((unsigned)MDBX_LOG_MAX) << 16 | - 127 /* avoid UBSAN false-positive trap by a tests */, + MDBX_DBG_MAX = ((unsigned)MDBX_LOG_MAX) << 16 | 127 /* avoid UBSAN false-positive trap by a tests */, #endif /* ENABLE_UBSAN */ /** for mdbx_setup_debug() only: Don't change current settings */ @@ -1000,8 +948,7 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_debug_flags) * format-message string passed by `fmt` argument. * Maybe NULL or invalid if the format-message string * don't contain `%`-specification of arguments. */ -typedef void MDBX_debug_func(MDBX_log_level_t loglevel, const char *function, - int line, const char *fmt, +typedef void MDBX_debug_func(MDBX_log_level_t loglevel, const char *function, int line, const char *fmt, va_list args) MDBX_CXX17_NOEXCEPT; /** \brief The "don't change `logger`" value for mdbx_setup_debug() */ @@ -1011,20 +958,13 @@ typedef void MDBX_debug_func(MDBX_log_level_t loglevel, const char *function, /** \brief Setup global log-level, debug options and debug logger. * \returns The previously `debug_flags` in the 0-15 bits * and `log_level` in the 16-31 bits. */ -LIBMDBX_API int mdbx_setup_debug(MDBX_log_level_t log_level, - MDBX_debug_flags_t debug_flags, - MDBX_debug_func *logger); +LIBMDBX_API int mdbx_setup_debug(MDBX_log_level_t log_level, MDBX_debug_flags_t debug_flags, MDBX_debug_func *logger); -typedef void MDBX_debug_func_nofmt(MDBX_log_level_t loglevel, - const char *function, int line, - const char *msg, +typedef void MDBX_debug_func_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg, unsigned length) MDBX_CXX17_NOEXCEPT; -LIBMDBX_API int mdbx_setup_debug_nofmt(MDBX_log_level_t log_level, - MDBX_debug_flags_t debug_flags, - MDBX_debug_func_nofmt *logger, - char *logger_buffer, - size_t logger_buffer_size); +LIBMDBX_API int mdbx_setup_debug_nofmt(MDBX_log_level_t log_level, MDBX_debug_flags_t debug_flags, + MDBX_debug_func_nofmt *logger, char *logger_buffer, size_t logger_buffer_size); /** \brief A callback function for most MDBX assert() failures, * called before printing the message and aborting. @@ -1036,8 +976,7 @@ LIBMDBX_API int mdbx_setup_debug_nofmt(MDBX_log_level_t log_level, * may be NULL. * \param [in] line The line number in the source file * where the assertion check failed, may be zero. */ -typedef void MDBX_assert_func(const MDBX_env *env, const char *msg, - const char *function, +typedef void MDBX_assert_func(const MDBX_env *env, const char *msg, const char *function, unsigned line) MDBX_CXX17_NOEXCEPT; /** \brief Set or reset the assert() callback of the environment. @@ -1060,19 +999,14 @@ LIBMDBX_API int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func); * - NULL if given buffer size less than 4 bytes; * - pointer to constant string if given value NULL or empty; * - otherwise pointer to given buffer. */ -LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf, - const size_t bufsize); +LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf, const size_t bufsize); /** \brief Panics with message and causes abnormal process termination. */ -MDBX_NORETURN LIBMDBX_API void mdbx_panic(const char *fmt, ...) - MDBX_PRINTF_ARGS(1, 2); +MDBX_NORETURN LIBMDBX_API void mdbx_panic(const char *fmt, ...) MDBX_PRINTF_ARGS(1, 2); /** \brief Panics with asserton failed message and causes abnormal process * termination. */ -MDBX_NORETURN LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, - const char *msg, - const char *func, - unsigned line); +MDBX_NORETURN LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, const char *msg, const char *func, unsigned line); /** end of c_debug @} */ /** \brief Environment flags @@ -1630,8 +1564,7 @@ typedef enum MDBX_txn_flags { /** Most operations on the transaction are currently illegal. * \note This is a transaction state flag. Returned from \ref mdbx_txn_flags() * but can't be used with \ref mdbx_txn_begin(). */ - MDBX_TXN_BLOCKED = - MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD | MDBX_TXN_PARKED + MDBX_TXN_BLOCKED = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD | MDBX_TXN_PARKED } MDBX_txn_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_txn_flags) @@ -2075,9 +2008,7 @@ typedef enum MDBX_error { * \ingroup c_err * \deprecated Please review your code to use MDBX_UNABLE_EXTEND_MAPSIZE * instead. */ -MDBX_DEPRECATED static __inline int MDBX_MAP_RESIZED_is_deprecated(void) { - return MDBX_UNABLE_EXTEND_MAPSIZE; -} +MDBX_DEPRECATED static __inline int MDBX_MAP_RESIZED_is_deprecated(void) { return MDBX_UNABLE_EXTEND_MAPSIZE; } #define MDBX_MAP_RESIZED MDBX_MAP_RESIZED_is_deprecated() /** \brief Return a string describing a given error code. @@ -2138,8 +2069,7 @@ LIBMDBX_API const char *mdbx_strerror_ANSI2OEM(int errnum); * Windows error-messages in the OEM-encoding for console utilities. * \ingroup c_err * \see mdbx_strerror_ANSI2OEM() */ -LIBMDBX_API const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, - size_t buflen); +LIBMDBX_API const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, size_t buflen); #endif /* Bit of Windows' madness */ /** \brief Create an MDBX environment instance. @@ -2472,8 +2402,7 @@ typedef enum MDBX_option { * \see MDBX_option_t * \see mdbx_env_get_option() * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, - uint64_t value); +LIBMDBX_API int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64_t value); /** \brief Gets the value of extra runtime options from an environment. * \ingroup c_settings @@ -2485,9 +2414,7 @@ LIBMDBX_API int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, * \see MDBX_option_t * \see mdbx_env_get_option() * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_get_option(const MDBX_env *env, - const MDBX_option_t option, - uint64_t *pvalue); +LIBMDBX_API int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, uint64_t *pvalue); /** \brief Open an environment instance. * \ingroup c_opening @@ -2561,20 +2488,16 @@ LIBMDBX_API int mdbx_env_get_option(const MDBX_env *env, * \retval MDBX_TOO_LARGE Database is too large for this process, * i.e. 32-bit process tries to open >4Gb database. */ -LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode); +LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_open() * \note Available only on Windows. * \see mdbx_env_open() */ -LIBMDBX_API int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode); -#define mdbx_env_openT(env, pathname, flags, mode) \ - mdbx_env_openW(env, pathname, flags, mode) +LIBMDBX_API int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode); +#define mdbx_env_openT(env, pathname, flags, mode) mdbx_env_openW(env, pathname, flags, mode) #else -#define mdbx_env_openT(env, pathname, flags, mode) \ - mdbx_env_open(env, pathname, flags, mode) +#define mdbx_env_openT(env, pathname, flags, mode) mdbx_env_open(env, pathname, flags, mode) #endif /* Windows */ /** \brief Deletion modes for \ref mdbx_env_delete(). @@ -2615,16 +2538,14 @@ typedef enum MDBX_env_delete_mode { * some possible errors are: * \retval MDBX_RESULT_TRUE No corresponding files or directories were found, * so no deletion was performed. */ -LIBMDBX_API int mdbx_env_delete(const char *pathname, - MDBX_env_delete_mode_t mode); +LIBMDBX_API int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_delete() * \ingroup c_extra * \note Available only on Windows. * \see mdbx_env_delete() */ -LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, - MDBX_env_delete_mode_t mode); +LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, MDBX_env_delete_mode_t mode); #define mdbx_env_deleteT(pathname, mode) mdbx_env_deleteW(pathname, mode) #else #define mdbx_env_deleteT(pathname, mode) mdbx_env_delete(pathname, mode) @@ -2683,8 +2604,7 @@ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, * \see mdbx_txn_park() * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, - MDBX_copy_flags_t flags); +LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, MDBX_copy_flags_t flags); /** \brief Copy an MDBX environment by given read transaction to the specified * path, with options. @@ -2739,30 +2659,25 @@ LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, * \see mdbx_txn_park() * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, - MDBX_copy_flags_t flags); +LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, MDBX_copy_flags_t flags); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_copy() * \ingroup c_extra * \note Available only on Windows. * \see mdbx_env_copy() */ -LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, - MDBX_copy_flags_t flags); +LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, MDBX_copy_flags_t flags); #define mdbx_env_copyT(env, dest, flags) mdbx_env_copyW(env, dest, flags) /** \copydoc mdbx_txn_copy2pathname() * \ingroup c_extra * \note Available only on Windows. * \see mdbx_txn_copy2pathname() */ -LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, - MDBX_copy_flags_t flags); -#define mdbx_txn_copy2pathnameT(txn, dest, flags) \ - mdbx_txn_copy2pathnameW(txn, dest, path) +LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, MDBX_copy_flags_t flags); +#define mdbx_txn_copy2pathnameT(txn, dest, flags) mdbx_txn_copy2pathnameW(txn, dest, path) #else #define mdbx_env_copyT(env, dest, flags) mdbx_env_copy(env, dest, flags) -#define mdbx_txn_copy2pathnameT(txn, dest, flags) \ - mdbx_txn_copy2pathname(txn, dest, path) +#define mdbx_txn_copy2pathnameT(txn, dest, flags) mdbx_txn_copy2pathname(txn, dest, path) #endif /* Windows */ /** \brief Copy an environment to the specified file descriptor, with @@ -2789,8 +2704,7 @@ LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, * \param [in] flags Special options for this operation. \see mdbx_env_copy() * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags); +LIBMDBX_API int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, MDBX_copy_flags_t flags); /** \brief Copy an environment by given read transaction to the specified file * descriptor, with options. @@ -2815,21 +2729,20 @@ LIBMDBX_API int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, * \param [in] flags Special options for this operation. \see mdbx_env_copy() * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags); +LIBMDBX_API int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags); /** \brief Statistics for a table in the environment * \ingroup c_statinfo * \see mdbx_env_stat_ex() \see mdbx_dbi_stat() */ struct MDBX_stat { - uint32_t ms_psize; /**< Size of a table page. This is the same for all tables - in a database. */ - uint32_t ms_depth; /**< Depth (height) of the B-tree */ + uint32_t ms_psize; /**< Size of a table page. This is the same for all tables + in a database. */ + uint32_t ms_depth; /**< Depth (height) of the B-tree */ uint64_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ uint64_t ms_leaf_pages; /**< Number of leaf pages */ uint64_t ms_overflow_pages; /**< Number of large/overflow pages */ uint64_t ms_entries; /**< Number of data items */ - uint64_t ms_mod_txnid; /**< Transaction ID of committed last modification */ + uint64_t ms_mod_txnid; /**< Transaction ID of committed last modification */ }; #ifndef __cplusplus /** \ingroup c_statinfo */ @@ -2855,15 +2768,12 @@ typedef struct MDBX_stat MDBX_stat; * \param [in] bytes The size of \ref MDBX_stat. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_stat *stat, size_t bytes); +LIBMDBX_API int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *stat, size_t bytes); /** \brief Return statistics about the MDBX environment. * \ingroup c_statinfo * \deprecated Please use mdbx_env_stat_ex() instead. */ -MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_stat, - (const MDBX_env *env, MDBX_stat *stat, - size_t bytes)) { +MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_stat, (const MDBX_env *env, MDBX_stat *stat, size_t bytes)) { return mdbx_env_stat_ex(env, NULL, stat, bytes); } @@ -2878,10 +2788,10 @@ struct MDBX_envinfo { uint64_t shrink; /**< Shrink threshold for datafile */ uint64_t grow; /**< Growth step for datafile */ } mi_geo; - uint64_t mi_mapsize; /**< Size of the data memory map */ - uint64_t mi_last_pgno; /**< Number of the last used page */ - uint64_t mi_recent_txnid; /**< ID of the last committed transaction */ - uint64_t mi_latter_reader_txnid; /**< ID of the last reader transaction */ + uint64_t mi_mapsize; /**< Size of the data memory map */ + uint64_t mi_last_pgno; /**< Number of the last used page */ + uint64_t mi_recent_txnid; /**< ID of the last committed transaction */ + uint64_t mi_latter_reader_txnid; /**< ID of the last reader transaction */ uint64_t mi_self_latter_reader_txnid; /**< ID of the last reader transaction of caller process */ uint64_t mi_meta_txnid[3], mi_meta_sign[3]; @@ -2940,10 +2850,8 @@ struct MDBX_envinfo { to a disk */ uint64_t prefault; /**< Number of prefault write operations (not a pages) */ uint64_t mincore; /**< Number of mincore() calls */ - uint64_t - msync; /**< Number of explicit msync-to-disk operations (not a pages) */ - uint64_t - fsync; /**< Number of explicit fsync-to-disk operations (not a pages) */ + uint64_t msync; /**< Number of explicit msync-to-disk operations (not a pages) */ + uint64_t fsync; /**< Number of explicit fsync-to-disk operations (not a pages) */ } mi_pgop_stat; /* GUID of the database DXB file. */ @@ -2976,14 +2884,11 @@ typedef struct MDBX_envinfo MDBX_envinfo; * this value is used to provide ABI compatibility. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *info, size_t bytes); +LIBMDBX_API int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *info, size_t bytes); /** \brief Return information about the MDBX environment. * \ingroup c_statinfo * \deprecated Please use mdbx_env_info_ex() instead. */ -MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_info, - (const MDBX_env *env, MDBX_envinfo *info, - size_t bytes)) { +MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_info, (const MDBX_env *env, MDBX_envinfo *info, size_t bytes)) { return mdbx_env_info_ex(env, NULL, info, bytes); } @@ -3028,16 +2933,12 @@ LIBMDBX_API int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock); /** \brief The shortcut to calling \ref mdbx_env_sync_ex() with * the `force=true` and `nonblock=false` arguments. * \ingroup c_extra */ -LIBMDBX_INLINE_API(int, mdbx_env_sync, (MDBX_env * env)) { - return mdbx_env_sync_ex(env, true, false); -} +LIBMDBX_INLINE_API(int, mdbx_env_sync, (MDBX_env * env)) { return mdbx_env_sync_ex(env, true, false); } /** \brief The shortcut to calling \ref mdbx_env_sync_ex() with * the `force=false` and `nonblock=true` arguments. * \ingroup c_extra */ -LIBMDBX_INLINE_API(int, mdbx_env_sync_poll, (MDBX_env * env)) { - return mdbx_env_sync_ex(env, false, true); -} +LIBMDBX_INLINE_API(int, mdbx_env_sync_poll, (MDBX_env * env)) { return mdbx_env_sync_ex(env, false, true); } /** \brief Sets threshold to force flush the data buffers to disk, even any of * \ref MDBX_SAFE_NOSYNC flag in the environment. @@ -3062,8 +2963,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_sync_poll, (MDBX_env * env)) { * a synchronous flush would be made. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_INLINE_API(int, mdbx_env_set_syncbytes, - (MDBX_env * env, size_t threshold)) { +LIBMDBX_INLINE_API(int, mdbx_env_set_syncbytes, (MDBX_env * env, size_t threshold)) { return mdbx_env_set_option(env, MDBX_opt_sync_bytes, threshold); } @@ -3081,8 +2981,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_set_syncbytes, * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_INLINE_API(int, mdbx_env_get_syncbytes, - (const MDBX_env *env, size_t *threshold)) { +LIBMDBX_INLINE_API(int, mdbx_env_get_syncbytes, (const MDBX_env *env, size_t *threshold)) { int rc = MDBX_EINVAL; if (threshold) { uint64_t proxy = 0; @@ -3125,8 +3024,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_get_syncbytes, * the last unsteady commit. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_INLINE_API(int, mdbx_env_set_syncperiod, - (MDBX_env * env, unsigned seconds_16dot16)) { +LIBMDBX_INLINE_API(int, mdbx_env_set_syncperiod, (MDBX_env * env, unsigned seconds_16dot16)) { return mdbx_env_set_option(env, MDBX_opt_sync_period, seconds_16dot16); } @@ -3146,8 +3044,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_set_syncperiod, * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_INLINE_API(int, mdbx_env_get_syncperiod, - (const MDBX_env *env, unsigned *period_seconds_16dot16)) { +LIBMDBX_INLINE_API(int, mdbx_env_get_syncperiod, (const MDBX_env *env, unsigned *period_seconds_16dot16)) { int rc = MDBX_EINVAL; if (period_seconds_16dot16) { uint64_t proxy = 0; @@ -3204,9 +3101,7 @@ LIBMDBX_API int mdbx_env_close_ex(MDBX_env *env, bool dont_sync); /** \brief The shortcut to calling \ref mdbx_env_close_ex() with * the `dont_sync=false` argument. * \ingroup c_opening */ -LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { - return mdbx_env_close_ex(env, false); -} +LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { return mdbx_env_close_ex(env, false); } #if defined(DOXYGEN) || !(defined(_WIN32) || defined(_WIN64)) /** \brief Восстанавливает экземпляр среды в дочернем процессе после ветвления @@ -3369,8 +3264,7 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_warmup_flags) * * \retval MDBX_RESULT_TRUE The specified timeout is reached during load * data into memory. */ -LIBMDBX_API int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, - MDBX_warmup_flags_t flags, +LIBMDBX_API int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, MDBX_warmup_flags_t flags, unsigned timeout_seconds_16dot16); /** \brief Set environment flags. @@ -3393,8 +3287,7 @@ LIBMDBX_API int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, - bool onoff); +LIBMDBX_API int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, bool onoff); /** \brief Get environment flags. * \ingroup c_statinfo @@ -3644,16 +3537,12 @@ LIBMDBX_API int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *fd); * \retval MDBX_TOO_LARGE Specified size is too large, i.e. too many pages for * given size, or a 32-bit process requests too much * bytes for the 32-bit address space. */ -LIBMDBX_API int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, - intptr_t size_now, intptr_t size_upper, - intptr_t growth_step, - intptr_t shrink_threshold, - intptr_t pagesize); +LIBMDBX_API int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, intptr_t size_upper, + intptr_t growth_step, intptr_t shrink_threshold, intptr_t pagesize); /** \deprecated Please use \ref mdbx_env_set_geometry() instead. * \ingroup c_settings */ -MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_set_mapsize, - (MDBX_env * env, size_t size)) { +MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_set_mapsize, (MDBX_env * env, size_t size)) { return mdbx_env_set_geometry(env, size, size, size, -1, -1, -1); } @@ -3672,81 +3561,66 @@ MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_set_mapsize, * i.e. \ref MDBX_NORDAHEAD is useful to * open environment by \ref mdbx_env_open(). * \retval Otherwise the error code. */ -LIBMDBX_API int mdbx_is_readahead_reasonable(size_t volume, - intptr_t redundancy); +LIBMDBX_API int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy); /** \brief Returns the minimal database page size in bytes. * \ingroup c_statinfo */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(intptr_t, mdbx_limits_pgsize_min, - (void)) { - return MDBX_MIN_PAGESIZE; -} +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(intptr_t, mdbx_limits_pgsize_min, (void)) { return MDBX_MIN_PAGESIZE; } /** \brief Returns the maximal database page size in bytes. * \ingroup c_statinfo */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(intptr_t, mdbx_limits_pgsize_max, - (void)) { - return MDBX_MAX_PAGESIZE; -} +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(intptr_t, mdbx_limits_pgsize_max, (void)) { return MDBX_MAX_PAGESIZE; } /** \brief Returns minimal database size in bytes for given page size, * or -1 if pagesize is invalid. * \ingroup c_statinfo */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_dbsize_min(intptr_t pagesize); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_dbsize_min(intptr_t pagesize); /** \brief Returns maximal database size in bytes for given page size, * or -1 if pagesize is invalid. * \ingroup c_statinfo */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_dbsize_max(intptr_t pagesize); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_dbsize_max(intptr_t pagesize); /** \brief Returns maximal key size in bytes for given page size * and table flags, or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); /** \brief Returns minimal key size in bytes for given table flags. * \ingroup c_statinfo * \see db_flags */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_keysize_min(MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes for given page size * and table flags, or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags); /** \brief Returns minimal data size in bytes for given table flags. * \ingroup c_statinfo * \see db_flags */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_valsize_min(MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags); /** \brief Returns maximal size of key-value pair to fit in a single page with * the given size and table flags, or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, + MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes to fit in a leaf-page or * single large/overflow-page with the given page size and table flags, * or -1 if pagesize is invalid. * \ingroup c_statinfo * \see db_flags */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /** \brief Returns maximal write transaction size (i.e. limit for summary volume * of dirty pages) in bytes for given page size, or -1 if pagesize is invalid. * \ingroup c_statinfo */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t -mdbx_limits_txnsize_max(intptr_t pagesize); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_txnsize_max(intptr_t pagesize); /** \brief Set the maximum number of threads/reader slots for for all processes * interacts with the database. @@ -3771,8 +3645,7 @@ mdbx_limits_txnsize_max(intptr_t pagesize); * some possible errors are: * \retval MDBX_EINVAL An invalid parameter was specified. * \retval MDBX_EPERM The environment is already open. */ -LIBMDBX_INLINE_API(int, mdbx_env_set_maxreaders, - (MDBX_env * env, unsigned readers)) { +LIBMDBX_INLINE_API(int, mdbx_env_set_maxreaders, (MDBX_env * env, unsigned readers)) { return mdbx_env_set_option(env, MDBX_opt_max_readers, readers); } @@ -3787,8 +3660,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_set_maxreaders, * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_INLINE_API(int, mdbx_env_get_maxreaders, - (const MDBX_env *env, unsigned *readers)) { +LIBMDBX_INLINE_API(int, mdbx_env_get_maxreaders, (const MDBX_env *env, unsigned *readers)) { int rc = MDBX_EINVAL; if (readers) { uint64_t proxy = 0; @@ -3833,8 +3705,7 @@ LIBMDBX_INLINE_API(int, mdbx_env_set_maxdbs, (MDBX_env * env, MDBX_dbi dbs)) { * \returns A non-zero error value on failure and 0 on success, * some possible errors are: * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_INLINE_API(int, mdbx_env_get_maxdbs, - (const MDBX_env *env, MDBX_dbi *dbs)) { +LIBMDBX_INLINE_API(int, mdbx_env_get_maxdbs, (const MDBX_env *env, MDBX_dbi *dbs)) { int rc = MDBX_EINVAL; if (dbs) { uint64_t proxy = 0; @@ -3864,8 +3735,7 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API size_t mdbx_default_pagesize(void); * available/free RAM pages will be stored. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, - intptr_t *avail_pages); +LIBMDBX_API int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, intptr_t *avail_pages); /** \brief Returns the maximum size of keys can put. * \ingroup c_statinfo @@ -3876,8 +3746,7 @@ LIBMDBX_API int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, * * \returns The maximum size of a key can write, * or -1 if something is wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_env_get_maxkeysize_ex(const MDBX_env *env, MDBX_db_flags_t flags); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_maxkeysize_ex(const MDBX_env *env, MDBX_db_flags_t flags); /** \brief Returns the maximum size of data we can put. * \ingroup c_statinfo @@ -3888,14 +3757,12 @@ mdbx_env_get_maxkeysize_ex(const MDBX_env *env, MDBX_db_flags_t flags); * * \returns The maximum size of a data can write, * or -1 if something is wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags); /** \deprecated Please use \ref mdbx_env_get_maxkeysize_ex() * and/or \ref mdbx_env_get_maxvalsize_ex() * \ingroup c_statinfo */ -MDBX_NOTHROW_PURE_FUNCTION MDBX_DEPRECATED LIBMDBX_API int -mdbx_env_get_maxkeysize(const MDBX_env *env); +MDBX_NOTHROW_PURE_FUNCTION MDBX_DEPRECATED LIBMDBX_API int mdbx_env_get_maxkeysize(const MDBX_env *env); /** \brief Returns maximal size of key-value pair to fit in a single page * for specified table flags. @@ -3907,8 +3774,7 @@ mdbx_env_get_maxkeysize(const MDBX_env *env); * * \returns The maximum size of a data can write, * or -1 if something is wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); /** \brief Returns maximal data size in bytes to fit in a leaf-page or * single large/overflow-page for specified table flags. @@ -3920,8 +3786,7 @@ mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); * * \returns The maximum size of a data can write, * or -1 if something is wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags); /** \brief Sets application information (a context pointer) associated with * the environment. @@ -3942,8 +3807,7 @@ LIBMDBX_API int mdbx_env_set_userctx(MDBX_env *env, void *ctx); * \param [in] env An environment handle returned by \ref mdbx_env_create() * \returns The pointer set by \ref mdbx_env_set_userctx() * or `NULL` if something wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void * -mdbx_env_get_userctx(const MDBX_env *env); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void *mdbx_env_get_userctx(const MDBX_env *env); /** \brief Create a transaction with a user provided context pointer * for use with the environment. @@ -4004,8 +3868,7 @@ mdbx_env_get_userctx(const MDBX_env *env); * \retval MDBX_ENOMEM Out of memory. * \retval MDBX_BUSY The write transaction is already started by the * current thread. */ -LIBMDBX_API int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, - MDBX_txn_flags_t flags, MDBX_txn **txn, +LIBMDBX_API int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **txn, void *context); /** \brief Create a transaction for use with the environment. @@ -4062,9 +3925,7 @@ LIBMDBX_API int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, * \retval MDBX_ENOMEM Out of memory. * \retval MDBX_BUSY The write transaction is already started by the * current thread. */ -LIBMDBX_INLINE_API(int, mdbx_txn_begin, - (MDBX_env * env, MDBX_txn *parent, MDBX_txn_flags_t flags, - MDBX_txn **txn)) { +LIBMDBX_INLINE_API(int, mdbx_txn_begin, (MDBX_env * env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **txn)) { return mdbx_txn_begin_ex(env, parent, flags, txn, NULL); } @@ -4090,8 +3951,7 @@ LIBMDBX_API int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx); * \returns The pointer which was passed via the `context` parameter * of `mdbx_txn_begin_ex()` or set by \ref mdbx_txn_set_userctx(), * or `NULL` if something wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void * -mdbx_txn_get_userctx(const MDBX_txn *txn); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void *mdbx_txn_get_userctx(const MDBX_txn *txn); /** \brief Information about the transaction * \ingroup c_statinfo @@ -4158,15 +4018,13 @@ typedef struct MDBX_txn_info MDBX_txn_info; * See description of \ref MDBX_txn_info. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, - bool scan_rlt); +LIBMDBX_API int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt); /** \brief Returns the transaction's MDBX_env. * \ingroup c_transactions * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin() */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_env * -mdbx_txn_env(const MDBX_txn *txn); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_env *mdbx_txn_env(const MDBX_txn *txn); /** \brief Return the transaction's flags. * \ingroup c_transactions @@ -4177,8 +4035,7 @@ mdbx_txn_env(const MDBX_txn *txn); * * \returns A transaction flags, valid if input is an valid transaction, * otherwise \ref MDBX_TXN_INVALID. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_txn_flags_t -mdbx_txn_flags(const MDBX_txn *txn); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn); /** \brief Return the transaction's ID. * \ingroup c_statinfo @@ -4191,8 +4048,7 @@ mdbx_txn_flags(const MDBX_txn *txn); * * \returns A transaction ID, valid if input is an active transaction, * otherwise 0. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t -mdbx_txn_id(const MDBX_txn *txn); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t mdbx_txn_id(const MDBX_txn *txn); /** \brief Latency of commit stages in 1/65536 of seconds units. * \warning This structure may be changed in future releases. @@ -4335,9 +4191,7 @@ LIBMDBX_API int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency); * \retval MDBX_EIO An error occurred during the flushing/writing * data to a storage medium/disk. * \retval MDBX_ENOMEM Out of memory. */ -LIBMDBX_INLINE_API(int, mdbx_txn_commit, (MDBX_txn * txn)) { - return mdbx_txn_commit_ex(txn, NULL); -} +LIBMDBX_INLINE_API(int, mdbx_txn_commit, (MDBX_txn * txn)) { return mdbx_txn_commit_ex(txn, NULL); } /** \brief Abandon all the operations of the transaction instead of saving them. * \ingroup c_transactions @@ -4602,8 +4456,7 @@ LIBMDBX_API int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary); * You have been warned but still can use custom comparators knowing * about the issues noted above. In this case you should ignore `deprecated` * warnings or define `MDBX_DEPRECATED` macro to empty to avoid ones. */ -typedef int(MDBX_cmp_func)(const MDBX_val *a, - const MDBX_val *b) MDBX_CXX17_NOEXCEPT; +typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b) MDBX_CXX17_NOEXCEPT; /** \brief Open or Create a named table in the environment. * \ingroup c_dbi @@ -4694,12 +4547,10 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, * opened with a different comparison function(s). * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. */ -LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name, - MDBX_db_flags_t flags, MDBX_dbi *dbi); +LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi); /** \copydoc mdbx_dbi_open() * \ingroup c_dbi */ -LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, - MDBX_db_flags_t flags, MDBX_dbi *dbi); +LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi); /** \brief Open or Create a named table in the environment * with using custom comparison functions. @@ -4717,14 +4568,12 @@ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, * \param [in] datacmp Optional custom data comparison function for a table. * \param [out] dbi Address where the new MDBX_dbi handle will be stored. * \returns A non-zero error value on failure and 0 on success. */ -MDBX_DEPRECATED LIBMDBX_API int -mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); /** \copydoc mdbx_dbi_open_ex() * \ingroup c_dbi */ -MDBX_DEPRECATED LIBMDBX_API int -mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, + MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); /** \brief Переименовает таблицу по DBI-дескриптору * @@ -4744,8 +4593,7 @@ mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); /** \copydoc mdbx_dbi_rename() * \ingroup c_dbi */ -LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *name); +LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *name); /** \brief Функция обратного вызова для перечисления * пользовательских именованных таблиц. @@ -4766,10 +4614,8 @@ LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, * \returns Ноль при успехе и продолжении перечисления, при возвращении другого * значения оно будет немедленно возвращено вызывающему * без продолжения перечисления. */ -typedef int(MDBX_table_enum_func)(void *ctx, const MDBX_txn *txn, - const MDBX_val *name, MDBX_db_flags_t flags, - const struct MDBX_stat *stat, - MDBX_dbi dbi) MDBX_CXX17_NOEXCEPT; +typedef int(MDBX_table_enum_func)(void *ctx, const MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, + const struct MDBX_stat *stat, MDBX_dbi dbi) MDBX_CXX17_NOEXCEPT; /** \brief Перечисляет пользовательские именнованные таблицы. * @@ -4791,8 +4637,7 @@ typedef int(MDBX_table_enum_func)(void *ctx, const MDBX_txn *txn, * в функцию `func()` как есть. * * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ -LIBMDBX_API int mdbx_enumerate_tables(const MDBX_txn *txn, - MDBX_table_enum_func *func, void *ctx); +LIBMDBX_API int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx); /** \defgroup value2key Value-to-Key functions * \brief Value-to-Key functions to @@ -4805,28 +4650,21 @@ LIBMDBX_API int mdbx_enumerate_tables(const MDBX_txn *txn, * and IEEE754 double values in one index for JSON-numbers with restriction for * integer numbers range corresponding to RFC-7159, i.e. \f$[-2^{53}+1, * 2^{53}-1]\f$. See bottom of page 6 at https://tools.ietf.org/html/rfc7159 */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API uint64_t -mdbx_key_from_jsonInteger(const int64_t json_integer); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer); -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API uint64_t -mdbx_key_from_double(const double ieee754_64bit); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API uint64_t mdbx_key_from_double(const double ieee754_64bit); -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t -mdbx_key_from_ptrdouble(const double *const ieee754_64bit); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit); -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API uint32_t -mdbx_key_from_float(const float ieee754_32bit); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API uint32_t mdbx_key_from_float(const float ieee754_32bit); -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint32_t -mdbx_key_from_ptrfloat(const float *const ieee754_32bit); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit); -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint64_t, mdbx_key_from_int64, - (const int64_t i64)) { +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint64_t, mdbx_key_from_int64, (const int64_t i64)) { return UINT64_C(0x8000000000000000) + i64; } -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint32_t, mdbx_key_from_int32, - (const int32_t i32)) { +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint32_t, mdbx_key_from_int32, (const int32_t i32)) { return UINT32_C(0x80000000) + i32; } /** end of value2key @} */ @@ -4836,20 +4674,15 @@ MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint32_t, mdbx_key_from_int32, * \ref avoid_custom_comparators "avoid using custom comparators" * \see value2key * @{ */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int64_t -mdbx_jsonInteger_from_key(const MDBX_val); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int64_t mdbx_jsonInteger_from_key(const MDBX_val); -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API double -mdbx_double_from_key(const MDBX_val); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API double mdbx_double_from_key(const MDBX_val); -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API float -mdbx_float_from_key(const MDBX_val); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API float mdbx_float_from_key(const MDBX_val); -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int32_t -mdbx_int32_from_key(const MDBX_val); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int32_t mdbx_int32_from_key(const MDBX_val); -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int64_t -mdbx_int64_from_key(const MDBX_val); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int64_t mdbx_int64_from_key(const MDBX_val); /** end of value2key @} */ /** \brief Retrieve statistics for a table. @@ -4866,8 +4699,7 @@ mdbx_int64_from_key(const MDBX_val); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, - MDBX_stat *stat, size_t bytes); +LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, size_t bytes); /** \brief Retrieve depth (bitmask) information of nested dupsort (multi-value) * B+trees for given table. @@ -4884,8 +4716,7 @@ LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. * \retval MDBX_RESULT_TRUE The dbi isn't a dupsort (multi-value) table. */ -LIBMDBX_API int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, - uint32_t *mask); +LIBMDBX_API int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask); /** \brief DBI state bits returted by \ref mdbx_dbi_flags_ex() * \ingroup c_statinfo @@ -4913,14 +4744,12 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state) * \param [out] state Address where the state will be returned. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, - unsigned *flags, unsigned *state); +LIBMDBX_API int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state); /** \brief The shortcut to calling \ref mdbx_dbi_flags_ex() with `state=NULL` * for discarding it result. * \ingroup c_statinfo * \see MDBX_db_flags_t */ -LIBMDBX_INLINE_API(int, mdbx_dbi_flags, - (const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags)) { +LIBMDBX_INLINE_API(int, mdbx_dbi_flags, (const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags)) { unsigned state; return mdbx_dbi_flags_ex(txn, dbi, flags, &state); } @@ -4999,8 +4828,7 @@ LIBMDBX_API int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del); * by current thread. * \retval MDBX_NOTFOUND The key was not in the table. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *data); +LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data); /** \brief Get items from a table * and optionally number of data items for a given key. @@ -5032,8 +4860,7 @@ LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * by current thread. * \retval MDBX_NOTFOUND The key was not in the table. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *data, size_t *values_count); +LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count); /** \brief Get equal or great item from a table. * \ingroup c_crud @@ -5063,8 +4890,7 @@ LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, * by current thread. * \retval MDBX_NOTFOUND The key was not in the table. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, - MDBX_val *key, MDBX_val *data); +LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data); /** \brief Store items into a table. * \ingroup c_crud @@ -5147,8 +4973,7 @@ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, * \retval MDBX_EACCES An attempt was made to write * in a read-only transaction. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *data, MDBX_put_flags_t flags); +LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags); /** \brief Replace items in a table. * \ingroup c_crud @@ -5193,16 +5018,12 @@ LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * \see \ref c_crud_hints "Quick reference for Insert/Update/Delete operations" * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *new_data, MDBX_val *old_data, +LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, MDBX_put_flags_t flags); -typedef int (*MDBX_preserve_func)(void *context, MDBX_val *target, - const void *src, size_t bytes); -LIBMDBX_API int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *key, MDBX_val *new_data, - MDBX_val *old_data, MDBX_put_flags_t flags, - MDBX_preserve_func preserver, +typedef int (*MDBX_preserve_func)(void *context, MDBX_val *target, const void *src, size_t bytes); +LIBMDBX_API int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, + MDBX_val *old_data, MDBX_put_flags_t flags, MDBX_preserve_func preserver, void *preserver_context); /** \brief Delete items from a table. @@ -5230,8 +5051,7 @@ LIBMDBX_API int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, * \retval MDBX_EACCES An attempt was made to write * in a read-only transaction. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - const MDBX_val *data); +LIBMDBX_API int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data); /** \brief Create a cursor handle but not bind it to transaction nor DBI-handle. * \ingroup c_cursors @@ -5277,8 +5097,7 @@ LIBMDBX_API int mdbx_cursor_set_userctx(MDBX_cursor *cursor, void *ctx); * \returns The pointer which was passed via the `context` parameter * of `mdbx_cursor_create()` or set by \ref mdbx_cursor_set_userctx(), * or `NULL` if something wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void * -mdbx_cursor_get_userctx(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void *mdbx_cursor_get_userctx(const MDBX_cursor *cursor); /** \brief Bind cursor to specified transaction and DBI-handle. * \ingroup c_cursors @@ -5305,8 +5124,7 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *cursor, - MDBX_dbi dbi); +LIBMDBX_API int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *cursor, MDBX_dbi dbi); /** \brief Unbind cursor from a transaction. * \ingroup c_cursors @@ -5375,8 +5193,7 @@ LIBMDBX_API int mdbx_cursor_reset(MDBX_cursor *cursor); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, - MDBX_cursor **cursor); +LIBMDBX_API int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **cursor); /** \brief Close a cursor handle. * \ingroup c_cursors @@ -5445,8 +5262,7 @@ LIBMDBX_API int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *cursor); * \ingroup c_cursors * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_txn * -mdbx_cursor_txn(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *cursor); /** \brief Return the cursor's table handle. * \ingroup c_cursors @@ -5487,9 +5303,7 @@ LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); * * \retval Значение со знаком в семантике оператора `<=>` (меньше нуля, ноль, * либо больше нуля) как результат сравнения позиций курсоров. */ -LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, - const MDBX_cursor *right, - bool ignore_multival); +LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, const MDBX_cursor *right, bool ignore_multival); /** \brief Retrieve by cursor. * \ingroup c_crud @@ -5522,8 +5336,7 @@ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, * by current thread. * \retval MDBX_NOTFOUND No matching key found. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op op); +LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); /** \brief Служебная функция для использования в утилитах. * \ingroup c_extra @@ -5569,8 +5382,7 @@ LIBMDBX_API int mdbx_cursor_ignord(MDBX_cursor *cursor); * * \see mdbx_cursor_scan() * \see mdbx_cursor_scan_from() */ -typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, - void *arg) MDBX_CXX17_NOEXCEPT; +typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, void *arg) MDBX_CXX17_NOEXCEPT; /** \brief Сканирует таблицу с использованием передаваемого предиката, * с уменьшением сопутствующих накладных расходов. @@ -5640,10 +5452,8 @@ typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, * и \ref MDBX_RESULT_FALSE, является кодом ошибки при позиционировании * курса, либо определяемым пользователем кодом остановки поиска * или ошибочной ситуации. */ -LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, - MDBX_predicate_func *predicate, void *context, - MDBX_cursor_op start_op, - MDBX_cursor_op turn_op, void *arg); +LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, MDBX_predicate_func *predicate, void *context, + MDBX_cursor_op start_op, MDBX_cursor_op turn_op, void *arg); /** Сканирует таблицу с использованием передаваемого предиката, * начиная с передаваемой пары ключ-значение, @@ -5729,10 +5539,8 @@ LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, * и \ref MDBX_RESULT_FALSE, является кодом ошибки при позиционировании * курса, либо определяемым пользователем кодом остановки поиска * или ошибочной ситуации. */ -LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, - MDBX_predicate_func *predicate, - void *context, MDBX_cursor_op from_op, - MDBX_val *from_key, MDBX_val *from_value, +LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, MDBX_predicate_func *predicate, void *context, + MDBX_cursor_op from_op, MDBX_val *from_key, MDBX_val *from_value, MDBX_cursor_op turn_op, void *arg); /** \brief Retrieve multiple non-dupsort key/value pairs by cursor. @@ -5776,8 +5584,7 @@ LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, * \retval MDBX_RESULT_TRUE The returned chunk is the last one, * and there are no pairs left. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, - MDBX_val *pairs, size_t limit, +LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, MDBX_val *pairs, size_t limit, MDBX_cursor_op op); /** \brief Store by cursor. @@ -5860,8 +5667,7 @@ LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count, * \retval MDBX_EACCES An attempt was made to write in a read-only * transaction. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_put(MDBX_cursor *cursor, const MDBX_val *key, - MDBX_val *data, MDBX_put_flags_t flags); +LIBMDBX_API int mdbx_cursor_put(MDBX_cursor *cursor, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags); /** \brief Delete current key/data pair. * \ingroup c_crud @@ -5924,8 +5730,7 @@ LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *pcount); * positioned * \retval MDBX_RESULT_FALSE A data is available * \retval Otherwise the error code */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_cursor_eof(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_eof(const MDBX_cursor *cursor); /** \brief Determines whether the cursor is pointed to the first key-value pair * or not. @@ -5938,8 +5743,7 @@ mdbx_cursor_eof(const MDBX_cursor *cursor); * \retval MDBX_RESULT_TRUE Cursor positioned to the first key-value pair * \retval MDBX_RESULT_FALSE Cursor NOT positioned to the first key-value * pair \retval Otherwise the error code */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_cursor_on_first(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first(const MDBX_cursor *cursor); /** \brief Определяет стоит ли курсор на первом или единственном * мульти-значении соответствующем ключу. @@ -5952,8 +5756,7 @@ mdbx_cursor_on_first(const MDBX_cursor *cursor); * \retval MDBX_RESULT_FALSE курсор НЕ установлен на первом или единственном * мульти-значении соответствующем ключу. * \retval ИНАЧЕ код ошибки. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); /** \brief Determines whether the cursor is pointed to the last key-value pair * or not. @@ -5966,8 +5769,7 @@ mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); * \retval MDBX_RESULT_TRUE Cursor positioned to the last key-value pair * \retval MDBX_RESULT_FALSE Cursor NOT positioned to the last key-value pair * \retval Otherwise the error code */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_cursor_on_last(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last(const MDBX_cursor *cursor); /** \brief Определяет стоит ли курсор на последнем или единственном * мульти-значении соответствующем ключу. @@ -5980,8 +5782,7 @@ mdbx_cursor_on_last(const MDBX_cursor *cursor); * \retval MDBX_RESULT_FALSE курсор НЕ установлен на последнем или единственном * мульти-значении соответствующем ключу. * \retval ИНАЧЕ код ошибки. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int -mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); /** \addtogroup c_rqest * \details \note The estimation result varies greatly depending on the filling @@ -6027,9 +5828,7 @@ mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); * i.e. `*distance_items = distance(first, last)`. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first, - const MDBX_cursor *last, - ptrdiff_t *distance_items); +LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last, ptrdiff_t *distance_items); /** \brief Estimates the move distance. * \ingroup c_rqest @@ -6051,8 +5850,7 @@ LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first, * as the number of elements. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op move_op, +LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op move_op, ptrdiff_t *distance_items); /** \brief Estimates the size of a range as a number of elements. @@ -6079,11 +5877,8 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, * \param [out] distance_items A pointer to store range estimation result. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *begin_key, - const MDBX_val *begin_data, - const MDBX_val *end_key, - const MDBX_val *end_data, +LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *begin_key, + const MDBX_val *begin_data, const MDBX_val *end_key, const MDBX_val *end_data, ptrdiff_t *distance_items); /** \brief The EPSILON value for mdbx_estimate_range() @@ -6123,8 +5918,7 @@ LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, * \retval MDBX_RESULT_TRUE Given address is on the dirty page. * \retval MDBX_RESULT_FALSE Given address is NOT on the dirty page. * \retval Otherwise the error code. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, - const void *ptr); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr); /** \brief Sequence generation for a table. * \ingroup c_crud @@ -6147,8 +5941,7 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, * some possible errors are: * \retval MDBX_RESULT_TRUE Increasing the sequence has resulted in an * overflow and therefore cannot be executed. */ -LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, - uint64_t increment); +LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment); /** \brief Compare two keys according to a particular table. * \ingroup c_crud @@ -6165,15 +5958,12 @@ LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, * \param [in] b The second item to compare. * * \returns < 0 if a < b, 0 if a == b, > 0 if a > b */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cmp(const MDBX_txn *txn, - MDBX_dbi dbi, - const MDBX_val *a, +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b); /** \brief Returns default internal key's comparator for given table flags. * \ingroup c_extra */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API MDBX_cmp_func * -mdbx_get_keycmp(MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API MDBX_cmp_func *mdbx_get_keycmp(MDBX_db_flags_t flags); /** \brief Compare two data items according to a particular table. * \ingroup c_crud @@ -6190,15 +5980,12 @@ mdbx_get_keycmp(MDBX_db_flags_t flags); * \param [in] b The second item to compare. * * \returns < 0 if a < b, 0 if a == b, > 0 if a > b */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_dcmp(const MDBX_txn *txn, - MDBX_dbi dbi, - const MDBX_val *a, +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b); /** \brief Returns default internal data's comparator for given table flags * \ingroup c_extra */ -MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API MDBX_cmp_func * -mdbx_get_datacmp(MDBX_db_flags_t flags); +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API MDBX_cmp_func *mdbx_get_datacmp(MDBX_db_flags_t flags); /** \brief A callback function used to enumerate the reader lock table. * \ingroup c_statinfo @@ -6225,10 +6012,8 @@ mdbx_get_datacmp(MDBX_db_flags_t flags); * for reuse by completion read transaction. * * \returns < 0 on failure, >= 0 on success. \see mdbx_reader_list() */ -typedef int(MDBX_reader_list_func)(void *ctx, int num, int slot, mdbx_pid_t pid, - mdbx_tid_t thread, uint64_t txnid, - uint64_t lag, size_t bytes_used, - size_t bytes_retained) MDBX_CXX17_NOEXCEPT; +typedef int(MDBX_reader_list_func)(void *ctx, int num, int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, + uint64_t lag, size_t bytes_used, size_t bytes_retained) MDBX_CXX17_NOEXCEPT; /** \brief Enumerate the entries in the reader lock table. * @@ -6241,8 +6026,7 @@ typedef int(MDBX_reader_list_func)(void *ctx, int num, int slot, mdbx_pid_t pid, * * \returns A non-zero error value on failure and 0 on success, * or \ref MDBX_RESULT_TRUE if the reader lock table is empty. */ -LIBMDBX_API int mdbx_reader_list(const MDBX_env *env, - MDBX_reader_list_func *func, void *ctx); +LIBMDBX_API int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, void *ctx); /** \brief Check for stale entries in the reader lock table. * \ingroup c_extra @@ -6266,8 +6050,7 @@ LIBMDBX_API int mdbx_reader_check(MDBX_env *env, int *dead); * * \returns Number of transactions committed after the given was started for * read, or negative value on failure. */ -MDBX_DEPRECATED LIBMDBX_API int mdbx_txn_straggler(const MDBX_txn *txn, - int *percent); +MDBX_DEPRECATED LIBMDBX_API int mdbx_txn_straggler(const MDBX_txn *txn, int *percent); /** \brief Registers the current thread as a reader for the environment. * \ingroup c_extra @@ -6377,10 +6160,8 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env); * \retval 2 or great The reader process was terminated or killed, * and libmdbx should entirely reset reader registration. */ -typedef int(MDBX_hsr_func)(const MDBX_env *env, const MDBX_txn *txn, - mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard, - unsigned gap, size_t space, - int retry) MDBX_CXX17_NOEXCEPT; +typedef int(MDBX_hsr_func)(const MDBX_env *env, const MDBX_txn *txn, mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard, + unsigned gap, size_t space, int retry) MDBX_CXX17_NOEXCEPT; /** \brief Sets a Handle-Slow-Readers callback to resolve database full/overflow * issue due to a reader(s) which prevents the old data from being recycled. @@ -6414,8 +6195,7 @@ LIBMDBX_API int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr_callback); * * \returns A MDBX_hsr_func function or NULL if disabled * or something wrong. */ -MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_hsr_func * -mdbx_env_get_hsr(const MDBX_env *env); +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_hsr_func *mdbx_env_get_hsr(const MDBX_env *env); /** \defgroup chk Checking and Recovery * Basically this is internal API for `mdbx_chk` tool, etc. @@ -6442,23 +6222,19 @@ LIBMDBX_API int mdbx_txn_unlock(MDBX_env *env); * * \note On Windows the \ref mdbx_env_open_for_recoveryW() is recommended * to use. */ -LIBMDBX_API int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, - unsigned target_meta, - bool writeable); +LIBMDBX_API int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, unsigned target_meta, bool writeable); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_open_for_recovery() * \ingroup c_extra * \note Available only on Windows. * \see mdbx_env_open_for_recovery() */ -LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, - const wchar_t *pathname, - unsigned target_meta, +LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, unsigned target_meta, bool writeable); -#define mdbx_env_open_for_recoveryT(env, pathname, target_mets, writeable) \ +#define mdbx_env_open_for_recoveryT(env, pathname, target_mets, writeable) \ mdbx_env_open_for_recoveryW(env, pathname, target_mets, writeable) #else -#define mdbx_env_open_for_recoveryT(env, pathname, target_mets, writeable) \ +#define mdbx_env_open_for_recoveryT(env, pathname, target_mets, writeable) \ mdbx_env_open_for_recovery(env, pathname, target_mets, writeable) #endif /* Windows */ @@ -6500,20 +6276,16 @@ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); * другим размером страницы и/или изменением любых других параметров. * * \returns Ненулевое значение кода ошибки, либо 0 при успешном выполнении. */ -LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *info, - size_t bytes); +LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *info, size_t bytes); #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_preopen_snapinfo() * \ingroup c_opening * \note Available only on Windows. * \see mdbx_preopen_snapinfo() */ -LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, - MDBX_envinfo *info, size_t bytes); -#define mdbx_preopen_snapinfoT(pathname, info, bytes) \ - mdbx_preopen_snapinfoW(pathname, info, bytes) +LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *info, size_t bytes); +#define mdbx_preopen_snapinfoT(pathname, info, bytes) mdbx_preopen_snapinfoW(pathname, info, bytes) #else -#define mdbx_preopen_snapinfoT(pathname, info, bytes) \ - mdbx_preopen_snapinfo(pathname, info, bytes) +#define mdbx_preopen_snapinfoT(pathname, info, bytes) mdbx_preopen_snapinfo(pathname, info, bytes) #endif /* Windows */ /** \brief Флаги/опции для проверки целостности базы данных. @@ -6673,10 +6445,8 @@ typedef struct MDBX_chk_context { size_t total_payload_bytes; size_t table_total, table_processed; size_t total_unused_bytes, unused_pages; - size_t processed_pages, reclaimable_pages, gc_pages, alloc_pages, - backed_pages; - size_t problems_meta, tree_problems, gc_tree_problems, kv_tree_problems, - problems_gc, problems_kv, total_problems; + size_t processed_pages, reclaimable_pages, gc_pages, alloc_pages, backed_pages; + size_t problems_meta, tree_problems, gc_tree_problems, kv_tree_problems, problems_gc, problems_kv, total_problems; uint64_t steady_txnid, recent_txnid; /** Указатель на массив размером table_total с указателями на экземпляры * структур MDBX_chk_table_t с информацией о всех таблицах ключ-значение, @@ -6702,37 +6472,28 @@ typedef struct MDBX_chk_context { * \see mdbx_env_chk() */ typedef struct MDBX_chk_callbacks { bool (*check_break)(MDBX_chk_context_t *ctx); - int (*scope_push)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, - MDBX_chk_scope_t *inner, const char *fmt, va_list args); - int (*scope_conclude)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, - MDBX_chk_scope_t *inner, int err); - void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, - MDBX_chk_scope_t *inner); - void (*issue)(MDBX_chk_context_t *ctx, const char *object, - uint64_t entry_number, const char *issue, const char *extra_fmt, - va_list extra_args); - MDBX_chk_user_table_cookie_t *(*table_filter)(MDBX_chk_context_t *ctx, - const MDBX_val *name, - MDBX_db_flags_t flags); - int (*table_conclude)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table, - MDBX_cursor *cursor, int err); + int (*scope_push)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, MDBX_chk_scope_t *inner, const char *fmt, + va_list args); + int (*scope_conclude)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, MDBX_chk_scope_t *inner, int err); + void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, MDBX_chk_scope_t *inner); + void (*issue)(MDBX_chk_context_t *ctx, const char *object, uint64_t entry_number, const char *issue, + const char *extra_fmt, va_list extra_args); + MDBX_chk_user_table_cookie_t *(*table_filter)(MDBX_chk_context_t *ctx, const MDBX_val *name, MDBX_db_flags_t flags); + int (*table_conclude)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table, MDBX_cursor *cursor, int err); void (*table_dispose)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table); - int (*table_handle_kv)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table, - size_t entry_number, const MDBX_val *key, - const MDBX_val *value); + int (*table_handle_kv)(MDBX_chk_context_t *ctx, const MDBX_chk_table_t *table, size_t entry_number, + const MDBX_val *key, const MDBX_val *value); int (*stage_begin)(MDBX_chk_context_t *ctx, MDBX_chk_stage_t); int (*stage_end)(MDBX_chk_context_t *ctx, MDBX_chk_stage_t, int err); - MDBX_chk_line_t *(*print_begin)(MDBX_chk_context_t *ctx, - MDBX_chk_severity_t severity); + MDBX_chk_line_t *(*print_begin)(MDBX_chk_context_t *ctx, MDBX_chk_severity_t severity); void (*print_flush)(MDBX_chk_line_t *); void (*print_done)(MDBX_chk_line_t *); void (*print_chars)(MDBX_chk_line_t *, const char *str, size_t len); void (*print_format)(MDBX_chk_line_t *, const char *fmt, va_list args); - void (*print_size)(MDBX_chk_line_t *, const char *prefix, - const uint64_t value, const char *suffix); + void (*print_size)(MDBX_chk_line_t *, const char *prefix, const uint64_t value, const char *suffix); } MDBX_chk_callbacks_t; /** \brief Проверяет целостность базы данных. @@ -6764,10 +6525,8 @@ typedef struct MDBX_chk_callbacks { * секунды для выполнения проверки, * либо 0 при отсутствии ограничения. * \returns Нулевое значение в случае успеха, иначе код ошибки. */ -LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, - MDBX_chk_context_t *ctx, - const MDBX_chk_flags_t flags, - MDBX_chk_severity_t verbosity, +LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, MDBX_chk_context_t *ctx, + const MDBX_chk_flags_t flags, MDBX_chk_severity_t verbosity, unsigned timeout_seconds_16dot16); /** \brief Вспомогательная функция для подсчета проблем детектируемых diff --git a/mdbx.h++ b/mdbx.h++ index 1166dc0a..b5e28262 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -27,8 +27,7 @@ #pragma once /* Workaround for modern libstdc++ with CLANG < 4.x */ -#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && \ - defined(__clang__) && __clang_major__ < 4 +#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && defined(__clang__) && __clang_major__ < 4 #define __GLIBCXX_BITSIZE_INT_N_0 128 #define __GLIBCXX_TYPE_INT_N_0 __int128 #endif /* Workaround for modern libstdc++ with CLANG < 4.x */ @@ -37,14 +36,12 @@ #if !defined(_MSC_VER) || _MSC_VER < 1900 #error "C++11 compiler or better is required" #elif _MSC_VER >= 1910 -#error \ - "Please add `/Zc:__cplusplus` to MSVC compiler options to enforce it conform ISO C++" +#error "Please add `/Zc:__cplusplus` to MSVC compiler options to enforce it conform ISO C++" #endif /* MSVC is mad and don't define __cplusplus properly */ #endif /* __cplusplus < 201103L */ #if (defined(_WIN32) || defined(_WIN64)) && MDBX_WITHOUT_MSVC_CRT -#error \ - "CRT is required for C++ API, the MDBX_WITHOUT_MSVC_CRT option must be disabled" +#error "CRT is required for C++ API, the MDBX_WITHOUT_MSVC_CRT option must be disabled" #endif /* Windows */ #ifndef __has_include @@ -84,18 +81,13 @@ #ifndef MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM #ifdef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL #define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 1 -#elif defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ - __cplusplus >= 201703L +#elif defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && __cplusplus >= 201703L #define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 0 -#elif (!defined(_MSC_VER) || __cplusplus >= 201403L || \ - (defined(_MSC_VER) && \ - defined(_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING) && \ - __cplusplus >= 201403L)) -#if defined(__cpp_lib_experimental_filesystem) && \ - __cpp_lib_experimental_filesystem >= 201406L +#elif (!defined(_MSC_VER) || __cplusplus >= 201403L || \ + (defined(_MSC_VER) && defined(_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING) && __cplusplus >= 201403L)) +#if defined(__cpp_lib_experimental_filesystem) && __cpp_lib_experimental_filesystem >= 201406L #define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 1 -#elif defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L && \ - __has_include() +#elif defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L && __has_include() #define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 1 #else #define MDBX_USING_CXX_EXPERIMETAL_FILESYSTEM 0 @@ -122,13 +114,12 @@ #include "mdbx.h" -#if (defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L) || \ - (defined(__cpp_lib_endian) && __cpp_lib_endian >= 201907L) || \ - (defined(__cpp_lib_bitops) && __cpp_lib_bitops >= 201907L) || \ +#if (defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L) || \ + (defined(__cpp_lib_endian) && __cpp_lib_endian >= 201907L) || \ + (defined(__cpp_lib_bitops) && __cpp_lib_bitops >= 201907L) || \ (defined(__cpp_lib_int_pow2) && __cpp_lib_int_pow2 >= 202002L) #include -#elif !(defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ - defined(__ORDER_BIG_ENDIAN__)) +#elif !(defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && defined(__ORDER_BIG_ENDIAN__)) #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) #define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN #define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN @@ -140,26 +131,18 @@ #else #define __ORDER_LITTLE_ENDIAN__ 1234 #define __ORDER_BIG_ENDIAN__ 4321 -#if defined(__LITTLE_ENDIAN__) || \ - (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ - defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ - defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ - defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ - defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ - defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ - defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ - defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ - defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ - defined(__WINDOWS__) +#if defined(__LITTLE_ENDIAN__) || (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || defined(__ARMEL__) || \ + defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ + defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || defined(__elbrus_4c__) || defined(__elbrus_8c__) || \ + defined(__bfin__) || defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \ + defined(__ia64) || defined(_M_IA64) || defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ + defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || defined(__WINDOWS__) #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ -#elif defined(__BIG_ENDIAN__) || \ - (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ - defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ - defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ - defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ - defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ - defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ - defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) +#elif defined(__BIG_ENDIAN__) || (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || defined(__ARMEB__) || \ + defined(__THUMBEB__) || defined(__AARCH64EB__) || defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ + defined(__m68k__) || defined(M68000) || defined(__hppa__) || defined(__hppa) || defined(__HPPA__) || \ + defined(__sparc__) || defined(__sparc) || defined(__370__) || defined(__THW_370__) || defined(__s390__) || \ + defined(__s390x__) || defined(__SYSC_ZARCH__) #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ #endif #endif @@ -169,11 +152,9 @@ */ #if defined(DOXYGEN) #define MDBX_CXX17_CONSTEXPR constexpr -#elif defined(__cpp_constexpr) && __cpp_constexpr >= 201603L && \ - ((defined(_MSC_VER) && _MSC_VER >= 1915) || \ - (defined(__clang__) && __clang_major__ > 5) || \ - (defined(__GNUC__) && __GNUC__ > 7) || \ - (!defined(__GNUC__) && !defined(__clang__) && !defined(_MSC_VER))) +#elif defined(__cpp_constexpr) && __cpp_constexpr >= 201603L && \ + ((defined(_MSC_VER) && _MSC_VER >= 1915) || (defined(__clang__) && __clang_major__ > 5) || \ + (defined(__GNUC__) && __GNUC__ > 7) || (!defined(__GNUC__) && !defined(__clang__) && !defined(_MSC_VER))) #define MDBX_CXX17_CONSTEXPR constexpr #else #define MDBX_CXX17_CONSTEXPR inline @@ -183,10 +164,8 @@ */ #if defined(DOXYGEN) #define MDBX_CXX20_CONSTEXPR constexpr -#elif defined(__cpp_lib_is_constant_evaluated) && \ - __cpp_lib_is_constant_evaluated >= 201811L && \ - defined(__cpp_lib_constexpr_string) && \ - __cpp_lib_constexpr_string >= 201907L +#elif defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L && \ + defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L #define MDBX_CXX20_CONSTEXPR constexpr #else #define MDBX_CXX20_CONSTEXPR inline @@ -213,14 +192,11 @@ #elif defined NDEBUG #define MDBX_CONSTEXPR_ASSERT(expr) void(0) #else -#define MDBX_CONSTEXPR_ASSERT(expr) \ - ((expr) ? void(0) : [] { assert(!#expr); }()) +#define MDBX_CONSTEXPR_ASSERT(expr) ((expr) ? void(0) : [] { assert(!#expr); }()) #endif /* MDBX_CONSTEXPR_ASSERT */ #ifndef MDBX_LIKELY -#if defined(DOXYGEN) || \ - (defined(__GNUC__) || __has_builtin(__builtin_expect)) && \ - !defined(__COVERITY__) +#if defined(DOXYGEN) || (defined(__GNUC__) || __has_builtin(__builtin_expect)) && !defined(__COVERITY__) #define MDBX_LIKELY(cond) __builtin_expect(!!(cond), 1) #else #define MDBX_LIKELY(x) (x) @@ -228,9 +204,7 @@ #endif /* MDBX_LIKELY */ #ifndef MDBX_UNLIKELY -#if defined(DOXYGEN) || \ - (defined(__GNUC__) || __has_builtin(__builtin_expect)) && \ - !defined(__COVERITY__) +#if defined(DOXYGEN) || (defined(__GNUC__) || __has_builtin(__builtin_expect)) && !defined(__COVERITY__) #define MDBX_UNLIKELY(cond) __builtin_expect(!!(cond), 0) #else #define MDBX_UNLIKELY(x) (x) @@ -247,25 +221,21 @@ #define MDBX_IF_CONSTEXPR #endif /* MDBX_IF_CONSTEXPR */ -#if defined(DOXYGEN) || \ - (__has_cpp_attribute(fallthrough) && \ - (!defined(__clang__) || __clang__ > 4)) || \ +#if defined(DOXYGEN) || (__has_cpp_attribute(fallthrough) && (!defined(__clang__) || __clang__ > 4)) || \ __cplusplus >= 201703L #define MDBX_CXX17_FALLTHROUGH [[fallthrough]] #else #define MDBX_CXX17_FALLTHROUGH #endif /* MDBX_CXX17_FALLTHROUGH */ -#if defined(DOXYGEN) || (__has_cpp_attribute(likely) >= 201803L && \ - (!defined(__GNUC__) || __GNUC__ > 9)) +#if defined(DOXYGEN) || (__has_cpp_attribute(likely) >= 201803L && (!defined(__GNUC__) || __GNUC__ > 9)) #define MDBX_CXX20_LIKELY [[likely]] #else #define MDBX_CXX20_LIKELY #endif /* MDBX_CXX20_LIKELY */ #ifndef MDBX_CXX20_UNLIKELY -#if defined(DOXYGEN) || (__has_cpp_attribute(unlikely) >= 201803L && \ - (!defined(__GNUC__) || __GNUC__ > 9)) +#if defined(DOXYGEN) || (__has_cpp_attribute(unlikely) >= 201803L && (!defined(__GNUC__) || __GNUC__ > 9)) #define MDBX_CXX20_UNLIKELY [[unlikely]] #else #define MDBX_CXX20_UNLIKELY @@ -293,10 +263,9 @@ #ifndef MDBX_ASSERT_CXX20_CONCEPT_SATISFIED #if MDBX_HAVE_CXX20_CONCEPTS || defined(DOXYGEN) -#define MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(CONCEPT, TYPE) \ - static_assert(CONCEPT) +#define MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(CONCEPT, TYPE) static_assert(CONCEPT) #else -#define MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(CONCEPT, NAME) \ +#define MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(CONCEPT, NAME) \ static_assert(true, MDBX_STRINGIFY(CONCEPT) "<" MDBX_STRINGIFY(TYPE) ">") #endif #endif /* MDBX_ASSERT_CXX20_CONCEPT_SATISFIED */ @@ -304,9 +273,9 @@ #ifdef _MSC_VER #pragma warning(push, 4) #pragma warning(disable : 4127) /* conditional expression is constant */ -#pragma warning(disable : 4251) /* 'std::FOO' needs to have dll-interface to \ +#pragma warning(disable : 4251) /* 'std::FOO' needs to have dll-interface to \ be used by clients of 'mdbx::BAR' */ -#pragma warning(disable : 4275) /* non dll-interface 'std::FOO' used as \ +#pragma warning(disable : 4275) /* non dll-interface 'std::FOO' used as \ base for dll-interface 'mdbx::BAR' */ /* MSVC is mad and can generate this warning for its own intermediate * automatically generated code, which becomes unreachable after some kinds of @@ -317,9 +286,9 @@ #if defined(__LCC__) && __LCC__ >= 126 #pragma diagnostic push #if __LCC__ < 127 -#pragma diag_suppress 3058 /* workaround: call to is_constant_evaluated() \ +#pragma diag_suppress 3058 /* workaround: call to is_constant_evaluated() \ appearing in a constant expression `true` */ -#pragma diag_suppress 3060 /* workaround: call to is_constant_evaluated() \ +#pragma diag_suppress 3060 /* workaround: call to is_constant_evaluated() \ appearing in a constant expression `false` */ #endif #endif /* E2K LCC (warnings) */ @@ -349,16 +318,10 @@ using byte = unsigned char; #if defined(__cpp_lib_endian) && __cpp_lib_endian >= 201907L using endian = ::std::endian; -#elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ - defined(__ORDER_BIG_ENDIAN__) -enum class endian { - little = __ORDER_LITTLE_ENDIAN__, - big = __ORDER_BIG_ENDIAN__, - native = __BYTE_ORDER__ -}; +#elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && defined(__ORDER_BIG_ENDIAN__) +enum class endian { little = __ORDER_LITTLE_ENDIAN__, big = __ORDER_BIG_ENDIAN__, native = __BYTE_ORDER__ }; #else -#error \ - "Please use a C++ compiler provides byte order information or C++20 support" +#error "Please use a C++ compiler provides byte order information or C++20 support" #endif /* Byte Order enum */ /// \copydoc MDBX_version_info @@ -374,19 +337,16 @@ MDBX_CXX11_CONSTEXPR const build_info &get_build() noexcept; static MDBX_CXX17_CONSTEXPR size_t strlen(const char *c_str) noexcept; /// \brief constexpr-enabled memcpy(). -static MDBX_CXX20_CONSTEXPR void *memcpy(void *dest, const void *src, - size_t bytes) noexcept; +static MDBX_CXX20_CONSTEXPR void *memcpy(void *dest, const void *src, size_t bytes) noexcept; /// \brief constexpr-enabled memcmp(). -static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, - size_t bytes) noexcept; +static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, size_t bytes) noexcept; /// \brief Legacy allocator /// but it is recommended to use \ref polymorphic_allocator. using legacy_allocator = ::std::string::allocator_type; -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_memory_resource) && \ - __cpp_lib_memory_resource >= 201603L && _GLIBCXX_USE_CXX11_ABI) +#if defined(DOXYGEN) || \ + (defined(__cpp_lib_memory_resource) && __cpp_lib_memory_resource >= 201603L && _GLIBCXX_USE_CXX11_ABI) /// \brief Default polymorphic allocator for modern code. using polymorphic_allocator = ::std::pmr::string::allocator_type; using default_allocator = polymorphic_allocator; @@ -396,9 +356,7 @@ using default_allocator = legacy_allocator; struct slice; struct default_capacity_policy; -template -class buffer; +template class buffer; class env; class env_managed; class txn; @@ -421,13 +379,11 @@ namespace filesystem = ::std::experimental::filesystem::v1; namespace filesystem = ::std::experimental::filesystem; #endif #define MDBX_STD_FILESYSTEM_PATH ::mdbx::filesystem::path -#elif defined(DOXYGEN) || \ - (defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ - defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L && \ - (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || \ - __MAC_OS_X_VERSION_MIN_REQUIRED >= 101500) && \ - (!defined(__IPHONE_OS_VERSION_MIN_REQUIRED) || \ - __IPHONE_OS_VERSION_MIN_REQUIRED >= 130100)) && \ +#elif defined(DOXYGEN) || \ + (defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && defined(__cpp_lib_string_view) && \ + __cpp_lib_string_view >= 201606L && \ + (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED >= 101500) && \ + (!defined(__IPHONE_OS_VERSION_MIN_REQUIRED) || __IPHONE_OS_VERSION_MIN_REQUIRED >= 130100)) && \ (!defined(_MSC_VER) || __cplusplus >= 201703L) namespace filesystem = ::std::filesystem; /// \brief Defined if `mdbx::filesystem::path` is available. @@ -447,8 +403,7 @@ using path = ::std::wstring; using path = ::std::string; #endif /* mdbx::path */ -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) +#if defined(__SIZEOF_INT128__) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) #ifndef MDBX_U128_TYPE #define MDBX_U128_TYPE __uint128_t #endif /* MDBX_U128_TYPE */ @@ -495,10 +450,8 @@ public: error &operator=(const error &) = default; error &operator=(error &&) = default; - MDBX_CXX11_CONSTEXPR friend bool operator==(const error &a, - const error &b) noexcept; - MDBX_CXX11_CONSTEXPR friend bool operator!=(const error &a, - const error &b) noexcept; + MDBX_CXX11_CONSTEXPR friend bool operator==(const error &a, const error &b) noexcept; + MDBX_CXX11_CONSTEXPR friend bool operator!=(const error &a, const error &b) noexcept; MDBX_CXX11_CONSTEXPR bool is_success() const noexcept; MDBX_CXX11_CONSTEXPR bool is_result_true() const noexcept; @@ -517,30 +470,23 @@ public: /// \brief Returns true for MDBX's errors. MDBX_CXX11_CONSTEXPR bool is_mdbx_error() const noexcept; /// \brief Panics on unrecoverable errors inside destructors etc. - [[noreturn]] void panic(const char *context_where_when, - const char *func_who_what) const noexcept; + [[noreturn]] void panic(const char *context_where_when, const char *func_who_what) const noexcept; [[noreturn]] void throw_exception() const; [[noreturn]] static inline void throw_exception(int error_code); inline void throw_on_failure() const; inline void success_or_throw() const; inline void success_or_throw(const exception_thunk &) const; - inline void panic_on_failure(const char *context_where, - const char *func_who) const noexcept; - inline void success_or_panic(const char *context_where, - const char *func_who) const noexcept; + inline void panic_on_failure(const char *context_where, const char *func_who) const noexcept; + inline void success_or_panic(const char *context_where, const char *func_who) const noexcept; static inline void throw_on_nullptr(const void *ptr, MDBX_error_t error_code); static inline void success_or_throw(MDBX_error_t error_code); - static void success_or_throw(int error_code) { - success_or_throw(static_cast(error_code)); - } + static void success_or_throw(int error_code) { success_or_throw(static_cast(error_code)); } static inline void throw_on_failure(int error_code); static inline bool boolean_or_throw(int error_code); static inline void success_or_throw(int error_code, const exception_thunk &); static inline bool boolean_or_throw(int error_code, const exception_thunk &); - static inline void panic_on_failure(int error_code, const char *context_where, - const char *func_who) noexcept; - static inline void success_or_panic(int error_code, const char *context_where, - const char *func_who) noexcept; + static inline void panic_on_failure(int error_code, const char *context_where, const char *func_who) noexcept; + static inline void success_or_panic(int error_code, const char *context_where, const char *func_who) noexcept; }; /// \brief Base class for all libmdbx's exceptions that are corresponds @@ -577,10 +523,10 @@ public: virtual ~fatal() noexcept; }; -#define MDBX_DECLARE_EXCEPTION(NAME) \ - struct LIBMDBX_API_TYPE NAME : public exception { \ - NAME(const ::mdbx::error &); \ - virtual ~NAME() noexcept; \ +#define MDBX_DECLARE_EXCEPTION(NAME) \ + struct LIBMDBX_API_TYPE NAME : public exception { \ + NAME(const ::mdbx::error &); \ + virtual ~NAME() noexcept; \ } MDBX_DECLARE_EXCEPTION(bad_map_id); MDBX_DECLARE_EXCEPTION(bad_transaction); @@ -623,10 +569,8 @@ MDBX_DECLARE_EXCEPTION(mvcc_retarded); [[noreturn]] LIBMDBX_API void throw_bad_value_size(); [[noreturn]] LIBMDBX_API void throw_incomparable_cursors(); static MDBX_CXX14_CONSTEXPR size_t check_length(size_t bytes); -static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, - size_t payload); -static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload, - size_t tailroom); +static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload); +static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload, size_t tailroom); /// end of cxx_exceptions @} @@ -661,35 +605,27 @@ concept ImmutableByteProducer = requires(const T &a, char array[42]) { * \interface SliceTranscoder * \brief SliceTranscoder C++20 concept */ template -concept SliceTranscoder = - ImmutableByteProducer && requires(const slice &source, const T &a) { - T(source); - { a.is_erroneous() } -> std::same_as; - }; +concept SliceTranscoder = ImmutableByteProducer && requires(const slice &source, const T &a) { + T(source); + { a.is_erroneous() } -> std::same_as; +}; #endif /* MDBX_HAVE_CXX20_CONCEPTS */ -template -inline buffer -make_buffer(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); +inline buffer make_buffer(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template -inline buffer -make_buffer(const PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); +inline buffer make_buffer(const PRODUCER &producer, + const ALLOCATOR &allocator = ALLOCATOR()); -template -inline string make_string(PRODUCER &producer, - const ALLOCATOR &allocator = ALLOCATOR()); +template +inline string make_string(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template -inline string make_string(const PRODUCER &producer, - const ALLOCATOR &allocator = ALLOCATOR()); +template +inline string make_string(const PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); /// \brief References a data located outside the slice. /// @@ -716,8 +652,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { MDBX_CXX14_CONSTEXPR slice(const void *begin, const void *end); /// \brief Create a slice that refers to text[0,strlen(text)-1]. - template - MDBX_CXX14_CONSTEXPR slice(const char (&text)[SIZE]) : slice(text, SIZE - 1) { + template MDBX_CXX14_CONSTEXPR slice(const char (&text)[SIZE]) : slice(text, SIZE - 1) { MDBX_CONSTEXPR_ASSERT(SIZE > 0 && text[SIZE - 1] == '\0'); } /// \brief Create a slice that refers to c_str[0,strlen(c_str)-1]. @@ -726,8 +661,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Create a slice that refers to the contents of "str". /// \note 'explicit' to avoid reference to the temporary std::string instance. template - explicit MDBX_CXX20_CONSTEXPR - slice(const ::std::basic_string &str) + explicit MDBX_CXX20_CONSTEXPR slice(const ::std::basic_string &str) : slice(str.data(), str.length() * sizeof(CHAR)) {} MDBX_CXX14_CONSTEXPR slice(const MDBX_val &src); @@ -736,29 +670,22 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { MDBX_CXX14_CONSTEXPR slice(slice &&src) noexcept; #if defined(DOXYGEN) || (defined(__cpp_lib_span) && __cpp_lib_span >= 202002L) - template - MDBX_CXX14_CONSTEXPR slice(const ::std::span &span) - : slice(span.begin(), span.end()) { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value, + template MDBX_CXX14_CONSTEXPR slice(const ::std::span &span) : slice(span.begin(), span.end()) { + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value, "Must be a standard layout type!"); } - template - MDBX_CXX14_CONSTEXPR ::std::span as_span() const { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value, + template MDBX_CXX14_CONSTEXPR ::std::span as_span() const { + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value, "Must be a standard layout type!"); if (MDBX_LIKELY(size() % sizeof(POD) == 0)) MDBX_CXX20_LIKELY - return ::std::span(static_cast(data()), - size() / sizeof(POD)); + return ::std::span(static_cast(data()), size() / sizeof(POD)); throw_bad_value_size(); } template MDBX_CXX14_CONSTEXPR ::std::span as_span() { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value, + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value, "Must be a standard layout type!"); if (MDBX_LIKELY(size() % sizeof(POD) == 0)) MDBX_CXX20_LIKELY @@ -766,38 +693,24 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { throw_bad_value_size(); } - MDBX_CXX14_CONSTEXPR ::std::span bytes() const { - return as_span(); - } + MDBX_CXX14_CONSTEXPR ::std::span bytes() const { return as_span(); } MDBX_CXX14_CONSTEXPR ::std::span bytes() { return as_span(); } - MDBX_CXX14_CONSTEXPR ::std::span chars() const { - return as_span(); - } + MDBX_CXX14_CONSTEXPR ::std::span chars() const { return as_span(); } MDBX_CXX14_CONSTEXPR ::std::span chars() { return as_span(); } #endif /* __cpp_lib_span >= 202002L */ -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) /// \brief Create a slice that refers to the same contents as "string_view" template - MDBX_CXX14_CONSTEXPR slice(const ::std::basic_string_view &sv) - : slice(sv.data(), sv.data() + sv.length()) {} + MDBX_CXX14_CONSTEXPR slice(const ::std::basic_string_view &sv) : slice(sv.data(), sv.data() + sv.length()) {} - template - slice(::std::basic_string_view &&sv) : slice(sv) { - sv = {}; - } + template slice(::std::basic_string_view &&sv) : slice(sv) { sv = {}; } #endif /* __cpp_lib_string_view >= 201606L */ - template - static MDBX_CXX14_CONSTEXPR slice wrap(const char (&text)[SIZE]) { - return slice(text); - } + template static MDBX_CXX14_CONSTEXPR slice wrap(const char (&text)[SIZE]) { return slice(text); } - template - MDBX_CXX14_CONSTEXPR static slice wrap(const POD &pod) { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value, + template MDBX_CXX14_CONSTEXPR static slice wrap(const POD &pod) { + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value, "Must be a standard layout type!"); return slice(&pod, sizeof(pod)); } @@ -808,19 +721,15 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { inline slice &assign(slice &&src) noexcept; inline slice &assign(::MDBX_val &&src); inline slice &assign(const void *begin, const void *end); - template - slice &assign(const ::std::basic_string &str) { + template slice &assign(const ::std::basic_string &str) { return assign(str.data(), str.length() * sizeof(CHAR)); } inline slice &assign(const char *c_str); -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - slice &assign(const ::std::basic_string_view &view) { +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + template slice &assign(const ::std::basic_string_view &view) { return assign(view.begin(), view.end()); } - template - slice &assign(::std::basic_string_view &&view) { + template slice &assign(::std::basic_string_view &&view) { assign(view); view = {}; return *this; @@ -833,152 +742,119 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { operator MDBX_val *() noexcept { return this; } operator const MDBX_val *() const noexcept { return this; } -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - slice &operator=(const ::std::basic_string_view &view) { +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + template slice &operator=(const ::std::basic_string_view &view) { return assign(view); } - template - slice &operator=(::std::basic_string_view &&view) { - return assign(view); - } + template slice &operator=(::std::basic_string_view &&view) { return assign(view); } /// \brief Return a string_view that references the same data as this slice. template > - MDBX_CXX11_CONSTEXPR ::std::basic_string_view - string_view() const noexcept { + MDBX_CXX11_CONSTEXPR ::std::basic_string_view string_view() const noexcept { static_assert(sizeof(CHAR) == 1, "Must be single byte characters"); return ::std::basic_string_view(char_ptr(), length()); } /// \brief Return a string_view that references the same data as this slice. template - MDBX_CXX11_CONSTEXPR explicit - operator ::std::basic_string_view() const noexcept { + MDBX_CXX11_CONSTEXPR explicit operator ::std::basic_string_view() const noexcept { return this->string_view(); } #endif /* __cpp_lib_string_view >= 201606L */ - template , - class ALLOCATOR = default_allocator> + template , class ALLOCATOR = default_allocator> MDBX_CXX20_CONSTEXPR ::std::basic_string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { static_assert(sizeof(CHAR) == 1, "Must be single byte characters"); - return ::std::basic_string(char_ptr(), length(), - allocator); + return ::std::basic_string(char_ptr(), length(), allocator); } template - MDBX_CXX20_CONSTEXPR explicit - operator ::std::basic_string() const { + MDBX_CXX20_CONSTEXPR explicit operator ::std::basic_string() const { return as_string(); } /// \brief Returns a string with a hexadecimal dump of the slice content. template - inline string - as_hex_string(bool uppercase = false, unsigned wrap_width = 0, - const ALLOCATOR &allocator = ALLOCATOR()) const; + inline string as_hex_string(bool uppercase = false, unsigned wrap_width = 0, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. template - inline string - as_base58_string(unsigned wrap_width = 0, - const ALLOCATOR &allocator = ALLOCATOR()) const; + inline string as_base58_string(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base64) dump of the slice content. template - inline string - as_base64_string(unsigned wrap_width = 0, - const ALLOCATOR &allocator = ALLOCATOR()) const; + inline string as_base64_string(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a buffer with a hexadecimal dump of the slice content. - template - inline buffer - encode_hex(bool uppercase = false, unsigned wrap_width = 0, - const ALLOCATOR &allocator = ALLOCATOR()) const; + template + inline buffer encode_hex(bool uppercase = false, unsigned wrap_width = 0, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. - template - inline buffer - encode_base58(unsigned wrap_width = 0, - const ALLOCATOR &allocator = ALLOCATOR()) const; + template + inline buffer encode_base58(unsigned wrap_width = 0, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. - template - inline buffer - encode_base64(unsigned wrap_width = 0, - const ALLOCATOR &allocator = ALLOCATOR()) const; + template + inline buffer encode_base64(unsigned wrap_width = 0, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Decodes hexadecimal dump from the slice content to returned buffer. - template - inline buffer - hex_decode(bool ignore_spaces = false, - const ALLOCATOR &allocator = ALLOCATOR()) const; + template + inline buffer hex_decode(bool ignore_spaces = false, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump /// from the slice content to returned buffer. - template - inline buffer - base58_decode(bool ignore_spaces = false, - const ALLOCATOR &allocator = ALLOCATOR()) const; + template + inline buffer base58_decode(bool ignore_spaces = false, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump /// from the slice content to returned buffer. - template - inline buffer - base64_decode(bool ignore_spaces = false, - const ALLOCATOR &allocator = ALLOCATOR()) const; + template + inline buffer base64_decode(bool ignore_spaces = false, + const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Checks whether the content of the slice is printable. /// \param [in] disable_utf8 By default if `disable_utf8` is `false` function /// checks that content bytes are printable ASCII-7 characters or a valid UTF8 /// sequences. Otherwise, if `disable_utf8` is `true` function checks that /// content bytes are printable extended 8-bit ASCII codes. - MDBX_NOTHROW_PURE_FUNCTION bool - is_printable(bool disable_utf8 = false) const noexcept; + MDBX_NOTHROW_PURE_FUNCTION bool is_printable(bool disable_utf8 = false) const noexcept; /// \brief Checks whether the content of the slice is a hexadecimal dump. /// \param [in] ignore_spaces If `true` function will skips spaces surrounding /// (before, between and after) a encoded bytes. However, spaces should not /// break a pair of characters encoding a single byte. - MDBX_NOTHROW_PURE_FUNCTION inline bool - is_hex(bool ignore_spaces = false) const noexcept; + MDBX_NOTHROW_PURE_FUNCTION inline bool is_hex(bool ignore_spaces = false) const noexcept; /// \brief Checks whether the content of the slice is a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump. /// \param [in] ignore_spaces If `true` function will skips spaces surrounding /// (before, between and after) a encoded bytes. However, spaces should not /// break a code group of characters. - MDBX_NOTHROW_PURE_FUNCTION inline bool - is_base58(bool ignore_spaces = false) const noexcept; + MDBX_NOTHROW_PURE_FUNCTION inline bool is_base58(bool ignore_spaces = false) const noexcept; /// \brief Checks whether the content of the slice is a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump. /// \param [in] ignore_spaces If `true` function will skips spaces surrounding /// (before, between and after) a encoded bytes. However, spaces should not /// break a code group of characters. - MDBX_NOTHROW_PURE_FUNCTION inline bool - is_base64(bool ignore_spaces = false) const noexcept; + MDBX_NOTHROW_PURE_FUNCTION inline bool is_base64(bool ignore_spaces = false) const noexcept; inline void swap(slice &other) noexcept; -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - void swap(::std::basic_string_view &view) noexcept { +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + template void swap(::std::basic_string_view &view) noexcept { static_assert(sizeof(CHAR) == 1, "Must be single byte characters"); const auto temp = ::std::basic_string_view(*this); *this = view; @@ -1054,12 +930,10 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { inline void safe_remove_suffix(size_t n); /// \brief Checks if the data starts with the given prefix. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool - starts_with(const slice &prefix) const noexcept; + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool starts_with(const slice &prefix) const noexcept; /// \brief Checks if the data ends with the given suffix. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool - ends_with(const slice &suffix) const noexcept; + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool ends_with(const slice &suffix) const noexcept; /// \brief Returns the nth byte in the referenced data. /// \pre REQUIRES: `n < size()` @@ -1097,8 +971,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \attention Function implementation and returned hash values may changed /// version to version, and in future the t1ha3 will be used here. Therefore /// values obtained from this function shouldn't be persisted anywhere. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t - hash_value() const noexcept; + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t hash_value() const noexcept; /// \brief Three-way fast non-lexicographically length-based comparison. /// \details Firstly compares length and if it equal then compare content @@ -1108,43 +981,31 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// or the same length and lexicographically less than `b`; /// `> 0` if `a` longer than `b`, /// or the same length and lexicographically great than `b`. - MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t - compare_fast(const slice &a, const slice &b) noexcept; + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t compare_fast(const slice &a, const slice &b) noexcept; /// \brief Three-way lexicographically comparison. /// \return value: /// `== 0` if `a` lexicographically equal `b`; /// `< 0` if `a` lexicographically less than `b`; /// `> 0` if `a` lexicographically great than `b`. - MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t - compare_lexicographically(const slice &a, const slice &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator==(const slice &a, - const slice &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator<(const slice &a, - const slice &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator>(const slice &a, - const slice &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator<=(const slice &a, - const slice &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator>=(const slice &a, - const slice &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator!=(const slice &a, - const slice &b) noexcept; + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t compare_lexicographically(const slice &a, + const slice &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator==(const slice &a, const slice &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<(const slice &a, const slice &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>(const slice &a, const slice &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<=(const slice &a, const slice &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>=(const slice &a, const slice &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator!=(const slice &a, const slice &b) noexcept; /// \brief Checks the slice is not refers to null address or has zero length. - MDBX_CXX11_CONSTEXPR bool is_valid() const noexcept { - return !(iov_base == nullptr && iov_len != 0); - } + MDBX_CXX11_CONSTEXPR bool is_valid() const noexcept { return !(iov_base == nullptr && iov_len != 0); } /// \brief Build an invalid slice which non-zero length and refers to null /// address. - MDBX_CXX14_CONSTEXPR static slice invalid() noexcept { - return slice(size_t(-1)); - } + MDBX_CXX14_CONSTEXPR static slice invalid() noexcept { return slice(size_t(-1)); } template MDBX_CXX14_CONSTEXPR POD as_pod() const { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value, + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value, "Must be a standard layout type!"); if (MDBX_LIKELY(size() == sizeof(POD))) MDBX_CXX20_LIKELY { @@ -1156,9 +1017,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { } #ifdef MDBX_U128_TYPE - MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { - return as_pod(); - } + MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { return as_pod(); } #endif /* MDBX_U128_TYPE */ MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { return as_pod(); } MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { return as_pod(); } @@ -1166,9 +1025,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { MDBX_CXX14_CONSTEXPR uint8_t as_uint8() const { return as_pod(); } #ifdef MDBX_I128_TYPE - MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { - return as_pod(); - } + MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { return as_pod(); } #endif /* MDBX_I128_TYPE */ MDBX_CXX14_CONSTEXPR int64_t as_int64() const { return as_pod(); } MDBX_CXX14_CONSTEXPR int32_t as_int32() const { return as_pod(); } @@ -1192,8 +1049,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { int8_t as_int8_adapt() const; protected: - MDBX_CXX11_CONSTEXPR slice(size_t invalid_length) noexcept - : ::MDBX_val({nullptr, invalid_length}) {} + MDBX_CXX11_CONSTEXPR slice(size_t invalid_length) noexcept : ::MDBX_val({nullptr, invalid_length}) {} }; //------------------------------------------------------------------------------ @@ -1201,8 +1057,7 @@ protected: namespace allocation_aware_details { template constexpr bool allocator_is_always_equal() noexcept { -#if defined(__cpp_lib_allocator_traits_is_always_equal) && \ - __cpp_lib_allocator_traits_is_always_equal >= 201411L +#if defined(__cpp_lib_allocator_traits_is_always_equal) && __cpp_lib_allocator_traits_is_always_equal >= 201411L return ::std::allocator_traits::is_always_equal::value; #else return ::std::is_empty::value; @@ -1210,17 +1065,13 @@ template constexpr bool allocator_is_always_equal() noexcept { } template ::propagate_on_container_move_assignment::value> + bool PoCMA = ::std::allocator_traits::propagate_on_container_move_assignment::value> struct move_assign_alloc; template struct move_assign_alloc { - static constexpr bool is_nothrow() noexcept { - return allocator_is_always_equal(); - } + static constexpr bool is_nothrow() noexcept { return allocator_is_always_equal(); } static MDBX_CXX20_CONSTEXPR bool is_moveable(T *target, T &source) noexcept { - return allocator_is_always_equal() || - target->get_allocator() == source.get_allocator(); + return allocator_is_always_equal() || target->get_allocator() == source.get_allocator(); } static MDBX_CXX20_CONSTEXPR void propagate(T *target, T &source) noexcept { assert(target->get_allocator() != source.get_allocator()); @@ -1231,8 +1082,7 @@ template struct move_assign_alloc { template struct move_assign_alloc { static constexpr bool is_nothrow() noexcept { - return allocator_is_always_equal() || - ::std::is_nothrow_move_assignable::value; + return allocator_is_always_equal() || ::std::is_nothrow_move_assignable::value; } static constexpr bool is_moveable(T *, T &) noexcept { return true; } static MDBX_CXX20_CONSTEXPR void propagate(T *target, T &source) { @@ -1242,14 +1092,12 @@ template struct move_assign_alloc { }; template ::propagate_on_container_copy_assignment::value> + bool PoCCA = ::std::allocator_traits::propagate_on_container_copy_assignment::value> struct copy_assign_alloc; template struct copy_assign_alloc { static constexpr bool is_nothrow() noexcept { return false; } - static MDBX_CXX20_CONSTEXPR void propagate(T *target, - const T &source) noexcept { + static MDBX_CXX20_CONSTEXPR void propagate(T *target, const T &source) noexcept { assert(target->get_allocator() != source.get_allocator()); (void)target; (void)source; @@ -1258,16 +1106,13 @@ template struct copy_assign_alloc { template struct copy_assign_alloc { static constexpr bool is_nothrow() noexcept { - return allocator_is_always_equal() || - ::std::is_nothrow_copy_assignable::value; + return allocator_is_always_equal() || ::std::is_nothrow_copy_assignable::value; } - static MDBX_CXX20_CONSTEXPR void - propagate(T *target, const T &source) noexcept(is_nothrow()) { + static MDBX_CXX20_CONSTEXPR void propagate(T *target, const T &source) noexcept(is_nothrow()) { if MDBX_IF_CONSTEXPR (!allocator_is_always_equal()) { if (MDBX_UNLIKELY(target->get_allocator() != source.get_allocator())) MDBX_CXX20_UNLIKELY target->get_allocator() = - ::std::allocator_traits::select_on_container_copy_construction( - source.get_allocator()); + ::std::allocator_traits::select_on_container_copy_construction(source.get_allocator()); } else { /* gag for buggy compilers */ (void)target; @@ -1277,16 +1122,12 @@ template struct copy_assign_alloc { }; template ::propagate_on_container_swap::value> + bool PoCS = ::std::allocator_traits::propagate_on_container_swap::value> struct swap_alloc; template struct swap_alloc { - static constexpr bool is_nothrow() noexcept { - return allocator_is_always_equal(); - } - static MDBX_CXX20_CONSTEXPR void propagate(T *target, - T &source) noexcept(is_nothrow()) { + static constexpr bool is_nothrow() noexcept { return allocator_is_always_equal(); } + static MDBX_CXX20_CONSTEXPR void propagate(T *target, T &source) noexcept(is_nothrow()) { if MDBX_IF_CONSTEXPR (!allocator_is_always_equal()) { if (MDBX_UNLIKELY(target->get_allocator() != source.get_allocator())) MDBX_CXX20_UNLIKELY throw_allocators_mismatch(); @@ -1304,11 +1145,9 @@ template struct swap_alloc { #if defined(__cpp_lib_is_swappable) && __cpp_lib_is_swappable >= 201603L ::std::is_nothrow_swappable() || #endif /* __cpp_lib_is_swappable >= 201603L */ - (::std::is_nothrow_move_constructible::value && - ::std::is_nothrow_move_assignable::value); + (::std::is_nothrow_move_constructible::value && ::std::is_nothrow_move_assignable::value); } - static MDBX_CXX20_CONSTEXPR void propagate(T *target, - T &source) noexcept(is_nothrow()) { + static MDBX_CXX20_CONSTEXPR void propagate(T *target, T &source) noexcept(is_nothrow()) { if MDBX_IF_CONSTEXPR (!allocator_is_always_equal()) { if (MDBX_UNLIKELY(target->get_allocator() != source.get_allocator())) MDBX_CXX20_UNLIKELY ::std::swap(*target, source); @@ -1323,29 +1162,19 @@ template struct swap_alloc { } // namespace allocation_aware_details struct default_capacity_policy { - enum : size_t { - extra_inplace_storage = 0, - pettiness_threshold = 64, - max_reserve = 65536 - }; + enum : size_t { extra_inplace_storage = 0, pettiness_threshold = 64, max_reserve = 65536 }; static MDBX_CXX11_CONSTEXPR size_t round(const size_t value) { - static_assert((pettiness_threshold & (pettiness_threshold - 1)) == 0, - "pettiness_threshold must be a power of 2"); - static_assert(pettiness_threshold % 2 == 0, - "pettiness_threshold must be even"); - static_assert(pettiness_threshold >= sizeof(uint64_t), - "pettiness_threshold must be > 7"); + static_assert((pettiness_threshold & (pettiness_threshold - 1)) == 0, "pettiness_threshold must be a power of 2"); + static_assert(pettiness_threshold % 2 == 0, "pettiness_threshold must be even"); + static_assert(pettiness_threshold >= sizeof(uint64_t), "pettiness_threshold must be > 7"); constexpr const auto pettiness_mask = ~size_t(pettiness_threshold - 1); return (value + pettiness_threshold - 1) & pettiness_mask; } - static MDBX_CXX11_CONSTEXPR size_t advise(const size_t current, - const size_t wanna) { - static_assert(max_reserve % pettiness_threshold == 0, - "max_reserve must be a multiple of pettiness_threshold"); - static_assert(max_reserve / 3 > pettiness_threshold, - "max_reserve must be > pettiness_threshold * 3"); + static MDBX_CXX11_CONSTEXPR size_t advise(const size_t current, const size_t wanna) { + static_assert(max_reserve % pettiness_threshold == 0, "max_reserve must be a multiple of pettiness_threshold"); + static_assert(max_reserve / 3 > pettiness_threshold, "max_reserve must be > pettiness_threshold * 3"); if (wanna > current) /* doubling capacity, but don't made reserve more than max_reserve */ return round(wanna + ::std::min(size_t(max_reserve), current)); @@ -1366,8 +1195,7 @@ struct LIBMDBX_API to_hex { const slice source; const bool uppercase = false; const unsigned wrap_width = 0; - MDBX_CXX11_CONSTEXPR to_hex(const slice &source, bool uppercase = false, - unsigned wrap_width = 0) noexcept + MDBX_CXX11_CONSTEXPR to_hex(const slice &source, bool uppercase = false, unsigned wrap_width = 0) noexcept : source(source), uppercase(uppercase), wrap_width(wrap_width) { MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(SliceTranscoder, to_hex); } @@ -1379,10 +1207,8 @@ struct LIBMDBX_API to_hex { } /// \brief Returns a buffer with a hexadecimal dump of a passed slice. - template - buffer - as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { + template + buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_buffer(*this, allocator); } @@ -1417,8 +1243,7 @@ struct LIBMDBX_API to_base58 { const slice source; const unsigned wrap_width = 0; MDBX_CXX11_CONSTEXPR - to_base58(const slice &source, unsigned wrap_width = 0) noexcept - : source(source), wrap_width(wrap_width) { + to_base58(const slice &source, unsigned wrap_width = 0) noexcept : source(source), wrap_width(wrap_width) { MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(SliceTranscoder, to_base58); } @@ -1431,10 +1256,8 @@ struct LIBMDBX_API to_base58 { /// \brief Returns a buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of a passed slice. - template - buffer - as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { + template + buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_buffer(*this, allocator); } @@ -1471,8 +1294,7 @@ struct LIBMDBX_API to_base64 { const slice source; const unsigned wrap_width = 0; MDBX_CXX11_CONSTEXPR - to_base64(const slice &source, unsigned wrap_width = 0) noexcept - : source(source), wrap_width(wrap_width) { + to_base64(const slice &source, unsigned wrap_width = 0) noexcept : source(source), wrap_width(wrap_width) { MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(SliceTranscoder, to_base64); } @@ -1485,10 +1307,8 @@ struct LIBMDBX_API to_base64 { /// \brief Returns a buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of a passed slice. - template - buffer - as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { + template + buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_buffer(*this, allocator); } @@ -1519,24 +1339,15 @@ struct LIBMDBX_API to_base64 { bool is_erroneous() const noexcept { return false; } }; -inline ::std::ostream &operator<<(::std::ostream &out, const to_hex &wrapper) { - return wrapper.output(out); -} -inline ::std::ostream &operator<<(::std::ostream &out, - const to_base58 &wrapper) { - return wrapper.output(out); -} -inline ::std::ostream &operator<<(::std::ostream &out, - const to_base64 &wrapper) { - return wrapper.output(out); -} +inline ::std::ostream &operator<<(::std::ostream &out, const to_hex &wrapper) { return wrapper.output(out); } +inline ::std::ostream &operator<<(::std::ostream &out, const to_base58 &wrapper) { return wrapper.output(out); } +inline ::std::ostream &operator<<(::std::ostream &out, const to_base64 &wrapper) { return wrapper.output(out); } /// \brief Hexadecimal decoder which satisfy \ref SliceTranscoder concept. struct LIBMDBX_API from_hex { const slice source; const bool ignore_spaces = false; - MDBX_CXX11_CONSTEXPR from_hex(const slice &source, - bool ignore_spaces = false) noexcept + MDBX_CXX11_CONSTEXPR from_hex(const slice &source, bool ignore_spaces = false) noexcept : source(source), ignore_spaces(ignore_spaces) { MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(SliceTranscoder, from_hex); } @@ -1548,18 +1359,14 @@ struct LIBMDBX_API from_hex { } /// \brief Decodes hexadecimal dump from a passed slice to returned buffer. - template - buffer - as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { + template + buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_buffer(*this, allocator); } /// \brief Returns the number of bytes needed for conversion /// hexadecimal dump from a passed slice to decoded data. - MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { - return source.length() >> 1; - } + MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { return source.length() >> 1; } /// \brief Fills the destination with data decoded from hexadecimal dump /// from a passed slice. @@ -1580,8 +1387,7 @@ struct LIBMDBX_API from_hex { struct LIBMDBX_API from_base58 { const slice source; const bool ignore_spaces = false; - MDBX_CXX11_CONSTEXPR from_base58(const slice &source, - bool ignore_spaces = false) noexcept + MDBX_CXX11_CONSTEXPR from_base58(const slice &source, bool ignore_spaces = false) noexcept : source(source), ignore_spaces(ignore_spaces) { MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(SliceTranscoder, from_base58); } @@ -1595,10 +1401,8 @@ struct LIBMDBX_API from_base58 { /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump from a /// passed slice to returned buffer. - template - buffer - as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { + template + buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_buffer(*this, allocator); } @@ -1629,8 +1433,7 @@ struct LIBMDBX_API from_base58 { struct LIBMDBX_API from_base64 { const slice source; const bool ignore_spaces = false; - MDBX_CXX11_CONSTEXPR from_base64(const slice &source, - bool ignore_spaces = false) noexcept + MDBX_CXX11_CONSTEXPR from_base64(const slice &source, bool ignore_spaces = false) noexcept : source(source), ignore_spaces(ignore_spaces) { MDBX_ASSERT_CXX20_CONCEPT_SATISFIED(SliceTranscoder, from_base64); } @@ -1644,19 +1447,15 @@ struct LIBMDBX_API from_base64 { /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump from a /// passed slice to returned buffer. - template - buffer - as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { + template + buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_buffer(*this, allocator); } /// \brief Returns the number of bytes needed for conversion /// [Base64](https://en.wikipedia.org/wiki/Base64) dump from a passed slice to /// decoded data. - MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { - return (source.length() + 3) / 4 * 3; - } + MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { return (source.length() + 3) / 4 * 3; } /// \brief Fills the destination with data decoded from /// [Base64](https://en.wikipedia.org/wiki/Base64) dump from a passed slice. @@ -1677,8 +1476,7 @@ struct LIBMDBX_API from_base64 { template class buffer { public: #if !defined(_MSC_VER) || _MSC_VER > 1900 - using allocator_type = typename ::std::allocator_traits< - ALLOCATOR>::template rebind_alloc; + using allocator_type = typename ::std::allocator_traits::template rebind_alloc; #else using allocator_type = typename ALLOCATOR::template rebind::other; #endif /* MSVC is mad */ @@ -1696,44 +1494,34 @@ private: struct silo; using swap_alloc = allocation_aware_details::swap_alloc; struct silo /* Empty Base Class Optimization */ : public allocator_type { - MDBX_CXX20_CONSTEXPR const allocator_type &get_allocator() const noexcept { - return *this; - } - MDBX_CXX20_CONSTEXPR allocator_type &get_allocator() noexcept { - return *this; - } + MDBX_CXX20_CONSTEXPR const allocator_type &get_allocator() const noexcept { return *this; } + MDBX_CXX20_CONSTEXPR allocator_type &get_allocator() noexcept { return *this; } using allocator_pointer = typename allocator_traits::pointer; using allocator_const_pointer = typename allocator_traits::const_pointer; - MDBX_CXX20_CONSTEXPR ::std::pair - allocate_storage(size_t bytes) { + MDBX_CXX20_CONSTEXPR ::std::pair allocate_storage(size_t bytes) { assert(bytes >= sizeof(bin)); constexpr size_t unit = sizeof(typename allocator_type::value_type); - static_assert((unit & (unit - 1)) == 0, - "size of ALLOCATOR::value_type should be a power of 2"); + static_assert((unit & (unit - 1)) == 0, "size of ALLOCATOR::value_type should be a power of 2"); static_assert(unit > 0, "size of ALLOCATOR::value_type must be > 0"); const size_t n = (bytes + unit - 1) / unit; - return ::std::make_pair(allocator_traits::allocate(get_allocator(), n), - n * unit); + return ::std::make_pair(allocator_traits::allocate(get_allocator(), n), n * unit); } - MDBX_CXX20_CONSTEXPR void deallocate_storage(allocator_pointer ptr, - size_t bytes) { + MDBX_CXX20_CONSTEXPR void deallocate_storage(allocator_pointer ptr, size_t bytes) { constexpr size_t unit = sizeof(typename allocator_type::value_type); assert(ptr && bytes >= sizeof(bin) && bytes >= unit && bytes % unit == 0); allocator_traits::deallocate(get_allocator(), ptr, bytes / unit); } - static MDBX_CXX17_CONSTEXPR void * - to_address(allocator_pointer ptr) noexcept { + static MDBX_CXX17_CONSTEXPR void *to_address(allocator_pointer ptr) noexcept { #if defined(__cpp_lib_to_address) && __cpp_lib_to_address >= 201711L return static_cast(::std::to_address(ptr)); #else return static_cast(::std::addressof(*ptr)); #endif /* __cpp_lib_to_address */ } - static MDBX_CXX17_CONSTEXPR const void * - to_address(allocator_const_pointer ptr) noexcept { + static MDBX_CXX17_CONSTEXPR const void *to_address(allocator_const_pointer ptr) noexcept { #if defined(__cpp_lib_to_address) && __cpp_lib_to_address >= 201711L return static_cast(::std::to_address(ptr)); #else @@ -1745,55 +1533,41 @@ private: struct allocated { allocator_pointer ptr_; size_t capacity_bytes_; - constexpr allocated(allocator_pointer ptr, size_t bytes) noexcept - : ptr_(ptr), capacity_bytes_(bytes) {} + constexpr allocated(allocator_pointer ptr, size_t bytes) noexcept : ptr_(ptr), capacity_bytes_(bytes) {} constexpr allocated(const allocated &) noexcept = default; constexpr allocated(allocated &&) noexcept = default; - MDBX_CXX17_CONSTEXPR allocated & - operator=(const allocated &) noexcept = default; - MDBX_CXX17_CONSTEXPR allocated & - operator=(allocated &&) noexcept = default; + MDBX_CXX17_CONSTEXPR allocated &operator=(const allocated &) noexcept = default; + MDBX_CXX17_CONSTEXPR allocated &operator=(allocated &&) noexcept = default; }; allocated allocated_; uint64_t align_hint_; - byte inplace_[(sizeof(allocated) + extra_inplace_storage + 7u) & - ~size_t(7)]; + byte inplace_[(sizeof(allocated) + extra_inplace_storage + 7u) & ~size_t(7)]; - static constexpr bool - is_suitable_for_inplace(size_t capacity_bytes) noexcept { + static constexpr bool is_suitable_for_inplace(size_t capacity_bytes) noexcept { static_assert(sizeof(bin) == sizeof(inplace_), "WTF?"); return capacity_bytes < sizeof(bin); } enum : byte { lastbyte_inplace_signature = byte(~byte(0)) }; enum : size_t { - inplace_signature_limit = - size_t(lastbyte_inplace_signature) - << (sizeof(size_t /* allocated::capacity_bytes_ */) - 1) * CHAR_BIT + inplace_signature_limit = size_t(lastbyte_inplace_signature) + << (sizeof(size_t /* allocated::capacity_bytes_ */) - 1) * CHAR_BIT }; - constexpr byte inplace_lastbyte() const noexcept { - return inplace_[sizeof(bin) - 1]; - } - MDBX_CXX17_CONSTEXPR byte &inplace_lastbyte() noexcept { - return inplace_[sizeof(bin) - 1]; - } + constexpr byte inplace_lastbyte() const noexcept { return inplace_[sizeof(bin) - 1]; } + MDBX_CXX17_CONSTEXPR byte &inplace_lastbyte() noexcept { return inplace_[sizeof(bin) - 1]; } constexpr bool is_inplace() const noexcept { - static_assert(size_t(inplace_signature_limit) > size_t(max_capacity), + static_assert(size_t(inplace_signature_limit) > size_t(max_capacity), "WTF?"); + static_assert(std::numeric_limits::max() - (std::numeric_limits::max() >> CHAR_BIT) == + inplace_signature_limit, "WTF?"); - static_assert( - std::numeric_limits::max() - - (std::numeric_limits::max() >> CHAR_BIT) == - inplace_signature_limit, - "WTF?"); return inplace_lastbyte() == lastbyte_inplace_signature; } constexpr bool is_allocated() const noexcept { return !is_inplace(); } - template - MDBX_CXX17_CONSTEXPR byte *make_inplace() noexcept { + template MDBX_CXX17_CONSTEXPR byte *make_inplace() noexcept { if (destroy_ptr) { MDBX_CONSTEXPR_ASSERT(is_allocated()); /* properly destroy allocator::pointer */ @@ -1803,14 +1577,12 @@ private: /* workaround for "uninitialized" warning from some compilers */ memset(&allocated_.ptr_, 0, sizeof(allocated_.ptr_)); inplace_lastbyte() = lastbyte_inplace_signature; - MDBX_CONSTEXPR_ASSERT(is_inplace() && address() == inplace_ && - is_suitable_for_inplace(capacity())); + MDBX_CONSTEXPR_ASSERT(is_inplace() && address() == inplace_ && is_suitable_for_inplace(capacity())); return address(); } template - MDBX_CXX17_CONSTEXPR byte * - make_allocated(allocator_pointer ptr, size_t capacity_bytes) noexcept { + MDBX_CXX17_CONSTEXPR byte *make_allocated(allocator_pointer ptr, size_t capacity_bytes) noexcept { MDBX_CONSTEXPR_ASSERT(inplace_signature_limit > capacity_bytes); if (construct_ptr) /* properly construct allocator::pointer */ @@ -1820,8 +1592,7 @@ private: allocated_.ptr_ = ptr; allocated_.capacity_bytes_ = capacity_bytes; } - MDBX_CONSTEXPR_ASSERT(is_allocated() && address() == to_address(ptr) && - capacity() == capacity_bytes); + MDBX_CONSTEXPR_ASSERT(is_allocated() && address() == to_address(ptr) && capacity() == capacity_bytes); return address(); } @@ -1830,8 +1601,7 @@ private: make_inplace(); (void)capacity_bytes; } - MDBX_CXX20_CONSTEXPR bin(allocator_pointer ptr, - size_t capacity_bytes) noexcept { + MDBX_CXX20_CONSTEXPR bin(allocator_pointer ptr, size_t capacity_bytes) noexcept { MDBX_CONSTEXPR_ASSERT(!is_suitable_for_inplace(capacity_bytes)); make_allocated(ptr, capacity_bytes); } @@ -1863,11 +1633,9 @@ private: memcpy(inplace_, ditto.inplace_, sizeof(inplace_)); MDBX_CONSTEXPR_ASSERT(is_inplace()); } else if (is_inplace()) - make_allocated(ditto.allocated_.ptr_, - ditto.allocated_.capacity_bytes_); + make_allocated(ditto.allocated_.ptr_, ditto.allocated_.capacity_bytes_); else - make_allocated(ditto.allocated_.ptr_, - ditto.allocated_.capacity_bytes_); + make_allocated(ditto.allocated_.ptr_, ditto.allocated_.capacity_bytes_); return *this; } @@ -1878,29 +1646,22 @@ private: return *this; } - static MDBX_CXX20_CONSTEXPR size_t advise_capacity(const size_t current, - const size_t wanna) { + static MDBX_CXX20_CONSTEXPR size_t advise_capacity(const size_t current, const size_t wanna) { if (MDBX_UNLIKELY(wanna > max_capacity)) MDBX_CXX20_UNLIKELY throw_max_length_exceeded(); const size_t advised = reservation_policy::advise(current, wanna); assert(advised >= wanna); - return ::std::min(size_t(max_capacity), - ::std::max(sizeof(bin) - 1, advised)); + return ::std::min(size_t(max_capacity), ::std::max(sizeof(bin) - 1, advised)); } constexpr const byte *address() const noexcept { - return is_inplace() - ? inplace_ - : static_cast(to_address(allocated_.ptr_)); + return is_inplace() ? inplace_ : static_cast(to_address(allocated_.ptr_)); } MDBX_CXX17_CONSTEXPR byte *address() noexcept { - return is_inplace() ? inplace_ - : static_cast(to_address(allocated_.ptr_)); - } - constexpr size_t capacity() const noexcept { - return is_inplace() ? sizeof(bin) - 1 : allocated_.capacity_bytes_; + return is_inplace() ? inplace_ : static_cast(to_address(allocated_.ptr_)); } + constexpr size_t capacity() const noexcept { return is_inplace() ? sizeof(bin) - 1 : allocated_.capacity_bytes_; } } bin_; MDBX_CXX20_CONSTEXPR void *init(size_t capacity) { @@ -1917,36 +1678,30 @@ private: MDBX_CXX20_CONSTEXPR void release() noexcept { if (bin_.is_allocated()) { - deallocate_storage(bin_.allocated_.ptr_, - bin_.allocated_.capacity_bytes_); + deallocate_storage(bin_.allocated_.ptr_, bin_.allocated_.capacity_bytes_); bin_.template make_inplace(); } } template - MDBX_CXX20_CONSTEXPR void * - reshape(const size_t wanna_capacity, const size_t wanna_headroom, - const void *const content, const size_t length) { + MDBX_CXX20_CONSTEXPR void *reshape(const size_t wanna_capacity, const size_t wanna_headroom, + const void *const content, const size_t length) { assert(wanna_capacity >= wanna_headroom + length); const size_t old_capacity = bin_.capacity(); - const size_t new_capacity = - bin::advise_capacity(old_capacity, wanna_capacity); + const size_t new_capacity = bin::advise_capacity(old_capacity, wanna_capacity); if (MDBX_LIKELY(new_capacity == old_capacity)) MDBX_CXX20_LIKELY { - assert(bin_.is_inplace() == - bin::is_suitable_for_inplace(new_capacity)); + assert(bin_.is_inplace() == bin::is_suitable_for_inplace(new_capacity)); byte *const new_place = bin_.address() + wanna_headroom; if (MDBX_LIKELY(length)) MDBX_CXX20_LIKELY { if (external_content) memcpy(new_place, content, length); else { - const size_t old_headroom = - bin_.address() - static_cast(content); + const size_t old_headroom = bin_.address() - static_cast(content); assert(old_capacity >= old_headroom + length); if (MDBX_UNLIKELY(old_headroom != wanna_headroom)) - MDBX_CXX20_UNLIKELY ::std::memmove(new_place, content, - length); + MDBX_CXX20_UNLIKELY ::std::memmove(new_place, content, length); } } return new_place; @@ -1955,8 +1710,7 @@ private: if (bin::is_suitable_for_inplace(new_capacity)) { assert(bin_.is_allocated()); const auto old_allocated = ::std::move(bin_.allocated_.ptr_); - byte *const new_place = - bin_.template make_inplace() + wanna_headroom; + byte *const new_place = bin_.template make_inplace() + wanna_headroom; if (MDBX_LIKELY(length)) MDBX_CXX20_LIKELY memcpy(new_place, content, length); deallocate_storage(old_allocated, old_capacity); @@ -1966,8 +1720,7 @@ private: if (!bin_.is_allocated()) { const auto pair = allocate_storage(new_capacity); assert(pair.second >= new_capacity); - byte *const new_place = - static_cast(to_address(pair.first)) + wanna_headroom; + byte *const new_place = static_cast(to_address(pair.first)) + wanna_headroom; if (MDBX_LIKELY(length)) MDBX_CXX20_LIKELY memcpy(new_place, content, length); bin_.template make_allocated(pair.first, pair.second); @@ -1979,9 +1732,7 @@ private: deallocate_storage(old_allocated, old_capacity); const auto pair = allocate_storage(new_capacity); assert(pair.second >= new_capacity); - byte *const new_place = - bin_.template make_allocated(pair.first, pair.second) + - wanna_headroom; + byte *const new_place = bin_.template make_allocated(pair.first, pair.second) + wanna_headroom; if (MDBX_LIKELY(length)) MDBX_CXX20_LIKELY memcpy(new_place, content, length); if (!external_content) @@ -1997,8 +1748,7 @@ private: assert(capacity() >= offset); return bin_.address() + offset; } - MDBX_CXX20_CONSTEXPR byte *put(size_t offset, const void *ptr, - size_t length) { + MDBX_CXX20_CONSTEXPR byte *put(size_t offset, const void *ptr, size_t length) { assert(capacity() >= offset + length); return static_cast(memcpy(get(offset), ptr, length)); } @@ -2008,128 +1758,92 @@ private: MDBX_CXX20_CONSTEXPR silo() noexcept : allocator_type() { init(0); } MDBX_CXX20_CONSTEXPR - silo(const allocator_type &alloc) noexcept : allocator_type(alloc) { - init(0); - } + silo(const allocator_type &alloc) noexcept : allocator_type(alloc) { init(0); } MDBX_CXX20_CONSTEXPR silo(size_t capacity) { init(capacity); } - MDBX_CXX20_CONSTEXPR silo(size_t capacity, const allocator_type &alloc) - : silo(alloc) { - init(capacity); - } + MDBX_CXX20_CONSTEXPR silo(size_t capacity, const allocator_type &alloc) : silo(alloc) { init(capacity); } - MDBX_CXX20_CONSTEXPR silo(silo &&ditto) noexcept( - ::std::is_nothrow_move_constructible::value) - : allocator_type(::std::move(ditto.get_allocator())), - bin_(::std::move(ditto.bin_)) {} + MDBX_CXX20_CONSTEXPR silo(silo &&ditto) noexcept(::std::is_nothrow_move_constructible::value) + : allocator_type(::std::move(ditto.get_allocator())), bin_(::std::move(ditto.bin_)) {} - MDBX_CXX20_CONSTEXPR silo(size_t capacity, size_t headroom, const void *ptr, - size_t length) - : silo(capacity) { + MDBX_CXX20_CONSTEXPR silo(size_t capacity, size_t headroom, const void *ptr, size_t length) : silo(capacity) { assert(capacity >= headroom + length); if (length) put(headroom, ptr, length); } // select_on_container_copy_construction() - MDBX_CXX20_CONSTEXPR silo(size_t capacity, size_t headroom, const void *ptr, - size_t length, const allocator_type &alloc) + MDBX_CXX20_CONSTEXPR silo(size_t capacity, size_t headroom, const void *ptr, size_t length, + const allocator_type &alloc) : silo(capacity, alloc) { assert(capacity >= headroom + length); if (length) put(headroom, ptr, length); } - MDBX_CXX20_CONSTEXPR silo(const void *ptr, size_t length) - : silo(length, 0, ptr, length) {} - MDBX_CXX20_CONSTEXPR silo(const void *ptr, size_t length, - const allocator_type &alloc) + MDBX_CXX20_CONSTEXPR silo(const void *ptr, size_t length) : silo(length, 0, ptr, length) {} + MDBX_CXX20_CONSTEXPR silo(const void *ptr, size_t length, const allocator_type &alloc) : silo(length, 0, ptr, length, alloc) {} ~silo() { release(); } //-------------------------------------------------------------------------- - MDBX_CXX20_CONSTEXPR void *assign(size_t headroom, const void *ptr, - size_t length, size_t tailroom) { + MDBX_CXX20_CONSTEXPR void *assign(size_t headroom, const void *ptr, size_t length, size_t tailroom) { return reshape(headroom + length + tailroom, headroom, ptr, length); } - MDBX_CXX20_CONSTEXPR void *assign(const void *ptr, size_t length) { - return assign(0, ptr, length, 0); - } + MDBX_CXX20_CONSTEXPR void *assign(const void *ptr, size_t length) { return assign(0, ptr, length, 0); } - MDBX_CXX20_CONSTEXPR silo &assign(const silo &ditto, size_t headroom, - slice &content) { + MDBX_CXX20_CONSTEXPR silo &assign(const silo &ditto, size_t headroom, slice &content) { assert(ditto.get() + headroom == content.byte_ptr()); - if MDBX_IF_CONSTEXPR (!allocation_aware_details:: - allocator_is_always_equal()) { + if MDBX_IF_CONSTEXPR (!allocation_aware_details::allocator_is_always_equal()) { if (MDBX_UNLIKELY(get_allocator() != ditto.get_allocator())) MDBX_CXX20_UNLIKELY { release(); - allocation_aware_details::copy_assign_alloc< - silo, allocator_type>::propagate(this, ditto); + allocation_aware_details::copy_assign_alloc::propagate(this, ditto); } } - content.iov_base = reshape(ditto.capacity(), headroom, - content.data(), content.length()); + content.iov_base = reshape(ditto.capacity(), headroom, content.data(), content.length()); return *this; } MDBX_CXX20_CONSTEXPR silo & - assign(silo &&ditto, size_t headroom, slice &content) noexcept( - allocation_aware_details::move_assign_alloc< - silo, allocator_type>::is_nothrow()) { + assign(silo &&ditto, size_t headroom, + slice &content) noexcept(allocation_aware_details::move_assign_alloc::is_nothrow()) { assert(ditto.get() + headroom == content.byte_ptr()); - if (allocation_aware_details::move_assign_alloc< - silo, allocator_type>::is_moveable(this, ditto)) { + if (allocation_aware_details::move_assign_alloc::is_moveable(this, ditto)) { release(); - allocation_aware_details::move_assign_alloc< - silo, allocator_type>::propagate(this, ditto); + allocation_aware_details::move_assign_alloc::propagate(this, ditto); /* no reallocation nor copying required */ bin_ = ::std::move(ditto.bin_); assert(get() + headroom == content.byte_ptr()); } else { /* copy content since allocators are different */ - content.iov_base = reshape(ditto.capacity(), headroom, - content.data(), content.length()); + content.iov_base = reshape(ditto.capacity(), headroom, content.data(), content.length()); ditto.release(); } return *this; } - MDBX_CXX20_CONSTEXPR void *clear() { - return reshape(0, 0, nullptr, 0); - } - MDBX_CXX20_CONSTEXPR void *clear_and_reserve(size_t whole_capacity, - size_t headroom) { + MDBX_CXX20_CONSTEXPR void *clear() { return reshape(0, 0, nullptr, 0); } + MDBX_CXX20_CONSTEXPR void *clear_and_reserve(size_t whole_capacity, size_t headroom) { return reshape(whole_capacity, headroom, nullptr, 0); } - MDBX_CXX20_CONSTEXPR void resize(size_t capacity, size_t headroom, - slice &content) { - content.iov_base = - reshape(capacity, headroom, content.iov_base, content.iov_len); + MDBX_CXX20_CONSTEXPR void resize(size_t capacity, size_t headroom, slice &content) { + content.iov_base = reshape(capacity, headroom, content.iov_base, content.iov_len); } - MDBX_CXX20_CONSTEXPR void swap(silo &ditto) noexcept( - allocation_aware_details::swap_alloc::is_nothrow()) { - allocation_aware_details::swap_alloc::propagate( - this, ditto); + MDBX_CXX20_CONSTEXPR void + swap(silo &ditto) noexcept(allocation_aware_details::swap_alloc::is_nothrow()) { + allocation_aware_details::swap_alloc::propagate(this, ditto); ::std::swap(bin_, ditto.bin_); } /* MDBX_CXX20_CONSTEXPR void shrink_to_fit() { TODO } */ - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX11_CONSTEXPR size_t - capacity() const noexcept { - return bin_.capacity(); - } - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX11_CONSTEXPR const void * - data(size_t offset = 0) const noexcept { - return get(offset); - } - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX11_CONSTEXPR void * - data(size_t offset = 0) noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX11_CONSTEXPR size_t capacity() const noexcept { return bin_.capacity(); } + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX11_CONSTEXPR const void *data(size_t offset = 0) const noexcept { return get(offset); } + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX11_CONSTEXPR void *data(size_t offset = 0) noexcept { return get(offset); } }; silo silo_; @@ -2141,21 +1855,18 @@ private: slice_.iov_base = silo_.data(); } - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR const byte * - silo_begin() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR const byte *silo_begin() const noexcept { return static_cast(silo_.data()); } - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR const byte * - silo_end() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR const byte *silo_end() const noexcept { return silo_begin() + silo_.capacity(); } struct data_preserver : public exception_thunk { buffer data; data_preserver(allocator_type &allocator) : data(allocator) {} - static int callback(void *context, MDBX_val *target, const void *src, - size_t bytes) noexcept { + static int callback(void *context, MDBX_val *target, const void *src, size_t bytes) noexcept { auto self = static_cast(context); assert(self->is_clean()); assert(&self->data.slice_ == target); @@ -2168,12 +1879,8 @@ private: return MDBX_RESULT_TRUE; } } - MDBX_CXX11_CONSTEXPR operator MDBX_preserve_func() const noexcept { - return callback; - } - MDBX_CXX11_CONSTEXPR operator const buffer &() const noexcept { - return data; - } + MDBX_CXX11_CONSTEXPR operator MDBX_preserve_func() const noexcept { return callback; } + MDBX_CXX11_CONSTEXPR operator const buffer &() const noexcept { return data; } MDBX_CXX11_CONSTEXPR operator buffer &() noexcept { return data; } }; @@ -2182,61 +1889,46 @@ public: /// \todo buffer& operator>>(buffer&, ...) for reading (delegated to slice) /// \todo template key(X) for encoding keys while writing - using move_assign_alloc = - allocation_aware_details::move_assign_alloc; - using copy_assign_alloc = - allocation_aware_details::copy_assign_alloc; + using move_assign_alloc = allocation_aware_details::move_assign_alloc; + using copy_assign_alloc = allocation_aware_details::copy_assign_alloc; /// \brief Returns the associated allocator. - MDBX_CXX20_CONSTEXPR allocator_type get_allocator() const { - return silo_.get_allocator(); - } + MDBX_CXX20_CONSTEXPR allocator_type get_allocator() const { return silo_.get_allocator(); } /// \brief Checks whether data chunk stored inside the buffer, otherwise /// buffer just refers to data located outside the buffer. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool - is_freestanding() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool is_freestanding() const noexcept { static_assert(size_t(-long(max_length)) > max_length, "WTF?"); return size_t(byte_ptr() - silo_begin()) < silo_.capacity(); } /// \brief Checks whether the buffer just refers to data located outside /// the buffer, rather than stores it. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool - is_reference() const noexcept { - return !is_freestanding(); - } + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool is_reference() const noexcept { return !is_freestanding(); } /// \brief Returns the number of bytes that can be held in currently allocated /// storage. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t - capacity() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t capacity() const noexcept { return is_freestanding() ? silo_.capacity() : 0; } /// \brief Returns the number of bytes that available in currently allocated /// storage ahead the currently beginning of data. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t - headroom() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t headroom() const noexcept { return is_freestanding() ? slice_.byte_ptr() - silo_begin() : 0; } /// \brief Returns the number of bytes that available in currently allocated /// storage after the currently data end. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t - tailroom() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t tailroom() const noexcept { return is_freestanding() ? capacity() - headroom() - slice_.length() : 0; } /// \brief Returns casted to const pointer to byte an address of data. - MDBX_CXX11_CONSTEXPR const byte *byte_ptr() const noexcept { - return slice_.byte_ptr(); - } + MDBX_CXX11_CONSTEXPR const byte *byte_ptr() const noexcept { return slice_.byte_ptr(); } /// \brief Returns casted to const pointer to byte an end of data. - MDBX_CXX11_CONSTEXPR const byte *end_byte_ptr() const noexcept { - return slice_.end_byte_ptr(); - } + MDBX_CXX11_CONSTEXPR const byte *end_byte_ptr() const noexcept { return slice_.end_byte_ptr(); } /// \brief Returns casted to pointer to byte an address of data. /// \pre REQUIRES: The buffer should store data chunk, but not referenced to @@ -2255,14 +1947,10 @@ public: } /// \brief Returns casted to const pointer to char an address of data. - MDBX_CXX11_CONSTEXPR const char *char_ptr() const noexcept { - return slice_.char_ptr(); - } + MDBX_CXX11_CONSTEXPR const char *char_ptr() const noexcept { return slice_.char_ptr(); } /// \brief Returns casted to const pointer to char an end of data. - MDBX_CXX11_CONSTEXPR const char *end_char_ptr() const noexcept { - return slice_.end_char_ptr(); - } + MDBX_CXX11_CONSTEXPR const char *end_char_ptr() const noexcept { return slice_.end_char_ptr(); } /// \brief Returns casted to pointer to char an address of data. /// \pre REQUIRES: The buffer should store data chunk, but not referenced to @@ -2281,9 +1969,7 @@ public: } /// \brief Return a const pointer to the beginning of the referenced data. - MDBX_CXX11_CONSTEXPR const void *data() const noexcept { - return slice_.data(); - } + MDBX_CXX11_CONSTEXPR const void *data() const noexcept { return slice_.data(); } /// \brief Return a const pointer to the end of the referenced data. MDBX_CXX11_CONSTEXPR const void *end() const noexcept { return slice_.end(); } @@ -2305,18 +1991,13 @@ public: } /// \brief Returns the number of bytes. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t - length() const noexcept { - return MDBX_CONSTEXPR_ASSERT(is_reference() || - slice_.length() + headroom() <= - silo_.capacity()), - slice_.length(); + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t length() const noexcept { + return MDBX_CONSTEXPR_ASSERT(is_reference() || slice_.length() + headroom() <= silo_.capacity()), slice_.length(); } /// \brief Set length of data. MDBX_CXX14_CONSTEXPR buffer &set_length(size_t bytes) { - MDBX_CONSTEXPR_ASSERT(is_reference() || - bytes + headroom() <= silo_.capacity()); + MDBX_CONSTEXPR_ASSERT(is_reference() || bytes + headroom() <= silo_.capacity()); slice_.set_length(bytes); return *this; } @@ -2336,87 +2017,66 @@ public: } MDBX_CXX20_CONSTEXPR buffer() noexcept = default; - MDBX_CXX20_CONSTEXPR buffer(const allocator_type &allocator) noexcept - : silo_(allocator) {} + MDBX_CXX20_CONSTEXPR buffer(const allocator_type &allocator) noexcept : silo_(allocator) {} - buffer(const struct slice &src, bool make_reference, - const allocator_type &allocator = allocator_type()) + buffer(const struct slice &src, bool make_reference, const allocator_type &allocator = allocator_type()) : silo_(allocator), slice_(src) { if (!make_reference) insulate(); } - buffer(const buffer &src, bool make_reference, - const allocator_type &allocator = allocator_type()) + buffer(const buffer &src, bool make_reference, const allocator_type &allocator = allocator_type()) : buffer(src.slice_, make_reference, allocator) {} - buffer(const void *ptr, size_t bytes, bool make_reference, - const allocator_type &allocator = allocator_type()) + buffer(const void *ptr, size_t bytes, bool make_reference, const allocator_type &allocator = allocator_type()) : buffer(::mdbx::slice(ptr, bytes), make_reference, allocator) {} - template - buffer(const ::std::basic_string &) = delete; - template - buffer(const ::std::basic_string &&) = delete; + template buffer(const ::std::basic_string &) = delete; + template buffer(const ::std::basic_string &&) = delete; - buffer(const char *c_str, bool make_reference, + buffer(const char *c_str, bool make_reference, const allocator_type &allocator = allocator_type()) + : buffer(::mdbx::slice(c_str), make_reference, allocator) {} + +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + template + buffer(const ::std::basic_string_view &view, bool make_reference, const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(c_str), make_reference, allocator){} - -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - buffer(const ::std::basic_string_view &view, - bool make_reference, - const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(view), make_reference, allocator) { - } + : buffer(::mdbx::slice(view), make_reference, allocator) {} #endif /* __cpp_lib_string_view >= 201606L */ MDBX_CXX20_CONSTEXPR - buffer(const struct slice &src, - const allocator_type &allocator = allocator_type()) - : silo_(src.data(), src.length(), allocator), - slice_(silo_.data(), src.length()) {} + buffer(const struct slice &src, const allocator_type &allocator = allocator_type()) + : silo_(src.data(), src.length(), allocator), slice_(silo_.data(), src.length()) {} MDBX_CXX20_CONSTEXPR - buffer(const buffer &src, const allocator_type &allocator = allocator_type()) - : buffer(src.slice_, allocator) {} + buffer(const buffer &src, const allocator_type &allocator = allocator_type()) : buffer(src.slice_, allocator) {} MDBX_CXX20_CONSTEXPR - buffer(const void *ptr, size_t bytes, - const allocator_type &allocator = allocator_type()) + buffer(const void *ptr, size_t bytes, const allocator_type &allocator = allocator_type()) : buffer(::mdbx::slice(ptr, bytes), allocator) {} template - MDBX_CXX20_CONSTEXPR - buffer(const ::std::basic_string &str, - const allocator_type &allocator = allocator_type()) + MDBX_CXX20_CONSTEXPR buffer(const ::std::basic_string &str, + const allocator_type &allocator = allocator_type()) : buffer(::mdbx::slice(str), allocator) {} MDBX_CXX20_CONSTEXPR buffer(const char *c_str, const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(c_str), allocator){} + : buffer(::mdbx::slice(c_str), allocator) {} -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - MDBX_CXX20_CONSTEXPR - buffer(const ::std::basic_string_view &view, - const allocator_type &allocator = allocator_type()) - : buffer(::mdbx::slice(view), allocator) { - } +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + template + MDBX_CXX20_CONSTEXPR buffer(const ::std::basic_string_view &view, + const allocator_type &allocator = allocator_type()) + : buffer(::mdbx::slice(view), allocator) {} #endif /* __cpp_lib_string_view >= 201606L */ - buffer(size_t head_room, size_t tail_room, - const allocator_type &allocator = allocator_type()) - : silo_(allocator) { + buffer(size_t head_room, size_t tail_room, const allocator_type &allocator = allocator_type()) : silo_(allocator) { slice_.iov_base = silo_.init(check_length(head_room, tail_room)); assert(slice_.iov_len == 0); } - buffer(size_t capacity, const allocator_type &allocator = allocator_type()) - : silo_(allocator) { + buffer(size_t capacity, const allocator_type &allocator = allocator_type()) : silo_(allocator) { slice_.iov_base = silo_.init(check_length(capacity)); assert(slice_.iov_len == 0); } @@ -2424,87 +2084,57 @@ public: buffer(size_t head_room, const struct slice &src, size_t tail_room, const allocator_type &allocator = allocator_type()) : silo_(allocator) { - slice_.iov_base = - silo_.init(check_length(head_room, src.length(), tail_room)); + slice_.iov_base = silo_.init(check_length(head_room, src.length(), tail_room)); slice_.iov_len = src.length(); memcpy(slice_.iov_base, src.data(), src.length()); } - buffer(size_t head_room, const buffer &src, size_t tail_room, - const allocator_type &allocator = allocator_type()) + buffer(size_t head_room, const buffer &src, size_t tail_room, const allocator_type &allocator = allocator_type()) : buffer(head_room, src.slice_, tail_room, allocator) {} - inline buffer(const ::mdbx::txn &txn, const struct slice &src, - const allocator_type &allocator = allocator_type()); + inline buffer(const ::mdbx::txn &txn, const struct slice &src, const allocator_type &allocator = allocator_type()); buffer(buffer &&src) noexcept(move_assign_alloc::is_nothrow()) : silo_(::std::move(src.silo_)), slice_(::std::move(src.slice_)) {} - MDBX_CXX11_CONSTEXPR const struct slice &slice() const noexcept { - return slice_; - } + MDBX_CXX11_CONSTEXPR const struct slice &slice() const noexcept { return slice_; } - MDBX_CXX11_CONSTEXPR operator const struct slice &() const noexcept { - return slice_; - } + MDBX_CXX11_CONSTEXPR operator const struct slice &() const noexcept { return slice_; } #if defined(DOXYGEN) || (defined(__cpp_lib_span) && __cpp_lib_span >= 202002L) - template - MDBX_CXX14_CONSTEXPR buffer(const ::std::span &span) - : buffer(span.begin(), span.end()) { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value, + template MDBX_CXX14_CONSTEXPR buffer(const ::std::span &span) : buffer(span.begin(), span.end()) { + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value, "Must be a standard layout type!"); } - template - MDBX_CXX14_CONSTEXPR ::std::span as_span() const { + template MDBX_CXX14_CONSTEXPR ::std::span as_span() const { return slice_.template as_span(); } - template MDBX_CXX14_CONSTEXPR ::std::span as_span() { - return slice_.template as_span(); - } + template MDBX_CXX14_CONSTEXPR ::std::span as_span() { return slice_.template as_span(); } - MDBX_CXX14_CONSTEXPR ::std::span bytes() const { - return as_span(); - } + MDBX_CXX14_CONSTEXPR ::std::span bytes() const { return as_span(); } MDBX_CXX14_CONSTEXPR ::std::span bytes() { return as_span(); } - MDBX_CXX14_CONSTEXPR ::std::span chars() const { - return as_span(); - } + MDBX_CXX14_CONSTEXPR ::std::span chars() const { return as_span(); } MDBX_CXX14_CONSTEXPR ::std::span chars() { return as_span(); } #endif /* __cpp_lib_span >= 202002L */ template - static buffer wrap(const POD &pod, bool make_reference = false, - const allocator_type &allocator = allocator_type()) { + static buffer wrap(const POD &pod, bool make_reference = false, const allocator_type &allocator = allocator_type()) { return buffer(::mdbx::slice::wrap(pod), make_reference, allocator); } - template MDBX_CXX14_CONSTEXPR POD as_pod() const { - return slice_.as_pod(); - } + template MDBX_CXX14_CONSTEXPR POD as_pod() const { return slice_.as_pod(); } #ifdef MDBX_U128_TYPE - MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { - return slice().as_uint128(); - } + MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { return slice().as_uint128(); } #endif /* MDBX_U128_TYPE */ - MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { - return slice().as_uint64(); - } - MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { - return slice().as_uint32(); - } - MDBX_CXX14_CONSTEXPR uint16_t as_uint16() const { - return slice().as_uint16(); - } + MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { return slice().as_uint64(); } + MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { return slice().as_uint32(); } + MDBX_CXX14_CONSTEXPR uint16_t as_uint16() const { return slice().as_uint16(); } MDBX_CXX14_CONSTEXPR uint8_t as_uint8() const { return slice().as_uint8(); } #ifdef MDBX_I128_TYPE - MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { - return slice().as_int128(); - } + MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { return slice().as_int128(); } #endif /* MDBX_I128_TYPE */ MDBX_CXX14_CONSTEXPR int64_t as_int64() const { return slice().as_int64(); } MDBX_CXX14_CONSTEXPR int32_t as_int32() const { return slice().as_int32(); } @@ -2528,32 +2158,27 @@ public: int8_t as_int8_adapt() const { return slice().as_int8_adapt(); } /// \brief Returns a new buffer with a hexadecimal dump of the slice content. - static buffer hex(const ::mdbx::slice &source, bool uppercase = false, - unsigned wrap_width = 0, + static buffer hex(const ::mdbx::slice &source, bool uppercase = false, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) { - return source.template encode_hex( - uppercase, wrap_width, allocator); + return source.template encode_hex(uppercase, wrap_width, allocator); } /// \brief Returns a new buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. static buffer base58(const ::mdbx::slice &source, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) { - return source.template encode_base58(wrap_width, - allocator); + return source.template encode_base58(wrap_width, allocator); } /// \brief Returns a new buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. static buffer base64(const ::mdbx::slice &source, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) { - return source.template encode_base64(wrap_width, - allocator); + return source.template encode_base64(wrap_width, allocator); } /// \brief Returns a new buffer with a hexadecimal dump of the given pod. template - static buffer hex(const POD &pod, bool uppercase = false, - unsigned wrap_width = 0, + static buffer hex(const POD &pod, bool uppercase = false, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) { return hex(mdbx::slice::wrap(pod), uppercase, wrap_width, allocator); } @@ -2561,105 +2186,80 @@ public: /// \brief Returns a new buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the given pod. template - static buffer base58(const POD &pod, unsigned wrap_width = 0, - const allocator_type &allocator = allocator_type()) { + static buffer base58(const POD &pod, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) { return base58(mdbx::slice::wrap(pod), wrap_width, allocator); } /// \brief Returns a new buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the given pod. template - static buffer base64(const POD &pod, unsigned wrap_width = 0, - const allocator_type &allocator = allocator_type()) { + static buffer base64(const POD &pod, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) { return base64(mdbx::slice::wrap(pod), wrap_width, allocator); } /// \brief Returns a new buffer with a hexadecimal dump of the slice content. buffer encode_hex(bool uppercase = false, unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) const { - return slice().template encode_hex( - uppercase, wrap_width, allocator); + return slice().template encode_hex(uppercase, wrap_width, allocator); } /// \brief Returns a new buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. - buffer - encode_base58(unsigned wrap_width = 0, - const allocator_type &allocator = allocator_type()) const { - return slice().template encode_base58( - wrap_width, allocator); + buffer encode_base58(unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) const { + return slice().template encode_base58(wrap_width, allocator); } /// \brief Returns a new buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. - buffer - encode_base64(unsigned wrap_width = 0, - const allocator_type &allocator = allocator_type()) const { - return slice().template encode_base64( - wrap_width, allocator); + buffer encode_base64(unsigned wrap_width = 0, const allocator_type &allocator = allocator_type()) const { + return slice().template encode_base64(wrap_width, allocator); } /// \brief Decodes hexadecimal dump from the slice content to returned buffer. - static buffer hex_decode(const ::mdbx::slice &source, - bool ignore_spaces = false, + static buffer hex_decode(const ::mdbx::slice &source, bool ignore_spaces = false, const allocator_type &allocator = allocator_type()) { - return source.template hex_decode(ignore_spaces, - allocator); + return source.template hex_decode(ignore_spaces, allocator); } /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump /// from the slice content to returned buffer. - static buffer - base58_decode(const ::mdbx::slice &source, bool ignore_spaces = false, - const allocator_type &allocator = allocator_type()) { - return source.template base58_decode( - ignore_spaces, allocator); + static buffer base58_decode(const ::mdbx::slice &source, bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template base58_decode(ignore_spaces, allocator); } /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump /// from the slice content to returned buffer. - static buffer - base64_decode(const ::mdbx::slice &source, bool ignore_spaces = false, - const allocator_type &allocator = allocator_type()) { - return source.template base64_decode( - ignore_spaces, allocator); + static buffer base64_decode(const ::mdbx::slice &source, bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template base64_decode(ignore_spaces, allocator); } /// \brief Decodes hexadecimal dump /// from the buffer content to new returned buffer. - buffer hex_decode(bool ignore_spaces = false, - const allocator_type &allocator = allocator_type()) const { + buffer hex_decode(bool ignore_spaces = false, const allocator_type &allocator = allocator_type()) const { return hex_decode(slice(), ignore_spaces, allocator); } /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump /// from the buffer content to new returned buffer. - buffer - base58_decode(bool ignore_spaces = false, - const allocator_type &allocator = allocator_type()) const { + buffer base58_decode(bool ignore_spaces = false, const allocator_type &allocator = allocator_type()) const { return base58_decode(slice(), ignore_spaces, allocator); } /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump /// from the buffer content to new returned buffer. - buffer - base64_decode(bool ignore_spaces = false, - const allocator_type &allocator = allocator_type()) const { + buffer base64_decode(bool ignore_spaces = false, const allocator_type &allocator = allocator_type()) const { return base64_decode(slice(), ignore_spaces, allocator); } /// \brief Reserves storage space. void reserve(size_t wanna_headroom, size_t wanna_tailroom) { - wanna_headroom = ::std::min(::std::max(headroom(), wanna_headroom), - wanna_headroom + pettiness_threshold); - wanna_tailroom = ::std::min(::std::max(tailroom(), wanna_tailroom), - wanna_tailroom + pettiness_threshold); - const size_t wanna_capacity = - check_length(wanna_headroom, slice_.length(), wanna_tailroom); + wanna_headroom = ::std::min(::std::max(headroom(), wanna_headroom), wanna_headroom + pettiness_threshold); + wanna_tailroom = ::std::min(::std::max(tailroom(), wanna_tailroom), wanna_tailroom + pettiness_threshold); + const size_t wanna_capacity = check_length(wanna_headroom, slice_.length(), wanna_tailroom); silo_.resize(wanna_capacity, wanna_headroom, slice_); - assert(headroom() >= wanna_headroom && - headroom() <= wanna_headroom + pettiness_threshold); - assert(tailroom() >= wanna_tailroom && - tailroom() <= wanna_tailroom + pettiness_threshold); + assert(headroom() >= wanna_headroom && headroom() <= wanna_headroom + pettiness_threshold); + assert(tailroom() >= wanna_tailroom && tailroom() <= wanna_tailroom + pettiness_threshold); } /// \brief Reserves space before the payload. @@ -2675,30 +2275,24 @@ public: } buffer &assign_freestanding(const void *ptr, size_t bytes) { - silo_.assign(static_cast(ptr), - check_length(bytes)); + silo_.assign(static_cast(ptr), check_length(bytes)); slice_.assign(silo_.data(), bytes); return *this; } - MDBX_CXX20_CONSTEXPR void - swap(buffer &other) noexcept(swap_alloc::is_nothrow()) { + MDBX_CXX20_CONSTEXPR void swap(buffer &other) noexcept(swap_alloc::is_nothrow()) { silo_.swap(other.silo_); slice_.swap(other.slice_); } - static buffer clone(const buffer &src, - const allocator_type &allocator = allocator_type()) { + static buffer clone(const buffer &src, const allocator_type &allocator = allocator_type()) { return buffer(src.headroom(), src.slice_, src.tailroom(), allocator); } - buffer &assign(const buffer &src, bool make_reference = false) { - return assign(src.slice_, make_reference); - } + buffer &assign(const buffer &src, bool make_reference = false) { return assign(src.slice_, make_reference); } buffer &assign(const void *ptr, size_t bytes, bool make_reference = false) { - return make_reference ? assign_reference(ptr, bytes) - : assign_freestanding(ptr, bytes); + return make_reference ? assign_reference(ptr, bytes) : assign_freestanding(ptr, bytes); } buffer &assign(const struct slice &src, bool make_reference = false) { @@ -2721,17 +2315,12 @@ public: return *this; } - buffer &assign(const void *begin, const void *end, - bool make_reference = false) { - return assign(begin, - static_cast(end) - - static_cast(begin), - make_reference); + buffer &assign(const void *begin, const void *end, bool make_reference = false) { + return assign(begin, static_cast(end) - static_cast(begin), make_reference); } template - buffer &assign(const ::std::basic_string &str, - bool make_reference = false) { + buffer &assign(const ::std::basic_string &str, bool make_reference = false) { return assign(str.data(), str.length(), make_reference); } @@ -2741,14 +2330,11 @@ public: #if defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L template - buffer &assign(const ::std::basic_string_view &view, - bool make_reference = false) { + buffer &assign(const ::std::basic_string_view &view, bool make_reference = false) { return assign(view.data(), view.length(), make_reference); } - template - buffer &assign(::std::basic_string_view &&view, - bool make_reference = false) { + template buffer &assign(::std::basic_string_view &&view, bool make_reference = false) { assign(view.data(), view.length(), make_reference); view = {}; return *this; @@ -2757,18 +2343,14 @@ public: buffer &operator=(const buffer &src) { return assign(src); } - buffer &operator=(buffer &&src) noexcept(move_assign_alloc::is_nothrow()) { - return assign(::std::move(src)); - } + buffer &operator=(buffer &&src) noexcept(move_assign_alloc::is_nothrow()) { return assign(::std::move(src)); } buffer &operator=(const struct slice &src) { return assign(src); } buffer &operator=(struct slice &&src) { return assign(::std::move(src)); } -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) - template - buffer &operator=(const ::std::basic_string_view &view) noexcept { +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) + template buffer &operator=(const ::std::basic_string_view &view) noexcept { return assign(view); } @@ -2779,58 +2361,43 @@ public: } /// \brief Return a string_view that references the data of this buffer. - template - operator ::std::basic_string_view() const noexcept { + template operator ::std::basic_string_view() const noexcept { return string_view(); } #endif /* __cpp_lib_string_view >= 201606L */ /// \brief Checks whether the string is empty. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool empty() const noexcept { - return length() == 0; - } + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool empty() const noexcept { return length() == 0; } /// \brief Checks whether the data pointer of the buffer is nullptr. - MDBX_CXX11_CONSTEXPR bool is_null() const noexcept { - return data() == nullptr; - } + MDBX_CXX11_CONSTEXPR bool is_null() const noexcept { return data() == nullptr; } /// \brief Returns the number of bytes. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t size() const noexcept { - return length(); - } + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR size_t size() const noexcept { return length(); } /// \brief Returns the hash value of the data. /// \attention Function implementation and returned hash values may changed /// version to version, and in future the t1ha3 will be used here. Therefore /// values obtained from this function shouldn't be persisted anywhere. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t - hash_value() const noexcept { - return slice_.hash_value(); - } + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t hash_value() const noexcept { return slice_.hash_value(); } - template , - class A = legacy_allocator> - MDBX_CXX20_CONSTEXPR ::std::basic_string - as_string(const A &allocator = A()) const { + template , class A = legacy_allocator> + MDBX_CXX20_CONSTEXPR ::std::basic_string as_string(const A &allocator = A()) const { return slice_.as_string(allocator); } template - MDBX_CXX20_CONSTEXPR explicit - operator ::std::basic_string() const { + MDBX_CXX20_CONSTEXPR explicit operator ::std::basic_string() const { return as_string(); } /// \brief Checks if the data starts with the given prefix. - MDBX_NOTHROW_PURE_FUNCTION bool - starts_with(const struct slice &prefix) const noexcept { + MDBX_NOTHROW_PURE_FUNCTION bool starts_with(const struct slice &prefix) const noexcept { return slice_.starts_with(prefix); } /// \brief Checks if the data ends with the given suffix. - MDBX_NOTHROW_PURE_FUNCTION bool - ends_with(const struct slice &suffix) const noexcept { + MDBX_NOTHROW_PURE_FUNCTION bool ends_with(const struct slice &suffix) const noexcept { return slice_.ends_with(suffix); } @@ -2889,40 +2456,27 @@ public: /// \brief Returns the first "n" bytes of the data chunk. /// \pre REQUIRES: `n <= size()` - MDBX_CXX14_CONSTEXPR struct slice head(size_t n) const noexcept { - return slice_.head(n); - } + MDBX_CXX14_CONSTEXPR struct slice head(size_t n) const noexcept { return slice_.head(n); } /// \brief Returns the last "n" bytes of the data chunk. /// \pre REQUIRES: `n <= size()` - MDBX_CXX14_CONSTEXPR struct slice tail(size_t n) const noexcept { - return slice_.tail(n); - } + MDBX_CXX14_CONSTEXPR struct slice tail(size_t n) const noexcept { return slice_.tail(n); } /// \brief Returns the middle "n" bytes of the data chunk. /// \pre REQUIRES: `from + n <= size()` - MDBX_CXX14_CONSTEXPR struct slice middle(size_t from, - size_t n) const noexcept { - return slice_.middle(from, n); - } + MDBX_CXX14_CONSTEXPR struct slice middle(size_t from, size_t n) const noexcept { return slice_.middle(from, n); } /// \brief Returns the first "n" bytes of the data chunk. /// \throws std::out_of_range if `n >= size()` - MDBX_CXX14_CONSTEXPR struct slice safe_head(size_t n) const { - return slice_.safe_head(n); - } + MDBX_CXX14_CONSTEXPR struct slice safe_head(size_t n) const { return slice_.safe_head(n); } /// \brief Returns the last "n" bytes of the data chunk. /// \throws std::out_of_range if `n >= size()` - MDBX_CXX14_CONSTEXPR struct slice safe_tail(size_t n) const { - return slice_.safe_tail(n); - } + MDBX_CXX14_CONSTEXPR struct slice safe_tail(size_t n) const { return slice_.safe_tail(n); } /// \brief Returns the middle "n" bytes of the data chunk. /// \throws std::out_of_range if `from + n >= size()` - MDBX_CXX14_CONSTEXPR struct slice safe_middle(size_t from, size_t n) const { - return slice_.safe_middle(from, n); - } + MDBX_CXX14_CONSTEXPR struct slice safe_middle(size_t from, size_t n) const { return slice_.safe_middle(from, n); } buffer &append(const void *src, size_t bytes) { if (MDBX_UNLIKELY(tailroom() < check_length(bytes))) @@ -2932,41 +2486,33 @@ public: return *this; } - buffer &append(const struct slice &chunk) { - return append(chunk.data(), chunk.size()); - } + buffer &append(const struct slice &chunk) { return append(chunk.data(), chunk.size()); } buffer &add_header(const void *src, size_t bytes) { if (MDBX_UNLIKELY(headroom() < check_length(bytes))) MDBX_CXX20_UNLIKELY reserve_headroom(bytes); - slice_.iov_base = - memcpy(static_cast(slice_.iov_base) - bytes, src, bytes); + slice_.iov_base = memcpy(static_cast(slice_.iov_base) - bytes, src, bytes); slice_.iov_len += bytes; return *this; } - buffer &add_header(const struct slice &chunk) { - return add_header(chunk.data(), chunk.size()); - } + buffer &add_header(const struct slice &chunk) { return add_header(chunk.data(), chunk.size()); } - template - buffer &append_producer(PRODUCER &producer) { + template buffer &append_producer(PRODUCER &producer) { const size_t wanna_bytes = producer.envisage_result_length(); if (MDBX_UNLIKELY(tailroom() < check_length(wanna_bytes))) MDBX_CXX20_UNLIKELY reserve_tailroom(wanna_bytes); return set_end(producer.write_bytes(end_char_ptr(), tailroom())); } - template - buffer &append_producer(const PRODUCER &producer) { + template buffer &append_producer(const PRODUCER &producer) { const size_t wanna_bytes = producer.envisage_result_length(); if (MDBX_UNLIKELY(tailroom() < check_length(wanna_bytes))) MDBX_CXX20_UNLIKELY reserve_tailroom(wanna_bytes); return set_end(producer.write_bytes(end_char_ptr(), tailroom())); } - buffer &append_hex(const struct slice &data, bool uppercase = false, - unsigned wrap_width = 0) { + buffer &append_hex(const struct slice &data, bool uppercase = false, unsigned wrap_width = 0) { return append_producer(to_hex(data, uppercase, wrap_width)); } @@ -2978,18 +2524,15 @@ public: return append_producer(to_base64(data, wrap_width)); } - buffer &append_decoded_hex(const struct slice &data, - bool ignore_spaces = false) { + buffer &append_decoded_hex(const struct slice &data, bool ignore_spaces = false) { return append_producer(from_hex(data, ignore_spaces)); } - buffer &append_decoded_base58(const struct slice &data, - bool ignore_spaces = false) { + buffer &append_decoded_base58(const struct slice &data, bool ignore_spaces = false) { return append_producer(from_base58(data, ignore_spaces)); } - buffer &append_decoded_base64(const struct slice &data, - bool ignore_spaces = false) { + buffer &append_decoded_base64(const struct slice &data, bool ignore_spaces = false) { return append_producer(from_base64(data, ignore_spaces)); } @@ -3068,149 +2611,99 @@ public: //---------------------------------------------------------------------------- - template - static buffer key_from(const char (&text)[SIZE], bool make_reference = true) { + template static buffer key_from(const char (&text)[SIZE], bool make_reference = true) { return buffer(::mdbx::slice(text), make_reference); } -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) template - static buffer key_from(const ::std::basic_string_view &src, - bool make_reference = false) { + static buffer key_from(const ::std::basic_string_view &src, bool make_reference = false) { return buffer(src, make_reference); } #endif /* __cpp_lib_string_view >= 201606L */ - static buffer key_from(const char *src, bool make_reference = false) { - return buffer(src, make_reference); - } + static buffer key_from(const char *src, bool make_reference = false) { return buffer(src, make_reference); } template - static buffer key_from(const ::std::basic_string &src, - bool make_reference = false) { + static buffer key_from(const ::std::basic_string &src, bool make_reference = false) { return buffer(src, make_reference); } - static buffer key_from(silo &&src) noexcept { - return buffer(::std::move(src)); - } + static buffer key_from(silo &&src) noexcept { return buffer(::std::move(src)); } - static buffer key_from_double(const double ieee754_64bit) { - return wrap(::mdbx_key_from_double(ieee754_64bit)); - } + static buffer key_from_double(const double ieee754_64bit) { return wrap(::mdbx_key_from_double(ieee754_64bit)); } - static buffer key_from(const double ieee754_64bit) { - return key_from_double(ieee754_64bit); - } + static buffer key_from(const double ieee754_64bit) { return key_from_double(ieee754_64bit); } - static buffer key_from(const double *ieee754_64bit) { - return wrap(::mdbx_key_from_ptrdouble(ieee754_64bit)); - } + static buffer key_from(const double *ieee754_64bit) { return wrap(::mdbx_key_from_ptrdouble(ieee754_64bit)); } - static buffer key_from_u64(const uint64_t unsigned_int64) { - return wrap(unsigned_int64); - } + static buffer key_from_u64(const uint64_t unsigned_int64) { return wrap(unsigned_int64); } - static buffer key_from(const uint64_t unsigned_int64) { - return key_from_u64(unsigned_int64); - } + static buffer key_from(const uint64_t unsigned_int64) { return key_from_u64(unsigned_int64); } - static buffer key_from_i64(const int64_t signed_int64) { - return wrap(::mdbx_key_from_int64(signed_int64)); - } + static buffer key_from_i64(const int64_t signed_int64) { return wrap(::mdbx_key_from_int64(signed_int64)); } - static buffer key_from(const int64_t signed_int64) { - return key_from_i64(signed_int64); - } + static buffer key_from(const int64_t signed_int64) { return key_from_i64(signed_int64); } static buffer key_from_jsonInteger(const int64_t json_integer) { return wrap(::mdbx_key_from_jsonInteger(json_integer)); } - static buffer key_from_float(const float ieee754_32bit) { - return wrap(::mdbx_key_from_float(ieee754_32bit)); - } + static buffer key_from_float(const float ieee754_32bit) { return wrap(::mdbx_key_from_float(ieee754_32bit)); } - static buffer key_from(const float ieee754_32bit) { - return key_from_float(ieee754_32bit); - } + static buffer key_from(const float ieee754_32bit) { return key_from_float(ieee754_32bit); } - static buffer key_from(const float *ieee754_32bit) { - return wrap(::mdbx_key_from_ptrfloat(ieee754_32bit)); - } + static buffer key_from(const float *ieee754_32bit) { return wrap(::mdbx_key_from_ptrfloat(ieee754_32bit)); } - static buffer key_from_u32(const uint32_t unsigned_int32) { - return wrap(unsigned_int32); - } + static buffer key_from_u32(const uint32_t unsigned_int32) { return wrap(unsigned_int32); } - static buffer key_from(const uint32_t unsigned_int32) { - return key_from_u32(unsigned_int32); - } + static buffer key_from(const uint32_t unsigned_int32) { return key_from_u32(unsigned_int32); } - static buffer key_from_i32(const int32_t signed_int32) { - return wrap(::mdbx_key_from_int32(signed_int32)); - } + static buffer key_from_i32(const int32_t signed_int32) { return wrap(::mdbx_key_from_int32(signed_int32)); } - static buffer key_from(const int32_t signed_int32) { - return key_from_i32(signed_int32); - } + static buffer key_from(const int32_t signed_int32) { return key_from_i32(signed_int32); } }; -template -inline buffer -make_buffer(PRODUCER &producer, const ALLOCATOR &allocator) { +template +inline buffer make_buffer(PRODUCER &producer, const ALLOCATOR &allocator) { if (MDBX_LIKELY(!producer.is_empty())) MDBX_CXX20_LIKELY { - buffer result( - producer.envisage_result_length(), allocator); - result.set_end( - producer.write_bytes(result.end_char_ptr(), result.tailroom())); + buffer result(producer.envisage_result_length(), allocator); + result.set_end(producer.write_bytes(result.end_char_ptr(), result.tailroom())); return result; } return buffer(allocator); } -template -inline buffer -make_buffer(const PRODUCER &producer, const ALLOCATOR &allocator) { +template +inline buffer make_buffer(const PRODUCER &producer, const ALLOCATOR &allocator) { if (MDBX_LIKELY(!producer.is_empty())) MDBX_CXX20_LIKELY { - buffer result( - producer.envisage_result_length(), allocator); - result.set_end( - producer.write_bytes(result.end_char_ptr(), result.tailroom())); + buffer result(producer.envisage_result_length(), allocator); + result.set_end(producer.write_bytes(result.end_char_ptr(), result.tailroom())); return result; } return buffer(allocator); } template -inline string make_string(PRODUCER &producer, - const ALLOCATOR &allocator) { +inline string make_string(PRODUCER &producer, const ALLOCATOR &allocator) { string result(allocator); if (MDBX_LIKELY(!producer.is_empty())) MDBX_CXX20_LIKELY { result.resize(producer.envisage_result_length()); - result.resize(producer.write_bytes(const_cast(result.data()), - result.capacity()) - - result.data()); + result.resize(producer.write_bytes(const_cast(result.data()), result.capacity()) - result.data()); } return result; } template -inline string make_string(const PRODUCER &producer, - const ALLOCATOR &allocator) { +inline string make_string(const PRODUCER &producer, const ALLOCATOR &allocator) { string result(allocator); if (MDBX_LIKELY(!producer.is_empty())) MDBX_CXX20_LIKELY { result.resize(producer.envisage_result_length()); - result.resize(producer.write_bytes(const_cast(result.data()), - result.capacity()) - - result.data()); + result.resize(producer.write_bytes(const_cast(result.data()), result.capacity()) - result.data()); } return result; } @@ -3220,8 +2713,7 @@ inline string make_string(const PRODUCER &producer, struct value_result { slice value; bool done; - value_result(const slice &value, bool done) noexcept - : value(value), done(done) {} + value_result(const slice &value, bool done) noexcept : value(value), done(done) {} value_result(const value_result &) noexcept = default; value_result &operator=(const value_result &) noexcept = default; MDBX_CXX14_CONSTEXPR operator bool() const noexcept { @@ -3235,52 +2727,37 @@ struct value_result { struct pair { using stl_pair = std::pair; slice key, value; - MDBX_CXX11_CONSTEXPR pair(const slice &key, const slice &value) noexcept - : key(key), value(value) {} - MDBX_CXX11_CONSTEXPR pair(const stl_pair &couple) noexcept - : key(couple.first), value(couple.second) {} - MDBX_CXX11_CONSTEXPR operator stl_pair() const noexcept { - return stl_pair(key, value); - } + MDBX_CXX11_CONSTEXPR pair(const slice &key, const slice &value) noexcept : key(key), value(value) {} + MDBX_CXX11_CONSTEXPR pair(const stl_pair &couple) noexcept : key(couple.first), value(couple.second) {} + MDBX_CXX11_CONSTEXPR operator stl_pair() const noexcept { return stl_pair(key, value); } pair(const pair &) noexcept = default; pair &operator=(const pair &) noexcept = default; MDBX_CXX14_CONSTEXPR operator bool() const noexcept { assert(bool(key) == bool(value)); return key; } - MDBX_CXX14_CONSTEXPR static pair invalid() noexcept { - return pair(slice::invalid(), slice::invalid()); - } + MDBX_CXX14_CONSTEXPR static pair invalid() noexcept { return pair(slice::invalid(), slice::invalid()); } /// \brief Three-way fast non-lexicographically length-based comparison. - MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t - compare_fast(const pair &a, const pair &b) noexcept; + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t compare_fast(const pair &a, const pair &b) noexcept; /// \brief Three-way lexicographically comparison. - MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t - compare_lexicographically(const pair &a, const pair &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator==(const pair &a, - const pair &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator<(const pair &a, - const pair &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator>(const pair &a, - const pair &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator<=(const pair &a, - const pair &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator>=(const pair &a, - const pair &b) noexcept; - friend MDBX_CXX14_CONSTEXPR bool operator!=(const pair &a, - const pair &b) noexcept; + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t compare_lexicographically(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator==(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<=(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>=(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator!=(const pair &a, const pair &b) noexcept; }; /// \brief Combines pair of slices for key and value with boolean flag to /// represent result of certain operations. struct pair_result : public pair { bool done; - MDBX_CXX11_CONSTEXPR pair_result() noexcept - : pair(pair::invalid()), done(false) {} - MDBX_CXX11_CONSTEXPR pair_result(const slice &key, const slice &value, - bool done) noexcept + MDBX_CXX11_CONSTEXPR pair_result() noexcept : pair(pair::invalid()), done(false) {} + MDBX_CXX11_CONSTEXPR pair_result(const slice &key, const slice &value, bool done) noexcept : pair(key, value), done(done) {} pair_result(const pair_result &) noexcept = default; pair_result &operator=(const pair_result &) noexcept = default; @@ -3290,8 +2767,7 @@ struct pair_result : public pair { } }; -template -struct buffer_pair_spec { +template struct buffer_pair_spec { using buffer_type = buffer; using allocator_type = typename buffer_type::allocator_type; using allocator_traits = typename buffer_type::allocator_traits; @@ -3301,64 +2777,49 @@ struct buffer_pair_spec { MDBX_CXX20_CONSTEXPR buffer_pair_spec() noexcept = default; MDBX_CXX20_CONSTEXPR - buffer_pair_spec(const allocator_type &allocator) noexcept - : key(allocator), value(allocator) {} + buffer_pair_spec(const allocator_type &allocator) noexcept : key(allocator), value(allocator) {} - buffer_pair_spec(const buffer_type &key, const buffer_type &value, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const buffer_type &key, const buffer_type &value, const allocator_type &allocator = allocator_type()) : key(key, allocator), value(value, allocator) {} - buffer_pair_spec(const buffer_type &key, const buffer_type &value, - bool make_reference, + buffer_pair_spec(const buffer_type &key, const buffer_type &value, bool make_reference, const allocator_type &allocator = allocator_type()) - : key(key, make_reference, allocator), - value(value, make_reference, allocator) {} + : key(key, make_reference, allocator), value(value, make_reference, allocator) {} - buffer_pair_spec(const stl_pair &pair, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const stl_pair &pair, const allocator_type &allocator = allocator_type()) : buffer_pair_spec(pair.first, pair.second, allocator) {} - buffer_pair_spec(const stl_pair &pair, bool make_reference, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const stl_pair &pair, bool make_reference, const allocator_type &allocator = allocator_type()) : buffer_pair_spec(pair.first, pair.second, make_reference, allocator) {} - buffer_pair_spec(const slice &key, const slice &value, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const slice &key, const slice &value, const allocator_type &allocator = allocator_type()) : key(key, allocator), value(value, allocator) {} buffer_pair_spec(const slice &key, const slice &value, bool make_reference, const allocator_type &allocator = allocator_type()) - : key(key, make_reference, allocator), - value(value, make_reference, allocator) {} + : key(key, make_reference, allocator), value(value, make_reference, allocator) {} - buffer_pair_spec(const pair &pair, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const pair &pair, const allocator_type &allocator = allocator_type()) : buffer_pair_spec(pair.key, pair.value, allocator) {} - buffer_pair_spec(const pair &pair, bool make_reference, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const pair &pair, bool make_reference, const allocator_type &allocator = allocator_type()) : buffer_pair_spec(pair.key, pair.value, make_reference, allocator) {} buffer_pair_spec(const txn &txn, const slice &key, const slice &value, const allocator_type &allocator = allocator_type()) : key(txn, key, allocator), value(txn, value, allocator) {} - buffer_pair_spec(const txn &txn, const pair &pair, - const allocator_type &allocator = allocator_type()) + buffer_pair_spec(const txn &txn, const pair &pair, const allocator_type &allocator = allocator_type()) : buffer_pair_spec(txn, pair.key, pair.value, allocator) {} - buffer_pair_spec(buffer_type &&key, buffer_type &&value) noexcept( - buffer_type::move_assign_alloc::is_nothrow()) + buffer_pair_spec(buffer_type &&key, buffer_type &&value) noexcept(buffer_type::move_assign_alloc::is_nothrow()) : key(::std::move(key)), value(::std::move(value)) {} - buffer_pair_spec(buffer_pair_spec &&pair) noexcept( - buffer_type::move_assign_alloc::is_nothrow()) + buffer_pair_spec(buffer_pair_spec &&pair) noexcept(buffer_type::move_assign_alloc::is_nothrow()) : buffer_pair_spec(::std::move(pair.key), ::std::move(pair.value)) {} /// \brief Checks whether data chunk stored inside the buffers both, otherwise /// at least one of buffers just refers to data located outside. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool - is_freestanding() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool is_freestanding() const noexcept { return key.is_freestanding() && value.is_freestanding(); } /// \brief Checks whether one of the buffers just refers to data located /// outside the buffer, rather than stores it. - MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool - is_reference() const noexcept { + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool is_reference() const noexcept { return key.is_reference() || value.is_reference(); } /// \brief Makes buffers owning the data. @@ -3373,8 +2834,7 @@ struct buffer_pair_spec { }; template -using buffer_pair = buffer_pair_spec; +using buffer_pair = buffer_pair_spec; /// end of cxx_data @} @@ -3396,9 +2856,9 @@ enum class key_mode { ///< sorted as such. The keys must all be of the ///< same size and must be aligned while passing ///< as arguments. - msgpack = -1 ///< Keys are in [MessagePack](https://msgpack.org/) - ///< format with appropriate comparison. - ///< \note Not yet implemented and PRs are welcome. + msgpack = -1 ///< Keys are in [MessagePack](https://msgpack.org/) + ///< format with appropriate comparison. + ///< \note Not yet implemented and PRs are welcome. }; MDBX_CXX01_CONSTEXPR_ENUM bool is_usual(key_mode mode) noexcept { @@ -3417,70 +2877,57 @@ MDBX_CXX01_CONSTEXPR_ENUM bool is_reverse(key_mode mode) noexcept { return (MDBX_db_flags_t(mode) & MDBX_REVERSEKEY) != 0; } -MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(key_mode mode) noexcept { - return mode == key_mode::msgpack; -} +MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(key_mode mode) noexcept { return mode == key_mode::msgpack; } /// \brief Kind of the values and sorted multi-values with corresponding /// comparison. enum class value_mode { single = MDBX_DB_DEFAULTS, ///< Usual single value for each key. In terms of ///< keys, they are unique. - multi = - MDBX_DUPSORT, ///< A more than one data value could be associated with - ///< each key. Internally each key is stored once, and the - ///< corresponding data values are sorted by byte-by-byte - ///< lexicographic comparison like `std::memcmp()`. - ///< In terms of keys, they are not unique, i.e. has - ///< duplicates which are sorted by associated data values. + multi = MDBX_DUPSORT, ///< A more than one data value could be associated with + ///< each key. Internally each key is stored once, and the + ///< corresponding data values are sorted by byte-by-byte + ///< lexicographic comparison like `std::memcmp()`. + ///< In terms of keys, they are not unique, i.e. has + ///< duplicates which are sorted by associated data values. #if CONSTEXPR_ENUM_FLAGS_OPERATIONS || defined(DOXYGEN) - multi_reverse = - MDBX_DUPSORT | - MDBX_REVERSEDUP, ///< A more than one data value could be associated with - ///< each key. Internally each key is stored once, and - ///< the corresponding data values are sorted by - ///< byte-by-byte lexicographic comparison in reverse - ///< order, from the end of the keys to the beginning. - ///< In terms of keys, they are not unique, i.e. has - ///< duplicates which are sorted by associated data - ///< values. - multi_samelength = - MDBX_DUPSORT | - MDBX_DUPFIXED, ///< A more than one data value could be associated with - ///< each key, and all data values must be same length. - ///< Internally each key is stored once, and the - ///< corresponding data values are sorted by byte-by-byte - ///< lexicographic comparison like `std::memcmp()`. In - ///< terms of keys, they are not unique, i.e. has - ///< duplicates which are sorted by associated data values. - multi_ordinal = - MDBX_DUPSORT | MDBX_DUPFIXED | - MDBX_INTEGERDUP, ///< A more than one data value could be associated with - ///< each key, and all data values are binary integers in - ///< native byte order, either `uint32_t` or `uint64_t`, - ///< and will be sorted as such. Internally each key is - ///< stored once, and the corresponding data values are - ///< sorted. In terms of keys, they are not unique, i.e. - ///< has duplicates which are sorted by associated data - ///< values. + multi_reverse = MDBX_DUPSORT | MDBX_REVERSEDUP, ///< A more than one data value could be associated with + ///< each key. Internally each key is stored once, and + ///< the corresponding data values are sorted by + ///< byte-by-byte lexicographic comparison in reverse + ///< order, from the end of the keys to the beginning. + ///< In terms of keys, they are not unique, i.e. has + ///< duplicates which are sorted by associated data + ///< values. + multi_samelength = MDBX_DUPSORT | MDBX_DUPFIXED, ///< A more than one data value could be associated with + ///< each key, and all data values must be same length. + ///< Internally each key is stored once, and the + ///< corresponding data values are sorted by byte-by-byte + ///< lexicographic comparison like `std::memcmp()`. In + ///< terms of keys, they are not unique, i.e. has + ///< duplicates which are sorted by associated data values. + multi_ordinal = MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP, ///< A more than one data value could be associated + ///< with each key, and all data values are binary + ///< integers in native byte order, either `uint32_t` + ///< or `uint64_t`, and will be sorted as such. + ///< Internally each key is stored once, and the + ///< corresponding data values are sorted. In terms of + ///< keys, they are not unique, i.e. has duplicates + ///< which are sorted by associated data values. multi_reverse_samelength = - MDBX_DUPSORT | MDBX_REVERSEDUP | - MDBX_DUPFIXED, ///< A more than one data value could be associated with - ///< each key, and all data values must be same length. - ///< Internally each key is stored once, and the - ///< corresponding data values are sorted by byte-by-byte - ///< lexicographic comparison in reverse order, from the - ///< end of the keys to the beginning. In terms of keys, - ///< they are not unique, i.e. has duplicates which are - ///< sorted by associated data values. + MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED, ///< A more than one data value could be associated with + ///< each key, and all data values must be same length. + ///< Internally each key is stored once, and the + ///< corresponding data values are sorted by byte-by-byte + ///< lexicographic comparison in reverse order, from the + ///< end of the keys to the beginning. In terms of keys, + ///< they are not unique, i.e. has duplicates which are + ///< sorted by associated data values. #else multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP), multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED), - multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) | - uint32_t(MDBX_INTEGERDUP), - multi_reverse_samelength = uint32_t(MDBX_DUPSORT) | - uint32_t(MDBX_REVERSEDUP) | - uint32_t(MDBX_DUPFIXED), + multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) | uint32_t(MDBX_INTEGERDUP), + multi_reverse_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP) | uint32_t(MDBX_DUPFIXED), #endif msgpack = -1 ///< A more than one data value could be associated with each ///< key. Values are in [MessagePack](https://msgpack.org/) @@ -3492,8 +2939,7 @@ enum class value_mode { }; MDBX_CXX01_CONSTEXPR_ENUM bool is_usual(value_mode mode) noexcept { - return (MDBX_db_flags_t(mode) & (MDBX_DUPSORT | MDBX_INTEGERDUP | - MDBX_DUPFIXED | MDBX_REVERSEDUP)) == 0; + return (MDBX_db_flags_t(mode) & (MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_REVERSEDUP)) == 0; } MDBX_CXX01_CONSTEXPR_ENUM bool is_multi(value_mode mode) noexcept { @@ -3512,9 +2958,7 @@ MDBX_CXX01_CONSTEXPR_ENUM bool is_reverse(value_mode mode) noexcept { return (MDBX_db_flags_t(mode) & MDBX_REVERSEDUP) != 0; } -MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(value_mode mode) noexcept { - return mode == value_mode::msgpack; -} +MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(value_mode mode) noexcept { return mode == value_mode::msgpack; } /// \brief A handle for an individual table (aka key-value space, maps or /// sub-database) in the environment. @@ -3537,8 +2981,7 @@ struct LIBMDBX_API_TYPE map_handle { struct LIBMDBX_API_TYPE info { map_handle::flags flags; map_handle::state state; - MDBX_CXX11_CONSTEXPR info(map_handle::flags flags, - map_handle::state state) noexcept; + MDBX_CXX11_CONSTEXPR info(map_handle::flags flags, map_handle::state state) noexcept; info(const info &) noexcept = default; info &operator=(const info &) noexcept = default; MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode key_mode() const noexcept; @@ -3587,10 +3030,8 @@ public: MDBX_CXX14_CONSTEXPR operator bool() const noexcept; MDBX_CXX14_CONSTEXPR operator const MDBX_env *() const; MDBX_CXX14_CONSTEXPR operator MDBX_env *(); - friend MDBX_CXX11_CONSTEXPR bool operator==(const env &a, - const env &b) noexcept; - friend MDBX_CXX11_CONSTEXPR bool operator!=(const env &a, - const env &b) noexcept; + friend MDBX_CXX11_CONSTEXPR bool operator==(const env &a, const env &b) noexcept; + friend MDBX_CXX11_CONSTEXPR bool operator!=(const env &a, const env &b) noexcept; //---------------------------------------------------------------------------- @@ -3664,20 +3105,15 @@ public: intptr_t pagesize{default_value}; inline geometry &make_fixed(intptr_t size) noexcept; - inline geometry &make_dynamic(intptr_t lower = minimal_value, - intptr_t upper = maximal_value) noexcept; + inline geometry &make_dynamic(intptr_t lower = minimal_value, intptr_t upper = maximal_value) noexcept; MDBX_CXX11_CONSTEXPR geometry() noexcept {} MDBX_CXX11_CONSTEXPR geometry(const geometry &) noexcept = default; - MDBX_CXX11_CONSTEXPR geometry(intptr_t size_lower, - intptr_t size_now = default_value, - intptr_t size_upper = maximal_value, - intptr_t growth_step = default_value, - intptr_t shrink_threshold = default_value, - intptr_t pagesize = default_value) noexcept - : size_lower(size_lower), size_now(size_now), size_upper(size_upper), - growth_step(growth_step), shrink_threshold(shrink_threshold), - pagesize(pagesize) {} + MDBX_CXX11_CONSTEXPR geometry(intptr_t size_lower, intptr_t size_now = default_value, + intptr_t size_upper = maximal_value, intptr_t growth_step = default_value, + intptr_t shrink_threshold = default_value, intptr_t pagesize = default_value) noexcept + : size_lower(size_lower), size_now(size_now), size_upper(size_upper), growth_step(growth_step), + shrink_threshold(shrink_threshold), pagesize(pagesize) {} }; /// \brief Operation mode. @@ -3705,8 +3141,7 @@ public: MDBX_CXX11_CONSTEXPR reclaiming_options() noexcept {} MDBX_CXX11_CONSTEXPR reclaiming_options(const reclaiming_options &) noexcept = default; - MDBX_CXX14_CONSTEXPR reclaiming_options & - operator=(const reclaiming_options &) noexcept = default; + MDBX_CXX14_CONSTEXPR reclaiming_options &operator=(const reclaiming_options &) noexcept = default; reclaiming_options(MDBX_env_flags_t) noexcept; }; @@ -3728,8 +3163,7 @@ public: MDBX_CXX11_CONSTEXPR operate_options() noexcept {} MDBX_CXX11_CONSTEXPR operate_options(const operate_options &) noexcept = default; - MDBX_CXX14_CONSTEXPR operate_options & - operator=(const operate_options &) noexcept = default; + MDBX_CXX14_CONSTEXPR operate_options &operator=(const operate_options &) noexcept = default; operate_options(MDBX_env_flags_t) noexcept; }; @@ -3748,31 +3182,25 @@ public: MDBX_CXX11_CONSTEXPR operate_parameters() noexcept {} MDBX_CXX11_CONSTEXPR - operate_parameters( - const unsigned max_maps, const unsigned max_readers = 0, - const env::mode mode = env::mode::write_mapped_io, - env::durability durability = env::durability::robust_synchronous, - const env::reclaiming_options &reclaiming = env::reclaiming_options(), - const env::operate_options &options = env::operate_options()) noexcept - : max_maps(max_maps), max_readers(max_readers), mode(mode), - durability(durability), reclaiming(reclaiming), options(options) {} + operate_parameters(const unsigned max_maps, const unsigned max_readers = 0, + const env::mode mode = env::mode::write_mapped_io, + env::durability durability = env::durability::robust_synchronous, + const env::reclaiming_options &reclaiming = env::reclaiming_options(), + const env::operate_options &options = env::operate_options()) noexcept + : max_maps(max_maps), max_readers(max_readers), mode(mode), durability(durability), reclaiming(reclaiming), + options(options) {} MDBX_CXX11_CONSTEXPR operate_parameters(const operate_parameters &) noexcept = default; - MDBX_CXX14_CONSTEXPR operate_parameters & - operator=(const operate_parameters &) noexcept = default; - MDBX_env_flags_t make_flags( - bool accede = true, ///< Allows accepting incompatible operating options - ///< in case the database is already being used by - ///< another process(es) \see MDBX_ACCEDE - bool use_subdirectory = - false ///< use subdirectory to place the DB files + MDBX_CXX14_CONSTEXPR operate_parameters &operator=(const operate_parameters &) noexcept = default; + MDBX_env_flags_t make_flags(bool accede = true, ///< Allows accepting incompatible operating options + ///< in case the database is already being used by + ///< another process(es) \see MDBX_ACCEDE + bool use_subdirectory = false ///< use subdirectory to place the DB files ) const; static env::mode mode_from_flags(MDBX_env_flags_t) noexcept; static env::durability durability_from_flags(MDBX_env_flags_t) noexcept; - inline static env::reclaiming_options - reclaiming_from_flags(MDBX_env_flags_t flags) noexcept; - inline static env::operate_options - options_from_flags(MDBX_env_flags_t flags) noexcept; + inline static env::reclaiming_options reclaiming_from_flags(MDBX_env_flags_t flags) noexcept; + inline static env::operate_options options_from_flags(MDBX_env_flags_t flags) noexcept; }; /// \brief Returns current operation parameters. @@ -3794,9 +3222,7 @@ public: bool is_empty() const; /// \brief Returns default page size for current system/platform. - static size_t default_pagesize() noexcept { - return ::mdbx_default_pagesize(); - } + static size_t default_pagesize() noexcept { return ::mdbx_default_pagesize(); } struct limits { limits() = delete; @@ -3849,8 +3275,7 @@ public: /// \brief Returns maximal size of key-value pair to fit in a single page /// for specified size and table flags. - static inline size_t pairsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags); + static inline size_t pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns maximal size of key-value pair to fit in a single page /// for specified page size and values mode. static inline size_t pairsize4page_max(intptr_t pagesize, value_mode); @@ -3863,8 +3288,7 @@ public: /// \brief Returns maximal data size in bytes to fit in a leaf-page or /// single large/overflow-page for specified size and table flags. - static inline size_t valsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags); + static inline size_t valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags); /// \brief Returns maximal data size in bytes to fit in a leaf-page or /// single large/overflow-page for specified page size and values mode. static inline size_t valsize4page_max(intptr_t pagesize, value_mode); @@ -3892,35 +3316,24 @@ public: /// \brief Returns the maximal key size in bytes for specified keys mode. size_t key_max(key_mode mode) const { return limits::key_max(*this, mode); } /// \brief Returns the minimal value size in bytes for specified values mode. - size_t value_min(value_mode mode) const noexcept { - return limits::value_min(mode); - } + size_t value_min(value_mode mode) const noexcept { return limits::value_min(mode); } /// \brief Returns the maximal value size in bytes for specified values mode. - size_t value_max(value_mode mode) const { - return limits::value_max(*this, mode); - } + size_t value_max(value_mode mode) const { return limits::value_max(*this, mode); } /// \brief Returns the maximal write transaction size (i.e. limit for summary /// volume of dirty pages) in bytes. - size_t transaction_size_max() const { - return limits::transaction_size_max(this->get_pagesize()); - } + size_t transaction_size_max() const { return limits::transaction_size_max(this->get_pagesize()); } /// \brief Make a copy (backup) of an existing environment to the specified /// path. #ifdef MDBX_STD_FILESYSTEM_PATH - env ©(const MDBX_STD_FILESYSTEM_PATH &destination, bool compactify, - bool force_dynamic_size = false); + env ©(const MDBX_STD_FILESYSTEM_PATH &destination, bool compactify, bool force_dynamic_size = false); #endif /* MDBX_STD_FILESYSTEM_PATH */ #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) - env ©(const ::std::wstring &destination, bool compactify, - bool force_dynamic_size = false); - env ©(const wchar_t *destination, bool compactify, - bool force_dynamic_size = false); + env ©(const ::std::wstring &destination, bool compactify, bool force_dynamic_size = false); + env ©(const wchar_t *destination, bool compactify, bool force_dynamic_size = false); #endif /* Windows */ - env ©(const ::std::string &destination, bool compactify, - bool force_dynamic_size = false); - env ©(const char *destination, bool compactify, - bool force_dynamic_size = false); + env ©(const ::std::string &destination, bool compactify, bool force_dynamic_size = false); + env ©(const char *destination, bool compactify, bool force_dynamic_size = false); /// \brief Copy an environment to the specified file descriptor. env ©(filehandle fd, bool compactify, bool force_dynamic_size = false); @@ -3945,19 +3358,14 @@ public: /// \brief Removes the environment's files in a proper and multiprocess-safe /// way. #ifdef MDBX_STD_FILESYSTEM_PATH - static bool remove(const MDBX_STD_FILESYSTEM_PATH &pathname, - const remove_mode mode = just_remove); + static bool remove(const MDBX_STD_FILESYSTEM_PATH &pathname, const remove_mode mode = just_remove); #endif /* MDBX_STD_FILESYSTEM_PATH */ #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) - static bool remove(const ::std::wstring &pathname, - const remove_mode mode = just_remove); - static bool remove(const wchar_t *pathname, - const remove_mode mode = just_remove); + static bool remove(const ::std::wstring &pathname, const remove_mode mode = just_remove); + static bool remove(const wchar_t *pathname, const remove_mode mode = just_remove); #endif /* Windows */ - static bool remove(const ::std::string &pathname, - const remove_mode mode = just_remove); - static bool remove(const char *pathname, - const remove_mode mode = just_remove); + static bool remove(const ::std::string &pathname, const remove_mode mode = just_remove); + static bool remove(const char *pathname, const remove_mode mode = just_remove); /// \brief Statistics for a database in the MDBX environment. using stat = ::MDBX_stat; @@ -4168,20 +3576,18 @@ public: ///< i.e. the number of committed write /// transactions since the current read /// transaction started. - size_t bytes_used; ///< The number of last used page in the MVCC-snapshot - ///< which being read, i.e. database file can't be shrunk - ///< beyond this. - size_t bytes_retained; ///< The total size of the database pages that - ///< were retired by committed write transactions - ///< after the reader's MVCC-snapshot, i.e. the space - ///< which would be freed after the Reader releases - ///< the MVCC-snapshot for reuse by completion read - ///< transaction. + size_t bytes_used; ///< The number of last used page in the MVCC-snapshot + ///< which being read, i.e. database file can't be shrunk + ///< beyond this. + size_t bytes_retained; ///< The total size of the database pages that + ///< were retired by committed write transactions + ///< after the reader's MVCC-snapshot, i.e. the space + ///< which would be freed after the Reader releases + ///< the MVCC-snapshot for reuse by completion read + ///< transaction. - MDBX_CXX11_CONSTEXPR reader_info(int slot, mdbx_pid_t pid, - mdbx_tid_t thread, uint64_t txnid, - uint64_t lag, size_t used, - size_t retained) noexcept; + MDBX_CXX11_CONSTEXPR reader_info(int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, uint64_t lag, + size_t used, size_t retained) noexcept; }; /// \brief Enumerate readers. @@ -4257,19 +3663,14 @@ public: /// \brief Open existing database. #ifdef MDBX_STD_FILESYSTEM_PATH - env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, - const operate_parameters &, bool accede = true); + env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, const operate_parameters &, bool accede = true); #endif /* MDBX_STD_FILESYSTEM_PATH */ #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) - env_managed(const ::std::wstring &pathname, const operate_parameters &, - bool accede = true); - explicit env_managed(const wchar_t *pathname, const operate_parameters &, - bool accede = true); + env_managed(const ::std::wstring &pathname, const operate_parameters &, bool accede = true); + explicit env_managed(const wchar_t *pathname, const operate_parameters &, bool accede = true); #endif /* Windows */ - env_managed(const ::std::string &pathname, const operate_parameters &, - bool accede = true); - explicit env_managed(const char *pathname, const operate_parameters &, - bool accede = true); + env_managed(const ::std::string &pathname, const operate_parameters &, bool accede = true); + explicit env_managed(const char *pathname, const operate_parameters &, bool accede = true); /// \brief Additional parameters for creating a new database. /// \see env_managed(const ::std::string &pathname, const create_parameters &, @@ -4284,20 +3685,17 @@ public: /// \brief Create new or open existing database. #ifdef MDBX_STD_FILESYSTEM_PATH - env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, - const create_parameters &, const operate_parameters &, + env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, const create_parameters &, const operate_parameters &, bool accede = true); #endif /* MDBX_STD_FILESYSTEM_PATH */ #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) - env_managed(const ::std::wstring &pathname, const create_parameters &, - const operate_parameters &, bool accede = true); - explicit env_managed(const wchar_t *pathname, const create_parameters &, - const operate_parameters &, bool accede = true); + env_managed(const ::std::wstring &pathname, const create_parameters &, const operate_parameters &, + bool accede = true); + explicit env_managed(const wchar_t *pathname, const create_parameters &, const operate_parameters &, + bool accede = true); #endif /* Windows */ - env_managed(const ::std::string &pathname, const create_parameters &, - const operate_parameters &, bool accede = true); - explicit env_managed(const char *pathname, const create_parameters &, - const operate_parameters &, bool accede = true); + env_managed(const ::std::string &pathname, const create_parameters &, const operate_parameters &, bool accede = true); + explicit env_managed(const char *pathname, const create_parameters &, const operate_parameters &, bool accede = true); /// \brief Explicitly closes the environment and release the memory map. /// @@ -4353,10 +3751,8 @@ public: MDBX_CXX14_CONSTEXPR operator bool() const noexcept; MDBX_CXX14_CONSTEXPR operator const MDBX_txn *() const; MDBX_CXX14_CONSTEXPR operator MDBX_txn *(); - friend MDBX_CXX11_CONSTEXPR bool operator==(const txn &a, - const txn &b) noexcept; - friend MDBX_CXX11_CONSTEXPR bool operator!=(const txn &a, - const txn &b) noexcept; + friend MDBX_CXX11_CONSTEXPR bool operator==(const txn &a, const txn &b) noexcept; + friend MDBX_CXX11_CONSTEXPR bool operator!=(const txn &a, const txn &b) noexcept; /// \brief Returns the transaction's environment. inline ::mdbx::env env() const noexcept; @@ -4426,20 +3822,14 @@ public: inline size_t unbind_all_cursors() const { return release_all_cursors(true); } /// \brief Open existing key-value map. - inline map_handle open_map( - const char *name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + inline map_handle open_map(const char *name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; /// \brief Open existing key-value map. - inline map_handle open_map( - const ::std::string &name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + inline map_handle open_map(const ::std::string &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; /// \brief Open existing key-value map. - inline map_handle open_map( - const ::mdbx::slice &name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; + inline map_handle open_map(const ::mdbx::slice &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const; /// \brief Open existing key-value map. inline map_handle open_map_accede(const char *name) const; @@ -4449,20 +3839,14 @@ public: inline map_handle open_map_accede(const ::mdbx::slice &name) const; /// \brief Create new or open existing key-value map. - inline map_handle - create_map(const char *name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); + inline map_handle create_map(const char *name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); /// \brief Create new or open existing key-value map. - inline map_handle - create_map(const ::std::string &name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); + inline map_handle create_map(const ::std::string &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); /// \brief Create new or open existing key-value map. - inline map_handle - create_map(const ::mdbx::slice &name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); + inline map_handle create_map(const ::mdbx::slice &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single); /// \brief Drops key-value map using handle. inline void drop_map(map_handle map); @@ -4486,8 +3870,7 @@ public: bool clear_map(const char *name, bool throw_if_absent = false); /// \return `True` if the key-value map existed and was cleared, either /// `false` if the key-value map did not exist and there is nothing to clear. - inline bool clear_map(const ::std::string &name, - bool throw_if_absent = false); + inline bool clear_map(const ::std::string &name, bool throw_if_absent = false); /// \return `True` if the key-value map existed and was cleared, either /// `false` if the key-value map did not exist and there is nothing to clear. bool clear_map(const ::mdbx::slice &name, bool throw_if_absent = false); @@ -4501,36 +3884,29 @@ public: /// \brief Переименовывает таблицу ключ-значение. /// \return `True` если таблица существует и была переименована, либо /// `false` в случае отсутствия исходной таблицы. - bool rename_map(const char *old_name, const char *new_name, - bool throw_if_absent = false); + bool rename_map(const char *old_name, const char *new_name, bool throw_if_absent = false); /// \brief Переименовывает таблицу ключ-значение. /// \return `True` если таблица существует и была переименована, либо /// `false` в случае отсутствия исходной таблицы. - bool rename_map(const ::std::string &old_name, const ::std::string &new_name, - bool throw_if_absent = false); + bool rename_map(const ::std::string &old_name, const ::std::string &new_name, bool throw_if_absent = false); /// \brief Переименовывает таблицу ключ-значение. /// \return `True` если таблица существует и была переименована, либо /// `false` в случае отсутствия исходной таблицы. - bool rename_map(const ::mdbx::slice &old_name, const ::mdbx::slice &new_name, - bool throw_if_absent = false); + bool rename_map(const ::mdbx::slice &old_name, const ::mdbx::slice &new_name, bool throw_if_absent = false); -#if defined(DOXYGEN) || \ - (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) +#if defined(DOXYGEN) || (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) /// \brief Open existing key-value map. - inline map_handle open_map( - const ::std::string_view &name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const { + inline map_handle open_map(const ::std::string_view &name, const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) const { return open_map(::mdbx::slice(name), key_mode, value_mode); } /// \brief Open existing key-value map. inline map_handle open_map_accede(const ::std::string_view &name) const; /// \brief Create new or open existing key-value map. - inline map_handle - create_map(const ::std::string_view &name, - const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, - const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) { + inline map_handle create_map(const ::std::string_view &name, + const ::mdbx::key_mode key_mode = ::mdbx::key_mode::usual, + const ::mdbx::value_mode value_mode = ::mdbx::value_mode::single) { return create_map(::mdbx::slice(name), key_mode, value_mode); } /// \brief Drop key-value map. @@ -4549,11 +3925,9 @@ public: /// \brief Переименовывает таблицу ключ-значение. /// \return `True` если таблица существует и была переименована, либо /// `false` в случае отсутствия исходной таблицы. - bool rename_map(const ::std::string_view &old_name, - const ::std::string_view &new_name, + bool rename_map(const ::std::string_view &old_name, const ::std::string_view &new_name, bool throw_if_absent = false) { - return rename_map(::mdbx::slice(old_name), ::mdbx::slice(new_name), - throw_if_absent); + return rename_map(::mdbx::slice(old_name), ::mdbx::slice(new_name), throw_if_absent); } #endif /* __cpp_lib_string_view >= 201606L */ @@ -4583,20 +3957,16 @@ public: /// \brief Compare two keys according to a particular key-value map (aka /// table). - inline int compare_keys(map_handle map, const slice &a, - const slice &b) const noexcept; + inline int compare_keys(map_handle map, const slice &a, const slice &b) const noexcept; /// \brief Compare two values according to a particular key-value map (aka /// table). - inline int compare_values(map_handle map, const slice &a, - const slice &b) const noexcept; + inline int compare_values(map_handle map, const slice &a, const slice &b) const noexcept; /// \brief Compare keys of two pairs according to a particular key-value map /// (aka table). - inline int compare_keys(map_handle map, const pair &a, - const pair &b) const noexcept; + inline int compare_keys(map_handle map, const pair &a, const pair &b) const noexcept; /// \brief Compare values of two pairs according to a particular key-value map /// (aka table). - inline int compare_values(map_handle map, const pair &a, - const pair &b) const noexcept; + inline int compare_values(map_handle map, const pair &a, const pair &b) const noexcept; /// \brief Get value by key from a key-value map (aka table). inline slice get(map_handle map, const slice &key) const; @@ -4604,12 +3974,10 @@ public: /// multimap (aka table). inline slice get(map_handle map, slice key, size_t &values_count) const; /// \brief Get value by key from a key-value map (aka table). - inline slice get(map_handle map, const slice &key, - const slice &value_at_absence) const; + inline slice get(map_handle map, const slice &key, const slice &value_at_absence) const; /// \brief Get first of multi-value and values count by key from a key-value /// multimap (aka table). - inline slice get(map_handle map, slice key, size_t &values_count, - const slice &value_at_absence) const; + inline slice get(map_handle map, slice key, size_t &values_count, const slice &value_at_absence) const; /// \brief Get value for equal or great key from a table. /// \return Bundle of key-value pair and boolean flag, /// which will be `true` if the exact key was found and `false` otherwise. @@ -4617,42 +3985,27 @@ public: /// \brief Get value for equal or great key from a table. /// \return Bundle of key-value pair and boolean flag, /// which will be `true` if the exact key was found and `false` otherwise. - inline pair_result get_equal_or_great(map_handle map, const slice &key, - const slice &value_at_absence) const; + inline pair_result get_equal_or_great(map_handle map, const slice &key, const slice &value_at_absence) const; - inline MDBX_error_t put(map_handle map, const slice &key, slice *value, - MDBX_put_flags_t flags) noexcept; + inline MDBX_error_t put(map_handle map, const slice &key, slice *value, MDBX_put_flags_t flags) noexcept; inline void put(map_handle map, const slice &key, slice value, put_mode mode); inline void insert(map_handle map, const slice &key, slice value); inline value_result try_insert(map_handle map, const slice &key, slice value); - inline slice insert_reserve(map_handle map, const slice &key, - size_t value_length); - inline value_result try_insert_reserve(map_handle map, const slice &key, - size_t value_length); + inline slice insert_reserve(map_handle map, const slice &key, size_t value_length); + inline value_result try_insert_reserve(map_handle map, const slice &key, size_t value_length); inline void upsert(map_handle map, const slice &key, const slice &value); - inline slice upsert_reserve(map_handle map, const slice &key, - size_t value_length); + inline slice upsert_reserve(map_handle map, const slice &key, size_t value_length); inline void update(map_handle map, const slice &key, const slice &value); inline bool try_update(map_handle map, const slice &key, const slice &value); - inline slice update_reserve(map_handle map, const slice &key, - size_t value_length); - inline value_result try_update_reserve(map_handle map, const slice &key, - size_t value_length); + inline slice update_reserve(map_handle map, const slice &key, size_t value_length); + inline value_result try_update_reserve(map_handle map, const slice &key, size_t value_length); - void put(map_handle map, const pair &kv, put_mode mode) { - return put(map, kv.key, kv.value, mode); - } - void insert(map_handle map, const pair &kv) { - return insert(map, kv.key, kv.value); - } - value_result try_insert(map_handle map, const pair &kv) { - return try_insert(map, kv.key, kv.value); - } - void upsert(map_handle map, const pair &kv) { - return upsert(map, kv.key, kv.value); - } + void put(map_handle map, const pair &kv, put_mode mode) { return put(map, kv.key, kv.value, mode); } + void insert(map_handle map, const pair &kv) { return insert(map, kv.key, kv.value); } + value_result try_insert(map_handle map, const pair &kv) { return try_insert(map, kv.key, kv.value); } + void upsert(map_handle map, const pair &kv) { return upsert(map, kv.key, kv.value); } /// \brief Removes all values for given key. inline bool erase(map_handle map, const slice &key); @@ -4661,28 +4014,27 @@ public: inline bool erase(map_handle map, const slice &key, const slice &value); /// \brief Replaces the particular multi-value of the key with a new value. - inline void replace(map_handle map, const slice &key, slice old_value, - const slice &new_value); + inline void replace(map_handle map, const slice &key, slice old_value, const slice &new_value); /// \brief Removes and return a value of the key. template inline buffer extract(map_handle map, const slice &key, - const typename buffer::allocator_type & - allocator = buffer::allocator_type()); + const typename buffer::allocator_type &allocator = + buffer::allocator_type()); /// \brief Replaces and returns a value of the key with new one. template inline buffer replace(map_handle map, const slice &key, const slice &new_value, - const typename buffer::allocator_type & - allocator = buffer::allocator_type()); + const typename buffer::allocator_type &allocator = + buffer::allocator_type()); template - inline buffer replace_reserve( - map_handle map, const slice &key, slice &new_value, - const typename buffer::allocator_type - &allocator = buffer::allocator_type()); + inline buffer + replace_reserve(map_handle map, const slice &key, slice &new_value, + const typename buffer::allocator_type &allocator = + buffer::allocator_type()); /// \brief Adding a key-value pair, provided that ascending order of the keys /// and (optionally) values are preserved. @@ -4700,39 +4052,28 @@ public: /// \param [in] multivalue_order_preserved /// If `multivalue_order_preserved == true` then the same rules applied for /// to pages of nested b+tree of multimap's values. - inline void append(map_handle map, const slice &key, const slice &value, - bool multivalue_order_preserved = true); - inline void append(map_handle map, const pair &kv, - bool multivalue_order_preserved = true) { + inline void append(map_handle map, const slice &key, const slice &value, bool multivalue_order_preserved = true); + inline void append(map_handle map, const pair &kv, bool multivalue_order_preserved = true) { return append(map, kv.key, kv.value, multivalue_order_preserved); } - size_t put_multiple_samelength(map_handle map, const slice &key, - const size_t value_length, - const void *values_array, size_t values_count, - put_mode mode, bool allow_partial = false); + size_t put_multiple_samelength(map_handle map, const slice &key, const size_t value_length, const void *values_array, + size_t values_count, put_mode mode, bool allow_partial = false); template - size_t put_multiple_samelength(map_handle map, const slice &key, - const VALUE *values_array, size_t values_count, + size_t put_multiple_samelength(map_handle map, const slice &key, const VALUE *values_array, size_t values_count, put_mode mode, bool allow_partial = false) { - static_assert(::std::is_standard_layout::value && - !::std::is_pointer::value && + static_assert(::std::is_standard_layout::value && !::std::is_pointer::value && !::std::is_array::value, "Must be a standard layout type!"); - return put_multiple_samelength(map, key, sizeof(VALUE), values_array, - values_count, mode, allow_partial); + return put_multiple_samelength(map, key, sizeof(VALUE), values_array, values_count, mode, allow_partial); } template - void put_multiple_samelength(map_handle map, const slice &key, - const ::std::vector &vector, - put_mode mode) { + void put_multiple_samelength(map_handle map, const slice &key, const ::std::vector &vector, put_mode mode) { put_multiple_samelength(map, key, vector.data(), vector.size(), mode); } - inline ptrdiff_t estimate(map_handle map, const pair &from, - const pair &to) const; - inline ptrdiff_t estimate(map_handle map, const slice &from, - const slice &to) const; + inline ptrdiff_t estimate(map_handle map, const pair &from, const pair &to) const; + inline ptrdiff_t estimate(map_handle map, const slice &from, const slice &to) const; inline ptrdiff_t estimate_from_first(map_handle map, const slice &to) const; inline ptrdiff_t estimate_to_last(map_handle map, const slice &from) const; }; @@ -4823,23 +4164,17 @@ public: MDBX_CXX14_CONSTEXPR operator bool() const noexcept; MDBX_CXX14_CONSTEXPR operator const MDBX_cursor *() const; MDBX_CXX14_CONSTEXPR operator MDBX_cursor *(); - friend MDBX_CXX11_CONSTEXPR bool operator==(const cursor &a, - const cursor &b) noexcept; - friend MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, - const cursor &b) noexcept; + friend MDBX_CXX11_CONSTEXPR bool operator==(const cursor &a, const cursor &b) noexcept; + friend MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, const cursor &b) noexcept; - friend inline int compare_position_nothrow(const cursor &left, - const cursor &right, - bool ignore_nested) noexcept; - friend inline int compare_position(const cursor &left, const cursor &right, - bool ignore_nested); + friend inline int compare_position_nothrow(const cursor &left, const cursor &right, bool ignore_nested) noexcept; + friend inline int compare_position(const cursor &left, const cursor &right, bool ignore_nested); bool is_before_than(const cursor &other, bool ignore_nested = false) const { return compare_position(*this, other, ignore_nested) < 0; } - bool is_same_or_before_than(const cursor &other, - bool ignore_nested = false) const { + bool is_same_or_before_than(const cursor &other, bool ignore_nested = false) const { return compare_position(*this, other, ignore_nested) <= 0; } @@ -4851,8 +4186,7 @@ public: return compare_position(*this, other, ignore_nested) > 0; } - bool is_same_or_after_than(const cursor &other, - bool ignore_nested = false) const { + bool is_same_or_after_than(const cursor &other, bool ignore_nested = false) const { return compare_position(*this, other, ignore_nested) >= 0; } @@ -4893,11 +4227,9 @@ public: /* Doubtless cursor positioning at a specified key-value pair * for dupsort/multi-value hives. */ multi_exactkey_value_lesser_than = MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN, - multi_exactkey_value_lesser_or_equal = - MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, + multi_exactkey_value_lesser_or_equal = MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, multi_exactkey_value_equal = MDBX_TO_EXACT_KEY_VALUE_EQUAL, - multi_exactkey_value_greater_or_equal = - MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, + multi_exactkey_value_greater_or_equal = MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, multi_exactkey_value_greater = MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN, pair_lesser_than = MDBX_TO_PAIR_LESSER_THAN, @@ -4915,14 +4247,10 @@ public: struct move_result : public pair_result { inline move_result(const cursor &cursor, bool throw_notfound); move_result(cursor &cursor, move_operation operation, bool throw_notfound) - : move_result(cursor, operation, slice::invalid(), slice::invalid(), - throw_notfound) {} - move_result(cursor &cursor, move_operation operation, const slice &key, - bool throw_notfound) - : move_result(cursor, operation, key, slice::invalid(), - throw_notfound) {} - inline move_result(cursor &cursor, move_operation operation, - const slice &key, const slice &value, + : move_result(cursor, operation, slice::invalid(), slice::invalid(), throw_notfound) {} + move_result(cursor &cursor, move_operation operation, const slice &key, bool throw_notfound) + : move_result(cursor, operation, key, slice::invalid(), throw_notfound) {} + inline move_result(cursor &cursor, move_operation operation, const slice &key, const slice &value, bool throw_notfound); move_result(const move_result &) noexcept = default; move_result &operator=(const move_result &) noexcept = default; @@ -4931,38 +4259,30 @@ public: struct estimate_result : public pair { ptrdiff_t approximate_quantity; estimate_result(const cursor &cursor, move_operation operation) - : estimate_result(cursor, operation, slice::invalid(), - slice::invalid()) {} - estimate_result(const cursor &cursor, move_operation operation, - const slice &key) + : estimate_result(cursor, operation, slice::invalid(), slice::invalid()) {} + estimate_result(const cursor &cursor, move_operation operation, const slice &key) : estimate_result(cursor, operation, key, slice::invalid()) {} - inline estimate_result(const cursor &cursor, move_operation operation, - const slice &key, const slice &value); + inline estimate_result(const cursor &cursor, move_operation operation, const slice &key, const slice &value); estimate_result(const estimate_result &) noexcept = default; estimate_result &operator=(const estimate_result &) noexcept = default; }; protected: /* fake const, i.e. for some move/get operations */ - inline bool move(move_operation operation, MDBX_val *key, MDBX_val *value, - bool throw_notfound) const; + inline bool move(move_operation operation, MDBX_val *key, MDBX_val *value, bool throw_notfound) const; - inline ptrdiff_t estimate(move_operation operation, MDBX_val *key, - MDBX_val *value) const; + inline ptrdiff_t estimate(move_operation operation, MDBX_val *key, MDBX_val *value) const; public: template - bool scan(CALLABLE_PREDICATE predicate, move_operation start = first, - move_operation turn = next) { + bool scan(CALLABLE_PREDICATE predicate, move_operation start = first, move_operation turn = next) { struct wrapper : public exception_thunk { - static int probe(void *context, MDBX_val *key, MDBX_val *value, - void *arg) noexcept { + static int probe(void *context, MDBX_val *key, MDBX_val *value, void *arg) noexcept { auto thunk = static_cast(context); assert(thunk->is_clean()); auto &predicate = *static_cast(arg); try { - return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE - : MDBX_RESULT_FALSE; + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } catch (... /* capture any exception to rethrow it over C code */) { thunk->capture(); return MDBX_RESULT_TRUE; @@ -4970,92 +4290,71 @@ public: } } thunk; return error::boolean_or_throw( - ::mdbx_cursor_scan(handle_, wrapper::probe, &thunk, - MDBX_cursor_op(start), MDBX_cursor_op(turn), - &predicate), + ::mdbx_cursor_scan(handle_, wrapper::probe, &thunk, MDBX_cursor_op(start), MDBX_cursor_op(turn), &predicate), thunk); } - template - bool fullscan(CALLABLE_PREDICATE predicate, bool backward = false) { - return scan(std::move(predicate), backward ? last : first, - backward ? previous : next); + template bool fullscan(CALLABLE_PREDICATE predicate, bool backward = false) { + return scan(std::move(predicate), backward ? last : first, backward ? previous : next); } template - bool scan_from(CALLABLE_PREDICATE predicate, slice &from, - move_operation start = key_greater_or_equal, + bool scan_from(CALLABLE_PREDICATE predicate, slice &from, move_operation start = key_greater_or_equal, move_operation turn = next) { struct wrapper : public exception_thunk { - static int probe(void *context, MDBX_val *key, MDBX_val *value, - void *arg) noexcept { + static int probe(void *context, MDBX_val *key, MDBX_val *value, void *arg) noexcept { auto thunk = static_cast(context); assert(thunk->is_clean()); auto &predicate = *static_cast(arg); try { - return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE - : MDBX_RESULT_FALSE; + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } catch (... /* capture any exception to rethrow it over C code */) { thunk->capture(); return MDBX_RESULT_TRUE; } } } thunk; - return error::boolean_or_throw( - ::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, - MDBX_cursor_op(start), &from, nullptr, - MDBX_cursor_op(turn), &predicate), - thunk); + return error::boolean_or_throw(::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, MDBX_cursor_op(start), + &from, nullptr, MDBX_cursor_op(turn), &predicate), + thunk); } template - bool scan_from(CALLABLE_PREDICATE predicate, pair &from, - move_operation start = pair_greater_or_equal, + bool scan_from(CALLABLE_PREDICATE predicate, pair &from, move_operation start = pair_greater_or_equal, move_operation turn = next) { struct wrapper : public exception_thunk { - static int probe(void *context, MDBX_val *key, MDBX_val *value, - void *arg) noexcept { + static int probe(void *context, MDBX_val *key, MDBX_val *value, void *arg) noexcept { auto thunk = static_cast(context); assert(thunk->is_clean()); auto &predicate = *static_cast(arg); try { - return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE - : MDBX_RESULT_FALSE; + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } catch (... /* capture any exception to rethrow it over C code */) { thunk->capture(); return MDBX_RESULT_TRUE; } } } thunk; - return error::boolean_or_throw( - ::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, - MDBX_cursor_op(start), &from.key, &from.value, - MDBX_cursor_op(turn), &predicate), - thunk); + return error::boolean_or_throw(::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, MDBX_cursor_op(start), + &from.key, &from.value, MDBX_cursor_op(turn), &predicate), + thunk); } move_result move(move_operation operation, bool throw_notfound) { return move_result(*this, operation, throw_notfound); } - move_result move(move_operation operation, const slice &key, - bool throw_notfound) { + move_result move(move_operation operation, const slice &key, bool throw_notfound) { return move_result(*this, operation, key, slice::invalid(), throw_notfound); } - move_result move(move_operation operation, const slice &key, - const slice &value, bool throw_notfound) { + move_result move(move_operation operation, const slice &key, const slice &value, bool throw_notfound) { return move_result(*this, operation, key, value, throw_notfound); } - bool move(move_operation operation, slice &key, slice &value, - bool throw_notfound) { + bool move(move_operation operation, slice &key, slice &value, bool throw_notfound) { return move(operation, &key, &value, throw_notfound); } - move_result to_first(bool throw_notfound = true) { - return move(first, throw_notfound); - } - move_result to_previous(bool throw_notfound = true) { - return move(previous, throw_notfound); - } + move_result to_first(bool throw_notfound = true) { return move(first, throw_notfound); } + move_result to_previous(bool throw_notfound = true) { return move(previous, throw_notfound); } move_result to_previous_last_multi(bool throw_notfound = true) { return move(multi_prevkey_lastvalue, throw_notfound); } @@ -5065,30 +4364,21 @@ public: move_result to_current_prev_multi(bool throw_notfound = true) { return move(multi_currentkey_prevvalue, throw_notfound); } - move_result current(bool throw_notfound = true) const { - return move_result(*this, throw_notfound); - } + move_result current(bool throw_notfound = true) const { return move_result(*this, throw_notfound); } move_result to_current_next_multi(bool throw_notfound = true) { return move(multi_currentkey_nextvalue, throw_notfound); } move_result to_current_last_multi(bool throw_notfound = true) { return move(multi_currentkey_lastvalue, throw_notfound); } - move_result to_next_first_multi(bool throw_notfound = true) { - return move(multi_nextkey_firstvalue, throw_notfound); - } - move_result to_next(bool throw_notfound = true) { - return move(next, throw_notfound); - } - move_result to_last(bool throw_notfound = true) { - return move(last, throw_notfound); - } + move_result to_next_first_multi(bool throw_notfound = true) { return move(multi_nextkey_firstvalue, throw_notfound); } + move_result to_next(bool throw_notfound = true) { return move(next, throw_notfound); } + move_result to_last(bool throw_notfound = true) { return move(last, throw_notfound); } move_result to_key_lesser_than(const slice &key, bool throw_notfound = true) { return move(key_lesser_than, key, throw_notfound); } - move_result to_key_lesser_or_equal(const slice &key, - bool throw_notfound = true) { + move_result to_key_lesser_or_equal(const slice &key, bool throw_notfound = true) { return move(key_lesser_or_equal, key, throw_notfound); } move_result to_key_equal(const slice &key, bool throw_notfound = true) { @@ -5097,64 +4387,45 @@ public: move_result to_key_exact(const slice &key, bool throw_notfound = true) { return move(key_exact, key, throw_notfound); } - move_result to_key_greater_or_equal(const slice &key, - bool throw_notfound = true) { + move_result to_key_greater_or_equal(const slice &key, bool throw_notfound = true) { return move(key_greater_or_equal, key, throw_notfound); } - move_result to_key_greater_than(const slice &key, - bool throw_notfound = true) { + move_result to_key_greater_than(const slice &key, bool throw_notfound = true) { return move(key_greater_than, key, throw_notfound); } - move_result to_exact_key_value_lesser_than(const slice &key, - const slice &value, - bool throw_notfound = true) { + move_result to_exact_key_value_lesser_than(const slice &key, const slice &value, bool throw_notfound = true) { return move(multi_exactkey_value_lesser_than, key, value, throw_notfound); } - move_result to_exact_key_value_lesser_or_equal(const slice &key, - const slice &value, - bool throw_notfound = true) { - return move(multi_exactkey_value_lesser_or_equal, key, value, - throw_notfound); + move_result to_exact_key_value_lesser_or_equal(const slice &key, const slice &value, bool throw_notfound = true) { + return move(multi_exactkey_value_lesser_or_equal, key, value, throw_notfound); } - move_result to_exact_key_value_equal(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_exact_key_value_equal(const slice &key, const slice &value, bool throw_notfound = true) { return move(multi_exactkey_value_equal, key, value, throw_notfound); } - move_result to_exact_key_value_greater_or_equal(const slice &key, - const slice &value, - bool throw_notfound = true) { - return move(multi_exactkey_value_greater_or_equal, key, value, - throw_notfound); + move_result to_exact_key_value_greater_or_equal(const slice &key, const slice &value, bool throw_notfound = true) { + return move(multi_exactkey_value_greater_or_equal, key, value, throw_notfound); } - move_result to_exact_key_value_greater_than(const slice &key, - const slice &value, - bool throw_notfound = true) { + move_result to_exact_key_value_greater_than(const slice &key, const slice &value, bool throw_notfound = true) { return move(multi_exactkey_value_greater, key, value, throw_notfound); } - move_result to_pair_lesser_than(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_pair_lesser_than(const slice &key, const slice &value, bool throw_notfound = true) { return move(pair_lesser_than, key, value, throw_notfound); } - move_result to_pair_lesser_or_equal(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_pair_lesser_or_equal(const slice &key, const slice &value, bool throw_notfound = true) { return move(pair_lesser_or_equal, key, value, throw_notfound); } - move_result to_pair_equal(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_pair_equal(const slice &key, const slice &value, bool throw_notfound = true) { return move(pair_equal, key, value, throw_notfound); } - move_result to_pair_exact(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_pair_exact(const slice &key, const slice &value, bool throw_notfound = true) { return move(pair_exact, key, value, throw_notfound); } - move_result to_pair_greater_or_equal(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_pair_greater_or_equal(const slice &key, const slice &value, bool throw_notfound = true) { return move(pair_greater_or_equal, key, value, throw_notfound); } - move_result to_pair_greater_than(const slice &key, const slice &value, - bool throw_notfound = true) { + move_result to_pair_greater_than(const slice &key, const slice &value, bool throw_notfound = true) { return move(pair_greater_than, key, value, throw_notfound); } @@ -5166,17 +4437,11 @@ public: /// \brief Return count of duplicates for current key. inline size_t count_multivalue() const; - inline move_result find_multivalue(const slice &key, const slice &value, - bool throw_notfound = true); - inline move_result lower_bound_multivalue(const slice &key, - const slice &value, - bool throw_notfound = false); - inline move_result upper_bound_multivalue(const slice &key, - const slice &value, - bool throw_notfound = false); + inline move_result find_multivalue(const slice &key, const slice &value, bool throw_notfound = true); + inline move_result lower_bound_multivalue(const slice &key, const slice &value, bool throw_notfound = false); + inline move_result upper_bound_multivalue(const slice &key, const slice &value, bool throw_notfound = false); - inline move_result get_multiple_samelength(const slice &key, - bool throw_notfound = true) { + inline move_result get_multiple_samelength(const slice &key, bool throw_notfound = true) { return move(batch_samelength, key, throw_notfound); } @@ -5222,8 +4487,7 @@ public: inline operator ::mdbx::txn() const { return txn(); } inline operator ::mdbx::map_handle() const { return map(); } - inline MDBX_error_t put(const slice &key, slice *value, - MDBX_put_flags_t flags) noexcept; + inline MDBX_error_t put(const slice &key, slice *value, MDBX_put_flags_t flags) noexcept; inline void put(const slice &key, slice value, put_mode mode); inline void insert(const slice &key, slice value); inline value_result try_insert(const slice &key, slice value); @@ -5238,13 +4502,9 @@ public: inline slice update_reserve(const slice &key, size_t value_length); inline value_result try_update_reserve(const slice &key, size_t value_length); - void put(const pair &kv, put_mode mode) { - return put(kv.key, kv.value, mode); - } + void put(const pair &kv, put_mode mode) { return put(kv.key, kv.value, mode); } void insert(const pair &kv) { return insert(kv.key, kv.value); } - value_result try_insert(const pair &kv) { - return try_insert(kv.key, kv.value); - } + value_result try_insert(const pair &kv) { return try_insert(kv.key, kv.value); } void upsert(const pair &kv) { return upsert(kv.key, kv.value); } /// \brief Removes single key-value pair or all multi-values at the current @@ -5272,13 +4532,11 @@ class LIBMDBX_API_TYPE cursor_managed : public cursor { using inherited = cursor; friend class txn; /// delegated constructor for RAII - MDBX_CXX11_CONSTEXPR cursor_managed(MDBX_cursor *ptr) noexcept - : inherited(ptr) {} + MDBX_CXX11_CONSTEXPR cursor_managed(MDBX_cursor *ptr) noexcept : inherited(ptr) {} public: /// \brief Creates a new managed cursor with underlying object. - cursor_managed(void *your_context = nullptr) - : cursor_managed(::mdbx_cursor_create(your_context)) { + cursor_managed(void *your_context = nullptr) : cursor_managed(::mdbx_cursor_create(your_context)) { if (MDBX_UNLIKELY(!handle_)) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_ENOMEM); } @@ -5308,53 +4566,33 @@ LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const slice &); LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const pair &); LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const pair_result &); template -inline ::std::ostream & -operator<<(::std::ostream &out, const buffer &it) { - return (it.is_freestanding() - ? out << "buf-" << it.headroom() << "." << it.tailroom() - : out << "ref-") - << it.slice(); +inline ::std::ostream &operator<<(::std::ostream &out, const buffer &it) { + return (it.is_freestanding() ? out << "buf-" << it.headroom() << "." << it.tailroom() : out << "ref-") << it.slice(); } -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const env::geometry::size &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::geometry::size &); LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::geometry &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const env::operate_parameters &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::operate_parameters &); LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::mode &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const env::durability &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const env::reclaiming_options &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const env::operate_options &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const env_managed::create_parameters &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::durability &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::reclaiming_options &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env::operate_options &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const env_managed::create_parameters &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const MDBX_log_level_t &); -LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, - const MDBX_debug_flags_t &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const MDBX_log_level_t &); +LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const MDBX_debug_flags_t &); LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const error &); -inline ::std::ostream &operator<<(::std::ostream &out, - const MDBX_error_t &errcode) { - return out << error(errcode); -} +inline ::std::ostream &operator<<(::std::ostream &out, const MDBX_error_t &errcode) { return out << error(errcode); } //============================================================================== // // Inline body of the libmdbx C++ API // -MDBX_CXX11_CONSTEXPR const version_info &get_version() noexcept { - return ::mdbx_version; -} -MDBX_CXX11_CONSTEXPR const build_info &get_build() noexcept { - return ::mdbx_build; -} +MDBX_CXX11_CONSTEXPR const version_info &get_version() noexcept { return ::mdbx_version; } +MDBX_CXX11_CONSTEXPR const build_info &get_build() noexcept { return ::mdbx_build; } static MDBX_CXX17_CONSTEXPR size_t strlen(const char *c_str) noexcept { -#if defined(__cpp_lib_is_constant_evaluated) && \ - __cpp_lib_is_constant_evaluated >= 201811L +#if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L if (::std::is_constant_evaluated()) { for (size_t i = 0; c_str; ++i) if (!c_str[i]) @@ -5369,10 +4607,8 @@ static MDBX_CXX17_CONSTEXPR size_t strlen(const char *c_str) noexcept { #endif } -MDBX_MAYBE_UNUSED static MDBX_CXX20_CONSTEXPR void * -memcpy(void *dest, const void *src, size_t bytes) noexcept { -#if defined(__cpp_lib_is_constant_evaluated) && \ - __cpp_lib_is_constant_evaluated >= 201811L +MDBX_MAYBE_UNUSED static MDBX_CXX20_CONSTEXPR void *memcpy(void *dest, const void *src, size_t bytes) noexcept { +#if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L if (::std::is_constant_evaluated()) { for (size_t i = 0; i < bytes; ++i) static_cast(dest)[i] = static_cast(src)[i]; @@ -5382,14 +4618,11 @@ memcpy(void *dest, const void *src, size_t bytes) noexcept { return ::std::memcpy(dest, src, bytes); } -static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, - size_t bytes) noexcept { -#if defined(__cpp_lib_is_constant_evaluated) && \ - __cpp_lib_is_constant_evaluated >= 201811L +static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, size_t bytes) noexcept { +#if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L if (::std::is_constant_evaluated()) { for (size_t i = 0; i < bytes; ++i) { - const int diff = int(static_cast(a)[i]) - - int(static_cast(b)[i]); + const int diff = int(static_cast(a)[i]) - int(static_cast(b)[i]); if (diff) return diff; } @@ -5405,13 +4638,11 @@ static MDBX_CXX14_CONSTEXPR size_t check_length(size_t bytes) { return bytes; } -static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, - size_t payload) { +static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload) { return check_length(check_length(headroom) + check_length(payload)); } -MDBX_MAYBE_UNUSED static MDBX_CXX14_CONSTEXPR size_t -check_length(size_t headroom, size_t payload, size_t tailroom) { +MDBX_MAYBE_UNUSED static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload, size_t tailroom) { return check_length(check_length(headroom, payload) + check_length(tailroom)); } @@ -5429,33 +4660,22 @@ inline void exception_thunk::rethrow_captured() const { //------------------------------------------------------------------------------ -MDBX_CXX11_CONSTEXPR error::error(MDBX_error_t error_code) noexcept - : code_(error_code) {} +MDBX_CXX11_CONSTEXPR error::error(MDBX_error_t error_code) noexcept : code_(error_code) {} inline error &error::operator=(MDBX_error_t error_code) noexcept { code_ = error_code; return *this; } -MDBX_CXX11_CONSTEXPR bool operator==(const error &a, const error &b) noexcept { - return a.code_ == b.code_; -} +MDBX_CXX11_CONSTEXPR bool operator==(const error &a, const error &b) noexcept { return a.code_ == b.code_; } -MDBX_CXX11_CONSTEXPR bool operator!=(const error &a, const error &b) noexcept { - return !(a == b); -} +MDBX_CXX11_CONSTEXPR bool operator!=(const error &a, const error &b) noexcept { return !(a == b); } -MDBX_CXX11_CONSTEXPR bool error::is_success() const noexcept { - return code_ == MDBX_SUCCESS; -} +MDBX_CXX11_CONSTEXPR bool error::is_success() const noexcept { return code_ == MDBX_SUCCESS; } -MDBX_CXX11_CONSTEXPR bool error::is_result_true() const noexcept { - return code_ == MDBX_RESULT_FALSE; -} +MDBX_CXX11_CONSTEXPR bool error::is_result_true() const noexcept { return code_ == MDBX_RESULT_FALSE; } -MDBX_CXX11_CONSTEXPR bool error::is_result_false() const noexcept { - return code_ == MDBX_RESULT_TRUE; -} +MDBX_CXX11_CONSTEXPR bool error::is_result_false() const noexcept { return code_ == MDBX_RESULT_TRUE; } MDBX_CXX11_CONSTEXPR bool error::is_failure() const noexcept { return code_ != MDBX_SUCCESS && code_ != MDBX_RESULT_TRUE; @@ -5464,10 +4684,8 @@ MDBX_CXX11_CONSTEXPR bool error::is_failure() const noexcept { MDBX_CXX11_CONSTEXPR MDBX_error_t error::code() const noexcept { return code_; } MDBX_CXX11_CONSTEXPR bool error::is_mdbx_error() const noexcept { - return (code() >= MDBX_FIRST_LMDB_ERRCODE && - code() <= MDBX_LAST_LMDB_ERRCODE) || - (code() >= MDBX_FIRST_ADDED_ERRCODE && - code() <= MDBX_LAST_ADDED_ERRCODE); + return (code() >= MDBX_FIRST_LMDB_ERRCODE && code() <= MDBX_LAST_LMDB_ERRCODE) || + (code() >= MDBX_FIRST_ADDED_ERRCODE && code() <= MDBX_LAST_ADDED_ERRCODE); } inline void error::throw_exception(int error_code) { @@ -5488,20 +4706,17 @@ inline void error::success_or_throw() const { inline void error::success_or_throw(const exception_thunk &thunk) const { assert(thunk.is_clean() || code() != MDBX_SUCCESS); if (MDBX_UNLIKELY(!is_success())) { - MDBX_CXX20_UNLIKELY if (MDBX_UNLIKELY(!thunk.is_clean())) - thunk.rethrow_captured(); + MDBX_CXX20_UNLIKELY if (MDBX_UNLIKELY(!thunk.is_clean())) thunk.rethrow_captured(); else throw_exception(); } } -inline void error::panic_on_failure(const char *context_where, - const char *func_who) const noexcept { +inline void error::panic_on_failure(const char *context_where, const char *func_who) const noexcept { if (MDBX_UNLIKELY(is_failure())) MDBX_CXX20_UNLIKELY panic(context_where, func_who); } -inline void error::success_or_panic(const char *context_where, - const char *func_who) const noexcept { +inline void error::success_or_panic(const char *context_where, const char *func_who) const noexcept { if (MDBX_UNLIKELY(!is_success())) MDBX_CXX20_UNLIKELY panic(context_where, func_who); } @@ -5532,26 +4747,22 @@ inline bool error::boolean_or_throw(int error_code) { } } -inline void error::success_or_throw(int error_code, - const exception_thunk &thunk) { +inline void error::success_or_throw(int error_code, const exception_thunk &thunk) { error rc(static_cast(error_code)); rc.success_or_throw(thunk); } -inline void error::panic_on_failure(int error_code, const char *context_where, - const char *func_who) noexcept { +inline void error::panic_on_failure(int error_code, const char *context_where, const char *func_who) noexcept { error rc(static_cast(error_code)); rc.panic_on_failure(context_where, func_who); } -inline void error::success_or_panic(int error_code, const char *context_where, - const char *func_who) noexcept { +inline void error::success_or_panic(int error_code, const char *context_where, const char *func_who) noexcept { error rc(static_cast(error_code)); rc.success_or_panic(context_where, func_who); } -inline bool error::boolean_or_throw(int error_code, - const exception_thunk &thunk) { +inline bool error::boolean_or_throw(int error_code, const exception_thunk &thunk) { if (MDBX_UNLIKELY(!thunk.is_clean())) MDBX_CXX20_UNLIKELY thunk.rethrow_captured(); return boolean_or_throw(error_code); @@ -5565,22 +4776,15 @@ MDBX_CXX14_CONSTEXPR slice::slice(const void *ptr, size_t bytes) : ::MDBX_val({const_cast(ptr), check_length(bytes)}) {} MDBX_CXX14_CONSTEXPR slice::slice(const void *begin, const void *end) - : slice(begin, static_cast(end) - - static_cast(begin)) {} + : slice(begin, static_cast(end) - static_cast(begin)) {} -MDBX_CXX17_CONSTEXPR slice::slice(const char *c_str) - : slice(c_str, ::mdbx::strlen(c_str)) {} +MDBX_CXX17_CONSTEXPR slice::slice(const char *c_str) : slice(c_str, ::mdbx::strlen(c_str)) {} -MDBX_CXX14_CONSTEXPR slice::slice(const MDBX_val &src) - : slice(src.iov_base, src.iov_len) {} +MDBX_CXX14_CONSTEXPR slice::slice(const MDBX_val &src) : slice(src.iov_base, src.iov_len) {} -MDBX_CXX14_CONSTEXPR slice::slice(MDBX_val &&src) : slice(src) { - src.iov_base = nullptr; -} +MDBX_CXX14_CONSTEXPR slice::slice(MDBX_val &&src) : slice(src) { src.iov_base = nullptr; } -MDBX_CXX14_CONSTEXPR slice::slice(slice &&src) noexcept : slice(src) { - src.invalidate(); -} +MDBX_CXX14_CONSTEXPR slice::slice(slice &&src) noexcept : slice(src) { src.invalidate(); } inline slice &slice::assign(const void *ptr, size_t bytes) { iov_base = const_cast(ptr); @@ -5594,9 +4798,7 @@ inline slice &slice::assign(const slice &src) noexcept { return *this; } -inline slice &slice::assign(const ::MDBX_val &src) { - return assign(src.iov_base, src.iov_len); -} +inline slice &slice::assign(const ::MDBX_val &src) { return assign(src.iov_base, src.iov_len); } slice &slice::assign(slice &&src) noexcept { assign(src); @@ -5611,21 +4813,14 @@ inline slice &slice::assign(::MDBX_val &&src) { } inline slice &slice::assign(const void *begin, const void *end) { - return assign(begin, static_cast(end) - - static_cast(begin)); + return assign(begin, static_cast(end) - static_cast(begin)); } -inline slice &slice::assign(const char *c_str) { - return assign(c_str, ::mdbx::strlen(c_str)); -} +inline slice &slice::assign(const char *c_str) { return assign(c_str, ::mdbx::strlen(c_str)); } -inline slice &slice::operator=(slice &&src) noexcept { - return assign(::std::move(src)); -} +inline slice &slice::operator=(slice &&src) noexcept { return assign(::std::move(src)); } -inline slice &slice::operator=(::MDBX_val &&src) { - return assign(::std::move(src)); -} +inline slice &slice::operator=(::MDBX_val &&src) { return assign(::std::move(src)); } inline void slice::swap(slice &other) noexcept { const auto temp = *this; @@ -5637,47 +4832,27 @@ MDBX_CXX11_CONSTEXPR const ::mdbx::byte *slice::byte_ptr() const noexcept { return static_cast(iov_base); } -MDBX_CXX11_CONSTEXPR const ::mdbx::byte *slice::end_byte_ptr() const noexcept { - return byte_ptr() + length(); -} +MDBX_CXX11_CONSTEXPR const ::mdbx::byte *slice::end_byte_ptr() const noexcept { return byte_ptr() + length(); } -MDBX_CXX11_CONSTEXPR ::mdbx::byte *slice::byte_ptr() noexcept { - return static_cast(iov_base); -} +MDBX_CXX11_CONSTEXPR ::mdbx::byte *slice::byte_ptr() noexcept { return static_cast(iov_base); } -MDBX_CXX11_CONSTEXPR ::mdbx::byte *slice::end_byte_ptr() noexcept { - return byte_ptr() + length(); -} +MDBX_CXX11_CONSTEXPR ::mdbx::byte *slice::end_byte_ptr() noexcept { return byte_ptr() + length(); } -MDBX_CXX11_CONSTEXPR const char *slice::char_ptr() const noexcept { - return static_cast(iov_base); -} +MDBX_CXX11_CONSTEXPR const char *slice::char_ptr() const noexcept { return static_cast(iov_base); } -MDBX_CXX11_CONSTEXPR const char *slice::end_char_ptr() const noexcept { - return char_ptr() + length(); -} +MDBX_CXX11_CONSTEXPR const char *slice::end_char_ptr() const noexcept { return char_ptr() + length(); } -MDBX_CXX11_CONSTEXPR char *slice::char_ptr() noexcept { - return static_cast(iov_base); -} +MDBX_CXX11_CONSTEXPR char *slice::char_ptr() noexcept { return static_cast(iov_base); } -MDBX_CXX11_CONSTEXPR char *slice::end_char_ptr() noexcept { - return char_ptr() + length(); -} +MDBX_CXX11_CONSTEXPR char *slice::end_char_ptr() noexcept { return char_ptr() + length(); } -MDBX_CXX11_CONSTEXPR const void *slice::data() const noexcept { - return iov_base; -} +MDBX_CXX11_CONSTEXPR const void *slice::data() const noexcept { return iov_base; } -MDBX_CXX11_CONSTEXPR const void *slice::end() const noexcept { - return static_cast(end_byte_ptr()); -} +MDBX_CXX11_CONSTEXPR const void *slice::end() const noexcept { return static_cast(end_byte_ptr()); } MDBX_CXX11_CONSTEXPR void *slice::data() noexcept { return iov_base; } -MDBX_CXX11_CONSTEXPR void *slice::end() noexcept { - return static_cast(end_byte_ptr()); -} +MDBX_CXX11_CONSTEXPR void *slice::end() noexcept { return static_cast(end_byte_ptr()); } MDBX_CXX11_CONSTEXPR size_t slice::length() const noexcept { return iov_len; } @@ -5691,19 +4866,13 @@ MDBX_CXX14_CONSTEXPR slice &slice::set_end(const void *ptr) { return set_length(static_cast(ptr) - char_ptr()); } -MDBX_CXX11_CONSTEXPR bool slice::empty() const noexcept { - return length() == 0; -} +MDBX_CXX11_CONSTEXPR bool slice::empty() const noexcept { return length() == 0; } -MDBX_CXX11_CONSTEXPR bool slice::is_null() const noexcept { - return data() == nullptr; -} +MDBX_CXX11_CONSTEXPR bool slice::is_null() const noexcept { return data() == nullptr; } MDBX_CXX11_CONSTEXPR size_t slice::size() const noexcept { return length(); } -MDBX_CXX11_CONSTEXPR slice::operator bool() const noexcept { - return !is_null(); -} +MDBX_CXX11_CONSTEXPR slice::operator bool() const noexcept { return !is_null(); } MDBX_CXX14_CONSTEXPR void slice::invalidate() noexcept { iov_base = nullptr; } @@ -5735,20 +4904,16 @@ inline void slice::safe_remove_suffix(size_t n) { remove_suffix(n); } -MDBX_CXX14_CONSTEXPR bool -slice::starts_with(const slice &prefix) const noexcept { - return length() >= prefix.length() && - memcmp(data(), prefix.data(), prefix.length()) == 0; +MDBX_CXX14_CONSTEXPR bool slice::starts_with(const slice &prefix) const noexcept { + return length() >= prefix.length() && memcmp(data(), prefix.data(), prefix.length()) == 0; } MDBX_CXX14_CONSTEXPR bool slice::ends_with(const slice &suffix) const noexcept { return length() >= suffix.length() && - memcmp(byte_ptr() + length() - suffix.length(), suffix.data(), - suffix.length()) == 0; + memcmp(byte_ptr() + length() - suffix.length(), suffix.data(), suffix.length()) == 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t -slice::hash_value() const noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR size_t slice::hash_value() const noexcept { size_t h = length() * 3977471; for (size_t i = 0; i < length(); ++i) h = (h ^ static_cast(data())[i]) * 1664525 + 1013904223; @@ -5801,17 +4966,14 @@ MDBX_CXX14_CONSTEXPR slice slice::safe_middle(size_t from, size_t n) const { return middle(from, n); } -MDBX_CXX14_CONSTEXPR intptr_t slice::compare_fast(const slice &a, - const slice &b) noexcept { +MDBX_CXX14_CONSTEXPR intptr_t slice::compare_fast(const slice &a, const slice &b) noexcept { const intptr_t diff = intptr_t(a.length()) - intptr_t(b.length()); - return diff ? diff - : MDBX_UNLIKELY(a.length() == 0 || a.data() == b.data()) - ? 0 - : memcmp(a.data(), b.data(), a.length()); + return diff ? diff + : MDBX_UNLIKELY(a.length() == 0 || a.data() == b.data()) ? 0 + : memcmp(a.data(), b.data(), a.length()); } -MDBX_CXX14_CONSTEXPR intptr_t -slice::compare_lexicographically(const slice &a, const slice &b) noexcept { +MDBX_CXX14_CONSTEXPR intptr_t slice::compare_lexicographically(const slice &a, const slice &b) noexcept { const size_t shortest = ::std::min(a.length(), b.length()); if (MDBX_LIKELY(shortest > 0)) MDBX_CXX20_LIKELY { @@ -5822,159 +4984,124 @@ slice::compare_lexicographically(const slice &a, const slice &b) noexcept { return intptr_t(a.length()) - intptr_t(b.length()); } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator==(const slice &a, const slice &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator==(const slice &a, const slice &b) noexcept { return slice::compare_fast(a, b) == 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator<(const slice &a, const slice &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator<(const slice &a, const slice &b) noexcept { return slice::compare_lexicographically(a, b) < 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator>(const slice &a, const slice &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator>(const slice &a, const slice &b) noexcept { return slice::compare_lexicographically(a, b) > 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator<=(const slice &a, const slice &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator<=(const slice &a, const slice &b) noexcept { return slice::compare_lexicographically(a, b) <= 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator>=(const slice &a, const slice &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator>=(const slice &a, const slice &b) noexcept { return slice::compare_lexicographically(a, b) >= 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator!=(const slice &a, const slice &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator!=(const slice &a, const slice &b) noexcept { return slice::compare_fast(a, b) != 0; } template -inline string -slice::as_hex_string(bool uppercase, unsigned wrap_width, - const ALLOCATOR &allocator) const { +inline string slice::as_hex_string(bool uppercase, unsigned wrap_width, const ALLOCATOR &allocator) const { return to_hex(*this, uppercase, wrap_width).as_string(allocator); } template -inline string -slice::as_base58_string(unsigned wrap_width, const ALLOCATOR &allocator) const { +inline string slice::as_base58_string(unsigned wrap_width, const ALLOCATOR &allocator) const { return to_base58(*this, wrap_width).as_string(allocator); } template -inline string -slice::as_base64_string(unsigned wrap_width, const ALLOCATOR &allocator) const { +inline string slice::as_base64_string(unsigned wrap_width, const ALLOCATOR &allocator) const { return to_base64(*this, wrap_width).as_string(allocator); } template -inline buffer -slice::encode_hex(bool uppercase, unsigned wrap_width, - const ALLOCATOR &allocator) const { - return to_hex(*this, uppercase, wrap_width) - .as_buffer(allocator); +inline buffer slice::encode_hex(bool uppercase, unsigned wrap_width, + const ALLOCATOR &allocator) const { + return to_hex(*this, uppercase, wrap_width).as_buffer(allocator); } template -inline buffer -slice::encode_base58(unsigned wrap_width, const ALLOCATOR &allocator) const { - return to_base58(*this, wrap_width) - .as_buffer(allocator); +inline buffer slice::encode_base58(unsigned wrap_width, const ALLOCATOR &allocator) const { + return to_base58(*this, wrap_width).as_buffer(allocator); } template -inline buffer -slice::encode_base64(unsigned wrap_width, const ALLOCATOR &allocator) const { - return to_base64(*this, wrap_width) - .as_buffer(allocator); +inline buffer slice::encode_base64(unsigned wrap_width, const ALLOCATOR &allocator) const { + return to_base64(*this, wrap_width).as_buffer(allocator); } template -inline buffer -slice::hex_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { - return from_hex(*this, ignore_spaces) - .as_buffer(allocator); +inline buffer slice::hex_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { + return from_hex(*this, ignore_spaces).as_buffer(allocator); } template -inline buffer -slice::base58_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { - return from_base58(*this, ignore_spaces) - .as_buffer(allocator); +inline buffer slice::base58_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { + return from_base58(*this, ignore_spaces).as_buffer(allocator); } template -inline buffer -slice::base64_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { - return from_base64(*this, ignore_spaces) - .as_buffer(allocator); +inline buffer slice::base64_decode(bool ignore_spaces, const ALLOCATOR &allocator) const { + return from_base64(*this, ignore_spaces).as_buffer(allocator); } -MDBX_NOTHROW_PURE_FUNCTION inline bool -slice::is_hex(bool ignore_spaces) const noexcept { +MDBX_NOTHROW_PURE_FUNCTION inline bool slice::is_hex(bool ignore_spaces) const noexcept { return !from_hex(*this, ignore_spaces).is_erroneous(); } -MDBX_NOTHROW_PURE_FUNCTION inline bool -slice::is_base58(bool ignore_spaces) const noexcept { +MDBX_NOTHROW_PURE_FUNCTION inline bool slice::is_base58(bool ignore_spaces) const noexcept { return !from_base58(*this, ignore_spaces).is_erroneous(); } -MDBX_NOTHROW_PURE_FUNCTION inline bool -slice::is_base64(bool ignore_spaces) const noexcept { +MDBX_NOTHROW_PURE_FUNCTION inline bool slice::is_base64(bool ignore_spaces) const noexcept { return !from_base64(*this, ignore_spaces).is_erroneous(); } //------------------------------------------------------------------------------ -MDBX_CXX14_CONSTEXPR intptr_t pair::compare_fast(const pair &a, - const pair &b) noexcept { +MDBX_CXX14_CONSTEXPR intptr_t pair::compare_fast(const pair &a, const pair &b) noexcept { const auto diff = slice::compare_fast(a.key, b.key); return diff ? diff : slice::compare_fast(a.value, b.value); } -MDBX_CXX14_CONSTEXPR intptr_t -pair::compare_lexicographically(const pair &a, const pair &b) noexcept { +MDBX_CXX14_CONSTEXPR intptr_t pair::compare_lexicographically(const pair &a, const pair &b) noexcept { const auto diff = slice::compare_lexicographically(a.key, b.key); return diff ? diff : slice::compare_lexicographically(a.value, b.value); } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator==(const pair &a, const pair &b) noexcept { - return a.key.length() == b.key.length() && - a.value.length() == b.value.length() && +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator==(const pair &a, const pair &b) noexcept { + return a.key.length() == b.key.length() && a.value.length() == b.value.length() && memcmp(a.key.data(), b.key.data(), a.key.length()) == 0 && memcmp(a.value.data(), b.value.data(), a.value.length()) == 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator<(const pair &a, const pair &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator<(const pair &a, const pair &b) noexcept { return pair::compare_lexicographically(a, b) < 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator>(const pair &a, const pair &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator>(const pair &a, const pair &b) noexcept { return pair::compare_lexicographically(a, b) > 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator<=(const pair &a, const pair &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator<=(const pair &a, const pair &b) noexcept { return pair::compare_lexicographically(a, b) <= 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator>=(const pair &a, const pair &b) noexcept { +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator>=(const pair &a, const pair &b) noexcept { return pair::compare_lexicographically(a, b) >= 0; } -MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool -operator!=(const pair &a, const pair &b) noexcept { - return a.key.length() != b.key.length() || - a.value.length() != b.value.length() || +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool operator!=(const pair &a, const pair &b) noexcept { + return a.key.length() != b.key.length() || a.value.length() != b.value.length() || memcmp(a.key.data(), b.key.data(), a.key.length()) != 0 || memcmp(a.value.data(), b.value.data(), a.value.length()) != 0; } @@ -5982,25 +5109,21 @@ operator!=(const pair &a, const pair &b) noexcept { //------------------------------------------------------------------------------ template -inline buffer::buffer( - const txn &txn, const struct slice &src, const allocator_type &allocator) +inline buffer::buffer(const txn &txn, const struct slice &src, + const allocator_type &allocator) : buffer(src, !txn.is_dirty(src.data()), allocator) {} //------------------------------------------------------------------------------ -MDBX_CXX11_CONSTEXPR map_handle::info::info(map_handle::flags flags, - map_handle::state state) noexcept +MDBX_CXX11_CONSTEXPR map_handle::info::info(map_handle::flags flags, map_handle::state state) noexcept : flags(flags), state(state) {} -MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode -map_handle::info::key_mode() const noexcept { +MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode map_handle::info::key_mode() const noexcept { return ::mdbx::key_mode(flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); } -MDBX_CXX11_CONSTEXPR_ENUM mdbx::value_mode -map_handle::info::value_mode() const noexcept { - return ::mdbx::value_mode(flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | - MDBX_DUPFIXED | MDBX_INTEGERDUP)); +MDBX_CXX11_CONSTEXPR_ENUM mdbx::value_mode map_handle::info::value_mode() const noexcept { + return ::mdbx::value_mode(flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)); } //------------------------------------------------------------------------------ @@ -6013,9 +5136,7 @@ inline env &env::operator=(env &&other) noexcept { return *this; } -inline env::env(env &&other) noexcept : handle_(other.handle_) { - other.handle_ = nullptr; -} +inline env::env(env &&other) noexcept : handle_(other.handle_) { other.handle_ = nullptr; } inline env::~env() noexcept { #ifndef NDEBUG @@ -6023,21 +5144,15 @@ inline env::~env() noexcept { #endif } -MDBX_CXX14_CONSTEXPR env::operator bool() const noexcept { - return handle_ != nullptr; -} +MDBX_CXX14_CONSTEXPR env::operator bool() const noexcept { return handle_ != nullptr; } MDBX_CXX14_CONSTEXPR env::operator const MDBX_env *() const { return handle_; } MDBX_CXX14_CONSTEXPR env::operator MDBX_env *() { return handle_; } -MDBX_CXX11_CONSTEXPR bool operator==(const env &a, const env &b) noexcept { - return a.handle_ == b.handle_; -} +MDBX_CXX11_CONSTEXPR bool operator==(const env &a, const env &b) noexcept { return a.handle_ == b.handle_; } -MDBX_CXX11_CONSTEXPR bool operator!=(const env &a, const env &b) noexcept { - return a.handle_ != b.handle_; -} +MDBX_CXX11_CONSTEXPR bool operator!=(const env &a, const env &b) noexcept { return a.handle_ != b.handle_; } inline env::geometry &env::geometry::make_fixed(intptr_t size) noexcept { size_lower = size_now = size_upper = size; @@ -6045,21 +5160,18 @@ inline env::geometry &env::geometry::make_fixed(intptr_t size) noexcept { return *this; } -inline env::geometry &env::geometry::make_dynamic(intptr_t lower, - intptr_t upper) noexcept { +inline env::geometry &env::geometry::make_dynamic(intptr_t lower, intptr_t upper) noexcept { size_now = size_lower = lower; size_upper = upper; growth_step = shrink_threshold = default_value; return *this; } -inline env::reclaiming_options env::operate_parameters::reclaiming_from_flags( - MDBX_env_flags_t flags) noexcept { +inline env::reclaiming_options env::operate_parameters::reclaiming_from_flags(MDBX_env_flags_t flags) noexcept { return reclaiming_options(flags); } -inline env::operate_options -env::operate_parameters::options_from_flags(MDBX_env_flags_t flags) noexcept { +inline env::operate_options env::operate_parameters::options_from_flags(MDBX_env_flags_t flags) noexcept { return operate_options(flags); } @@ -6081,13 +5193,9 @@ inline size_t env::limits::dbsize_max(intptr_t pagesize) { return static_cast(result); } -inline size_t env::limits::key_min(MDBX_db_flags_t flags) noexcept { - return (flags & MDBX_INTEGERKEY) ? 4 : 0; -} +inline size_t env::limits::key_min(MDBX_db_flags_t flags) noexcept { return (flags & MDBX_INTEGERKEY) ? 4 : 0; } -inline size_t env::limits::key_min(key_mode mode) noexcept { - return key_min(MDBX_db_flags_t(mode)); -} +inline size_t env::limits::key_min(key_mode mode) noexcept { return key_min(MDBX_db_flags_t(mode)); } inline size_t env::limits::key_max(intptr_t pagesize, MDBX_db_flags_t flags) { const intptr_t result = mdbx_limits_keysize_max(pagesize, flags); @@ -6107,17 +5215,11 @@ inline size_t env::limits::key_max(const env &env, MDBX_db_flags_t flags) { return static_cast(result); } -inline size_t env::limits::key_max(const env &env, key_mode mode) { - return key_max(env, MDBX_db_flags_t(mode)); -} +inline size_t env::limits::key_max(const env &env, key_mode mode) { return key_max(env, MDBX_db_flags_t(mode)); } -inline size_t env::limits::value_min(MDBX_db_flags_t flags) noexcept { - return (flags & MDBX_INTEGERDUP) ? 4 : 0; -} +inline size_t env::limits::value_min(MDBX_db_flags_t flags) noexcept { return (flags & MDBX_INTEGERDUP) ? 4 : 0; } -inline size_t env::limits::value_min(value_mode mode) noexcept { - return value_min(MDBX_db_flags_t(mode)); -} +inline size_t env::limits::value_min(value_mode mode) noexcept { return value_min(MDBX_db_flags_t(mode)); } inline size_t env::limits::value_max(intptr_t pagesize, MDBX_db_flags_t flags) { const intptr_t result = mdbx_limits_valsize_max(pagesize, flags); @@ -6137,25 +5239,20 @@ inline size_t env::limits::value_max(const env &env, MDBX_db_flags_t flags) { return static_cast(result); } -inline size_t env::limits::value_max(const env &env, value_mode mode) { - return value_max(env, MDBX_db_flags_t(mode)); -} +inline size_t env::limits::value_max(const env &env, value_mode mode) { return value_max(env, MDBX_db_flags_t(mode)); } -inline size_t env::limits::pairsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags) { +inline size_t env::limits::pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) { const intptr_t result = mdbx_limits_pairsize4page_max(pagesize, flags); if (result < 0) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL); return static_cast(result); } -inline size_t env::limits::pairsize4page_max(intptr_t pagesize, - value_mode mode) { +inline size_t env::limits::pairsize4page_max(intptr_t pagesize, value_mode mode) { return pairsize4page_max(pagesize, MDBX_db_flags_t(mode)); } -inline size_t env::limits::pairsize4page_max(const env &env, - MDBX_db_flags_t flags) { +inline size_t env::limits::pairsize4page_max(const env &env, MDBX_db_flags_t flags) { const intptr_t result = mdbx_env_get_pairsize4page_max(env, flags); if (result < 0) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL); @@ -6166,21 +5263,18 @@ inline size_t env::limits::pairsize4page_max(const env &env, value_mode mode) { return pairsize4page_max(env, MDBX_db_flags_t(mode)); } -inline size_t env::limits::valsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags) { +inline size_t env::limits::valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) { const intptr_t result = mdbx_limits_valsize4page_max(pagesize, flags); if (result < 0) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL); return static_cast(result); } -inline size_t env::limits::valsize4page_max(intptr_t pagesize, - value_mode mode) { +inline size_t env::limits::valsize4page_max(intptr_t pagesize, value_mode mode) { return valsize4page_max(pagesize, MDBX_db_flags_t(mode)); } -inline size_t env::limits::valsize4page_max(const env &env, - MDBX_db_flags_t flags) { +inline size_t env::limits::valsize4page_max(const env &env, MDBX_db_flags_t flags) { const intptr_t result = mdbx_env_get_valsize4page_max(env, flags); if (result < 0) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL); @@ -6202,16 +5296,13 @@ inline size_t env::limits::max_map_handles(void) { return MDBX_MAX_DBI; } inline env::operate_parameters env::get_operation_parameters() const { const auto flags = get_flags(); - return operate_parameters(max_maps(), max_readers(), - operate_parameters::mode_from_flags(flags), + return operate_parameters(max_maps(), max_readers(), operate_parameters::mode_from_flags(flags), operate_parameters::durability_from_flags(flags), operate_parameters::reclaiming_from_flags(flags), operate_parameters::options_from_flags(flags)); } -inline env::mode env::get_mode() const { - return operate_parameters::mode_from_flags(get_flags()); -} +inline env::mode env::get_mode() const { return operate_parameters::mode_from_flags(get_flags()); } inline env::durability env::get_durability() const { return env::operate_parameters::durability_from_flags(get_flags()); @@ -6273,9 +5364,7 @@ inline unsigned env::max_maps() const { return r; } -inline void *env::get_context() const noexcept { - return mdbx_env_get_userctx(handle_); -} +inline void *env::get_context() const noexcept { return mdbx_env_get_userctx(handle_); } inline env &env::set_context(void *ptr) { error::success_or_throw(::mdbx_env_set_userctx(handle_, ptr)); @@ -6308,31 +5397,22 @@ inline env &env::set_sync_period__seconds_double(double seconds) { return set_sync_period__seconds_16dot16(unsigned(seconds * 65536)); } -inline double env::sync_period__seconds_double() const { - return sync_period__seconds_16dot16() / 65536.0; -} +inline double env::sync_period__seconds_double() const { return sync_period__seconds_16dot16() / 65536.0; } #if __cplusplus >= 201103L -inline env &env::set_sync_period(const duration &period) { - return set_sync_period__seconds_16dot16(period.count()); -} +inline env &env::set_sync_period(const duration &period) { return set_sync_period__seconds_16dot16(period.count()); } -inline duration env::sync_period() const { - return duration(sync_period__seconds_16dot16()); -} +inline duration env::sync_period() const { return duration(sync_period__seconds_16dot16()); } #endif -inline env &env::set_extra_option(enum env::extra_runtime_option option, - uint64_t value) { - error::success_or_throw( - ::mdbx_env_set_option(handle_, ::MDBX_option_t(option), value)); +inline env &env::set_extra_option(enum env::extra_runtime_option option, uint64_t value) { + error::success_or_throw(::mdbx_env_set_option(handle_, ::MDBX_option_t(option), value)); return *this; } inline uint64_t env::extra_option(enum env::extra_runtime_option option) const { uint64_t value; - error::success_or_throw( - ::mdbx_env_get_option(handle_, ::MDBX_option_t(option), &value)); + error::success_or_throw(::mdbx_env_get_option(handle_, ::MDBX_option_t(option), &value)); return value; } @@ -6342,9 +5422,8 @@ inline env &env::alter_flags(MDBX_env_flags_t flags, bool on_off) { } inline env &env::set_geometry(const geometry &geo) { - error::success_or_throw(::mdbx_env_set_geometry( - handle_, geo.size_lower, geo.size_now, geo.size_upper, geo.growth_step, - geo.shrink_threshold, geo.pagesize)); + error::success_or_throw(::mdbx_env_set_geometry(handle_, geo.size_lower, geo.size_now, geo.size_upper, + geo.growth_step, geo.shrink_threshold, geo.pagesize)); return *this; } @@ -6361,24 +5440,19 @@ inline bool env::sync_to_disk(bool force, bool nonblock) { } } -inline void env::close_map(const map_handle &handle) { - error::success_or_throw(::mdbx_dbi_close(*this, handle.dbi)); -} +inline void env::close_map(const map_handle &handle) { error::success_or_throw(::mdbx_dbi_close(*this, handle.dbi)); } MDBX_CXX11_CONSTEXPR -env::reader_info::reader_info(int slot, mdbx_pid_t pid, mdbx_tid_t thread, - uint64_t txnid, uint64_t lag, size_t used, +env::reader_info::reader_info(int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, uint64_t lag, size_t used, size_t retained) noexcept - : slot(slot), pid(pid), thread(thread), transaction_id(txnid), - transaction_lag(lag), bytes_used(used), bytes_retained(retained) {} + : slot(slot), pid(pid), thread(thread), transaction_id(txnid), transaction_lag(lag), bytes_used(used), + bytes_retained(retained) {} -template -inline int env::enumerate_readers(VISITOR &visitor) { +template inline int env::enumerate_readers(VISITOR &visitor) { struct reader_visitor_thunk : public exception_thunk { VISITOR &visitor_; - static int cb(void *ctx, int number, int slot, mdbx_pid_t pid, - mdbx_tid_t thread, uint64_t txnid, uint64_t lag, size_t used, - size_t retained) noexcept { + static int cb(void *ctx, int number, int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, uint64_t lag, + size_t used, size_t retained) noexcept { reader_visitor_thunk *thunk = static_cast(ctx); assert(thunk->is_clean()); try { @@ -6389,8 +5463,7 @@ inline int env::enumerate_readers(VISITOR &visitor) { return loop_control::exit_loop; } } - MDBX_CXX11_CONSTEXPR reader_visitor_thunk(VISITOR &visitor) noexcept - : visitor_(visitor) {} + MDBX_CXX11_CONSTEXPR reader_visitor_thunk(VISITOR &visitor) noexcept : visitor_(visitor) {} }; reader_visitor_thunk thunk(visitor); const auto rc = ::mdbx_reader_list(*this, thunk.cb, &thunk); @@ -6410,38 +5483,32 @@ inline env &env::set_HandleSlowReaders(MDBX_hsr_func *cb) { return *this; } -inline MDBX_hsr_func *env::get_HandleSlowReaders() const noexcept { - return ::mdbx_env_get_hsr(handle_); -} +inline MDBX_hsr_func *env::get_HandleSlowReaders() const noexcept { return ::mdbx_env_get_hsr(handle_); } inline txn_managed env::start_read() const { ::MDBX_txn *ptr; - error::success_or_throw( - ::mdbx_txn_begin(handle_, nullptr, MDBX_TXN_RDONLY, &ptr)); + error::success_or_throw(::mdbx_txn_begin(handle_, nullptr, MDBX_TXN_RDONLY, &ptr)); assert(ptr != nullptr); return txn_managed(ptr); } inline txn_managed env::prepare_read() const { ::MDBX_txn *ptr; - error::success_or_throw( - ::mdbx_txn_begin(handle_, nullptr, MDBX_TXN_RDONLY_PREPARE, &ptr)); + error::success_or_throw(::mdbx_txn_begin(handle_, nullptr, MDBX_TXN_RDONLY_PREPARE, &ptr)); assert(ptr != nullptr); return txn_managed(ptr); } inline txn_managed env::start_write(bool dont_wait) { ::MDBX_txn *ptr; - error::success_or_throw(::mdbx_txn_begin( - handle_, nullptr, dont_wait ? MDBX_TXN_TRY : MDBX_TXN_READWRITE, &ptr)); + error::success_or_throw(::mdbx_txn_begin(handle_, nullptr, dont_wait ? MDBX_TXN_TRY : MDBX_TXN_READWRITE, &ptr)); assert(ptr != nullptr); return txn_managed(ptr); } inline txn_managed env::start_write(txn &parent) { ::MDBX_txn *ptr; - error::success_or_throw( - ::mdbx_txn_begin(handle_, parent, MDBX_TXN_READWRITE, &ptr)); + error::success_or_throw(::mdbx_txn_begin(handle_, parent, MDBX_TXN_READWRITE, &ptr)); assert(ptr != nullptr); return txn_managed(ptr); } @@ -6458,9 +5525,7 @@ inline txn &txn::operator=(txn &&other) noexcept { return *this; } -inline txn::txn(txn &&other) noexcept : handle_(other.handle_) { - other.handle_ = nullptr; -} +inline txn::txn(txn &&other) noexcept : handle_(other.handle_) { other.handle_ = nullptr; } inline txn::~txn() noexcept { #ifndef NDEBUG @@ -6468,25 +5533,17 @@ inline txn::~txn() noexcept { #endif } -MDBX_CXX14_CONSTEXPR txn::operator bool() const noexcept { - return handle_ != nullptr; -} +MDBX_CXX14_CONSTEXPR txn::operator bool() const noexcept { return handle_ != nullptr; } MDBX_CXX14_CONSTEXPR txn::operator const MDBX_txn *() const { return handle_; } MDBX_CXX14_CONSTEXPR txn::operator MDBX_txn *() { return handle_; } -MDBX_CXX11_CONSTEXPR bool operator==(const txn &a, const txn &b) noexcept { - return a.handle_ == b.handle_; -} +MDBX_CXX11_CONSTEXPR bool operator==(const txn &a, const txn &b) noexcept { return a.handle_ == b.handle_; } -MDBX_CXX11_CONSTEXPR bool operator!=(const txn &a, const txn &b) noexcept { - return a.handle_ != b.handle_; -} +MDBX_CXX11_CONSTEXPR bool operator!=(const txn &a, const txn &b) noexcept { return a.handle_ != b.handle_; } -inline void *txn::get_context() const noexcept { - return mdbx_txn_get_userctx(handle_); -} +inline void *txn::get_context() const noexcept { return mdbx_txn_get_userctx(handle_); } inline txn &txn::set_context(void *ptr) { error::success_or_throw(::mdbx_txn_set_userctx(handle_, ptr)); @@ -6519,17 +5576,11 @@ inline uint64_t txn::id() const { return txnid; } -inline void txn::reset_reading() { - error::success_or_throw(::mdbx_txn_reset(handle_)); -} +inline void txn::reset_reading() { error::success_or_throw(::mdbx_txn_reset(handle_)); } -inline void txn::renew_reading() { - error::success_or_throw(::mdbx_txn_renew(handle_)); -} +inline void txn::renew_reading() { error::success_or_throw(::mdbx_txn_renew(handle_)); } -inline void txn::park_reading(bool autounpark) { - error::success_or_throw(::mdbx_txn_park(handle_, autounpark)); -} +inline void txn::park_reading(bool autounpark) { error::success_or_throw(::mdbx_txn_park(handle_, autounpark)); } inline bool txn::unpark_reading(bool restart_if_ousted) { return error::boolean_or_throw(::mdbx_txn_unpark(handle_, restart_if_ousted)); @@ -6554,76 +5605,59 @@ inline size_t txn::release_all_cursors(bool unbind) const { return size_t(err); } -inline ::mdbx::map_handle -txn::open_map(const ::mdbx::slice &name, const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) const { - ::mdbx::map_handle map; - error::success_or_throw(::mdbx_dbi_open2( - handle_, name, MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), - &map.dbi)); - assert(map.dbi != 0); - return map; -} - -inline ::mdbx::map_handle -txn::open_map(const char *name, const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) const { - ::mdbx::map_handle map; - error::success_or_throw(::mdbx_dbi_open( - handle_, name, MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), - &map.dbi)); - assert(map.dbi != 0); - return map; -} - -inline ::mdbx::map_handle -txn::open_map_accede(const ::mdbx::slice &name) const { +inline ::mdbx::map_handle txn::open_map(const ::mdbx::slice &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { ::mdbx::map_handle map; error::success_or_throw( - ::mdbx_dbi_open2(handle_, name, MDBX_DB_ACCEDE, &map.dbi)); + ::mdbx_dbi_open2(handle_, name, MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), &map.dbi)); + assert(map.dbi != 0); + return map; +} + +inline ::mdbx::map_handle txn::open_map(const char *name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { + ::mdbx::map_handle map; + error::success_or_throw( + ::mdbx_dbi_open(handle_, name, MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), &map.dbi)); + assert(map.dbi != 0); + return map; +} + +inline ::mdbx::map_handle txn::open_map_accede(const ::mdbx::slice &name) const { + ::mdbx::map_handle map; + error::success_or_throw(::mdbx_dbi_open2(handle_, name, MDBX_DB_ACCEDE, &map.dbi)); assert(map.dbi != 0); return map; } inline ::mdbx::map_handle txn::open_map_accede(const char *name) const { ::mdbx::map_handle map; + error::success_or_throw(::mdbx_dbi_open(handle_, name, MDBX_DB_ACCEDE, &map.dbi)); + assert(map.dbi != 0); + return map; +} + +inline ::mdbx::map_handle txn::create_map(const ::mdbx::slice &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) { + ::mdbx::map_handle map; error::success_or_throw( - ::mdbx_dbi_open(handle_, name, MDBX_DB_ACCEDE, &map.dbi)); + ::mdbx_dbi_open2(handle_, name, MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), &map.dbi)); assert(map.dbi != 0); return map; } -inline ::mdbx::map_handle txn::create_map(const ::mdbx::slice &name, - const ::mdbx::key_mode key_mode, +inline ::mdbx::map_handle txn::create_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { ::mdbx::map_handle map; - error::success_or_throw(::mdbx_dbi_open2( - handle_, name, - MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), - &map.dbi)); + error::success_or_throw( + ::mdbx_dbi_open(handle_, name, MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), &map.dbi)); assert(map.dbi != 0); return map; } -inline ::mdbx::map_handle txn::create_map(const char *name, - const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) { - ::mdbx::map_handle map; - error::success_or_throw(::mdbx_dbi_open( - handle_, name, - MDBX_CREATE | MDBX_db_flags_t(key_mode) | MDBX_db_flags_t(value_mode), - &map.dbi)); - assert(map.dbi != 0); - return map; -} +inline void txn::drop_map(map_handle map) { error::success_or_throw(::mdbx_drop(handle_, map.dbi, true)); } -inline void txn::drop_map(map_handle map) { - error::success_or_throw(::mdbx_drop(handle_, map.dbi, true)); -} - -inline void txn::clear_map(map_handle map) { - error::success_or_throw(::mdbx_drop(handle_, map.dbi, false)); -} +inline void txn::clear_map(map_handle map) { error::success_or_throw(::mdbx_drop(handle_, map.dbi, false)); } inline void txn::rename_map(map_handle map, const char *new_name) { error::success_or_throw(::mdbx_dbi_rename(handle_, map, new_name)); @@ -6633,19 +5667,16 @@ inline void txn::rename_map(map_handle map, const ::mdbx::slice &new_name) { error::success_or_throw(::mdbx_dbi_rename2(handle_, map, new_name)); } -inline ::mdbx::map_handle -txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, - const ::mdbx::value_mode value_mode) const { +inline ::mdbx::map_handle txn::open_map(const ::std::string &name, const ::mdbx::key_mode key_mode, + const ::mdbx::value_mode value_mode) const { return open_map(::mdbx::slice(name), key_mode, value_mode); } -inline ::mdbx::map_handle -txn::open_map_accede(const ::std::string &name) const { +inline ::mdbx::map_handle txn::open_map_accede(const ::std::string &name) const { return open_map_accede(::mdbx::slice(name)); } -inline ::mdbx::map_handle txn::create_map(const ::std::string &name, - const ::mdbx::key_mode key_mode, +inline ::mdbx::map_handle txn::create_map(const ::std::string &name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) { return create_map(::mdbx::slice(name), key_mode, value_mode); } @@ -6676,8 +5707,7 @@ inline uint32_t txn::get_tree_deepmask(map_handle map) const { inline map_handle::info txn::get_handle_info(map_handle map) const { unsigned flags, state; - error::success_or_throw( - ::mdbx_dbi_flags_ex(handle_, map.dbi, &flags, &state)); + error::success_or_throw(::mdbx_dbi_flags_ex(handle_, map.dbi, &flags, &state)); return map_handle::info(MDBX_db_flags_t(flags), MDBX_dbi_state_t(state)); } @@ -6700,28 +5730,23 @@ inline uint64_t txn::sequence(map_handle map) const { inline uint64_t txn::sequence(map_handle map, uint64_t increment) { uint64_t result; - error::success_or_throw( - ::mdbx_dbi_sequence(handle_, map.dbi, &result, increment)); + error::success_or_throw(::mdbx_dbi_sequence(handle_, map.dbi, &result, increment)); return result; } -inline int txn::compare_keys(map_handle map, const slice &a, - const slice &b) const noexcept { +inline int txn::compare_keys(map_handle map, const slice &a, const slice &b) const noexcept { return ::mdbx_cmp(handle_, map.dbi, &a, &b); } -inline int txn::compare_values(map_handle map, const slice &a, - const slice &b) const noexcept { +inline int txn::compare_values(map_handle map, const slice &a, const slice &b) const noexcept { return ::mdbx_dcmp(handle_, map.dbi, &a, &b); } -inline int txn::compare_keys(map_handle map, const pair &a, - const pair &b) const noexcept { +inline int txn::compare_keys(map_handle map, const pair &a, const pair &b) const noexcept { return compare_keys(map, a.key, b.key); } -inline int txn::compare_values(map_handle map, const pair &a, - const pair &b) const noexcept { +inline int txn::compare_values(map_handle map, const pair &a, const pair &b) const noexcept { return compare_values(map, a.value, b.value); } @@ -6733,13 +5758,11 @@ inline slice txn::get(map_handle map, const slice &key) const { inline slice txn::get(map_handle map, slice key, size_t &values_count) const { slice result; - error::success_or_throw( - ::mdbx_get_ex(handle_, map.dbi, &key, &result, &values_count)); + error::success_or_throw(::mdbx_get_ex(handle_, map.dbi, &key, &result, &values_count)); return result; } -inline slice txn::get(map_handle map, const slice &key, - const slice &value_at_absence) const { +inline slice txn::get(map_handle map, const slice &key, const slice &value_at_absence) const { slice result; const int err = ::mdbx_get(handle_, map.dbi, &key, &result); switch (err) { @@ -6752,8 +5775,7 @@ inline slice txn::get(map_handle map, const slice &key, } } -inline slice txn::get(map_handle map, slice key, size_t &values_count, - const slice &value_at_absence) const { +inline slice txn::get(map_handle map, slice key, size_t &values_count, const slice &value_at_absence) const { slice result; const int err = ::mdbx_get_ex(handle_, map.dbi, &key, &result, &values_count); switch (err) { @@ -6766,20 +5788,15 @@ inline slice txn::get(map_handle map, slice key, size_t &values_count, } } -inline pair_result txn::get_equal_or_great(map_handle map, - const slice &key) const { +inline pair_result txn::get_equal_or_great(map_handle map, const slice &key) const { pair result(key, slice()); - bool exact = !error::boolean_or_throw( - ::mdbx_get_equal_or_great(handle_, map.dbi, &result.key, &result.value)); + bool exact = !error::boolean_or_throw(::mdbx_get_equal_or_great(handle_, map.dbi, &result.key, &result.value)); return pair_result(result.key, result.value, exact); } -inline pair_result -txn::get_equal_or_great(map_handle map, const slice &key, - const slice &value_at_absence) const { +inline pair_result txn::get_equal_or_great(map_handle map, const slice &key, const slice &value_at_absence) const { pair result{key, slice()}; - const int err = - ::mdbx_get_equal_or_great(handle_, map.dbi, &result.key, &result.value); + const int err = ::mdbx_get_equal_or_great(handle_, map.dbi, &result.key, &result.value); switch (err) { case MDBX_SUCCESS: return pair_result{result.key, result.value, true}; @@ -6792,27 +5809,22 @@ txn::get_equal_or_great(map_handle map, const slice &key, } } -inline MDBX_error_t txn::put(map_handle map, const slice &key, slice *value, - MDBX_put_flags_t flags) noexcept { +inline MDBX_error_t txn::put(map_handle map, const slice &key, slice *value, MDBX_put_flags_t flags) noexcept { return MDBX_error_t(::mdbx_put(handle_, map.dbi, &key, value, flags)); } -inline void txn::put(map_handle map, const slice &key, slice value, - put_mode mode) { +inline void txn::put(map_handle map, const slice &key, slice value, put_mode mode) { error::success_or_throw(put(map, key, &value, MDBX_put_flags_t(mode))); } inline void txn::insert(map_handle map, const slice &key, slice value) { - error::success_or_throw( - put(map, key, &value /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique))); + error::success_or_throw(put(map, key, &value /* takes the present value in case MDBX_KEYEXIST */, + MDBX_put_flags_t(put_mode::insert_unique))); } -inline value_result txn::try_insert(map_handle map, const slice &key, - slice value) { - const int err = - put(map, key, &value /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique)); +inline value_result txn::try_insert(map_handle map, const slice &key, slice value) { + const int err = put(map, key, &value /* takes the present value in case MDBX_KEYEXIST */, + MDBX_put_flags_t(put_mode::insert_unique)); switch (err) { case MDBX_SUCCESS: return value_result{slice(), true}; @@ -6823,21 +5835,17 @@ inline value_result txn::try_insert(map_handle map, const slice &key, } } -inline slice txn::insert_reserve(map_handle map, const slice &key, - size_t value_length) { +inline slice txn::insert_reserve(map_handle map, const slice &key, size_t value_length) { slice result(nullptr, value_length); - error::success_or_throw( - put(map, key, &result /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE)); + error::success_or_throw(put(map, key, &result /* takes the present value in case MDBX_KEYEXIST */, + MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE)); return result; } -inline value_result txn::try_insert_reserve(map_handle map, const slice &key, - size_t value_length) { +inline value_result txn::try_insert_reserve(map_handle map, const slice &key, size_t value_length) { slice result(nullptr, value_length); - const int err = - put(map, key, &result /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE); + const int err = put(map, key, &result /* takes the present value in case MDBX_KEYEXIST */, + MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE); switch (err) { case MDBX_SUCCESS: return value_result{result, true}; @@ -6849,27 +5857,21 @@ inline value_result txn::try_insert_reserve(map_handle map, const slice &key, } inline void txn::upsert(map_handle map, const slice &key, const slice &value) { - error::success_or_throw(put(map, key, const_cast(&value), - MDBX_put_flags_t(put_mode::upsert))); + error::success_or_throw(put(map, key, const_cast(&value), MDBX_put_flags_t(put_mode::upsert))); } -inline slice txn::upsert_reserve(map_handle map, const slice &key, - size_t value_length) { +inline slice txn::upsert_reserve(map_handle map, const slice &key, size_t value_length) { slice result(nullptr, value_length); - error::success_or_throw(put( - map, key, &result, MDBX_put_flags_t(put_mode::upsert) | MDBX_RESERVE)); + error::success_or_throw(put(map, key, &result, MDBX_put_flags_t(put_mode::upsert) | MDBX_RESERVE)); return result; } inline void txn::update(map_handle map, const slice &key, const slice &value) { - error::success_or_throw(put(map, key, const_cast(&value), - MDBX_put_flags_t(put_mode::update))); + error::success_or_throw(put(map, key, const_cast(&value), MDBX_put_flags_t(put_mode::update))); } -inline bool txn::try_update(map_handle map, const slice &key, - const slice &value) { - const int err = put(map, key, const_cast(&value), - MDBX_put_flags_t(put_mode::update)); +inline bool txn::try_update(map_handle map, const slice &key, const slice &value) { + const int err = put(map, key, const_cast(&value), MDBX_put_flags_t(put_mode::update)); switch (err) { case MDBX_SUCCESS: return true; @@ -6880,19 +5882,15 @@ inline bool txn::try_update(map_handle map, const slice &key, } } -inline slice txn::update_reserve(map_handle map, const slice &key, - size_t value_length) { +inline slice txn::update_reserve(map_handle map, const slice &key, size_t value_length) { slice result(nullptr, value_length); - error::success_or_throw(put( - map, key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE)); + error::success_or_throw(put(map, key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE)); return result; } -inline value_result txn::try_update_reserve(map_handle map, const slice &key, - size_t value_length) { +inline value_result txn::try_update_reserve(map_handle map, const slice &key, size_t value_length) { slice result(nullptr, value_length); - const int err = - put(map, key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE); + const int err = put(map, key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE); switch (err) { case MDBX_SUCCESS: return value_result{result, true}; @@ -6927,69 +5925,53 @@ inline bool txn::erase(map_handle map, const slice &key, const slice &value) { } } -inline void txn::replace(map_handle map, const slice &key, slice old_value, - const slice &new_value) { - error::success_or_throw(::mdbx_replace_ex( - handle_, map.dbi, &key, const_cast(&new_value), &old_value, - MDBX_CURRENT | MDBX_NOOVERWRITE, nullptr, nullptr)); +inline void txn::replace(map_handle map, const slice &key, slice old_value, const slice &new_value) { + error::success_or_throw(::mdbx_replace_ex(handle_, map.dbi, &key, const_cast(&new_value), &old_value, + MDBX_CURRENT | MDBX_NOOVERWRITE, nullptr, nullptr)); } template inline buffer txn::extract(map_handle map, const slice &key, - const typename buffer::allocator_type - &allocator) { + const typename buffer::allocator_type &allocator) { typename buffer::data_preserver result(allocator); - error::success_or_throw(::mdbx_replace_ex(handle_, map.dbi, &key, nullptr, - &result.slice_, MDBX_CURRENT, - result, &result), - result); + error::success_or_throw( + ::mdbx_replace_ex(handle_, map.dbi, &key, nullptr, &result.slice_, MDBX_CURRENT, result, &result), result); return result; } template inline buffer txn::replace(map_handle map, const slice &key, const slice &new_value, - const typename buffer::allocator_type - &allocator) { + const typename buffer::allocator_type &allocator) { typename buffer::data_preserver result(allocator); - error::success_or_throw( - ::mdbx_replace_ex(handle_, map.dbi, &key, const_cast(&new_value), - &result.slice_, MDBX_CURRENT, result, &result), - result); + error::success_or_throw(::mdbx_replace_ex(handle_, map.dbi, &key, const_cast(&new_value), &result.slice_, + MDBX_CURRENT, result, &result), + result); return result; } template -inline buffer txn::replace_reserve( - map_handle map, const slice &key, slice &new_value, - const typename buffer::allocator_type - &allocator) { +inline buffer +txn::replace_reserve(map_handle map, const slice &key, slice &new_value, + const typename buffer::allocator_type &allocator) { typename buffer::data_preserver result(allocator); - error::success_or_throw( - ::mdbx_replace_ex(handle_, map.dbi, &key, &new_value, &result.slice_, - MDBX_CURRENT | MDBX_RESERVE, result, &result), - result); + error::success_or_throw(::mdbx_replace_ex(handle_, map.dbi, &key, &new_value, &result.slice_, + MDBX_CURRENT | MDBX_RESERVE, result, &result), + result); return result; } -inline void txn::append(map_handle map, const slice &key, const slice &value, - bool multivalue_order_preserved) { - error::success_or_throw(::mdbx_put( - handle_, map.dbi, const_cast(&key), const_cast(&value), - multivalue_order_preserved ? (MDBX_APPEND | MDBX_APPENDDUP) - : MDBX_APPEND)); +inline void txn::append(map_handle map, const slice &key, const slice &value, bool multivalue_order_preserved) { + error::success_or_throw(::mdbx_put(handle_, map.dbi, const_cast(&key), const_cast(&value), + multivalue_order_preserved ? (MDBX_APPEND | MDBX_APPENDDUP) : MDBX_APPEND)); } -inline size_t txn::put_multiple_samelength(map_handle map, const slice &key, - const size_t value_length, - const void *values_array, - size_t values_count, put_mode mode, +inline size_t txn::put_multiple_samelength(map_handle map, const slice &key, const size_t value_length, + const void *values_array, size_t values_count, put_mode mode, bool allow_partial) { - MDBX_val args[2] = {{const_cast(values_array), value_length}, - {nullptr, values_count}}; - const int err = ::mdbx_put(handle_, map.dbi, const_cast(&key), args, - MDBX_put_flags_t(mode) | MDBX_MULTIPLE); + MDBX_val args[2] = {{const_cast(values_array), value_length}, {nullptr, values_count}}; + const int err = ::mdbx_put(handle_, map.dbi, const_cast(&key), args, MDBX_put_flags_t(mode) | MDBX_MULTIPLE); switch (err) { case MDBX_SUCCESS: MDBX_CXX20_LIKELY break; @@ -7004,35 +5986,27 @@ inline size_t txn::put_multiple_samelength(map_handle map, const slice &key, return args[1].iov_len /* done item count */; } -inline ptrdiff_t txn::estimate(map_handle map, const pair &from, - const pair &to) const { +inline ptrdiff_t txn::estimate(map_handle map, const pair &from, const pair &to) const { ptrdiff_t result; - error::success_or_throw(mdbx_estimate_range( - handle_, map.dbi, &from.key, &from.value, &to.key, &to.value, &result)); + error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from.key, &from.value, &to.key, &to.value, &result)); return result; } -inline ptrdiff_t txn::estimate(map_handle map, const slice &from, - const slice &to) const { +inline ptrdiff_t txn::estimate(map_handle map, const slice &from, const slice &to) const { ptrdiff_t result; - error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, - &to, nullptr, &result)); + error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, &to, nullptr, &result)); return result; } -inline ptrdiff_t txn::estimate_from_first(map_handle map, - const slice &to) const { +inline ptrdiff_t txn::estimate_from_first(map_handle map, const slice &to) const { ptrdiff_t result; - error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, nullptr, - nullptr, &to, nullptr, &result)); + error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, nullptr, nullptr, &to, nullptr, &result)); return result; } -inline ptrdiff_t txn::estimate_to_last(map_handle map, - const slice &from) const { +inline ptrdiff_t txn::estimate_to_last(map_handle map, const slice &from) const { ptrdiff_t result; - error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, - nullptr, nullptr, &result)); + error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, nullptr, nullptr, &result)); return result; } @@ -7046,9 +6020,7 @@ inline cursor_managed cursor::clone(void *your_context) const { return clone; } -inline void *cursor::get_context() const noexcept { - return mdbx_cursor_get_userctx(handle_); -} +inline void *cursor::get_context() const noexcept { return mdbx_cursor_get_userctx(handle_); } inline cursor &cursor::set_context(void *ptr) { error::success_or_throw(::mdbx_cursor_set_userctx(handle_, ptr)); @@ -7061,9 +6033,7 @@ inline cursor &cursor::operator=(cursor &&other) noexcept { return *this; } -inline cursor::cursor(cursor &&other) noexcept : handle_(other.handle_) { - other.handle_ = nullptr; -} +inline cursor::cursor(cursor &&other) noexcept : handle_(other.handle_) { other.handle_ = nullptr; } inline cursor::~cursor() noexcept { #ifndef NDEBUG @@ -7071,33 +6041,21 @@ inline cursor::~cursor() noexcept { #endif } -MDBX_CXX14_CONSTEXPR cursor::operator bool() const noexcept { - return handle_ != nullptr; -} +MDBX_CXX14_CONSTEXPR cursor::operator bool() const noexcept { return handle_ != nullptr; } -MDBX_CXX14_CONSTEXPR cursor::operator const MDBX_cursor *() const { - return handle_; -} +MDBX_CXX14_CONSTEXPR cursor::operator const MDBX_cursor *() const { return handle_; } MDBX_CXX14_CONSTEXPR cursor::operator MDBX_cursor *() { return handle_; } -MDBX_CXX11_CONSTEXPR bool operator==(const cursor &a, - const cursor &b) noexcept { - return a.handle_ == b.handle_; -} +MDBX_CXX11_CONSTEXPR bool operator==(const cursor &a, const cursor &b) noexcept { return a.handle_ == b.handle_; } -MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, - const cursor &b) noexcept { - return a.handle_ != b.handle_; -} +MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, const cursor &b) noexcept { return a.handle_ != b.handle_; } -inline int compare_position_nothrow(const cursor &left, const cursor &right, - bool ignore_nested = false) noexcept { +inline int compare_position_nothrow(const cursor &left, const cursor &right, bool ignore_nested = false) noexcept { return mdbx_cursor_compare(left.handle_, right.handle_, ignore_nested); } -inline int compare_position(const cursor &left, const cursor &right, - bool ignore_nested = false) { +inline int compare_position(const cursor &left, const cursor &right, bool ignore_nested = false) { const auto diff = compare_position_nothrow(left, right, ignore_nested); assert(compare_position_nothrow(right, left, ignore_nested) == -diff); if (MDBX_LIKELY(int16_t(diff) == diff)) @@ -7106,24 +6064,18 @@ inline int compare_position(const cursor &left, const cursor &right, throw_incomparable_cursors(); } -inline cursor::move_result::move_result(const cursor &cursor, - bool throw_notfound) - : pair_result() { +inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) : pair_result() { done = cursor.move(get_current, &this->key, &this->value, throw_notfound); } -inline cursor::move_result::move_result(cursor &cursor, - move_operation operation, - const slice &key, const slice &value, +inline cursor::move_result::move_result(cursor &cursor, move_operation operation, const slice &key, const slice &value, bool throw_notfound) : pair_result(key, value, false) { this->done = cursor.move(operation, &this->key, &this->value, throw_notfound); } -inline bool cursor::move(move_operation operation, MDBX_val *key, - MDBX_val *value, bool throw_notfound) const { - const int err = - ::mdbx_cursor_get(handle_, key, value, MDBX_cursor_op(operation)); +inline bool cursor::move(move_operation operation, MDBX_val *key, MDBX_val *value, bool throw_notfound) const { + const int err = ::mdbx_cursor_get(handle_, key, value, MDBX_cursor_op(operation)); switch (err) { case MDBX_SUCCESS: MDBX_CXX20_LIKELY return true; @@ -7138,19 +6090,15 @@ inline bool cursor::move(move_operation operation, MDBX_val *key, } } -inline cursor::estimate_result::estimate_result(const cursor &cursor, - move_operation operation, - const slice &key, +inline cursor::estimate_result::estimate_result(const cursor &cursor, move_operation operation, const slice &key, const slice &value) : pair(key, value), approximate_quantity(PTRDIFF_MIN) { approximate_quantity = cursor.estimate(operation, &this->key, &this->value); } -inline ptrdiff_t cursor::estimate(move_operation operation, MDBX_val *key, - MDBX_val *value) const { +inline ptrdiff_t cursor::estimate(move_operation operation, MDBX_val *key, MDBX_val *value) const { ptrdiff_t result; - error::success_or_throw(::mdbx_estimate_move( - *this, key, value, MDBX_cursor_op(operation), &result)); + error::success_or_throw(::mdbx_estimate_move(*this, key, value, MDBX_cursor_op(operation), &result)); return result; } @@ -7164,37 +6112,27 @@ inline cursor::move_result cursor::find(const slice &key, bool throw_notfound) { return move(key_exact, key, throw_notfound); } -inline cursor::move_result cursor::lower_bound(const slice &key, - bool throw_notfound) { +inline cursor::move_result cursor::lower_bound(const slice &key, bool throw_notfound) { return move(key_lowerbound, key, throw_notfound); } -inline cursor::move_result cursor::upper_bound(const slice &key, - bool throw_notfound) { +inline cursor::move_result cursor::upper_bound(const slice &key, bool throw_notfound) { return move(key_greater_than, key, throw_notfound); } -inline cursor::move_result cursor::find_multivalue(const slice &key, - const slice &value, - bool throw_notfound) { +inline cursor::move_result cursor::find_multivalue(const slice &key, const slice &value, bool throw_notfound) { return move(multi_find_pair, key, value, throw_notfound); } -inline cursor::move_result cursor::lower_bound_multivalue(const slice &key, - const slice &value, - bool throw_notfound) { +inline cursor::move_result cursor::lower_bound_multivalue(const slice &key, const slice &value, bool throw_notfound) { return move(multi_exactkey_lowerboundvalue, key, value, throw_notfound); } -inline cursor::move_result cursor::upper_bound_multivalue(const slice &key, - const slice &value, - bool throw_notfound) { +inline cursor::move_result cursor::upper_bound_multivalue(const slice &key, const slice &value, bool throw_notfound) { return move(multi_exactkey_value_greater, key, value, throw_notfound); } -inline bool cursor::seek(const slice &key) { - return move(seek_key, const_cast(&key), nullptr, false); -} +inline bool cursor::seek(const slice &key) { return move(seek_key, const_cast(&key), nullptr, false); } inline size_t cursor::count_multivalue() const { size_t result; @@ -7202,28 +6140,17 @@ inline size_t cursor::count_multivalue() const { return result; } -inline bool cursor::eof() const { - return error::boolean_or_throw(::mdbx_cursor_eof(*this)); -} +inline bool cursor::eof() const { return error::boolean_or_throw(::mdbx_cursor_eof(*this)); } -inline bool cursor::on_first() const { - return error::boolean_or_throw(::mdbx_cursor_on_first(*this)); -} +inline bool cursor::on_first() const { return error::boolean_or_throw(::mdbx_cursor_on_first(*this)); } -inline bool cursor::on_last() const { - return error::boolean_or_throw(::mdbx_cursor_on_last(*this)); -} +inline bool cursor::on_last() const { return error::boolean_or_throw(::mdbx_cursor_on_last(*this)); } -inline bool cursor::on_first_multival() const { - return error::boolean_or_throw(::mdbx_cursor_on_first_dup(*this)); -} +inline bool cursor::on_first_multival() const { return error::boolean_or_throw(::mdbx_cursor_on_first_dup(*this)); } -inline bool cursor::on_last_multival() const { - return error::boolean_or_throw(::mdbx_cursor_on_last_dup(*this)); -} +inline bool cursor::on_last_multival() const { return error::boolean_or_throw(::mdbx_cursor_on_last_dup(*this)); } -inline cursor::estimate_result cursor::estimate(const slice &key, - const slice &value) const { +inline cursor::estimate_result cursor::estimate(const slice &key, const slice &value) const { return estimate_result(*this, multi_exactkey_lowerboundvalue, key, value); } @@ -7231,23 +6158,17 @@ inline cursor::estimate_result cursor::estimate(const slice &key) const { return estimate_result(*this, key_lowerbound, key); } -inline cursor::estimate_result -cursor::estimate(move_operation operation) const { +inline cursor::estimate_result cursor::estimate(move_operation operation) const { return estimate_result(*this, operation); } -inline void cursor::renew(const ::mdbx::txn &txn) { - error::success_or_throw(::mdbx_cursor_renew(txn, handle_)); -} +inline void cursor::renew(const ::mdbx::txn &txn) { error::success_or_throw(::mdbx_cursor_renew(txn, handle_)); } -inline void cursor::bind(const ::mdbx::txn &txn, - ::mdbx::map_handle map_handle) { +inline void cursor::bind(const ::mdbx::txn &txn, ::mdbx::map_handle map_handle) { error::success_or_throw(::mdbx_cursor_bind(txn, handle_, map_handle.dbi)); } -inline void cursor::unbind() { - error::success_or_throw(::mdbx_cursor_unbind(handle_)); -} +inline void cursor::unbind() { error::success_or_throw(::mdbx_cursor_unbind(handle_)); } inline txn cursor::txn() const { MDBX_txn *txn = ::mdbx_cursor_txn(handle_); @@ -7262,8 +6183,7 @@ inline map_handle cursor::map() const { return map_handle(dbi); } -inline MDBX_error_t cursor::put(const slice &key, slice *value, - MDBX_put_flags_t flags) noexcept { +inline MDBX_error_t cursor::put(const slice &key, slice *value, MDBX_put_flags_t flags) noexcept { return MDBX_error_t(::mdbx_cursor_put(handle_, &key, value, flags)); } @@ -7273,14 +6193,12 @@ inline void cursor::put(const slice &key, slice value, put_mode mode) { inline void cursor::insert(const slice &key, slice value) { error::success_or_throw( - put(key, &value /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique))); + put(key, &value /* takes the present value in case MDBX_KEYEXIST */, MDBX_put_flags_t(put_mode::insert_unique))); } inline value_result cursor::try_insert(const slice &key, slice value) { const int err = - put(key, &value /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique)); + put(key, &value /* takes the present value in case MDBX_KEYEXIST */, MDBX_put_flags_t(put_mode::insert_unique)); switch (err) { case MDBX_SUCCESS: return value_result{slice(), true}; @@ -7293,18 +6211,15 @@ inline value_result cursor::try_insert(const slice &key, slice value) { inline slice cursor::insert_reserve(const slice &key, size_t value_length) { slice result(nullptr, value_length); - error::success_or_throw( - put(key, &result /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE)); + error::success_or_throw(put(key, &result /* takes the present value in case MDBX_KEYEXIST */, + MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE)); return result; } -inline value_result cursor::try_insert_reserve(const slice &key, - size_t value_length) { +inline value_result cursor::try_insert_reserve(const slice &key, size_t value_length) { slice result(nullptr, value_length); - const int err = - put(key, &result /* takes the present value in case MDBX_KEYEXIST */, - MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE); + const int err = put(key, &result /* takes the present value in case MDBX_KEYEXIST */, + MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE); switch (err) { case MDBX_SUCCESS: return value_result{result, true}; @@ -7316,25 +6231,21 @@ inline value_result cursor::try_insert_reserve(const slice &key, } inline void cursor::upsert(const slice &key, const slice &value) { - error::success_or_throw(put(key, const_cast(&value), - MDBX_put_flags_t(put_mode::upsert))); + error::success_or_throw(put(key, const_cast(&value), MDBX_put_flags_t(put_mode::upsert))); } inline slice cursor::upsert_reserve(const slice &key, size_t value_length) { slice result(nullptr, value_length); - error::success_or_throw( - put(key, &result, MDBX_put_flags_t(put_mode::upsert) | MDBX_RESERVE)); + error::success_or_throw(put(key, &result, MDBX_put_flags_t(put_mode::upsert) | MDBX_RESERVE)); return result; } inline void cursor::update(const slice &key, const slice &value) { - error::success_or_throw(put(key, const_cast(&value), - MDBX_put_flags_t(put_mode::update))); + error::success_or_throw(put(key, const_cast(&value), MDBX_put_flags_t(put_mode::update))); } inline bool cursor::try_update(const slice &key, const slice &value) { - const int err = - put(key, const_cast(&value), MDBX_put_flags_t(put_mode::update)); + const int err = put(key, const_cast(&value), MDBX_put_flags_t(put_mode::update)); switch (err) { case MDBX_SUCCESS: return true; @@ -7347,16 +6258,13 @@ inline bool cursor::try_update(const slice &key, const slice &value) { inline slice cursor::update_reserve(const slice &key, size_t value_length) { slice result(nullptr, value_length); - error::success_or_throw( - put(key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE)); + error::success_or_throw(put(key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE)); return result; } -inline value_result cursor::try_update_reserve(const slice &key, - size_t value_length) { +inline value_result cursor::try_update_reserve(const slice &key, size_t value_length) { slice result(nullptr, value_length); - const int err = - put(key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE); + const int err = put(key, &result, MDBX_put_flags_t(put_mode::update) | MDBX_RESERVE); switch (err) { case MDBX_SUCCESS: return value_result{result, true}; @@ -7368,8 +6276,7 @@ inline value_result cursor::try_update_reserve(const slice &key, } inline bool cursor::erase(bool whole_multivalue) { - const int err = ::mdbx_cursor_del(handle_, whole_multivalue ? MDBX_ALLDUPS - : MDBX_CURRENT); + const int err = ::mdbx_cursor_del(handle_, whole_multivalue ? MDBX_ALLDUPS : MDBX_CURRENT); switch (err) { case MDBX_SUCCESS: MDBX_CXX20_LIKELY return true; @@ -7409,8 +6316,7 @@ inline string to_string(const ::mdbx::slice &value) { } template -inline string -to_string(const ::mdbx::buffer &buffer) { +inline string to_string(const ::mdbx::buffer &buffer) { ostringstream out; out << buffer; return out.str(); @@ -7482,15 +6388,10 @@ inline string to_string(const ::mdbx::error &value) { return out.str(); } -inline string to_string(const ::MDBX_error_t &errcode) { - return to_string(::mdbx::error(errcode)); -} +inline string to_string(const ::MDBX_error_t &errcode) { return to_string(::mdbx::error(errcode)); } template <> struct hash<::mdbx::slice> { - MDBX_CXX14_CONSTEXPR size_t - operator()(::mdbx::slice const &slice) const noexcept { - return slice.hash_value(); - } + MDBX_CXX14_CONSTEXPR size_t operator()(::mdbx::slice const &slice) const noexcept { return slice.hash_value(); } }; /// end cxx_api @} diff --git a/src/api-cursor.c b/src/api-cursor.c index 6bb89cfa..24d21fce 100644 --- a/src/api-cursor.c +++ b/src/api-cursor.c @@ -14,30 +14,24 @@ MDBX_cursor *mdbx_cursor_create(void *context) { couple->userctx = context; couple->outer.top_and_flags = z_poor_mark; couple->inner.cursor.top_and_flags = z_poor_mark | z_inner; - VALGRIND_MAKE_MEM_DEFINED(&couple->outer.backup, - sizeof(couple->outer.backup)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.backup, sizeof(couple->outer.backup)); VALGRIND_MAKE_MEM_DEFINED(&couple->outer.tree, sizeof(couple->outer.tree)); VALGRIND_MAKE_MEM_DEFINED(&couple->outer.clc, sizeof(couple->outer.clc)); - VALGRIND_MAKE_MEM_DEFINED(&couple->outer.dbi_state, - sizeof(couple->outer.dbi_state)); - VALGRIND_MAKE_MEM_DEFINED(&couple->outer.subcur, - sizeof(couple->outer.subcur)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.dbi_state, sizeof(couple->outer.dbi_state)); + VALGRIND_MAKE_MEM_DEFINED(&couple->outer.subcur, sizeof(couple->outer.subcur)); VALGRIND_MAKE_MEM_DEFINED(&couple->outer.txn, sizeof(couple->outer.txn)); return &couple->outer; } int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { - return likely(mc) - ? mdbx_cursor_bind(txn, mc, (kvx_t *)mc->clc - txn->env->kvs) - : LOG_IFERR(MDBX_EINVAL); + return likely(mc) ? mdbx_cursor_bind(txn, mc, (kvx_t *)mc->clc - txn->env->kvs) : LOG_IFERR(MDBX_EINVAL); } int mdbx_cursor_reset(MDBX_cursor *mc) { if (unlikely(!mc)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(mc->signature != cur_signature_ready4dispose && - mc->signature != cur_signature_live)) + if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) return LOG_IFERR(MDBX_EBADSIGN); cursor_couple_t *couple = (cursor_couple_t *)mc; @@ -50,8 +44,7 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(mc->signature != cur_signature_ready4dispose && - mc->signature != cur_signature_live)) + if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) return LOG_IFERR(MDBX_EBADSIGN); int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -68,16 +61,14 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(mc->backup)) /* Cursor from parent transaction */ { cASSERT(mc, mc->signature == cur_signature_live); if (unlikely(cursor_dbi(mc) != dbi || - /* paranoia */ mc->signature != cur_signature_live || - mc->txn != txn)) + /* paranoia */ mc->signature != cur_signature_live || mc->txn != txn)) return LOG_IFERR(MDBX_EINVAL); cASSERT(mc, mc->tree == &txn->dbs[dbi]); cASSERT(mc, mc->clc == &txn->env->kvs[dbi].clc); cASSERT(mc, cursor_dbi(mc) == dbi); return likely(cursor_dbi(mc) == dbi && - /* paranoia */ mc->signature == cur_signature_live && - mc->txn == txn) + /* paranoia */ mc->signature == cur_signature_live && mc->txn == txn) ? MDBX_SUCCESS : LOG_IFERR(MDBX_EINVAL) /* Disallow change DBI in nested transactions */ @@ -105,9 +96,7 @@ int mdbx_cursor_unbind(MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return (mc->signature == cur_signature_ready4dispose) - ? MDBX_SUCCESS - : LOG_IFERR(MDBX_EBADSIGN); + return (mc->signature == cur_signature_ready4dispose) ? MDBX_SUCCESS : LOG_IFERR(MDBX_EBADSIGN); if (unlikely(mc->backup)) /* Cursor from parent transaction */ return LOG_IFERR(MDBX_EINVAL); @@ -116,9 +105,7 @@ int mdbx_cursor_unbind(MDBX_cursor *mc) { cASSERT(mc, mc->signature == cur_signature_live); cASSERT(mc, !mc->backup); if (unlikely(!mc->txn || mc->txn->signature != txn_signature)) { - ERROR("Wrong cursor's transaction %p 0x%x", - __Wpedantic_format_voidptr(mc->txn), - mc->txn ? mc->txn->signature : 0); + ERROR("Wrong cursor's transaction %p 0x%x", __Wpedantic_format_voidptr(mc->txn), mc->txn ? mc->txn->signature : 0); return LOG_IFERR(MDBX_PROBLEM); } if (mc->next != mc) { @@ -160,8 +147,7 @@ int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { void mdbx_cursor_close(MDBX_cursor *mc) { if (likely(mc)) { - ENSURE(nullptr, mc->signature == cur_signature_live || - mc->signature == cur_signature_ready4dispose); + ENSURE(nullptr, mc->signature == cur_signature_live || mc->signature == cur_signature_ready4dispose); MDBX_txn *const txn = mc->txn; if (!mc->backup) { mc->txn = nullptr; @@ -194,9 +180,7 @@ int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { if (unlikely(!src)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(src->signature != cur_signature_live)) - return LOG_IFERR((src->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((src->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = mdbx_cursor_bind(src->txn, dest, cursor_dbi(src)); if (unlikely(rc != MDBX_SUCCESS)) @@ -229,8 +213,7 @@ int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { TXN_FOREACH_DBI_FROM(txn, i, MAIN_DBI) { while (txn->cursors[i]) { MDBX_cursor *mc = txn->cursors[i]; - ENSURE(nullptr, mc->signature == cur_signature_live && - (mc->next != mc) && !mc->backup); + ENSURE(nullptr, mc->signature == cur_signature_live && (mc->next != mc) && !mc->backup); rc = likely(rc < INT_MAX) ? rc + 1 : rc; txn->cursors[i] = mc->next; mc->next = mc; @@ -250,8 +233,7 @@ int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { return rc; } -int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, - bool ignore_multival) { +int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, bool ignore_multival) { const int incomparable = INT16_MAX + 1; if (unlikely(!l)) return r ? -incomparable * 9 : 0; @@ -267,8 +249,7 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, if (l->txn->env != r->txn->env) return (l->txn->env > r->txn->env) ? incomparable * 7 : -incomparable * 7; if (l->txn->txnid != r->txn->txnid) - return (l->txn->txnid > r->txn->txnid) ? incomparable * 6 - : -incomparable * 6; + return (l->txn->txnid > r->txn->txnid) ? incomparable * 6 : -incomparable * 6; return (l->clc > r->clc) ? incomparable * 5 : -incomparable * 5; } assert(cursor_dbi(l) == cursor_dbi(r)); @@ -333,9 +314,7 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -349,9 +328,8 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { const page_t *mp = mc->pg[mc->top]; const node_t *node = page_node(mp, mc->ki[mc->top]); cASSERT(mc, node_flags(node) & N_DUP); - *countp = unlikely(mc->subcur->nested_tree.items > PTRDIFF_MAX) - ? PTRDIFF_MAX - : (size_t)mc->subcur->nested_tree.items; + *countp = + unlikely(mc->subcur->nested_tree.items > PTRDIFF_MAX) ? PTRDIFF_MAX : (size_t)mc->subcur->nested_tree.items; } } return MDBX_SUCCESS; @@ -362,9 +340,7 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); for (intptr_t i = 0; i <= mc->top; ++i) { if (mc->ki[i]) @@ -379,9 +355,7 @@ int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); if (is_filled(mc) && mc->subcur) { mc = &mc->subcur->cursor; @@ -399,9 +373,7 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); for (intptr_t i = 0; i <= mc->top; ++i) { size_t nkeys = page_numkeys(mc->pg[i]); @@ -417,9 +389,7 @@ int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); if (is_filled(mc) && mc->subcur) { mc = &mc->subcur->cursor; @@ -438,22 +408,17 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); return is_eof(mc) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } -int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { +int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { if (unlikely(mc == nullptr)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -465,8 +430,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return LOG_IFERR(cursor_ops(mc, key, data, op)); } -__hot static int scan_confinue(MDBX_cursor *mc, MDBX_predicate_func *predicate, - void *context, void *arg, MDBX_val *key, +__hot static int scan_confinue(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, void *arg, MDBX_val *key, MDBX_val *value, MDBX_cursor_op turn_op) { int rc; switch (turn_op) { @@ -528,22 +492,19 @@ __hot static int scan_confinue(MDBX_cursor *mc, MDBX_predicate_func *predicate, } } -int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, - void *context, MDBX_cursor_op start_op, +int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op start_op, MDBX_cursor_op turn_op, void *arg) { if (unlikely(!predicate)) return LOG_IFERR(MDBX_EINVAL); - const unsigned valid_start_mask = - 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | - 1 << MDBX_LAST_DUP | 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; + const unsigned valid_start_mask = 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | 1 << MDBX_LAST_DUP | + 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0)) return LOG_IFERR(MDBX_EINVAL); - const unsigned valid_turn_mask = - 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | - 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | - 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + const unsigned valid_turn_mask = 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | 1 << MDBX_PREV | + 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | 1 << MDBX_NEXT_MULTIPLE | + 1 << MDBX_PREV_MULTIPLE; if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) return LOG_IFERR(MDBX_EINVAL); @@ -551,28 +512,22 @@ int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, int rc = mdbx_cursor_get(mc, &key, &value, start_op); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - return LOG_IFERR( - scan_confinue(mc, predicate, context, arg, &key, &value, turn_op)); + return LOG_IFERR(scan_confinue(mc, predicate, context, arg, &key, &value, turn_op)); } -int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, - void *context, MDBX_cursor_op from_op, MDBX_val *key, - MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { +int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op from_op, + MDBX_val *key, MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { if (unlikely(!predicate || !key)) return LOG_IFERR(MDBX_EINVAL); - const unsigned valid_start_mask = - 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | - 1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | - 1 << MDBX_SET_UPPERBOUND; - if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && - ((1 << from_op) & valid_start_mask) == 0)) + const unsigned valid_start_mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | + 1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | 1 << MDBX_SET_UPPERBOUND; + if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && ((1 << from_op) & valid_start_mask) == 0)) return LOG_IFERR(MDBX_EINVAL); - const unsigned valid_turn_mask = - 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | - 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | - 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + const unsigned valid_turn_mask = 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | 1 << MDBX_PREV | + 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | 1 << MDBX_NEXT_MULTIPLE | + 1 << MDBX_PREV_MULTIPLE; if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) return LOG_IFERR(MDBX_EINVAL); @@ -588,12 +543,10 @@ int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); } - return LOG_IFERR( - scan_confinue(mc, predicate, context, arg, key, value, turn_op)); + return LOG_IFERR(scan_confinue(mc, predicate, context, arg, key, value, turn_op)); } -int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, - size_t limit, MDBX_cursor_op op) { +int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, size_t limit, MDBX_cursor_op op) { if (unlikely(!count)) return LOG_IFERR(MDBX_EINVAL); @@ -602,9 +555,7 @@ int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = check_txn(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -661,11 +612,9 @@ int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, } mp = mc->pg[mc->top]; - DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, - mc->ki[mc->top]); + DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, mc->ki[mc->top]); if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->pgno, mp->flags); + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags); rc = MDBX_CORRUPTED; goto bailout; } @@ -686,8 +635,7 @@ int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) { if (unlikely(!mc)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(mc->signature != cur_signature_ready4dispose && - mc->signature != cur_signature_live)) + if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) return LOG_IFERR(MDBX_EBADSIGN); cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer); @@ -699,8 +647,7 @@ void *mdbx_cursor_get_userctx(const MDBX_cursor *mc) { if (unlikely(!mc)) return nullptr; - if (unlikely(mc->signature != cur_signature_ready4dispose && - mc->signature != cur_signature_live)) + if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) return nullptr; cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer); @@ -726,15 +673,12 @@ MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { /*----------------------------------------------------------------------------*/ -int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, - MDBX_put_flags_t flags) { +int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) { if (unlikely(mc == nullptr || key == nullptr || data == nullptr)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = check_txn_rw(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -754,12 +698,9 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, const size_t dcount = data[1].iov_len; if (unlikely(dcount < 2 || data->iov_len == 0)) return LOG_IFERR(MDBX_BAD_VALSIZE); - if (unlikely(mc->tree->dupfix_size != data->iov_len) && - mc->tree->dupfix_size) + if (unlikely(mc->tree->dupfix_size != data->iov_len) && mc->tree->dupfix_size) return LOG_IFERR(MDBX_BAD_VALSIZE); - if (unlikely(dcount > - MAX_MAPSIZE / 2 / - (BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) - NODESIZE))) { + if (unlikely(dcount > MAX_MAPSIZE / 2 / (BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) - NODESIZE))) { /* checking for multiplication overflow */ if (unlikely(dcount > MAX_MAPSIZE / 2 / data->iov_len)) return LOG_IFERR(MDBX_TOO_LARGE); @@ -767,15 +708,13 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } if (flags & MDBX_RESERVE) { - if (unlikely(mc->tree->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | - MDBX_INTEGERDUP | MDBX_DUPFIXED))) + if (unlikely(mc->tree->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_INTEGERDUP | MDBX_DUPFIXED))) return LOG_IFERR(MDBX_INCOMPATIBLE); data->iov_base = nullptr; } if (unlikely(mc->txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return LOG_IFERR((mc->txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS - : MDBX_BAD_TXN); + return LOG_IFERR((mc->txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); return LOG_IFERR(cursor_put_checklen(mc, key, data, flags)); } @@ -785,9 +724,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = check_txn_rw(mc->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -804,9 +741,7 @@ __cold int mdbx_cursor_ignord(MDBX_cursor *mc) { return LOG_IFERR(MDBX_EINVAL); if (unlikely(mc->signature != cur_signature_live)) - return LOG_IFERR((mc->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((mc->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); mc->checking |= z_ignord; if (mc->subcur) diff --git a/src/api-env.c b/src/api-env.c index be47d566..b9d0488c 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -7,8 +7,7 @@ __cold static intptr_t reasonable_db_maxsize(void) { static intptr_t cached_result; if (cached_result == 0) { intptr_t pagesize, total_ram_pages; - if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != - MDBX_SUCCESS)) + if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != MDBX_SUCCESS)) /* the 32-bit limit is good enough for fallback */ return cached_result = MAX_MAPSIZE32; @@ -24,8 +23,7 @@ __cold static intptr_t reasonable_db_maxsize(void) { const size_t floor = floor_powerof2(cached_result, unit); const size_t ceil = ceil_powerof2(cached_result, unit); const size_t threshold = (size_t)cached_result >> 4; - const bool down = - cached_result - floor < ceil - cached_result || ceil > MAX_MAPSIZE; + const bool down = cached_result - floor < ceil - cached_result || ceil > MAX_MAPSIZE; if (threshold < (down ? cached_result - floor : ceil - cached_result)) break; cached_result = down ? floor : ceil; @@ -39,14 +37,12 @@ __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { if (unlikely(err != MDBX_RESULT_FALSE)) { if (err == MDBX_RESULT_TRUE) err = MDBX_DUPLICATED_CLK; - ERROR("Alternative/Duplicate LCK-file '%" MDBX_PRIsPATH "' error %d", - lck_pathname, err); + ERROR("Alternative/Duplicate LCK-file '%" MDBX_PRIsPATH "' error %d", lck_pathname, err); } return err; } -__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, - const mdbx_mode_t mode) { +__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, const mdbx_mode_t mode) { memset(&env->pathname, 0, sizeof(env->pathname)); if (unlikely(!pathname || !*pathname)) return MDBX_EINVAL; @@ -63,8 +59,7 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, return rc; /* auto-create directory if requested */ - if ((env->flags & MDBX_NOSUBDIR) == 0 && - !CreateDirectoryW(pathname, nullptr)) { + if ((env->flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathname, nullptr)) { rc = GetLastError(); if (rc != ERROR_ALREADY_EXISTS) return rc; @@ -87,8 +82,7 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, /* auto-create directory if requested */ const mdbx_mode_t dir_mode = - (/* inherit read/write permissions for group and others */ mode & - (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | + (/* inherit read/write permissions for group and others */ mode & (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | /* always add read/write/search for owner */ S_IRWXU | ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); @@ -120,15 +114,11 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, size_t base_len = pathname_len; static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1; if (env->flags & MDBX_NOSUBDIR) { - if (base_len > dxb_name_len && - osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, - dxb_name_len)) { + if (base_len > dxb_name_len && osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, dxb_name_len)) { env->flags -= MDBX_NOSUBDIR; base_len -= dxb_name_len; - } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && - osal_isdirsep(lck_name[0]) && - osal_pathequal(pathname + base_len - dxb_name_len + 1, - dxb_name + 1, dxb_name_len - 1)) { + } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && osal_isdirsep(lck_name[0]) && + osal_pathequal(pathname + base_len - dxb_name_len + 1, dxb_name + 1, dxb_name_len - 1)) { env->flags -= MDBX_NOSUBDIR; base_len -= dxb_name_len - 1; } @@ -136,11 +126,9 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t); const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name); - const size_t enough4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) - ? suflen_with_NOSUBDIR - : suflen_without_NOSUBDIR; - const size_t bytes_needed = - sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; + const size_t enough4any = + (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) ? suflen_with_NOSUBDIR : suflen_without_NOSUBDIR; + const size_t bytes_needed = sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; env->pathname.buffer = osal_malloc(bytes_needed); if (!env->pathname.buffer) return MDBX_ENOMEM; @@ -153,8 +141,7 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, if (base_len) { memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len); if (env->flags & MDBX_NOSUBDIR) { - const pathchar_t *const lck_ext = - osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); + const pathchar_t *const lck_ext = osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); if (lck_ext) { pathchar_t *pathname_ext = osal_fileext(buf, pathname_len); memcpy(pathname_ext ? pathname_ext : buf + pathname_len, lck_ext, @@ -181,14 +168,11 @@ __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); - memcpy(env->pathname.dxb, dxb_name + 1, - sizeof(dxb_name) - sizeof(pathchar_t)); - memcpy(env->pathname.lck, lck_name + 1, - sizeof(lck_name) - sizeof(pathchar_t)); + memcpy(env->pathname.dxb, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); + memcpy(env->pathname.lck, lck_name + 1, sizeof(lck_name) - sizeof(pathchar_t)); } - memcpy(env->pathname.specified, pathname, - sizeof(pathchar_t) * (pathname_len + 1)); + memcpy(env->pathname.specified, pathname, sizeof(pathchar_t) * (pathname_len + 1)); return rc; } @@ -212,8 +196,7 @@ __cold int mdbx_env_create(MDBX_env **penv) { #endif /* MDBX_64BIT_ATOMIC */ #endif /* MDBX_HAVE_C11ATOMICS */ - if (unlikely(!is_powerof2(globals.sys_pagesize) || - globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { + if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { ERROR("unsuitable system pagesize %u", globals.sys_pagesize); return LOG_IFERR(MDBX_INCOMPATIBLE); } @@ -222,10 +205,8 @@ __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(globals.linux_kernel_version < 0x04000000)) { /* 2022-09-01: Прошло уже более двух лет после окончания какой-либо * поддержки самого "долгоиграющего" ядра 3.16.85 ветки 3.x */ - ERROR("too old linux kernel %u.%u.%u.%u, the >= 4.0.0 is required", - globals.linux_kernel_version >> 24, - (globals.linux_kernel_version >> 16) & 255, - (globals.linux_kernel_version >> 8) & 255, + ERROR("too old linux kernel %u.%u.%u.%u, the >= 4.0.0 is required", globals.linux_kernel_version >> 24, + (globals.linux_kernel_version >> 16) & 255, (globals.linux_kernel_version >> 8) & 255, globals.linux_kernel_version & 255); return LOG_IFERR(MDBX_INCOMPATIBLE); } @@ -237,14 +218,11 @@ __cold int mdbx_env_create(MDBX_env **penv) { env->max_readers = DEFAULT_READERS; env->max_dbi = env->n_dbi = CORE_DBS; - env->lazy_fd = env->dsync_fd = env->fd4meta = env->lck_mmap.fd = - INVALID_HANDLE_VALUE; + env->lazy_fd = env->dsync_fd = env->fd4meta = env->lck_mmap.fd = INVALID_HANDLE_VALUE; env->stuck_meta = -1; env_options_init(env); - env_setup_pagesize(env, (globals.sys_pagesize < MDBX_MAX_PAGESIZE) - ? globals.sys_pagesize - : MDBX_MAX_PAGESIZE); + env_setup_pagesize(env, (globals.sys_pagesize < MDBX_MAX_PAGESIZE) ? globals.sys_pagesize : MDBX_MAX_PAGESIZE); int rc = osal_fastmutex_init(&env->dbi_lock); if (unlikely(rc != MDBX_SUCCESS)) @@ -318,8 +296,7 @@ __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { return LOG_IFERR(meta_override(env, target, new_txnid, target_meta)); } -__cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, - unsigned target_meta, bool writeable) { +__cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, unsigned target_meta, bool writeable) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); @@ -330,8 +307,7 @@ __cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, return LOG_IFERR(rc); } -__cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, - unsigned target_meta, bool writeable) { +__cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, unsigned target_meta, bool writeable) { #endif /* Windows */ if (unlikely(target_meta >= NUM_METAS)) @@ -349,8 +325,7 @@ __cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, #else mdbx_env_open #endif /* Windows */ - (env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY, - 0); + (env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY, 0); } __cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) { @@ -364,8 +339,7 @@ __cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) { return LOG_IFERR(rc); } -__cold int mdbx_env_deleteW(const wchar_t *pathname, - MDBX_env_delete_mode_t mode) { +__cold int mdbx_env_deleteW(const wchar_t *pathname, MDBX_env_delete_mode_t mode) { #endif /* Windows */ switch (mode) { @@ -383,22 +357,18 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, MDBX_env dummy_env_silo, *const dummy_env = &dummy_env_silo; #endif memset(dummy_env, 0, sizeof(*dummy_env)); - dummy_env->flags = - (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; + dummy_env->flags = (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; dummy_env->ps = (unsigned)mdbx_default_pagesize(); STATIC_ASSERT(sizeof(dummy_env->flags) == sizeof(MDBX_env_flags_t)); int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0); if (likely(err == MDBX_SUCCESS)) { - mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, - dxb_handle = INVALID_HANDLE_VALUE; + mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, dxb_handle = INVALID_HANDLE_VALUE; if (mode > MDBX_ENV_JUST_DELETE) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.dxb, - &dxb_handle, 0); + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.dxb, &dxb_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; if (err == MDBX_SUCCESS) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, - dummy_env->pathname.lck, &clk_handle, 0); + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.lck, &clk_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; } if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE) @@ -425,8 +395,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, if (err == MDBX_SUCCESS && !(dummy_env->flags & MDBX_NOSUBDIR) && (/* pathname != "." */ pathname[0] != '.' || pathname[1] != 0) && - (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || - pathname[2] != 0)) { + (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || pathname[2] != 0)) { err = osal_removedirectory(pathname); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; @@ -445,8 +414,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, return LOG_IFERR((err == MDBX_SUCCESS) ? rc : err); } -__cold int mdbx_env_open(MDBX_env *env, const char *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { +__cold int mdbx_env_open(MDBX_env *env, const char *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); @@ -460,8 +428,7 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname, return LOG_IFERR(rc); } -__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { +__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode) { #endif /* Windows */ int rc = check_env(env, false); @@ -471,8 +438,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, if (unlikely(flags & ~ENV_USABLE_FLAGS)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(env->lazy_fd != INVALID_HANDLE_VALUE || - (env->flags & ENV_ACTIVE) != 0 || env->dxb_mmap.base)) + if (unlikely(env->lazy_fd != INVALID_HANDLE_VALUE || (env->flags & ENV_ACTIVE) != 0 || env->dxb_mmap.base)) return LOG_IFERR(MDBX_EPERM); /* Pickup previously mdbx_env_set_flags(), @@ -482,9 +448,8 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, if (flags & MDBX_RDONLY) { /* Silently ignore irrelevant flags when we're only getting read access */ - flags &= ~(MDBX_WRITEMAP | DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | - MDBX_NOMETASYNC | DEPRECATED_COALESCE | MDBX_LIFORECLAIM | - MDBX_NOMEMINIT | MDBX_ACCEDE); + flags &= ~(MDBX_WRITEMAP | DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | DEPRECATED_COALESCE | + MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE); mode = 0; } else { #if MDBX_MMAP_INCOHERENT_FILE_WRITE @@ -520,16 +485,14 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_txn *txn = nullptr; const intptr_t bitmap_bytes = #if MDBX_ENABLE_DBI_SPARSE - ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / - CHAR_BIT; + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT; #else 0; #endif /* MDBX_ENABLE_DBI_SPARSE */ const size_t base = sizeof(MDBX_txn) + sizeof(cursor_couple_t); - const size_t size = - base + bitmap_bytes + - env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + - sizeof(txn->dbi_seqs[0]) + sizeof(txn->dbi_state[0])); + const size_t size = base + bitmap_bytes + + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_seqs[0]) + + sizeof(txn->dbi_state[0])); txn = osal_calloc(1, size); if (unlikely(!txn)) { @@ -538,10 +501,8 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } txn->dbs = ptr_disp(txn, base); txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); - txn->dbi_seqs = - ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); - txn->dbi_state = - ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); + txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); + txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); #if MDBX_ENABLE_DBI_SPARSE txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); #endif /* MDBX_ENABLE_DBI_SPARSE */ @@ -566,10 +527,9 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, const meta_ptr_t head = meta_recent(env, &troika); const tree_t *db = &head.ptr_c->trees.main; - DEBUG("opened database version %u, pagesize %u", - (uint8_t)unaligned_peek_u64(4, head.ptr_c->magic_and_version), env->ps); - DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, - data_page(head.ptr_c)->pgno, head.txnid); + DEBUG("opened database version %u, pagesize %u", (uint8_t)unaligned_peek_u64(4, head.ptr_c->magic_and_version), + env->ps); + DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, data_page(head.ptr_c)->pgno, head.txnid); DEBUG("depth: %u", db->height); DEBUG("entries: %" PRIu64, db->items); DEBUG("branch pages: %" PRIaPGNO, db->branch_pages); @@ -651,8 +611,7 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { env->flags |= ENV_FATAL_ERROR; #endif /* MDBX_ENV_CHECKPID */ - if (env->dxb_mmap.base && - (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0 && env->basal_txn) { + if (env->dxb_mmap.base && (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0 && env->basal_txn) { if (env->basal_txn->owner && env->basal_txn->owner != osal_thread_self()) return LOG_IFERR(MDBX_BUSY); } else @@ -675,8 +634,8 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { rc = errno; else if (st.st_nlink > 0 /* don't sync deleted files */) { rc = env_sync(env, true, true); - rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY || - rc == EWOULDBLOCK || rc == MDBX_RESULT_TRUE) + rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || + rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; } @@ -717,8 +676,7 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { /*----------------------------------------------------------------------------*/ -static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *out, const size_t bytes, +static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, const size_t bytes, troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); @@ -752,8 +710,7 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, #endif } - *troika = - (txn && !(txn->flags & MDBX_TXN_RDONLY)) ? txn->tw.troika : meta_tap(env); + *troika = (txn && !(txn->flags & MDBX_TXN_RDONLY)) ? txn->tw.troika : meta_tap(env); const meta_ptr_t head = meta_recent(env, troika); const meta_t *const meta0 = METAPAGE(env, 0); const meta_t *const meta1 = METAPAGE(env, 1); @@ -780,9 +737,7 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_last_pgno = txn->geo.first_unallocated - 1; out->mi_geo.current = pgno2bytes(env, txn->geo.end_pgno); - const txnid_t wanna_meta_txnid = (txn->flags & MDBX_TXN_RDONLY) - ? txn->txnid - : txn->txnid - xMDBX_TXNID_STEP; + const txnid_t wanna_meta_txnid = (txn->flags & MDBX_TXN_RDONLY) ? txn->txnid : txn->txnid - xMDBX_TXNID_STEP; txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; @@ -795,30 +750,23 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, const lck_t *const lck = env->lck; out->mi_maxreaders = env->max_readers; - out->mi_numreaders = env->lck_mmap.lck - ? atomic_load32(&lck->rdt_length, mo_Relaxed) - : INT32_MAX; + out->mi_numreaders = env->lck_mmap.lck ? atomic_load32(&lck->rdt_length, mo_Relaxed) : INT32_MAX; out->mi_dxb_pagesize = env->ps; out->mi_sys_pagesize = globals.sys_pagesize; if (likely(bytes > size_before_bootid)) { const uint64_t unsynced_pages = atomic_load64(&lck->unsynced_pages, mo_Relaxed) + - ((uint32_t)out->mi_recent_txnid != - atomic_load32(&lck->meta_sync_txnid, mo_Relaxed)); + ((uint32_t)out->mi_recent_txnid != atomic_load32(&lck->meta_sync_txnid, mo_Relaxed)); out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->eoos_timestamp, mo_Relaxed); - out->mi_since_sync_seconds16dot16 = - ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; + out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; ts = atomic_load64(&lck->readers_check_timestamp, mo_Relaxed); - out->mi_since_reader_check_seconds16dot16 = - ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; - out->mi_autosync_threshold = - pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed)); + out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; + out->mi_autosync_threshold = pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed)); out->mi_autosync_period_seconds16dot16 = - osal_monotime_to_16dot16_noUnderflow( - atomic_load64(&lck->autosync_period, mo_Relaxed)); + osal_monotime_to_16dot16_noUnderflow(atomic_load64(&lck->autosync_period, mo_Relaxed)); out->mi_bootid.current.x = globals.bootid.x; out->mi_bootid.current.y = globals.bootid.y; out->mi_mode = env->lck_mmap.lck ? lck->envmode.weak : env->flags; @@ -834,8 +782,7 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_pgop_stat.spill = atomic_load64(&lck->pgops.spill, mo_Relaxed); out->mi_pgop_stat.unspill = atomic_load64(&lck->pgops.unspill, mo_Relaxed); out->mi_pgop_stat.wops = atomic_load64(&lck->pgops.wops, mo_Relaxed); - out->mi_pgop_stat.prefault = - atomic_load64(&lck->pgops.prefault, mo_Relaxed); + out->mi_pgop_stat.prefault = atomic_load64(&lck->pgops.prefault, mo_Relaxed); out->mi_pgop_stat.mincore = atomic_load64(&lck->pgops.mincore, mo_Relaxed); out->mi_pgop_stat.msync = atomic_load64(&lck->pgops.msync, mo_Relaxed); out->mi_pgop_stat.fsync = atomic_load64(&lck->pgops.fsync, mo_Relaxed); @@ -865,8 +812,7 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, return MDBX_SUCCESS; } -__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, - size_t bytes, troika_t *troika) { +__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika) { MDBX_envinfo snap; int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); if (unlikely(rc != MDBX_SUCCESS)) @@ -878,24 +824,22 @@ __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, if (unlikely(rc != MDBX_SUCCESS)) return rc; snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; - snap.mi_since_reader_check_seconds16dot16 = - out->mi_since_reader_check_seconds16dot16; + snap.mi_since_reader_check_seconds16dot16 = out->mi_since_reader_check_seconds16dot16; if (likely(memcmp(&snap, out, bytes) == 0)) return MDBX_SUCCESS; memcpy(&snap, out, bytes); } } -__cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *arg, size_t bytes) { +__cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == nullptr && txn == nullptr) || arg == nullptr)) return LOG_IFERR(MDBX_EINVAL); const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); - if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat && bytes != size_before_dxbid) + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat && + bytes != size_before_dxbid) return LOG_IFERR(MDBX_EINVAL); if (txn) { @@ -917,8 +861,7 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, return LOG_IFERR(env_info(env, txn, arg, bytes, &troika)); } -__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, - size_t bytes) { +__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, size_t bytes) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); @@ -929,8 +872,7 @@ __cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, return LOG_IFERR(rc); } -__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, - size_t bytes) { +__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, size_t bytes) { #endif /* Windows */ if (unlikely(!out)) return LOG_IFERR(MDBX_EINVAL); @@ -938,8 +880,8 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); - if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat && bytes != size_before_dxbid) + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat && + bytes != size_before_dxbid) return LOG_IFERR(MDBX_EINVAL); memset(out, 0, bytes); @@ -951,8 +893,7 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, MDBX_env env; memset(&env, 0, sizeof(env)); env.pid = osal_getpid(); - if (unlikely(!is_powerof2(globals.sys_pagesize) || - globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { + if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { ERROR("unsuitable system pagesize %u", globals.sys_pagesize); return LOG_IFERR(MDBX_INCOMPATIBLE); } @@ -972,8 +913,7 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, int rc = env_handle_pathname(&env, pathname, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.pathname.dxb, &env.lazy_fd, - 0); + rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.pathname.dxb, &env.lazy_fd, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -1006,10 +946,8 @@ bailout: /*----------------------------------------------------------------------------*/ -__cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, - intptr_t size_now, intptr_t size_upper, - intptr_t growth_step, - intptr_t shrink_threshold, intptr_t pagesize) { +__cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, intptr_t size_upper, + intptr_t growth_step, intptr_t shrink_threshold, intptr_t pagesize) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -1038,8 +976,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, should_unlock = true; env->basal_txn->tw.troika = meta_tap(env); eASSERT(env, !env->txn && !env->basal_txn->nested); - env->basal_txn->txnid = - env->basal_txn->tw.troika.txnid[env->basal_txn->tw.troika.recent]; + env->basal_txn->txnid = env->basal_txn->tw.troika.txnid[env->basal_txn->tw.troika.recent]; txn_snapshot_oldest(env->basal_txn); } @@ -1047,9 +984,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (pagesize <= 0 || pagesize >= INT_MAX) pagesize = env->ps; const geo_t *const geo = - inside_txn - ? &env->txn->geo - : &meta_recent(env, &env->basal_txn->tw.troika).ptr_c->geometry; + inside_txn ? &env->txn->geo : &meta_recent(env, &env->basal_txn->tw.troika).ptr_c->geometry; if (size_lower < 0) size_lower = pgno2bytes(env, geo->lower); if (size_now < 0) @@ -1065,8 +1000,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, rc = MDBX_EINVAL; goto bailout; } - const size_t usedbytes = - pgno2bytes(env, mvcc_snapshot_largest(env, geo->first_unallocated)); + const size_t usedbytes = pgno2bytes(env, mvcc_snapshot_largest(env, geo->first_unallocated)); if ((size_t)size_upper < usedbytes) { rc = MDBX_MAP_FULL; goto bailout; @@ -1101,14 +1035,12 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, else if (top >= (intptr_t)MAX_MAPSIZE /* maximal */) top = MAX_MAPSIZE; - while (top > pagesize * (int64_t)(MAX_PAGENO + 1) && - pagesize < MDBX_MAX_PAGESIZE) + while (top > pagesize * (int64_t)(MAX_PAGENO + 1) && pagesize < MDBX_MAX_PAGESIZE) pagesize <<= 1; } } - if (pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2(pagesize)) { + if (pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2(pagesize)) { rc = MDBX_EINVAL; goto bailout; } @@ -1140,13 +1072,10 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, size_upper = size_now; else if (size_now >= reasonable_db_maxsize() / 2) size_upper = reasonable_db_maxsize(); - else if ((size_t)size_now >= MAX_MAPSIZE32 / 2 && - (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) + else if ((size_t)size_now >= MAX_MAPSIZE32 / 2 && (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) size_upper = MAX_MAPSIZE32; else { - size_upper = ceil_powerof2(((size_t)size_now < MAX_MAPSIZE / 4) - ? size_now + size_now - : size_now + size_now / 2, + size_upper = ceil_powerof2(((size_t)size_now < MAX_MAPSIZE / 4) ? size_now + size_now : size_now + size_now / 2, MEGABYTE * MDBX_WORDBITS * MDBX_WORDBITS / 32); if ((size_t)size_upper > MAX_MAPSIZE) size_upper = MAX_MAPSIZE; @@ -1174,15 +1103,12 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, size_now = size_lower; } - if (unlikely((size_t)size_upper > MAX_MAPSIZE || - (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { + if (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { rc = MDBX_TOO_LARGE; goto bailout; } - const size_t unit = (globals.sys_pagesize > (size_t)pagesize) - ? globals.sys_pagesize - : (size_t)pagesize; + const size_t unit = (globals.sys_pagesize > (size_t)pagesize) ? globals.sys_pagesize : (size_t)pagesize; size_lower = ceil_powerof2(size_lower, unit); size_upper = ceil_powerof2(size_upper, unit); size_now = ceil_powerof2(size_now, unit); @@ -1190,10 +1116,8 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, /* LY: подбираем значение size_upper: * - кратное размеру страницы * - без нарушения MAX_MAPSIZE и MAX_PAGENO */ - while (unlikely((size_t)size_upper > MAX_MAPSIZE || - (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { - if ((size_t)size_upper < unit + MIN_MAPSIZE || - (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) { + while (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { + if ((size_t)size_upper < unit + MIN_MAPSIZE || (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) { /* паранойа на случай переполнения при невероятных значениях */ rc = MDBX_EINVAL; goto bailout; @@ -1235,10 +1159,8 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, env->geo_in_bytes.lower = size_lower; env->geo_in_bytes.now = size_now; env->geo_in_bytes.upper = size_upper; - env->geo_in_bytes.grow = - pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, growth_step)))); - env->geo_in_bytes.shrink = - pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, shrink_threshold)))); + env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, growth_step)))); + env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, shrink_threshold)))); env_options_adjust_defaults(env); ENSURE(env, env->geo_in_bytes.lower >= MIN_MAPSIZE); @@ -1290,15 +1212,13 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, meta_set_txnid(env, &meta, txnid); } - const geo_t *const current_geo = - &(env->txn ? env->txn : env->basal_txn)->geo; + const geo_t *const current_geo = &(env->txn ? env->txn : env->basal_txn)->geo; /* update env-geo to avoid influences */ env->geo_in_bytes.now = pgno2bytes(env, current_geo->now); env->geo_in_bytes.lower = pgno2bytes(env, current_geo->lower); env->geo_in_bytes.upper = pgno2bytes(env, current_geo->upper); env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(current_geo->grow_pv)); - env->geo_in_bytes.shrink = - pgno2bytes(env, pv2pages(current_geo->shrink_pv)); + env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(current_geo->shrink_pv)); geo_t new_geo; new_geo.lower = bytes2pgno(env, size_lower); @@ -1326,8 +1246,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, #if defined(_WIN32) || defined(_WIN64) /* Was DB shrinking disabled before and now it will be enabled? */ if (new_geo.lower < new_geo.upper && new_geo.shrink_pv && - !(current_geo->lower < current_geo->upper && - current_geo->shrink_pv)) { + !(current_geo->lower < current_geo->upper && current_geo->shrink_pv)) { if (!env->lck_mmap.lck) { rc = MDBX_EPERM; goto bailout; @@ -1341,9 +1260,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, /* Check if there are any reading threads that do not use the SRWL */ const size_t CurrentTid = GetCurrentThreadId(); const reader_slot_t *const begin = env->lck_mmap.lck->rdt; - const reader_slot_t *const end = - begin + - atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); + const reader_slot_t *const end = begin + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); for (const reader_slot_t *reader = begin; reader < end; ++reader) { if (reader->pid.weak == env->pid && reader->tid.weak != CurrentTid) { /* At least one thread may don't use SRWL */ @@ -1358,10 +1275,8 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, } #endif /* Windows */ - if (new_geo.now != current_geo->now || - new_geo.upper != current_geo->upper) { - rc = dxb_resize(env, current_geo->first_unallocated, new_geo.now, - new_geo.upper, explicit_resize); + if (new_geo.now != current_geo->now || new_geo.upper != current_geo->upper) { + rc = dxb_resize(env, current_geo->first_unallocated, new_geo.now, new_geo.upper, explicit_resize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } @@ -1370,13 +1285,10 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, env->txn->flags |= MDBX_TXN_DIRTY; } else { meta.geometry = new_geo; - rc = - dxb_sync_locked(env, env->flags, &meta, &env->basal_txn->tw.troika); + rc = dxb_sync_locked(env, env->flags, &meta, &env->basal_txn->tw.troika); if (likely(rc == MDBX_SUCCESS)) { - env->geo_in_bytes.now = - pgno2bytes(env, new_geo.now = meta.geometry.now); - env->geo_in_bytes.upper = - pgno2bytes(env, new_geo.upper = meta.geometry.upper); + env->geo_in_bytes.now = pgno2bytes(env, new_geo.now = meta.geometry.now); + env->geo_in_bytes.upper = pgno2bytes(env, new_geo.upper = meta.geometry.upper); } } } diff --git a/src/api-extra.c b/src/api-extra.c index 8c6a6301..e74c3bbc 100644 --- a/src/api-extra.c +++ b/src/api-extra.c @@ -6,8 +6,7 @@ /*------------------------------------------------------------------------------ * Readers API */ -__cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, - void *ctx) { +__cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, void *ctx) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -19,8 +18,7 @@ __cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, int serial = 0; lck_t *const lck = env->lck_mmap.lck; if (likely(lck)) { - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); for (size_t i = 0; i < snap_nreaders; i++) { const reader_slot_t *r = lck->rdt + i; retry_reader:; @@ -29,17 +27,12 @@ __cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, continue; txnid_t txnid = safe64_read(&r->txnid); const uint64_t tid = atomic_load64(&r->tid, mo_Relaxed); - const pgno_t pages_used = - atomic_load32(&r->snapshot_pages_used, mo_Relaxed); - const uint64_t reader_pages_retired = - atomic_load64(&r->snapshot_pages_retired, mo_Relaxed); - if (unlikely(txnid != safe64_read(&r->txnid) || - pid != atomic_load32(&r->pid, mo_AcquireRelease) || + const pgno_t pages_used = atomic_load32(&r->snapshot_pages_used, mo_Relaxed); + const uint64_t reader_pages_retired = atomic_load64(&r->snapshot_pages_retired, mo_Relaxed); + if (unlikely(txnid != safe64_read(&r->txnid) || pid != atomic_load32(&r->pid, mo_AcquireRelease) || tid != atomic_load64(&r->tid, mo_Relaxed) || - pages_used != - atomic_load32(&r->snapshot_pages_used, mo_Relaxed) || - reader_pages_retired != - atomic_load64(&r->snapshot_pages_retired, mo_Relaxed))) + pages_used != atomic_load32(&r->snapshot_pages_used, mo_Relaxed) || + reader_pages_retired != atomic_load64(&r->snapshot_pages_retired, mo_Relaxed))) goto retry_reader; eASSERT(env, txnid > 0); @@ -53,22 +46,18 @@ __cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, troika_t troika = meta_tap(env); retry_header:; const meta_ptr_t head = meta_recent(env, &troika); - const uint64_t head_pages_retired = - unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); + const uint64_t head_pages_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); if (unlikely(meta_should_retry(env, &troika) || - head_pages_retired != unaligned_peek_u64_volatile( - 4, head.ptr_v->pages_retired))) + head_pages_retired != unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired))) goto retry_header; lag = (head.txnid - txnid) / xMDBX_TXNID_STEP; bytes_used = pgno2bytes(env, pages_used); bytes_retained = (head_pages_retired > reader_pages_retired) - ? pgno2bytes(env, (pgno_t)(head_pages_retired - - reader_pages_retired)) + ? pgno2bytes(env, (pgno_t)(head_pages_retired - reader_pages_retired)) : 0; } - rc = func(ctx, ++serial, (unsigned)i, pid, (mdbx_tid_t)((intptr_t)tid), - txnid, lag, bytes_used, bytes_retained); + rc = func(ctx, ++serial, (unsigned)i, pid, (mdbx_tid_t)((intptr_t)tid), txnid, lag, bytes_used, bytes_retained); if (unlikely(rc != MDBX_SUCCESS)) break; } @@ -93,8 +82,7 @@ int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { if (unlikely(env->flags & MDBX_RDONLY)) return LOG_IFERR(MDBX_EACCESS); - if (unlikely(env->basal_txn->owner || - (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0)) + if (unlikely(env->basal_txn->owner || (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0)) return LOG_IFERR(MDBX_BUSY); return LOG_IFERR(lck_txn_lock(env, dont_wait)); diff --git a/src/api-key-transform.c b/src/api-key-transform.c index e28f8de9..6143a62e 100644 --- a/src/api-key-transform.c +++ b/src/api-key-transform.c @@ -9,16 +9,14 @@ static inline double key2double(const int64_t key) { double f; } casting; - casting.u = (key < 0) ? key + UINT64_C(0x8000000000000000) - : UINT64_C(0xffffFFFFffffFFFF) - key; + casting.u = (key < 0) ? key + UINT64_C(0x8000000000000000) : UINT64_C(0xffffFFFFffffFFFF) - key; return casting.f; } static inline uint64_t double2key(const double *const ptr) { STATIC_ASSERT(sizeof(double) == sizeof(int64_t)); const int64_t i = *(const int64_t *)ptr; - const uint64_t u = (i < 0) ? UINT64_C(0xffffFFFFffffFFFF) - i - : i + UINT64_C(0x8000000000000000); + const uint64_t u = (i < 0) ? UINT64_C(0xffffFFFFffffFFFF) - i : i + UINT64_C(0x8000000000000000); if (ASSERT_ENABLED()) { const double f = key2double(u); assert(memcmp(&f, ptr, sizeof(double)) == 0); @@ -32,16 +30,14 @@ static inline float key2float(const int32_t key) { float f; } casting; - casting.u = - (key < 0) ? key + UINT32_C(0x80000000) : UINT32_C(0xffffFFFF) - key; + casting.u = (key < 0) ? key + UINT32_C(0x80000000) : UINT32_C(0xffffFFFF) - key; return casting.f; } static inline uint32_t float2key(const float *const ptr) { STATIC_ASSERT(sizeof(float) == sizeof(int32_t)); const int32_t i = *(const int32_t *)ptr; - const uint32_t u = - (i < 0) ? UINT32_C(0xffffFFFF) - i : i + UINT32_C(0x80000000); + const uint32_t u = (i < 0) ? UINT32_C(0xffffFFFF) - i : i + UINT32_C(0x80000000); if (ASSERT_ENABLED()) { const float f = key2float(u); assert(memcmp(&f, ptr, sizeof(float)) == 0); @@ -49,21 +45,13 @@ static inline uint32_t float2key(const float *const ptr) { return u; } -uint64_t mdbx_key_from_double(const double ieee754_64bit) { - return double2key(&ieee754_64bit); -} +uint64_t mdbx_key_from_double(const double ieee754_64bit) { return double2key(&ieee754_64bit); } -uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) { - return double2key(ieee754_64bit); -} +uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) { return double2key(ieee754_64bit); } -uint32_t mdbx_key_from_float(const float ieee754_32bit) { - return float2key(&ieee754_32bit); -} +uint32_t mdbx_key_from_float(const float ieee754_32bit) { return float2key(&ieee754_32bit); } -uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) { - return float2key(ieee754_32bit); -} +uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) { return float2key(ieee754_32bit); } #define IEEE754_DOUBLE_MANTISSA_SIZE 52 #define IEEE754_DOUBLE_EXPONENTA_BIAS 0x3FF @@ -78,8 +66,7 @@ static inline int clz64(uint64_t value) { return __builtin_clz(value); if (sizeof(value) == sizeof(long)) return __builtin_clzl(value); -#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ - __has_builtin(__builtin_clzll) +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || __has_builtin(__builtin_clzll) return __builtin_clzll(value); #endif /* have(long long) && long long == uint64_t */ #endif /* GNU C */ @@ -105,11 +92,10 @@ static inline int clz64(uint64_t value) { value |= value >> 8; value |= value >> 16; value |= value >> 32; - static const uint8_t debruijn_clz64[64] = { - 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, - 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, - 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, - 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0}; + static const uint8_t debruijn_clz64[64] = {63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, + 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, + 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, + 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0}; return debruijn_clz64[value * UINT64_C(0x03F79D71B4CB0A89) >> 58]; } @@ -134,17 +120,12 @@ uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) { mantissa = round_mantissa(u64, --shift); } - assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && - mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); - const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + - IEEE754_DOUBLE_MANTISSA_SIZE - shift; + assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); + const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - shift; assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX); - const uint64_t key = bias + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); -#if !defined(_MSC_VER) || \ - defined( \ - _DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ - symbol __except1 referenced in function __ftol3_except */ + const uint64_t key = bias + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); +#if !defined(_MSC_VER) || defined(_DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ + symbol __except1 referenced in function __ftol3_except */ assert(key == mdbx_key_from_double((double)json_integer)); #endif /* Workaround for MSVC */ return key; @@ -160,17 +141,13 @@ uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) { mantissa = round_mantissa(u64, --shift); } - assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && - mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); - const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + - IEEE754_DOUBLE_MANTISSA_SIZE - shift; + assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX); + const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - shift; assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX); - const uint64_t key = bias - 1 - (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); -#if !defined(_MSC_VER) || \ - defined( \ - _DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ - symbol __except1 referenced in function __ftol3_except */ + const uint64_t key = + bias - 1 - (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); +#if !defined(_MSC_VER) || defined(_DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \ + symbol __except1 referenced in function __ftol3_except */ assert(key == mdbx_key_from_double((double)json_integer)); #endif /* Workaround for MSVC */ return key; @@ -185,21 +162,17 @@ int64_t mdbx_jsonInteger_from_key(const MDBX_val v) { const uint64_t bias = UINT64_C(0x8000000000000000); const uint64_t covalent = (key > bias) ? key - bias : bias - key - 1; const int shift = IEEE754_DOUBLE_EXPONENTA_BIAS + 63 - - (IEEE754_DOUBLE_EXPONENTA_MAX & - (int)(covalent >> IEEE754_DOUBLE_MANTISSA_SIZE)); + (IEEE754_DOUBLE_EXPONENTA_MAX & (int)(covalent >> IEEE754_DOUBLE_MANTISSA_SIZE)); if (unlikely(shift < 1)) return (key < bias) ? INT64_MIN : INT64_MAX; if (unlikely(shift > 63)) return 0; - const uint64_t unscaled = ((covalent & IEEE754_DOUBLE_MANTISSA_MASK) - << (63 - IEEE754_DOUBLE_MANTISSA_SIZE)) + - bias; + const uint64_t unscaled = ((covalent & IEEE754_DOUBLE_MANTISSA_MASK) << (63 - IEEE754_DOUBLE_MANTISSA_SIZE)) + bias; const int64_t absolute = unscaled >> shift; const int64_t value = (key < bias) ? -absolute : absolute; assert(key == mdbx_key_from_jsonInteger(value) || - (mdbx_key_from_jsonInteger(value - 1) < key && - key < mdbx_key_from_jsonInteger(value + 1))); + (mdbx_key_from_jsonInteger(value - 1) < key && key < mdbx_key_from_jsonInteger(value + 1))); return value; } @@ -220,6 +193,5 @@ int32_t mdbx_int32_from_key(const MDBX_val v) { int64_t mdbx_int64_from_key(const MDBX_val v) { assert(v.iov_len == 8); - return (int64_t)(unaligned_peek_u64(2, v.iov_base) - - UINT64_C(0x8000000000000000)); + return (int64_t)(unaligned_peek_u64(2, v.iov_base) - UINT64_C(0x8000000000000000)); } diff --git a/src/api-txn.c b/src/api-txn.c index d75f05ff..2b3ba0d8 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -16,9 +16,7 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) MDBX_env *env = txn->env; if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) { if (percent) - *percent = (int)((txn->geo.first_unallocated * UINT64_C(100) + - txn->geo.end_pgno / 2) / - txn->geo.end_pgno); + *percent = (int)((txn->geo.first_unallocated * UINT64_C(100) + txn->geo.end_pgno / 2) / txn->geo.end_pgno); return 0; } @@ -28,9 +26,7 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) const meta_ptr_t head = meta_recent(env, &troika); if (percent) { const pgno_t maxpg = head.ptr_v->geometry.now; - *percent = (int)((head.ptr_v->geometry.first_unallocated * UINT64_C(100) + - maxpg / 2) / - maxpg); + *percent = (int)((head.ptr_v->geometry.first_unallocated * UINT64_C(100) + maxpg / 2) / maxpg); } lag = (head.txnid - txn->txnid) / xMDBX_TXNID_STEP; } while (unlikely(meta_should_retry(env, &troika))); @@ -38,8 +34,7 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) return (lag > INT_MAX) ? INT_MAX : (int)lag; } -__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, - uint32_t *mask) { +__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { if (unlikely(!mask)) return LOG_IFERR(MDBX_EINVAL); @@ -58,8 +53,7 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val key, data; rc = outer_first(&cx.outer, &key, &data); while (rc == MDBX_SUCCESS) { - const node_t *node = - page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + const node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); const tree_t *db = node_data(node); const unsigned flags = node_flags(node); switch (flags) { @@ -77,8 +71,7 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, *mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height); break; default: - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid node-size", flags); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node-size", flags); return LOG_IFERR(MDBX_CORRUPTED); } rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); @@ -101,8 +94,7 @@ int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { return MDBX_SUCCESS; } -int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *data) { +int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { DKBUF_DEBUG; DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); @@ -121,8 +113,7 @@ int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, return LOG_IFERR(cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err); } -int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *data) { +int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -141,8 +132,7 @@ int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, return LOG_IFERR(cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND)); } -int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *data, size_t *values_count) { +int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count) { DKBUF_DEBUG; DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); @@ -169,8 +159,7 @@ int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, *values_count = 1; if (inner_pointed(&cx.outer)) *values_count = - (sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || - cx.inner.nested_tree.items <= PTRDIFF_MAX) + (sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || cx.inner.nested_tree.items <= PTRDIFF_MAX) ? (size_t)cx.inner.nested_tree.items : PTRDIFF_MAX; } @@ -185,8 +174,7 @@ int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { return LOG_IFERR(rc); if (likely(canary)) { - if (txn->canary.x == canary->x && txn->canary.y == canary->y && - txn->canary.z == canary->z) + if (txn->canary.x == canary->x && txn->canary.y == canary->y && txn->canary.z == canary->z) return MDBX_SUCCESS; txn->canary.x = canary->x; txn->canary.y = canary->y; @@ -236,17 +224,14 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { * not to the beginning of a data. */ return LOG_IFERR(MDBX_EINVAL); } - return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) - ? MDBX_RESULT_FALSE - : MDBX_RESULT_TRUE; + return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; } if ((size_t)offset < env->dxb_mmap.limit) { /* Указатель адресует что-то в пределах mmap, но за границей * распределенных страниц. Такое может случится если mdbx_is_dirty() * вызывается после операции, в ходе которой грязная страница была * возвращена в нераспределенное пространство. */ - return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) - : MDBX_RESULT_TRUE; + return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE; } } @@ -256,13 +241,10 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { * * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная", * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */ - return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) - ? LOG_IFERR(MDBX_EINVAL) - : MDBX_RESULT_TRUE; + return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE; } -int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - const MDBX_val *data) { +int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -274,8 +256,7 @@ int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, return LOG_IFERR(MDBX_BAD_DBI); if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS - : MDBX_BAD_TXN); + return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); @@ -302,8 +283,7 @@ int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, return LOG_IFERR(rc); } -int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, - MDBX_put_flags_t flags) { +int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -314,14 +294,12 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, if (unlikely(dbi <= FREE_DBI)) return LOG_IFERR(MDBX_BAD_DBI); - if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | - MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE))) return LOG_IFERR(MDBX_EINVAL); if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS - : MDBX_BAD_TXN); + return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); cursor_couple_t cx; rc = cursor_init(&cx.outer, txn, dbi); @@ -333,14 +311,11 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, /* LY: support for update (explicit overwrite) */ if (flags & MDBX_CURRENT) { rc = cursor_seek(&cx.outer, (MDBX_val *)key, nullptr, MDBX_SET).err; - if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && - (flags & MDBX_ALLDUPS) == 0) { + if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && (flags & MDBX_ALLDUPS) == 0) { /* LY: allows update (explicit overwrite) only for unique keys */ - node_t *node = - page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); if (node_flags(node) & N_DUP) { - tASSERT(txn, inner_pointed(&cx.outer) && - cx.outer.subcur->nested_tree.items > 1); + tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); rc = MDBX_EMULTIVAL; if ((flags & MDBX_NOOVERWRITE) == 0) { flags -= MDBX_CURRENT; @@ -383,10 +358,8 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, * - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE). */ -int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *new_data, MDBX_val *old_data, - MDBX_put_flags_t flags, MDBX_preserve_func preserver, - void *preserver_context) { +int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, + MDBX_put_flags_t flags, MDBX_preserve_func preserver, void *preserver_context) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -397,16 +370,14 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (unlikely(old_data->iov_base == nullptr && old_data->iov_len)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(new_data == nullptr && - (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) + if (unlikely(new_data == nullptr && (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(dbi <= FREE_DBI)) return LOG_IFERR(MDBX_BAD_DBI); - if (unlikely(flags & - ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | - MDBX_RESERVE | MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | + MDBX_APPENDDUP | MDBX_CURRENT))) return LOG_IFERR(MDBX_EINVAL); cursor_couple_t cx; @@ -452,8 +423,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, /* disallow update/delete for multi-values */ node_t *node = page_node(page, cx.outer.ki[cx.outer.top]); if (node_flags(node) & N_DUP) { - tASSERT(txn, inner_pointed(&cx.outer) && - cx.outer.subcur->nested_tree.items > 1); + tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); if (cx.outer.subcur->nested_tree.items > 1) { rc = MDBX_EMULTIVAL; goto bailout; @@ -472,8 +442,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, *old_data = *new_data; goto bailout; } - rc = preserver ? preserver(preserver_context, old_data, - present_data.iov_base, present_data.iov_len) + rc = preserver ? preserver(preserver_context, old_data, present_data.iov_base, present_data.iov_len) : MDBX_SUCCESS; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -494,8 +463,7 @@ bailout: return LOG_IFERR(rc); } -static int default_value_preserver(void *context, MDBX_val *target, - const void *src, size_t bytes) { +static int default_value_preserver(void *context, MDBX_val *target, const void *src, size_t bytes) { (void)context; if (unlikely(target->iov_len < bytes)) { target->iov_base = nullptr; @@ -506,9 +474,7 @@ static int default_value_preserver(void *context, MDBX_val *target, return MDBX_SUCCESS; } -int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, - MDBX_val *new_data, MDBX_val *old_data, +int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, MDBX_put_flags_t flags) { - return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, - default_value_preserver, nullptr); + return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, default_value_preserver, nullptr); } diff --git a/src/atomics-ops.h b/src/atomics-ops.h index 0b29cb84..c61c055d 100644 --- a/src/atomics-ops.h +++ b/src/atomics-ops.h @@ -8,43 +8,36 @@ #ifndef __cplusplus #ifdef MDBX_HAVE_C11ATOMICS -#define osal_memory_fence(order, write) \ - atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order)) +#define osal_memory_fence(order, write) atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order)) #else /* MDBX_HAVE_C11ATOMICS */ -#define osal_memory_fence(order, write) \ - do { \ - osal_compiler_barrier(); \ - if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed \ - : mo_AcquireRelease)) \ - osal_memory_barrier(); \ +#define osal_memory_fence(order, write) \ + do { \ + osal_compiler_barrier(); \ + if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed : mo_AcquireRelease)) \ + osal_memory_barrier(); \ } while (0) #endif /* MDBX_HAVE_C11ATOMICS */ #if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__) -#define atomic_store32(p, value, order) \ - ({ \ - const uint32_t value_to_store = (value); \ - atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, \ - mo_c11_store(order)); \ - value_to_store; \ +#define atomic_store32(p, value, order) \ + ({ \ + const uint32_t value_to_store = (value); \ + atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, mo_c11_store(order)); \ + value_to_store; \ }) -#define atomic_load32(p, order) \ - atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)) -#define atomic_store64(p, value, order) \ - ({ \ - const uint64_t value_to_store = (value); \ - atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, \ - mo_c11_store(order)); \ - value_to_store; \ +#define atomic_load32(p, order) atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)) +#define atomic_store64(p, value, order) \ + ({ \ + const uint64_t value_to_store = (value); \ + atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, mo_c11_store(order)); \ + value_to_store; \ }) -#define atomic_load64(p, order) \ - atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)) +#define atomic_load64(p, order) atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)) #endif /* LCC && MDBX_HAVE_C11ATOMICS */ #ifndef atomic_store32 -MDBX_MAYBE_UNUSED static __always_inline uint32_t -atomic_store32(mdbx_atomic_uint32_t *p, const uint32_t value, - enum mdbx_memory_order order) { +MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_store32(mdbx_atomic_uint32_t *p, const uint32_t value, + enum mdbx_memory_order order) { STATIC_ASSERT(sizeof(mdbx_atomic_uint32_t) == 4); #ifdef MDBX_HAVE_C11ATOMICS assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); @@ -60,8 +53,8 @@ atomic_store32(mdbx_atomic_uint32_t *p, const uint32_t value, #endif /* atomic_store32 */ #ifndef atomic_load32 -MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32( - const volatile mdbx_atomic_uint32_t *p, enum mdbx_memory_order order) { +MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(const volatile mdbx_atomic_uint32_t *p, + enum mdbx_memory_order order) { STATIC_ASSERT(sizeof(mdbx_atomic_uint32_t) == 4); #ifdef MDBX_HAVE_C11ATOMICS assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p))); @@ -90,9 +83,8 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32( #endif /* xMDBX_TXNID_STEP */ #ifndef atomic_store64 -MDBX_MAYBE_UNUSED static __always_inline uint64_t -atomic_store64(mdbx_atomic_uint64_t *p, const uint64_t value, - enum mdbx_memory_order order) { +MDBX_MAYBE_UNUSED static __always_inline uint64_t atomic_store64(mdbx_atomic_uint64_t *p, const uint64_t value, + enum mdbx_memory_order order) { STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8); #if MDBX_64BIT_ATOMIC #if __GNUC_PREREQ(11, 0) @@ -124,8 +116,7 @@ MDBX_MAYBE_UNUSED static __always_inline #endif /* MDBX_64BIT_ATOMIC */ uint64_t - atomic_load64(const volatile mdbx_atomic_uint64_t *p, - enum mdbx_memory_order order) { + atomic_load64(const volatile mdbx_atomic_uint64_t *p, enum mdbx_memory_order order) { STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8); #if MDBX_64BIT_ATOMIC #ifdef MDBX_HAVE_C11ATOMICS @@ -142,15 +133,13 @@ MDBX_MAYBE_UNUSED static osal_compiler_barrier(); uint64_t value = (uint64_t)atomic_load32(&p->high, order) << 32; jitter4testing(true); - value |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed - : mo_AcquireRelease); + value |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed : mo_AcquireRelease); jitter4testing(true); for (;;) { osal_compiler_barrier(); uint64_t again = (uint64_t)atomic_load32(&p->high, order) << 32; jitter4testing(true); - again |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed - : mo_AcquireRelease); + again |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed : mo_AcquireRelease); jitter4testing(true); if (likely(value == again)) return value; @@ -171,19 +160,16 @@ MDBX_MAYBE_UNUSED static __always_inline void atomic_yield(void) { #else __asm__ __volatile__("hint @pause"); #endif -#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || \ - defined(__ARM_ARCH_6K__) +#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || defined(__ARM_ARCH_6K__) #ifdef __CC_ARM __yield(); #else __asm__ __volatile__("yield"); #endif -#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && \ - __mips_isa_rev >= 2 +#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && __mips_isa_rev >= 2 __asm__ __volatile__("pause"); -#elif defined(__mips) || defined(__mips__) || defined(__mips64) || \ - defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ - defined(__MWERKS__) || defined(__sgi) +#elif defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) || \ + defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi) __asm__ __volatile__(".word 0x00000140"); #elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE) sched_yield(); @@ -193,8 +179,7 @@ MDBX_MAYBE_UNUSED static __always_inline void atomic_yield(void) { } #if MDBX_64BIT_CAS -MDBX_MAYBE_UNUSED static __always_inline bool -atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) { +MDBX_MAYBE_UNUSED static __always_inline bool atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) { #ifdef MDBX_HAVE_C11ATOMICS STATIC_ASSERT(sizeof(long long) >= sizeof(uint64_t)); assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); @@ -202,8 +187,7 @@ atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) { #elif defined(__GNUC__) || defined(__clang__) return __sync_bool_compare_and_swap(&p->weak, c, v); #elif defined(_MSC_VER) - return c == (uint64_t)_InterlockedCompareExchange64( - (volatile __int64 *)&p->weak, v, c); + return c == (uint64_t)_InterlockedCompareExchange64((volatile __int64 *)&p->weak, v, c); #elif defined(__APPLE__) return OSAtomicCompareAndSwap64Barrier(c, v, &p->weak); #else @@ -212,8 +196,7 @@ atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) { } #endif /* MDBX_64BIT_CAS */ -MDBX_MAYBE_UNUSED static __always_inline bool -atomic_cas32(mdbx_atomic_uint32_t *p, uint32_t c, uint32_t v) { +MDBX_MAYBE_UNUSED static __always_inline bool atomic_cas32(mdbx_atomic_uint32_t *p, uint32_t c, uint32_t v) { #ifdef MDBX_HAVE_C11ATOMICS STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t)); assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); @@ -222,8 +205,7 @@ atomic_cas32(mdbx_atomic_uint32_t *p, uint32_t c, uint32_t v) { return __sync_bool_compare_and_swap(&p->weak, c, v); #elif defined(_MSC_VER) STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t)); - return c == - (uint32_t)_InterlockedCompareExchange((volatile long *)&p->weak, v, c); + return c == (uint32_t)_InterlockedCompareExchange((volatile long *)&p->weak, v, c); #elif defined(__APPLE__) return OSAtomicCompareAndSwap32Barrier(c, v, &p->weak); #else @@ -231,8 +213,7 @@ atomic_cas32(mdbx_atomic_uint32_t *p, uint32_t c, uint32_t v) { #endif } -MDBX_MAYBE_UNUSED static __always_inline uint32_t -atomic_add32(mdbx_atomic_uint32_t *p, uint32_t v) { +MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_add32(mdbx_atomic_uint32_t *p, uint32_t v) { #ifdef MDBX_HAVE_C11ATOMICS STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t)); assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); @@ -251,8 +232,7 @@ atomic_add32(mdbx_atomic_uint32_t *p, uint32_t v) { #define atomic_sub32(p, v) atomic_add32(p, 0 - (v)) -MDBX_MAYBE_UNUSED static __always_inline uint64_t -safe64_txnid_next(uint64_t txnid) { +MDBX_MAYBE_UNUSED static __always_inline uint64_t safe64_txnid_next(uint64_t txnid) { txnid += xMDBX_TXNID_STEP; #if !MDBX_64BIT_CAS /* avoid overflow of low-part in safe64_reset() */ @@ -262,8 +242,7 @@ safe64_txnid_next(uint64_t txnid) { } /* Atomically make target value >= SAFE64_INVALID_THRESHOLD */ -MDBX_MAYBE_UNUSED static __always_inline void -safe64_reset(mdbx_atomic_uint64_t *p, bool single_writer) { +MDBX_MAYBE_UNUSED static __always_inline void safe64_reset(mdbx_atomic_uint64_t *p, bool single_writer) { if (single_writer) { #if MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64 atomic_store64(p, UINT64_MAX, mo_AcquireRelease); @@ -290,8 +269,7 @@ safe64_reset(mdbx_atomic_uint64_t *p, bool single_writer) { jitter4testing(true); } -MDBX_MAYBE_UNUSED static __always_inline bool -safe64_reset_compare(mdbx_atomic_uint64_t *p, uint64_t compare) { +MDBX_MAYBE_UNUSED static __always_inline bool safe64_reset_compare(mdbx_atomic_uint64_t *p, uint64_t compare) { /* LY: This function is used to reset `txnid` from hsr-handler in case * the asynchronously cancellation of read transaction. Therefore, * there may be a collision between the cleanup performed here and @@ -307,8 +285,7 @@ safe64_reset_compare(mdbx_atomic_uint64_t *p, uint64_t compare) { bool rc = false; if (likely(atomic_load32(&p->low, mo_AcquireRelease) == (uint32_t)compare && atomic_cas32(&p->high, (uint32_t)(compare >> 32), UINT32_MAX))) { - if (unlikely(atomic_load32(&p->low, mo_AcquireRelease) != - (uint32_t)compare)) + if (unlikely(atomic_load32(&p->low, mo_AcquireRelease) != (uint32_t)compare)) atomic_cas32(&p->high, UINT32_MAX, (uint32_t)(compare >> 32)); else rc = true; @@ -318,8 +295,7 @@ safe64_reset_compare(mdbx_atomic_uint64_t *p, uint64_t compare) { return rc; } -MDBX_MAYBE_UNUSED static __always_inline void -safe64_write(mdbx_atomic_uint64_t *p, const uint64_t v) { +MDBX_MAYBE_UNUSED static __always_inline void safe64_write(mdbx_atomic_uint64_t *p, const uint64_t v) { assert(p->weak >= SAFE64_INVALID_THRESHOLD); #if MDBX_64BIT_ATOMIC && MDBX_64BIT_CAS atomic_store64(p, v, mo_AcquireRelease); @@ -336,8 +312,7 @@ safe64_write(mdbx_atomic_uint64_t *p, const uint64_t v) { jitter4testing(true); } -MDBX_MAYBE_UNUSED static __always_inline uint64_t -safe64_read(const mdbx_atomic_uint64_t *p) { +MDBX_MAYBE_UNUSED static __always_inline uint64_t safe64_read(const mdbx_atomic_uint64_t *p) { jitter4testing(true); uint64_t v; do @@ -366,8 +341,7 @@ MDBX_MAYBE_UNUSED static __always_inline bool #endif /* unused for now */ /* non-atomic write with safety for reading a half-updated value */ -MDBX_MAYBE_UNUSED static __always_inline void -safe64_update(mdbx_atomic_uint64_t *p, const uint64_t v) { +MDBX_MAYBE_UNUSED static __always_inline void safe64_update(mdbx_atomic_uint64_t *p, const uint64_t v) { #if MDBX_64BIT_ATOMIC atomic_store64(p, v, mo_Relaxed); #else diff --git a/src/atomics-types.h b/src/atomics-types.h index 8e3e4b9b..fe977b94 100644 --- a/src/atomics-types.h +++ b/src/atomics-types.h @@ -16,21 +16,19 @@ #if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include() #include #define MDBX_HAVE_C11ATOMICS -#elif !defined(__cplusplus) && \ - (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) && \ - !defined(__STDC_NO_ATOMICS__) && \ - (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \ - !(defined(__GNUC__) || defined(__clang__))) +#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) && \ + !defined(__STDC_NO_ATOMICS__) && \ + (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || !(defined(__GNUC__) || defined(__clang__))) #include #define MDBX_HAVE_C11ATOMICS #elif defined(__GNUC__) || defined(__clang__) #elif defined(_MSC_VER) #pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */ -#pragma warning(disable : 4133) /* 'function': incompatible types - from \ +#pragma warning(disable : 4133) /* 'function': incompatible types - from \ 'size_t' to 'LONGLONG' */ -#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \ +#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \ 'std::size_t', possible loss of data */ -#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \ +#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \ 'long', possible loss of data */ #pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange) #pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64) @@ -85,13 +83,13 @@ typedef union { #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) #endif /* Crutches for C11 atomic compiler's bugs */ -#define mo_c11_store(fence) \ - (((fence) == mo_Relaxed) ? memory_order_relaxed \ - : ((fence) == mo_AcquireRelease) ? memory_order_release \ +#define mo_c11_store(fence) \ + (((fence) == mo_Relaxed) ? memory_order_relaxed \ + : ((fence) == mo_AcquireRelease) ? memory_order_release \ : memory_order_seq_cst) -#define mo_c11_load(fence) \ - (((fence) == mo_Relaxed) ? memory_order_relaxed \ - : ((fence) == mo_AcquireRelease) ? memory_order_acquire \ +#define mo_c11_load(fence) \ + (((fence) == mo_Relaxed) ? memory_order_relaxed \ + : ((fence) == mo_AcquireRelease) ? memory_order_acquire \ : memory_order_seq_cst) #endif /* MDBX_HAVE_C11ATOMICS */ diff --git a/src/audit.c b/src/audit.c index 34a0f68f..05c69632 100644 --- a/src/audit.c +++ b/src/audit.c @@ -8,29 +8,23 @@ struct audit_ctx { uint8_t *const done_bitmap; }; -static int audit_dbi(void *ctx, const MDBX_txn *txn, const MDBX_val *name, - MDBX_db_flags_t flags, const struct MDBX_stat *stat, - MDBX_dbi dbi) { +static int audit_dbi(void *ctx, const MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, + const struct MDBX_stat *stat, MDBX_dbi dbi) { struct audit_ctx *audit_ctx = ctx; (void)name; (void)txn; (void)flags; - audit_ctx->used += (size_t)stat->ms_branch_pages + - (size_t)stat->ms_leaf_pages + - (size_t)stat->ms_overflow_pages; + audit_ctx->used += (size_t)stat->ms_branch_pages + (size_t)stat->ms_leaf_pages + (size_t)stat->ms_overflow_pages; if (dbi) audit_ctx->done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; return MDBX_SUCCESS; } static size_t audit_db_used(const tree_t *db) { - return db ? (size_t)db->branch_pages + (size_t)db->leaf_pages + - (size_t)db->large_pages - : 0; + return db ? (size_t)db->branch_pages + (size_t)db->leaf_pages + (size_t)db->large_pages : 0; } -__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc) { +__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc) { const MDBX_env *const env = txn->env; size_t pending = 0; if ((txn->flags & MDBX_TXN_RDONLY) == 0) @@ -48,8 +42,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, while (rc == MDBX_SUCCESS) { if (!dont_filter_gc) { if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len); return MDBX_CORRUPTED; } txnid_t id = unaligned_peek_u64(4, key.iov_base); @@ -68,18 +61,16 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, const size_t done_bitmap_size = (txn->n_dbi + CHAR_BIT - 1) / CHAR_BIT; if (txn->parent) { - tASSERT(txn, txn->n_dbi == txn->parent->n_dbi && - txn->n_dbi == txn->env->txn->n_dbi); + tASSERT(txn, txn->n_dbi == txn->parent->n_dbi && txn->n_dbi == txn->env->txn->n_dbi); #if MDBX_ENABLE_DBI_SPARSE - tASSERT(txn, txn->dbi_sparse == txn->parent->dbi_sparse && - txn->dbi_sparse == txn->env->txn->dbi_sparse); + tASSERT(txn, txn->dbi_sparse == txn->parent->dbi_sparse && txn->dbi_sparse == txn->env->txn->dbi_sparse); #endif /* MDBX_ENABLE_DBI_SPARSE */ } struct audit_ctx ctx = {0, alloca(done_bitmap_size)}; memset(ctx.done_bitmap, 0, done_bitmap_size); - ctx.used = NUM_METAS + audit_db_used(dbi_dig(txn, FREE_DBI, nullptr)) + - audit_db_used(dbi_dig(txn, MAIN_DBI, nullptr)); + ctx.used = + NUM_METAS + audit_db_used(dbi_dig(txn, FREE_DBI, nullptr)) + audit_db_used(dbi_dig(txn, MAIN_DBI, nullptr)); rc = mdbx_enumerate_tables(txn, audit_dbi, &ctx); tASSERT(txn, rc == MDBX_SUCCESS); @@ -91,11 +82,9 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, if (db) ctx.used += audit_db_used(db); else if (dbi_state(txn, dbi)) - WARNING("audit %s@%" PRIaTXN - ": unable account dbi %zd / \"%*s\", state 0x%02x", - txn->parent ? "nested-" : "", txn->txnid, dbi, - (int)env->kvs[dbi].name.iov_len, - (const char *)env->kvs[dbi].name.iov_base, dbi_state(txn, dbi)); + WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", txn->parent ? "nested-" : "", + txn->txnid, dbi, (int)env->kvs[dbi].name.iov_len, (const char *)env->kvs[dbi].name.iov_base, + dbi_state(txn, dbi)); } if (pending + gc + ctx.used == txn->geo.first_unallocated) @@ -104,15 +93,12 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, if ((txn->flags & MDBX_TXN_RDONLY) == 0) ERROR("audit @%" PRIaTXN ": %zu(pending) = %zu(loose) + " "%zu(reclaimed) + %zu(retired-pending) - %zu(retired-stored)", - txn->txnid, pending, txn->tw.loose_count, - MDBX_PNL_GETSIZE(txn->tw.relist), - txn->tw.retired_pages ? MDBX_PNL_GETSIZE(txn->tw.retired_pages) : 0, - retired_stored); + txn->txnid, pending, txn->tw.loose_count, MDBX_PNL_GETSIZE(txn->tw.relist), + txn->tw.retired_pages ? MDBX_PNL_GETSIZE(txn->tw.retired_pages) : 0, retired_stored); ERROR("audit @%" PRIaTXN ": %zu(pending) + %zu" "(gc) + %zu(count) = %zu(total) <> %zu" "(allocated)", - txn->txnid, pending, gc, ctx.used, pending + gc + ctx.used, - (size_t)txn->geo.first_unallocated); + txn->txnid, pending, gc, ctx.used, pending + gc + ctx.used, (size_t)txn->geo.first_unallocated); return MDBX_PROBLEM; } diff --git a/src/chk.c b/src/chk.c index 83d7d74c..4197b29c 100644 --- a/src/chk.c +++ b/src/chk.c @@ -28,8 +28,7 @@ typedef struct MDBX_chk_internal { __cold static int chk_check_break(MDBX_chk_scope_t *const scope) { MDBX_chk_internal_t *const chk = scope->internal; - return (chk->got_break || (chk->cb->check_break && - (chk->got_break = chk->cb->check_break(chk->usr)))) + return (chk->got_break || (chk->cb->check_break && (chk->got_break = chk->cb->check_break(chk->usr)))) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } @@ -37,15 +36,14 @@ __cold static int chk_check_break(MDBX_chk_scope_t *const scope) { __cold static void chk_line_end(MDBX_chk_line_t *line) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (likely(chk->cb->print_done)) chk->cb->print_done(line); } } -__cold __must_check_result static MDBX_chk_line_t * -chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { +__cold __must_check_result static MDBX_chk_line_t *chk_line_begin(MDBX_chk_scope_t *const scope, + enum MDBX_chk_severity severity) { MDBX_chk_internal_t *const chk = scope->internal; if (severity < MDBX_chk_warning) mdbx_env_chk_encount_problem(chk->usr); @@ -54,8 +52,7 @@ chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { line = chk->cb->print_begin(chk->usr, severity); if (likely(line)) { assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); line->ctx = chk->usr; } } @@ -75,12 +72,10 @@ __cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { __cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (likely(chk->cb->print_flush)) { chk->cb->print_flush(line); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); line->out = line->begin; } } @@ -90,8 +85,7 @@ __cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { __cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { if (likely(line && need)) { size_t have = line->end - line->out; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (need > have) { line = chk_flush(line); have = line->end - line->out; @@ -101,17 +95,14 @@ __cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { return 0; } -__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, - const char *str) { +__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, const char *str) { if (likely(line && str && *str)) { MDBX_chk_internal_t *chk = line->ctx->internal; size_t left = strlen(str); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (chk->cb->print_chars) { chk->cb->print_chars(line, str, left); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); } else do { size_t chunk = chk_print_wanna(line, left); @@ -120,8 +111,7 @@ __cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, break; memcpy(line->out, str, chunk); line->out += chunk; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); str += chunk; left -= chunk; } while (left); @@ -130,16 +120,13 @@ __cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, return line; } -__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, - const char *fmt, va_list args) { +__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, const char *fmt, va_list args) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (chk->cb->print_format) { chk->cb->print_format(line, fmt, args); - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); } else { va_list ones; va_copy(ones, args); @@ -151,8 +138,7 @@ __cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, int written = vsnprintf(line->out, have, fmt, args); if (likely(written > 0)) line->out += written; - assert(line->begin <= line->end && line->begin <= line->out && - line->out <= line->end); + assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); } } } @@ -161,8 +147,7 @@ __cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, return line; } -__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) - chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { +__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { if (likely(line)) { // MDBX_chk_internal_t *chk = line->ctx->internal; va_list args; @@ -174,12 +159,9 @@ __cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) return line; } -__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, - const char *prefix, - const uint64_t value, +__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, const char *prefix, const uint64_t value, const char *suffix) { - static const char sf[] = - "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ + static const char sf[] = "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; prefix = prefix ? prefix : ""; @@ -191,33 +173,27 @@ __cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, const unsigned scale = 10 + i * 10; const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); const uint64_t integer = rounded >> scale; - const uint64_t fractional = - (rounded - (integer << scale)) * 100u >> scale; + const uint64_t fractional = (rounded - (integer << scale)) * 100u >> scale; if ((rounded >> scale) <= 1000) - return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, - value, (unsigned)integer, (unsigned)fractional, - sf[i], suffix); + return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, value, (unsigned)integer, + (unsigned)fractional, sf[i], suffix); } line->empty = false; } return line; } -__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, - const char *subj) { +__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, const char *subj) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); if (line) - chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, - mdbx_strerror(err), err))); + chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, mdbx_strerror(err), err))); else - debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", - subj, mdbx_strerror(err), err); + debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", subj, mdbx_strerror(err), err); return err; } __cold static void MDBX_PRINTF_ARGS(5, 6) - chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, - uint64_t entry_number, const char *caption, + chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, uint64_t entry_number, const char *caption, const char *extra_fmt, ...) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_issue_t *issue = chk->usr->scope->issues; @@ -258,8 +234,7 @@ __cold static void MDBX_PRINTF_ARGS(5, 6) va_end(args); } -__cold static void MDBX_PRINTF_ARGS(2, 3) - chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { +__cold static void MDBX_PRINTF_ARGS(2, 3) chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { MDBX_chk_internal_t *const chk = scope->internal; va_list args; va_start(args, fmt); @@ -267,8 +242,7 @@ __cold static void MDBX_PRINTF_ARGS(2, 3) mdbx_env_chk_encount_problem(chk->usr); chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); } else - chk_line_end( - chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); + chk_line_end(chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); va_end(args); } @@ -306,26 +280,19 @@ __cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { return err; } -__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, - int verbosity_adjustment, - enum MDBX_chk_stage stage, - const void *object, size_t *problems, - const char *fmt, va_list args) { +__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, int verbosity_adjustment, enum MDBX_chk_stage stage, + const void *object, size_t *problems, const char *fmt, va_list args) { if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) return MDBX_BACKLOG_DEPLETED; MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; - const int verbosity = - outer->verbosity + - (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); + const int verbosity = outer->verbosity + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); MDBX_chk_scope_t *const inner = outer + 1; memset(inner, 0, sizeof(*inner)); inner->internal = outer->internal; inner->stage = stage ? stage : (stage = outer->stage); inner->object = object; - inner->verbosity = (verbosity < MDBX_chk_warning) - ? MDBX_chk_warning - : (enum MDBX_chk_severity)verbosity; + inner->verbosity = (verbosity < MDBX_chk_warning) ? MDBX_chk_warning : (enum MDBX_chk_severity)verbosity; if (problems) chk->problem_counter = problems; else if (!chk->problem_counter || outer->stage != stage) @@ -351,13 +318,11 @@ __cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, } __cold static int MDBX_PRINTF_ARGS(6, 7) - chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, - enum MDBX_chk_stage stage, const void *object, + chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, enum MDBX_chk_stage stage, const void *object, size_t *problems, const char *fmt, ...) { va_list args; va_start(args, fmt); - int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, - problems, fmt, args); + int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, problems, fmt, args); va_end(args); return rc; } @@ -376,19 +341,16 @@ __cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { } __cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) - chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, - const char *fmt, ...) { + chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, const char *fmt, ...) { chk_scope_restore(scope, MDBX_SUCCESS); va_list args; va_start(args, fmt); - int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, - scope->stage, nullptr, nullptr, fmt, args); + int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, scope->stage, nullptr, nullptr, fmt, args); va_end(args); return err ? nullptr : scope + 1; } -__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, - const MDBX_val *val) { +__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, const MDBX_val *val) { if (val == MDBX_CHK_MAIN) return "@MAIN"; if (val == MDBX_CHK_GC) @@ -418,8 +380,7 @@ __cold static const char *chk_v2a(MDBX_chk_internal_t *chk, chk->v2a_buf.iov_base = ptr; chk->v2a_buf.iov_len = enough; } - snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, - "", len); + snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, "", len); return chk->v2a_buf.iov_base; } @@ -428,8 +389,7 @@ __cold static const char *chk_v2a(MDBX_chk_internal_t *chk, size_t xchars = 0; for (size_t i = 0; i < len && printable; ++i) { quoting = quoting || !(data[i] == '_' || isalnum(data[i])); - printable = - isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); + printable = isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); } size_t need = len + 1; @@ -525,10 +485,8 @@ static void histogram_reduce(struct MDBX_chk_histogram *p) { // ищем пару для слияния с минимальной ошибкой size_t min_err = SIZE_MAX, min_i = last - 1; for (size_t i = 0; i < last; ++i) { - const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, - s1 = p->ranges[i].amount; - const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, - s2 = p->ranges[i + 1].amount; + const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, s1 = p->ranges[i].amount; + const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, s2 = p->ranges[i + 1].amount; const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; assert(s1 > 0 && b1 > 0 && b1 < e1); assert(s2 > 0 && b2 > 0 && b2 < e2); @@ -550,8 +508,7 @@ static void histogram_reduce(struct MDBX_chk_histogram *p) { p->ranges[min_i].count += p->ranges[min_i + 1].count; if (min_i < last) // перемещаем хвост - memmove(p->ranges + min_i, p->ranges + min_i + 1, - (last - min_i) * sizeof(p->ranges[0])); + memmove(p->ranges + min_i, p->ranges + min_i + 1, (last - min_i) * sizeof(p->ranges[0])); // обнуляем последний элемент и продолжаем p->ranges[last].count = 0; } @@ -585,8 +542,7 @@ static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { #ifdef __COVERITY__ if (i < last) /* avoid Coverity false-positive issue */ #endif /* __COVERITY__ */ - memmove(p->ranges + i + 1, p->ranges + i, - (last - i) * sizeof(p->ranges[0])); + memmove(p->ranges + i + 1, p->ranges + i, (last - i) * sizeof(p->ranges[0])); } p->ranges[i].begin = n; p->ranges[i].end = n + 1; @@ -598,10 +554,8 @@ static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { } } -__cold static MDBX_chk_line_t * -histogram_dist(MDBX_chk_line_t *line, - const struct MDBX_chk_histogram *histogram, const char *prefix, - const char *first, bool amount) { +__cold static MDBX_chk_line_t *histogram_dist(MDBX_chk_line_t *line, const struct MDBX_chk_histogram *histogram, + const char *prefix, const char *first, bool amount) { line = chk_print(line, "%s:", prefix); const char *comma = ""; const size_t first_val = amount ? histogram->ones : histogram->pad; @@ -614,36 +568,28 @@ histogram_dist(MDBX_chk_line_t *line, chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); - line = chk_print(line, "=%" PRIuSIZE, - amount ? histogram->ranges[n].amount - : histogram->ranges[n].count); + line = chk_print(line, "=%" PRIuSIZE, amount ? histogram->ranges[n].amount : histogram->ranges[n].count); comma = ","; } return line; } -__cold static MDBX_chk_line_t * -histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, - const struct MDBX_chk_histogram *histogram, const char *prefix, - const char *first, bool amount) { +__cold static MDBX_chk_line_t *histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { if (histogram->count) { - line = chk_print(line, "%s %" PRIuSIZE, prefix, - amount ? histogram->amount : histogram->count); + line = chk_print(line, "%s %" PRIuSIZE, prefix, amount ? histogram->amount : histogram->count); if (scope->verbosity > MDBX_chk_info) - line = chk_puts( - histogram_dist(line, histogram, " (distribution", first, amount), - ")"); + line = chk_puts(histogram_dist(line, histogram, " (distribution", first, amount), ")"); } return line; } //----------------------------------------------------------------------------- -__cold static int chk_get_tbl(MDBX_chk_scope_t *const scope, - const walk_tbl_t *in, MDBX_chk_table_t **out) { +__cold static int chk_get_tbl(MDBX_chk_scope_t *const scope, const walk_tbl_t *in, MDBX_chk_table_t **out) { MDBX_chk_internal_t *const chk = scope->internal; - if (chk->last_lookup && - chk->last_lookup->name.iov_base == in->name.iov_base) { + if (chk->last_lookup && chk->last_lookup->name.iov_base == in->name.iov_base) { *out = chk->last_lookup; return MDBX_SUCCESS; } @@ -665,34 +611,27 @@ __cold static int chk_get_tbl(MDBX_chk_scope_t *const scope, if (tbl->id < 0) { tbl->id = (int)i; tbl->cookie = - chk->cb->table_filter - ? chk->cb->table_filter(chk->usr, &tbl->name, tbl->flags) - : (void *)(intptr_t)-1; + chk->cb->table_filter ? chk->cb->table_filter(chk->usr, &tbl->name, tbl->flags) : (void *)(intptr_t)-1; } *out = (chk->last_lookup = tbl); return MDBX_SUCCESS; } } - chk_scope_issue(scope, "too many tables > %u", - (unsigned)ARRAY_LENGTH(chk->table) - CORE_DBS - /* meta */ 1); + chk_scope_issue(scope, "too many tables > %u", (unsigned)ARRAY_LENGTH(chk->table) - CORE_DBS - /* meta */ 1); *out = nullptr; return MDBX_PROBLEM; } //------------------------------------------------------------------------------ -__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, - const unsigned num) { +__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, const unsigned num) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); MDBX_chk_internal_t *const chk = scope->internal; if (line) { MDBX_env *const env = chk->usr->env; - const bool have_bootid = (chk->envinfo.mi_bootid.current.x | - chk->envinfo.mi_bootid.current.y) != 0; - const bool bootid_match = - have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], - &chk->envinfo.mi_bootid.current, - sizeof(chk->envinfo.mi_bootid.current)) == 0; + const bool have_bootid = (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) != 0; + const bool bootid_match = have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], &chk->envinfo.mi_bootid.current, + sizeof(chk->envinfo.mi_bootid.current)) == 0; const char *status = "stay"; if (num == chk->troika.recent) @@ -707,9 +646,7 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, break; case DATASIGN_WEAK: line = chk_print(line, "weak-%s", - have_bootid - ? (bootid_match ? "intact (same boot-id)" : "dead") - : "unknown (no boot-id)"); + have_bootid ? (bootid_match ? "intact (same boot-id)" : "dead") : "unknown (no boot-id)"); break; default: line = chk_puts(line, "steady"); @@ -718,10 +655,8 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid); if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y) - line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", - chk->envinfo.mi_bootid.meta[num].x, - chk->envinfo.mi_bootid.meta[num].y, - bootid_match ? "live" : "not match"); + line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", chk->envinfo.mi_bootid.meta[num].x, + chk->envinfo.mi_bootid.meta[num].y, bootid_match ? "live" : "not match"); else line = chk_puts(line, "no boot-id"); @@ -730,22 +665,16 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, line = chk_print(line, ", %s", "forced for checking"); } else if (meta_txnid > chk->envinfo.mi_recent_txnid && (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) - line = chk_print(line, - ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 - " >>> %" PRIu64 ")", - meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, - chk->envinfo.mi_recent_txnid); + line = chk_print(line, ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 " >>> %" PRIu64 ")", + meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, chk->envinfo.mi_recent_txnid); chk_line_end(line); } } -__cold static int -chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, - const int deep, const walk_tbl_t *tbl_info, - const size_t page_size, const page_type_t pagetype, - const MDBX_error_t page_err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) { +__cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, const int deep, + const walk_tbl_t *tbl_info, const size_t page_size, const page_type_t pagetype, + const MDBX_error_t page_err, const size_t nentries, const size_t payload_bytes, + const size_t header_bytes, const size_t unused_bytes) { MDBX_chk_scope_t *const scope = ctx; MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; @@ -772,9 +701,8 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, if (tbl->flags & MDBX_DUPSORT) height -= tbl_info->internal->height; else { - chk_object_issue(scope, "nested tree", pgno, "unexpected", - "table %s flags 0x%x, deep %i", chk_v2a(chk, &tbl->name), - tbl->flags, deep); + chk_object_issue(scope, "nested tree", pgno, "unexpected", "table %s flags 0x%x, deep %i", + chk_v2a(chk, &tbl->name), tbl->flags, deep); nested = nullptr; } } else @@ -784,8 +712,7 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, bool branch = false; switch (pagetype) { default: - chk_object_issue(scope, "page", pgno, "unknown page-type", - "type %u, deep %i", (unsigned)pagetype, deep); + chk_object_issue(scope, "page", pgno, "unknown page-type", "type %u, deep %i", (unsigned)pagetype, deep); pagetype_caption = "unknown"; tbl->pages.other += npages; break; @@ -803,10 +730,8 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, pagetype_caption = "large"; histogram_acc(npages, &tbl->histogram.large_pages); if (tbl->flags & MDBX_DUPSORT) - chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, table %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, - deep); + chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, + chk_v2a(chk, &tbl->name), tbl->flags, deep); break; case page_branch: branch = true; @@ -820,10 +745,8 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, break; case page_dupfix_leaf: if (!nested) - chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, table %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, - deep); + chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, + chk_v2a(chk, &tbl->name), tbl->flags, deep); /* fall through */ __fallthrough; case page_leaf: @@ -831,33 +754,27 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, pagetype_caption = "leaf"; tbl->pages.leaf += 1; if (height != tbl_info->internal->height) - chk_object_issue(scope, "page", pgno, "wrong tree height", - "actual %i != %i table %s", height, + chk_object_issue(scope, "page", pgno, "wrong tree height", "actual %i != %i table %s", height, tbl_info->internal->height, chk_v2a(chk, &tbl->name)); } else { - pagetype_caption = - (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix"; + pagetype_caption = (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix"; tbl->pages.nested_leaf += 1; if (chk->last_nested != nested) { histogram_acc(height, &tbl->histogram.nested_tree); chk->last_nested = nested; } if (height != nested->height) - chk_object_issue(scope, "page", pgno, "wrong nested-tree height", - "actual %i != %i dupsort-node %s", height, + chk_object_issue(scope, "page", pgno, "wrong nested-tree height", "actual %i != %i dupsort-node %s", height, nested->height, chk_v2a(chk, &tbl->name)); } break; case page_sub_dupfix_leaf: case page_sub_leaf: - pagetype_caption = - (pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix"; + pagetype_caption = (pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix"; tbl->pages.nested_subleaf += 1; if ((tbl->flags & MDBX_DUPSORT) == 0 || nested) - chk_object_issue(scope, "page", pgno, "unexpected", - "type %u, table %s flags 0x%x, deep %i", - (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, - deep); + chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, + chk_v2a(chk, &tbl->name), tbl->flags, deep); break; } @@ -867,33 +784,24 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, if (npages == 1) chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); else - chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, - npages); - chk_line_end(chk_print(line, - " of %s: header %" PRIiPTR ", %s %" PRIiPTR - ", payload %" PRIiPTR ", unused %" PRIiPTR - ", deep %i", - chk_v2a(chk, &tbl->name), header_bytes, - (pagetype == page_branch) ? "keys" : "entries", - nentries, payload_bytes, unused_bytes, deep)); + chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, npages); + chk_line_end(chk_print( + line, " of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", + chk_v2a(chk, &tbl->name), header_bytes, (pagetype == page_branch) ? "keys" : "entries", nentries, + payload_bytes, unused_bytes, deep)); } bool already_used = false; for (unsigned n = 0; n < npages; ++n) { const size_t spanpgno = pgno + n; if (spanpgno >= usr->result.alloc_pages) { - chk_object_issue(scope, "page", spanpgno, "wrong page-no", - "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", - pagetype_caption, spanpgno, usr->result.alloc_pages, - deep); + chk_object_issue(scope, "page", spanpgno, "wrong page-no", "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", + pagetype_caption, spanpgno, usr->result.alloc_pages, deep); tbl->pages.all += 1; } else if (chk->pagemap[spanpgno]) { - const MDBX_chk_table_t *const rival = - chk->table[chk->pagemap[spanpgno] - 1]; - chk_object_issue(scope, "page", spanpgno, - (branch && rival == tbl) ? "loop" : "already used", - "%s-page: by %s, deep %i", pagetype_caption, - chk_v2a(chk, &rival->name), deep); + const MDBX_chk_table_t *const rival = chk->table[chk->pagemap[spanpgno] - 1]; + chk_object_issue(scope, "page", spanpgno, (branch && rival == tbl) ? "loop" : "already used", + "%s-page: by %s, deep %i", pagetype_caption, chk_v2a(chk, &rival->name), deep); already_used = true; } else { chk->pagemap[spanpgno] = (int16_t)tbl->id + 1; @@ -907,36 +815,30 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, } if (MDBX_IS_ERROR(page_err)) { - chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", - pagetype_caption); + chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", pagetype_caption); } else { if (unused_bytes > page_size) - chk_object_issue(scope, "page", pgno, "illegal unused-bytes", - "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, - unused_bytes, env->ps); + chk_object_issue(scope, "page", pgno, "illegal unused-bytes", "%s-page: %u < %" PRIuSIZE " < %u", + pagetype_caption, 0, unused_bytes, env->ps); - if (header_bytes < (int)sizeof(long) || - (size_t)header_bytes >= env->ps - sizeof(long)) { + if (header_bytes < (int)sizeof(long) || (size_t)header_bytes >= env->ps - sizeof(long)) { chk_object_issue(scope, "page", pgno, "illegal header-length", - "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, - pagetype_caption, sizeof(long), header_bytes, - env->ps - sizeof(long)); + "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, pagetype_caption, sizeof(long), + header_bytes, env->ps - sizeof(long)); } if (nentries < 1 || (pagetype == page_branch && nentries < 2)) { chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty", - "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE " entries, deep %i", pagetype_caption, + payload_bytes, nentries, deep); tbl->pages.empty += 1; } if (npages) { if (page_bytes != page_size) { chk_object_issue(scope, "page", pgno, "misused", - "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR - "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", - pagetype_caption, page_size, page_bytes, header_bytes, - payload_bytes, unused_bytes, deep); + "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR "h + %" PRIuPTR "p + %" PRIuPTR + "u), deep %i", + pagetype_caption, page_size, page_bytes, header_bytes, payload_bytes, unused_bytes, deep); if (page_size > page_bytes) tbl->lost_bytes += page_size - page_bytes; } else { @@ -1001,76 +903,53 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), "walked %zu pages, left/unused %zu" ", %" PRIuSIZE " problem(s)", - usr->result.processed_pages, - usr->result.unused_pages, - usr->scope->subtotal_issues)); + usr->result.processed_pages, usr->result.unused_pages, usr->scope->subtotal_issues)); err = chk_scope_restore(scope, err); if (scope->verbosity > MDBX_chk_info) { for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { MDBX_chk_table_t *const tbl = chk->table[i]; - MDBX_chk_scope_t *inner = - chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &tbl->name)); + MDBX_chk_scope_t *inner = chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &tbl->name)); if (tbl->pages.all == 0) - chk_line_end( - chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); else { MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); if (line) { - line = chk_print(line, "page usage: subtotal %" PRIuSIZE, - tbl->pages.all); - const size_t branch_pages = - tbl->pages.branch + tbl->pages.nested_branch; - const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf + - tbl->pages.nested_subleaf; + line = chk_print(line, "page usage: subtotal %" PRIuSIZE, tbl->pages.all); + const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch; + const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf + tbl->pages.nested_subleaf; if (tbl->pages.other) line = chk_print(line, ", other %" PRIuSIZE, tbl->pages.other); - if (tbl->pages.other == 0 || - (branch_pages | leaf_pages | tbl->histogram.large_pages.count) != - 0) { - line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, - branch_pages, leaf_pages); - if (tbl->histogram.large_pages.count || - (tbl->flags & MDBX_DUPSORT) == 0) { - line = chk_print(line, ", large %" PRIuSIZE, - tbl->histogram.large_pages.count); - if (tbl->histogram.large_pages.amount | - tbl->histogram.large_pages.count) - line = histogram_print(inner, line, &tbl->histogram.large_pages, - " amount", "single", true); + if (tbl->pages.other == 0 || (branch_pages | leaf_pages | tbl->histogram.large_pages.count) != 0) { + line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, branch_pages, leaf_pages); + if (tbl->histogram.large_pages.count || (tbl->flags & MDBX_DUPSORT) == 0) { + line = chk_print(line, ", large %" PRIuSIZE, tbl->histogram.large_pages.count); + if (tbl->histogram.large_pages.amount | tbl->histogram.large_pages.count) + line = histogram_print(inner, line, &tbl->histogram.large_pages, " amount", "single", true); } } - line = histogram_dist(chk_line_feed(line), &tbl->histogram.deep, - "tree deep density", "1", false); + line = histogram_dist(chk_line_feed(line), &tbl->histogram.deep, "tree deep density", "1", false); if (tbl != &chk->table_gc && tbl->histogram.nested_tree.count) { - line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, - tbl->histogram.nested_tree.count); - line = histogram_dist(line, &tbl->histogram.nested_tree, " density", - "1", false); + line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, tbl->histogram.nested_tree.count); + line = histogram_dist(line, &tbl->histogram.nested_tree, " density", "1", false); line = chk_print(chk_line_feed(line), - "nested tree(s) pages %" PRIuSIZE - ": branch %" PRIuSIZE ", leaf %" PRIuSIZE + "nested tree(s) pages %" PRIuSIZE ": branch %" PRIuSIZE ", leaf %" PRIuSIZE ", subleaf %" PRIuSIZE, - tbl->pages.nested_branch + tbl->pages.nested_leaf, - tbl->pages.nested_branch, tbl->pages.nested_leaf, - tbl->pages.nested_subleaf); + tbl->pages.nested_branch + tbl->pages.nested_leaf, tbl->pages.nested_branch, + tbl->pages.nested_leaf, tbl->pages.nested_subleaf); } const size_t bytes = pgno2bytes(env, tbl->pages.all); - line = chk_print( - chk_line_feed(line), - "page filling: subtotal %" PRIuSIZE - " bytes (%.1f%%), payload %" PRIuSIZE - " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", - bytes, bytes * 100.0 / total_page_bytes, tbl->payload_bytes, - tbl->payload_bytes * 100.0 / bytes, bytes - tbl->payload_bytes, - (bytes - tbl->payload_bytes) * 100.0 / bytes); + line = + chk_print(chk_line_feed(line), + "page filling: subtotal %" PRIuSIZE " bytes (%.1f%%), payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", + bytes, bytes * 100.0 / total_page_bytes, tbl->payload_bytes, tbl->payload_bytes * 100.0 / bytes, + bytes - tbl->payload_bytes, (bytes - tbl->payload_bytes) * 100.0 / bytes); if (tbl->pages.empty) - line = chk_print(line, ", %" PRIuSIZE " empty pages", - tbl->pages.empty); + line = chk_print(line, ", %" PRIuSIZE " empty pages", tbl->pages.empty); if (tbl->lost_bytes) - line = - chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes); + line = chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes); chk_line_end(line); } } @@ -1080,14 +959,12 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); line = chk_print(line, - "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE - " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," + "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," " average fill %.1f%%", total_page_bytes, usr->result.total_payload_bytes, usr->result.total_payload_bytes * 100.0 / total_page_bytes, total_page_bytes - usr->result.total_payload_bytes, - (total_page_bytes - usr->result.total_payload_bytes) * - 100.0 / total_page_bytes, + (total_page_bytes - usr->result.total_payload_bytes) * 100.0 / total_page_bytes, usr->result.total_payload_bytes * 100.0 / total_page_bytes); if (total.pages.empty) line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); @@ -1097,14 +974,11 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) { return err; } -typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, - MDBX_chk_table_t *tbl, const size_t record_number, +typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data); -__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, - MDBX_chk_table_t *tbl, - const size_t record_number, const MDBX_val *key, - const MDBX_val *data) { +__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number, + const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; int err = MDBX_SUCCESS; assert(tbl->cookie); @@ -1113,8 +987,7 @@ __cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, return err ? err : chk_check_break(scope); } -__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, - MDBX_chk_table_t *tbl, chk_kv_visitor *handler) { +__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, MDBX_chk_table_t *tbl, chk_kv_visitor *handler) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; @@ -1124,27 +997,22 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, int err; if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->flags) { - chk_line_end( - chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), - "abort processing %s due to a previous error", - chk_v2a(chk, &tbl->name)))); + chk_line_end(chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), + "abort processing %s due to a previous error", chk_v2a(chk, &tbl->name)))); err = MDBX_BAD_TXN; goto bailout; } if (0 > (int)dbi) { - err = dbi_open( - txn, &tbl->name, MDBX_DB_ACCEDE, &dbi, - (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, - (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); + err = dbi_open(txn, &tbl->name, MDBX_DB_ACCEDE, &dbi, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); if (unlikely(err)) { - tASSERT(txn, dbi >= txn->env->n_dbi || - (txn->env->dbs_flags[dbi] & DB_VALID) == 0); + tASSERT(txn, dbi >= txn->env->n_dbi || (txn->env->dbs_flags[dbi] & DB_VALID) == 0); chk_error_rc(scope, err, "mdbx_dbi_open"); goto bailout; } - tASSERT(txn, dbi < txn->env->n_dbi && - (txn->env->dbs_flags[dbi] & DB_VALID) != 0); + tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0); } const tree_t *const db = txn->dbs + dbi; @@ -1165,13 +1033,11 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, break; default: key_mode = "inconsistent"; - chk_scope_issue(scope, "wrong key-mode (0x%x)", - tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); + chk_scope_issue(scope, "wrong key-mode (0x%x)", tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); } const char *value_mode = nullptr; - switch (tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | - MDBX_INTEGERDUP)) { + switch (tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)) { case 0: value_mode = "single"; break; @@ -1199,69 +1065,51 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, default: value_mode = "inconsistent"; chk_scope_issue(scope, "wrong value-mode (0x%x)", - tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | - MDBX_DUPFIXED | MDBX_INTEGERDUP)); + tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)); } MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); - line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, - value_mode); + line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, value_mode); line = chk_print(line, ", flags:"); if (!tbl->flags) line = chk_print(line, " none"); else { - const uint8_t f[] = {MDBX_DUPSORT, - MDBX_INTEGERKEY, - MDBX_REVERSEKEY, - MDBX_DUPFIXED, - MDBX_REVERSEDUP, - MDBX_INTEGERDUP, - 0}; - const char *const t[] = {"dupsort", "integerkey", "reversekey", - "dupfix", "reversedup", "integerdup"}; + const uint8_t f[] = { + MDBX_DUPSORT, MDBX_INTEGERKEY, MDBX_REVERSEKEY, MDBX_DUPFIXED, MDBX_REVERSEDUP, MDBX_INTEGERDUP, 0}; + const char *const t[] = {"dupsort", "integerkey", "reversekey", "dupfix", "reversedup", "integerdup"}; for (size_t i = 0; f[i]; i++) if (tbl->flags & f[i]) line = chk_print(line, " %s", t[i]); } chk_line_end(chk_print(line, " (0x%02X)", tbl->flags)); - line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), - "entries %" PRIu64 ", sequence %" PRIu64, db->items, + line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), "entries %" PRIu64 ", sequence %" PRIu64, db->items, db->sequence); if (db->mod_txnid) - line = - chk_print(line, ", last modification txn#%" PRIaTXN, db->mod_txnid); + line = chk_print(line, ", last modification txn#%" PRIaTXN, db->mod_txnid); if (db->root != P_INVALID) line = chk_print(line, ", root #%" PRIaPGNO, db->root); chk_line_end(line); chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), - "b-tree depth %u, pages: branch %" PRIaPGNO - ", leaf %" PRIaPGNO ", large %" PRIaPGNO, - db->height, db->branch_pages, db->leaf_pages, - db->large_pages)); + "b-tree depth %u, pages: branch %" PRIaPGNO ", leaf %" PRIaPGNO ", large %" PRIaPGNO, + db->height, db->branch_pages, db->leaf_pages, db->large_pages)); if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch; const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf; - const size_t subtotal_pages = - db->branch_pages + db->leaf_pages + db->large_pages; + const size_t subtotal_pages = db->branch_pages + db->leaf_pages + db->large_pages; if (subtotal_pages != tbl->pages.all) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", - "subtotal", subtotal_pages, tbl->pages.all); + chk_scope_issue(scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", "subtotal", subtotal_pages, + tbl->pages.all); if (db->branch_pages != branch_pages) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", - "branch", db->branch_pages, branch_pages); + chk_scope_issue(scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "branch", db->branch_pages, + branch_pages); if (db->leaf_pages != leaf_pages) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", - "all-leaf", db->leaf_pages, leaf_pages); + chk_scope_issue(scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "all-leaf", db->leaf_pages, + leaf_pages); if (db->large_pages != tbl->histogram.large_pages.amount) - chk_scope_issue( - scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", - "large/overlow", db->large_pages, - tbl->histogram.large_pages.amount); + chk_scope_issue(scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "large/overlow", + db->large_pages, tbl->histogram.large_pages.amount); } } @@ -1287,31 +1135,24 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, bool bad_key = false; if (key.iov_len > maxkeysize) { - chk_object_issue(scope, "entry", record_count, - "key length exceeds max-key-size", - "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); + chk_object_issue(scope, "entry", record_count, "key length exceeds max-key-size", "%" PRIuPTR " > %" PRIuPTR, + key.iov_len, maxkeysize); bad_key = true; - } else if ((tbl->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && - key.iov_len != 4) { - chk_object_issue(scope, "entry", record_count, "wrong key length", - "%" PRIuPTR " != 4or8", key.iov_len); + } else if ((tbl->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && key.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong key length", "%" PRIuPTR " != 4or8", key.iov_len); bad_key = true; } bool bad_data = false; - if ((tbl->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && - data.iov_len != 4) { - chk_object_issue(scope, "entry", record_count, "wrong data length", - "%" PRIuPTR " != 4or8", data.iov_len); + if ((tbl->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && data.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong data length", "%" PRIuPTR " != 4or8", data.iov_len); bad_data = true; } if (prev_key.iov_base) { - if (prev_data.iov_base && !bad_data && (tbl->flags & MDBX_DUPFIXED) && - prev_data.iov_len != data.iov_len) { - chk_object_issue(scope, "entry", record_count, "different data length", - "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, - data.iov_len); + if (prev_data.iov_base && !bad_data && (tbl->flags & MDBX_DUPFIXED) && prev_data.iov_len != data.iov_len) { + chk_object_issue(scope, "entry", record_count, "different data length", "%" PRIuPTR " != %" PRIuPTR, + prev_data.iov_len, data.iov_len); bad_data = true; } @@ -1320,38 +1161,30 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, if (cmp == 0) { ++dups; if ((tbl->flags & MDBX_DUPSORT) == 0) { - chk_object_issue(scope, "entry", record_count, "duplicated entries", - nullptr); + chk_object_issue(scope, "entry", record_count, "duplicated entries", nullptr); if (prev_data.iov_base && data.iov_len == prev_data.iov_len && memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) - chk_object_issue(scope, "entry", record_count, - "complete duplicate", nullptr); + chk_object_issue(scope, "entry", record_count, "complete duplicate", nullptr); } else if (!bad_data && prev_data.iov_base) { cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); if (cmp == 0) - chk_object_issue(scope, "entry", record_count, - "complete duplicate", nullptr); + chk_object_issue(scope, "entry", record_count, "complete duplicate", nullptr); else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) - chk_object_issue(scope, "entry", record_count, - "wrong order of multi-values", nullptr); + chk_object_issue(scope, "entry", record_count, "wrong order of multi-values", nullptr); } } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) - chk_object_issue(scope, "entry", record_count, - "wrong order of entries", nullptr); + chk_object_issue(scope, "entry", record_count, "wrong order of entries", nullptr); } } if (!bad_key) { if (!prev_key.iov_base && (tbl->flags & MDBX_INTEGERKEY)) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), - "fixed key-size %" PRIuSIZE, key.iov_len)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed key-size %" PRIuSIZE, key.iov_len)); prev_key = key; } if (!bad_data) { - if (!prev_data.iov_base && - (tbl->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), - "fixed data-size %" PRIuSIZE, data.iov_len)); + if (!prev_data.iov_base && (tbl->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed data-size %" PRIuSIZE, data.iov_len)); prev_data = data; } @@ -1359,17 +1192,13 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, histogram_acc(key.iov_len, &tbl->histogram.key_len); histogram_acc(data.iov_len, &tbl->histogram.val_len); - const node_t *const node = - page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]); + const node_t *const node = page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]); if (node_flags(node) == N_TREE) { - if (dbi != MAIN_DBI || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | - MDBX_REVERSEDUP | MDBX_INTEGERDUP))) - chk_object_issue(scope, "entry", record_count, "unexpected table", - "node-flags 0x%x", node_flags(node)); + if (dbi != MAIN_DBI || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + chk_object_issue(scope, "entry", record_count, "unexpected table", "node-flags 0x%x", node_flags(node)); else if (data.iov_len != sizeof(tree_t)) - chk_object_issue(scope, "entry", record_count, "wrong table node size", - "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, - sizeof(tree_t)); + chk_object_issue(scope, "entry", record_count, "wrong table node size", "node-size %" PRIuSIZE " != %" PRIuSIZE, + data.iov_len, sizeof(tree_t)); else if (scope->stage == MDBX_chk_maindb) /* подсчитываем table при первом проходе */ sub_databases += 1; @@ -1384,9 +1213,8 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, if (unlikely(err)) goto bailout; if (table->cookie) { - err = chk_scope_begin( - chk, 0, MDBX_chk_tables, table, &usr->result.problems_kv, - "Processing table %s...", chk_v2a(chk, &table->name)); + err = chk_scope_begin(chk, 0, MDBX_chk_tables, table, &usr->result.problems_kv, "Processing table %s...", + chk_v2a(chk, &table->name)); if (likely(!err)) { err = chk_db(usr->scope, (MDBX_dbi)-1, table, chk_handle_kv); if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) @@ -1396,9 +1224,8 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, if (unlikely(err)) goto bailout; } else - chk_line_end(chk_flush( - chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s...", chk_v2a(chk, &table->name)))); + chk_line_end(chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s...", + chk_v2a(chk, &table->name)))); } } else if (handler) { err = handler(scope, tbl, record_count, &key, &data); @@ -1409,22 +1236,17 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); } - err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") - : MDBX_SUCCESS; + err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") : MDBX_SUCCESS; if (err == MDBX_SUCCESS && record_count != db->items) - chk_scope_issue(scope, - "different number of entries %" PRIuSIZE " != %" PRIu64, - record_count, db->items); + chk_scope_issue(scope, "different number of entries %" PRIuSIZE " != %" PRIu64, record_count, db->items); bailout: if (cursor) { if (handler) { if (tbl->histogram.key_len.count) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); - line = histogram_dist(line, &tbl->histogram.key_len, - "key length density", "0/1", false); + line = histogram_dist(line, &tbl->histogram.key_len, "key length density", "0/1", false); chk_line_feed(line); - line = histogram_dist(line, &tbl->histogram.val_len, - "value length density", "0/1", false); + line = histogram_dist(line, &tbl->histogram.val_len, "value length density", "0/1", false); chk_line_end(line); } if (scope->stage == MDBX_chk_maindb) @@ -1433,8 +1255,7 @@ bailout: err = chk->cb->table_conclude(usr, tbl, cursor, err); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); - if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | - MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) line = chk_print(line, " %" PRIuSIZE " dups,", dups); if (sub_databases || dbi == MAIN_DBI) line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases); @@ -1442,8 +1263,7 @@ bailout: " %" PRIuSIZE " key's bytes," " %" PRIuSIZE " data's bytes," " %" PRIuSIZE " problem(s)", - tbl->histogram.key_len.amount, - tbl->histogram.val_len.amount, scope->subtotal_issues); + tbl->histogram.key_len.amount, tbl->histogram.val_len.amount, scope->subtotal_issues); chk_line_end(chk_flush(line)); } @@ -1454,10 +1274,8 @@ bailout: return err; } -__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, - MDBX_chk_table_t *tbl, - const size_t record_number, const MDBX_val *key, - const MDBX_val *data) { +__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number, + const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; assert(tbl == &chk->table_gc); @@ -1466,25 +1284,20 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, pgno_t *iptr = data->iov_base; if (key->iov_len != sizeof(txnid_t)) - chk_object_issue(scope, "entry", record_number, "wrong txn-id size", - "key-size %" PRIuSIZE, key->iov_len); + chk_object_issue(scope, "entry", record_number, "wrong txn-id size", "key-size %" PRIuSIZE, key->iov_len); else { txnid_t txnid; memcpy(&txnid, key->iov_base, sizeof(txnid)); if (txnid < 1 || txnid > usr->txn->txnid) - chk_object_issue(scope, "entry", record_number, "wrong txn-id", - "%" PRIaTXN, txnid); + chk_object_issue(scope, "entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid); else { if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) - chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, - data->iov_len); + chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len); size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; if (number > PAGELIST_LIMIT) - chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, - number); + chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, number); else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { - chk_object_issue(scope, "entry", txnid, "trimmed idl", - "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", + chk_object_issue(scope, "entry", txnid, "trimmed idl", "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", (number + 1) * sizeof(pgno_t), data->iov_len); number = data->iov_len / sizeof(pgno_t) - 1; } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= @@ -1492,38 +1305,31 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, * and better than shink-and-retry inside gc_update() */ usr->env->ps) chk_object_issue(scope, "entry", txnid, "extra idl space", - "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", - (number + 1) * sizeof(pgno_t), data->iov_len); + "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", (number + 1) * sizeof(pgno_t), + data->iov_len); usr->result.gc_pages += number; if (chk->envinfo.mi_latter_reader_txnid > txnid) usr->result.reclaimable_pages += number; - size_t prev = - MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->geo.first_unallocated; + size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->geo.first_unallocated; size_t span = 1; for (size_t i = 0; i < number; ++i) { const size_t pgno = iptr[i]; if (pgno < NUM_METAS) - chk_object_issue(scope, "entry", txnid, "wrong idl entry", - "pgno %" PRIuSIZE " < meta-pages %u", pgno, + chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " < meta-pages %u", pgno, NUM_METAS); else if (pgno >= usr->result.backed_pages) - chk_object_issue(scope, "entry", txnid, "wrong idl entry", - "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, - usr->result.backed_pages); + chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, + pgno, usr->result.backed_pages); else if (pgno >= usr->result.alloc_pages) - chk_object_issue(scope, "entry", txnid, "wrong idl entry", - "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, - usr->result.alloc_pages - 1); + chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, + pgno, usr->result.alloc_pages - 1); else { if (MDBX_PNL_DISORDERED(prev, pgno)) { bad = " [bad sequence]"; - chk_object_issue( - scope, "entry", txnid, "bad sequence", - "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, - (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, - pgno); + chk_object_issue(scope, "entry", txnid, "bad sequence", "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, + (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, pgno); } if (chk->pagemap) { const intptr_t id = chk->pagemap[pgno]; @@ -1531,38 +1337,31 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; else if (id > 0) { assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->table)); - chk_object_issue(scope, "page", pgno, "already used", "by %s", - chk_v2a(chk, &chk->table[id - 1]->name)); + chk_object_issue(scope, "page", pgno, "already used", "by %s", chk_v2a(chk, &chk->table[id - 1]->name)); } else - chk_object_issue(scope, "page", pgno, "already listed in GC", - nullptr); + chk_object_issue(scope, "page", pgno, "already listed in GC", nullptr); } } prev = pgno; while (i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span))) + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span))) ++span; } if (tbl->cookie) { chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), - "transaction %" PRIaTXN ", %" PRIuSIZE - " pages, maxspan %" PRIuSIZE "%s", - txnid, number, span, bad)); + "transaction %" PRIaTXN ", %" PRIuSIZE " pages, maxspan %" PRIuSIZE "%s", txnid, number, + span, bad)); for (size_t i = 0; i < number; i += span) { const size_t pgno = iptr[i]; - for (span = 1; - i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span)); + for (span = 1; i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span)); ++span) ; histogram_acc(span, &tbl->histogram.nested_tree); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); if (line) { if (span > 1) - line = - chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); + line = chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); else line = chk_print(line, "%9" PRIuSIZE, pgno); chk_line_end(line); @@ -1582,26 +1381,21 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; MDBX_txn *const txn = usr->txn; - int err = - env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); + int err = env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); if (unlikely(err)) return chk_error_rc(scope, err, "env_info"); MDBX_chk_line_t *line = - chk_puts(chk_line_begin(scope, MDBX_chk_info - - (1 << MDBX_chk_severity_prio_shift)), - "dxb-id "); + chk_puts(chk_line_begin(scope, MDBX_chk_info - (1 << MDBX_chk_severity_prio_shift)), "dxb-id "); if (chk->envinfo.mi_dxbid.x | chk->envinfo.mi_dxbid.y) - line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, - chk->envinfo.mi_dxbid.x, chk->envinfo.mi_dxbid.y); + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_dxbid.x, chk->envinfo.mi_dxbid.y); else line = chk_puts(line, "is absent"); chk_line_end(line); line = chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) - line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, - chk->envinfo.mi_bootid.current.x, + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_bootid.current.x, chk->envinfo.mi_bootid.current.y); else line = chk_puts(line, "is unavailable"); @@ -1613,103 +1407,79 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { //-------------------------------------------------------------------------- - err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, - &usr->result.problems_meta, "Peek the meta-pages..."); + err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, &usr->result.problems_meta, "Peek the meta-pages..."); if (likely(!err)) { MDBX_chk_scope_t *const inner = usr->scope; const uint64_t dxbfile_pages = env->dxb_mmap.filesize >> env->ps2ln; usr->result.alloc_pages = txn->geo.first_unallocated; usr->result.backed_pages = bytes2pgno(env, env->dxb_mmap.current); if (unlikely(usr->result.backed_pages > dxbfile_pages)) - chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, - usr->result.backed_pages, dxbfile_pages); + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, usr->result.backed_pages, dxbfile_pages); if (unlikely(dxbfile_pages < NUM_METAS)) - chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, - NUM_METAS); + chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, NUM_METAS); if (unlikely(usr->result.backed_pages < NUM_METAS)) - chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, - NUM_METAS); + chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, NUM_METAS); if (unlikely(usr->result.backed_pages < NUM_METAS)) { - chk_scope_issue(inner, "backed-pages %zu < num-metas %u", - usr->result.backed_pages, NUM_METAS); + chk_scope_issue(inner, "backed-pages %zu < num-metas %u", usr->result.backed_pages, NUM_METAS); return MDBX_CORRUPTED; } if (unlikely(dxbfile_pages < NUM_METAS)) { - chk_scope_issue(inner, "backed-pages %zu < num-metas %u", - usr->result.backed_pages, NUM_METAS); + chk_scope_issue(inner, "backed-pages %zu < num-metas %u", usr->result.backed_pages, NUM_METAS); return MDBX_CORRUPTED; } if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { - chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", - usr->result.backed_pages, (size_t)MAX_PAGENO + 1); + chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", usr->result.backed_pages, (size_t)MAX_PAGENO + 1); usr->result.backed_pages = MAX_PAGENO + 1; } if ((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { if (unlikely(usr->result.backed_pages > dxbfile_pages)) { - chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, - usr->result.backed_pages, dxbfile_pages); + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, usr->result.backed_pages, dxbfile_pages); usr->result.backed_pages = (size_t)dxbfile_pages; } if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { - chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", - usr->result.alloc_pages, usr->result.backed_pages); + chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", usr->result.alloc_pages, usr->result.backed_pages); usr->result.alloc_pages = usr->result.backed_pages; } } else { /* DB may be shrunk by writer down to the allocated (but unused) pages. */ if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { - chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", - usr->result.alloc_pages, usr->result.backed_pages); + chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", usr->result.alloc_pages, usr->result.backed_pages); usr->result.alloc_pages = usr->result.backed_pages; } if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { - chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, - usr->result.alloc_pages, dxbfile_pages); + chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, usr->result.alloc_pages, dxbfile_pages); usr->result.alloc_pages = (size_t)dxbfile_pages; } if (unlikely(usr->result.backed_pages > dxbfile_pages)) usr->result.backed_pages = (size_t)dxbfile_pages; } - line = chk_line_feed(chk_print( - chk_line_begin(inner, MDBX_chk_info), - "pagesize %u (%u system), max keysize %u..%u" - ", max readers %u", - env->ps, globals.sys_pagesize, - mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), - mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers)); - line = chk_line_feed( - chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr)); + line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_info), + "pagesize %u (%u system), max keysize %u..%u" + ", max readers %u", + env->ps, globals.sys_pagesize, mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), + mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers)); + line = chk_line_feed(chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr)); if (txn->geo.lower == txn->geo.upper) - line = chk_print_size( - line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); + line = chk_print_size(line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); else { - line = chk_print_size( - line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); + line = chk_print_size(line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); - line = chk_line_feed( - chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); - line = chk_print_size( - line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); + line = chk_line_feed(chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); + line = chk_print_size(line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); } - tASSERT(txn, txn->geo.now == chk->envinfo.mi_geo.current / - chk->envinfo.mi_dxb_pagesize); + tASSERT(txn, txn->geo.now == chk->envinfo.mi_geo.current / chk->envinfo.mi_dxb_pagesize); chk_line_end(chk_print(line, ", %u pages", txn->geo.now)); #if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG - if (txn->geo.shrink_pv && txn->geo.now != txn->geo.upper && - scope->verbosity >= MDBX_chk_verbose) { + if (txn->geo.shrink_pv && txn->geo.now != txn->geo.upper && scope->verbosity >= MDBX_chk_verbose) { line = chk_line_begin(inner, MDBX_chk_notice); - chk_line_feed(chk_print( - line, " > WARNING: Due Windows system limitations a file couldn't")); - chk_line_feed(chk_print( - line, " > be truncated while the database is opened. So, the size")); - chk_line_feed(chk_print( - line, " > database file of may by large than the database itself,")); - chk_line_end(chk_print( - line, " > until it will be closed or reopened in read-write mode.")); + chk_line_feed(chk_print(line, " > WARNING: Due Windows system limitations a file couldn't")); + chk_line_feed(chk_print(line, " > be truncated while the database is opened. So, the size")); + chk_line_feed(chk_print(line, " > database file of may by large than the database itself,")); + chk_line_end(chk_print(line, " > until it will be closed or reopened in read-write mode.")); } #endif /* Windows || Debug */ chk_verbose_meta(inner, 0); @@ -1721,18 +1491,14 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { "skip checking meta-pages since the %u" " is selected for verification", env->stuck_meta)); - line = chk_line_feed( - chk_print(chk_line_begin(inner, MDBX_chk_resolution), - "transactions: recent %" PRIu64 ", " - "selected for verification %" PRIu64 ", lag %" PRIi64, - chk->envinfo.mi_recent_txnid, - chk->envinfo.mi_meta_txnid[env->stuck_meta], - chk->envinfo.mi_recent_txnid - - chk->envinfo.mi_meta_txnid[env->stuck_meta])); + line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", " + "selected for verification %" PRIu64 ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, chk->envinfo.mi_meta_txnid[env->stuck_meta], + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_meta_txnid[env->stuck_meta])); chk_line_end(line); } else { - chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), - "performs check for meta-pages clashes")); + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs check for meta-pages clashes")); const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); if (meta_clash_mask & 1) chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); @@ -1742,62 +1508,45 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); const unsigned prefer_steady_metanum = chk->troika.prefer_steady; - const uint64_t prefer_steady_txnid = - chk->troika.txnid[prefer_steady_metanum]; + const uint64_t prefer_steady_txnid = chk->troika.txnid[prefer_steady_metanum]; const unsigned recent_metanum = chk->troika.recent; const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; if (env->flags & MDBX_EXCLUSIVE) { chk_line_end( - chk_puts(chk_line_begin(inner, MDBX_chk_verbose), - "performs full check recent-txn-id with meta-pages")); + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs full check recent-txn-id with meta-pages")); eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid); if (prefer_steady_txnid != recent_txnid) { - if ((chk->flags & MDBX_CHK_READWRITE) != 0 && - (env->flags & MDBX_RDONLY) == 0 && + if ((chk->flags & MDBX_CHK_READWRITE) != 0 && (env->flags & MDBX_RDONLY) == 0 && recent_txnid > prefer_steady_txnid && - (chk->envinfo.mi_bootid.current.x | - chk->envinfo.mi_bootid.current.y) != 0 && - chk->envinfo.mi_bootid.current.x == - chk->envinfo.mi_bootid.meta[recent_metanum].x && - chk->envinfo.mi_bootid.current.y == - chk->envinfo.mi_bootid.meta[recent_metanum].y) { - chk_line_end( - chk_print(chk_line_begin(inner, MDBX_chk_verbose), - "recent meta-%u is weak, but boot-id match current" - " (will synced upon successful check)", - recent_metanum)); + (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) != 0 && + chk->envinfo.mi_bootid.current.x == chk->envinfo.mi_bootid.meta[recent_metanum].x && + chk->envinfo.mi_bootid.current.y == chk->envinfo.mi_bootid.meta[recent_metanum].y) { + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_verbose), + "recent meta-%u is weak, but boot-id match current" + " (will synced upon successful check)", + recent_metanum)); } else - chk_scope_issue( - inner, - "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")", - prefer_steady_metanum, prefer_steady_txnid, recent_txnid); + chk_scope_issue(inner, "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, recent_txnid); } } else if (chk->write_locked) { - chk_line_end( - chk_puts(chk_line_begin(inner, MDBX_chk_verbose), - "performs lite check recent-txn-id with meta-pages (not a " - "monopolistic mode)")); + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs lite check recent-txn-id with meta-pages (not a " + "monopolistic mode)")); if (recent_txnid != chk->envinfo.mi_recent_txnid) { - chk_scope_issue(inner, - "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")", - recent_metanum, recent_txnid, - chk->envinfo.mi_recent_txnid); + chk_scope_issue(inner, "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64 ")", + recent_metanum, recent_txnid, chk->envinfo.mi_recent_txnid); } } else { - chk_line_end(chk_puts( - chk_line_begin(inner, MDBX_chk_verbose), - "skip check recent-txn-id with meta-pages (monopolistic or " - "read-write mode only)")); + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "skip check recent-txn-id with meta-pages (monopolistic or " + "read-write mode only)")); } - chk_line_end(chk_print( - chk_line_begin(inner, MDBX_chk_resolution), - "transactions: recent %" PRIu64 ", latter reader %" PRIu64 - ", lag %" PRIi64, - chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, - chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", latter reader %" PRIu64 ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); } } err = chk_scope_restore(scope, err); @@ -1806,12 +1555,10 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { const char *const subj_tree = "B-Trees"; if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skipping %s traversal...", subj_tree)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skipping %s traversal...", subj_tree)); else { - err = chk_scope_begin( - chk, -1, MDBX_chk_tree, nullptr, &usr->result.tree_problems, - "Traversal %s by txn#%" PRIaTXN "...", subj_tree, txn->txnid); + err = chk_scope_begin(chk, -1, MDBX_chk_tree, nullptr, &usr->result.tree_problems, + "Traversal %s by txn#%" PRIaTXN "...", subj_tree, txn->txnid); if (likely(!err)) err = chk_tree(usr->scope); if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) @@ -1823,35 +1570,26 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { const char *const subj_gc = chk_v2a(chk, MDBX_CHK_GC); if (usr->result.gc_tree_problems > 0) - chk_line_end(chk_print( - chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", - subj_gc, subj_tree, - usr->result.problems_gc = usr->result.gc_tree_problems)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", subj_gc, subj_tree, + usr->result.problems_gc = usr->result.gc_tree_problems)); else { - err = chk_scope_begin( - chk, -1, MDBX_chk_gc, &chk->table_gc, &usr->result.problems_gc, - "Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid); + err = chk_scope_begin(chk, -1, MDBX_chk_gc, &chk->table_gc, &usr->result.problems_gc, + "Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid); if (likely(!err)) err = chk_db(usr->scope, FREE_DBI, &chk->table_gc, chk_handle_gc); line = chk_line_begin(scope, MDBX_chk_info); if (line) { - histogram_print(scope, line, &chk->table_gc.histogram.nested_tree, - "span(s)", "single", false); + histogram_print(scope, line, &chk->table_gc.histogram.nested_tree, "span(s)", "single", false); chk_line_end(line); } - if (usr->result.problems_gc == 0 && - (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + if (usr->result.problems_gc == 0 && (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; if (usr->result.processed_pages != used_pages) - chk_scope_issue(usr->scope, - "used pages mismatch (%" PRIuSIZE - "(walked) != %" PRIuSIZE "(allocated - GC))", + chk_scope_issue(usr->scope, "used pages mismatch (%" PRIuSIZE "(walked) != %" PRIuSIZE "(allocated - GC))", usr->result.processed_pages, used_pages); if (usr->result.unused_pages != usr->result.gc_pages) - chk_scope_issue(usr->scope, - "GC pages mismatch (%" PRIuSIZE - "(expected) != %" PRIuSIZE "(GC))", + chk_scope_issue(usr->scope, "GC pages mismatch (%" PRIuSIZE "(expected) != %" PRIuSIZE "(GC))", usr->result.unused_pages, usr->result.gc_pages); } } @@ -1859,99 +1597,70 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { //-------------------------------------------------------------------------- - err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, - "Page allocation:"); + err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, "Page allocation:"); const double percent_boundary_reciprocal = 100.0 / txn->geo.upper; const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; - const size_t available2boundary = - txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages; - const size_t available2backed = usr->result.backed_pages - - usr->result.alloc_pages + - usr->result.reclaimable_pages; + const size_t available2boundary = txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages; + const size_t available2backed = usr->result.backed_pages - usr->result.alloc_pages + usr->result.reclaimable_pages; const size_t remained2boundary = txn->geo.upper - usr->result.alloc_pages; - const size_t remained2backed = - usr->result.backed_pages - usr->result.alloc_pages; + const size_t remained2backed = usr->result.backed_pages - usr->result.alloc_pages; - const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) - ? usr->result.alloc_pages - usr->result.gc_pages - : usr->result.processed_pages; + const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) ? usr->result.alloc_pages - usr->result.gc_pages + : usr->result.processed_pages; line = chk_line_begin(usr->scope, MDBX_chk_info); line = chk_print(line, "backed by file: %" PRIuSIZE " pages (%.1f%%)" ", %" PRIuSIZE " left to boundary (%.1f%%)", - usr->result.backed_pages, - usr->result.backed_pages * percent_boundary_reciprocal, + usr->result.backed_pages, usr->result.backed_pages * percent_boundary_reciprocal, txn->geo.upper - usr->result.backed_pages, - (txn->geo.upper - usr->result.backed_pages) * - percent_boundary_reciprocal); + (txn->geo.upper - usr->result.backed_pages) * percent_boundary_reciprocal); line = chk_line_feed(line); - line = chk_print( - line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", - "used", used, used * percent_backed_reciprocal, - used * percent_boundary_reciprocal); + line = chk_print(line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "used", used, + used * percent_backed_reciprocal, used * percent_boundary_reciprocal); line = chk_line_feed(line); - line = chk_print( - line, - "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE - " to boundary (%.1f%% of boundary)", - "remained", remained2backed, remained2backed * percent_backed_reciprocal, - remained2boundary, remained2boundary * percent_boundary_reciprocal); + line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)", + "remained", remained2backed, remained2backed * percent_backed_reciprocal, remained2boundary, + remained2boundary * percent_boundary_reciprocal); line = chk_line_feed(line); - line = chk_print( - line, - "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" - ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", - usr->result.reclaimable_pages, - usr->result.reclaimable_pages * percent_backed_reciprocal, - usr->result.reclaimable_pages * percent_boundary_reciprocal, - usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, - usr->result.gc_pages * percent_boundary_reciprocal); - line = chk_line_feed(line); - - line = chk_print( - line, - "detained by reader(s): %" PRIuSIZE - " (%.1f%% of backed, %.1f%% of boundary)" - ", %u reader(s), lag %" PRIi64, - detained, detained * percent_backed_reciprocal, - detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, - chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); - line = chk_line_feed(line); - - line = chk_print( - line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", - "allocated", usr->result.alloc_pages, - usr->result.alloc_pages * percent_backed_reciprocal, - usr->result.alloc_pages * percent_boundary_reciprocal); + line = + chk_print(line, + "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" + ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", + usr->result.reclaimable_pages, usr->result.reclaimable_pages * percent_backed_reciprocal, + usr->result.reclaimable_pages * percent_boundary_reciprocal, usr->result.gc_pages, + usr->result.gc_pages * percent_backed_reciprocal, usr->result.gc_pages * percent_boundary_reciprocal); line = chk_line_feed(line); line = chk_print(line, - "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE - " to boundary (%.1f%% of boundary)", - "available", available2backed, - available2backed * percent_backed_reciprocal, - available2boundary, + "detained by reader(s): %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" + ", %u reader(s), lag %" PRIi64, + detained, detained * percent_backed_reciprocal, detained * percent_boundary_reciprocal, + chk->envinfo.mi_numreaders, chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); + line = chk_line_feed(line); + + line = chk_print(line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "allocated", + usr->result.alloc_pages, usr->result.alloc_pages * percent_backed_reciprocal, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)", + "available", available2backed, available2backed * percent_backed_reciprocal, available2boundary, available2boundary * percent_boundary_reciprocal); chk_line_end(line); line = chk_line_begin(usr->scope, MDBX_chk_resolution); - line = chk_print(line, "%s %" PRIaPGNO " pages", - (txn->geo.upper == txn->geo.now) ? "total" : "upto", - txn->geo.upper); - line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", - usr->result.backed_pages, + line = chk_print(line, "%s %" PRIaPGNO " pages", (txn->geo.upper == txn->geo.now) ? "total" : "upto", txn->geo.upper); + line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", usr->result.backed_pages, usr->result.backed_pages * percent_boundary_reciprocal); - line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", - usr->result.alloc_pages, + line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", usr->result.alloc_pages, usr->result.alloc_pages * percent_boundary_reciprocal); - line = - chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, - available2boundary * percent_boundary_reciprocal); + line = chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, + available2boundary * percent_boundary_reciprocal); chk_line_end(line); chk_scope_restore(scope, err); @@ -1959,17 +1668,13 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { const char *const subj_main = chk_v2a(chk, MDBX_CHK_MAIN); if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s...", subj_main)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s...", subj_main)); else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) - chk_line_end(chk_print( - chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", - subj_main, subj_tree, - usr->result.problems_kv = usr->result.kv_tree_problems)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", subj_main, subj_tree, + usr->result.problems_kv = usr->result.kv_tree_problems)); else { - err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->table_main, - &usr->result.problems_kv, "Processing %s...", + err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->table_main, &usr->result.problems_kv, "Processing %s...", subj_main); if (likely(!err)) err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, chk_handle_kv); @@ -1977,35 +1682,28 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { const char *const subj_tables = "table(s)"; if (usr->result.problems_kv && usr->result.table_total) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), - "Skip processing %s", subj_tables)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s", subj_tables)); else if (usr->result.problems_kv == 0 && usr->result.table_total == 0) - chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", - subj_tables)); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", subj_tables)); else if (usr->result.problems_kv == 0 && usr->result.table_total) { - err = chk_scope_begin( - chk, 1, MDBX_chk_tables, nullptr, &usr->result.problems_kv, - "Processing %s by txn#%" PRIaTXN "...", subj_tables, txn->txnid); + err = chk_scope_begin(chk, 1, MDBX_chk_tables, nullptr, &usr->result.problems_kv, + "Processing %s by txn#%" PRIaTXN "...", subj_tables, txn->txnid); if (!err) err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, nullptr); if (usr->scope->subtotal_issues) chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), - "processed %" PRIuSIZE " of %" PRIuSIZE - " %s, %" PRIuSIZE " problems(s)", - usr->result.table_processed, - usr->result.table_total, subj_tables, + "processed %" PRIuSIZE " of %" PRIuSIZE " %s, %" PRIuSIZE " problems(s)", + usr->result.table_processed, usr->result.table_total, subj_tables, usr->scope->subtotal_issues)); } chk_scope_restore(scope, err); } - return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, - nullptr, nullptr)); + return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, nullptr, nullptr)); } __cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) { - if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && - ctx->internal->problem_counter && ctx->scope)) { + if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && ctx->internal->problem_counter && ctx->scope)) { *ctx->internal->problem_counter += 1; ctx->scope->subtotal_issues += 1; return MDBX_SUCCESS; @@ -2013,10 +1711,8 @@ __cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) { return MDBX_EINVAL; } -__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, - MDBX_chk_context_t *ctx, const MDBX_chk_flags_t flags, - MDBX_chk_severity_t verbosity, - unsigned timeout_seconds_16dot16) { +__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, MDBX_chk_context_t *ctx, + const MDBX_chk_flags_t flags, MDBX_chk_severity_t verbosity, unsigned timeout_seconds_16dot16) { int err, rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -2042,9 +1738,7 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, chk->table[MAIN_DBI] = &chk->table_main; chk->monotime_timeout = - timeout_seconds_16dot16 - ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() - : 0; + timeout_seconds_16dot16 ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() : 0; chk->usr->scope_nesting = 0; chk->usr->result.tables = (const void *)&chk->table; @@ -2053,16 +1747,13 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, top->internal = chk; // init - rc = chk_scope_end( - chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); + rc = chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); // lock if (likely(!rc)) - rc = chk_scope_begin( - chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", - (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); - if (likely(!rc) && (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && - (flags & MDBX_CHK_READWRITE)) { + rc = chk_scope_begin(chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", + (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); + if (likely(!rc) && (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && (flags & MDBX_CHK_READWRITE)) { rc = mdbx_txn_lock(env, false); if (unlikely(rc)) chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); diff --git a/src/cogs.c b/src/cogs.c index 2c505fbe..2e7d18c6 100644 --- a/src/cogs.c +++ b/src/cogs.c @@ -74,27 +74,22 @@ __cold bool pv2pages_verify(void) { /*----------------------------------------------------------------------------*/ -MDBX_NOTHROW_PURE_FUNCTION size_t bytes_align2os_bytes(const MDBX_env *env, - size_t bytes) { - return ceil_powerof2( - bytes, (env->ps > globals.sys_pagesize) ? env->ps : globals.sys_pagesize); +MDBX_NOTHROW_PURE_FUNCTION size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes) { + return ceil_powerof2(bytes, (env->ps > globals.sys_pagesize) ? env->ps : globals.sys_pagesize); } -MDBX_NOTHROW_PURE_FUNCTION size_t pgno_align2os_bytes(const MDBX_env *env, - size_t pgno) { +MDBX_NOTHROW_PURE_FUNCTION size_t pgno_align2os_bytes(const MDBX_env *env, size_t pgno) { return ceil_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize); } -MDBX_NOTHROW_PURE_FUNCTION pgno_t pgno_align2os_pgno(const MDBX_env *env, - size_t pgno) { +MDBX_NOTHROW_PURE_FUNCTION pgno_t pgno_align2os_pgno(const MDBX_env *env, size_t pgno) { return bytes2pgno(env, pgno_align2os_bytes(env, pgno)); } /*----------------------------------------------------------------------------*/ -MDBX_NOTHROW_PURE_FUNCTION static __always_inline int -cmp_int_inline(const size_t expected_alignment, const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION static __always_inline int cmp_int_inline(const size_t expected_alignment, const MDBX_val *a, + const MDBX_val *b) { if (likely(a->iov_len == b->iov_len)) { if (sizeof(size_t) > 7 && likely(a->iov_len == 8)) return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base), @@ -106,35 +101,31 @@ cmp_int_inline(const size_t expected_alignment, const MDBX_val *a, return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base), unaligned_peek_u64(expected_alignment, b->iov_base)); } - ERROR("mismatch and/or invalid size %p.%zu/%p.%zu for INTEGERKEY/INTEGERDUP", - a->iov_base, a->iov_len, b->iov_base, b->iov_len); + ERROR("mismatch and/or invalid size %p.%zu/%p.%zu for INTEGERKEY/INTEGERDUP", a->iov_base, a->iov_len, b->iov_base, + b->iov_len); return 0; } -MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_unaligned(const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b) { return cmp_int_inline(1, a, b); } #ifndef cmp_int_align2 /* Compare two items pointing at 2-byte aligned unsigned int's. */ -MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align2(const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align2(const MDBX_val *a, const MDBX_val *b) { return cmp_int_inline(2, a, b); } #endif /* cmp_int_align2 */ #ifndef cmp_int_align4 /* Compare two items pointing at 4-byte aligned unsigned int's. */ -MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align4(const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align4(const MDBX_val *a, const MDBX_val *b) { return cmp_int_inline(4, a, b); } #endif /* cmp_int_align4 */ /* Compare two items lexically */ -MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lexical(const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lexical(const MDBX_val *a, const MDBX_val *b) { if (a->iov_len == b->iov_len) return a->iov_len ? memcmp(a->iov_base, b->iov_base, a->iov_len) : 0; @@ -144,8 +135,7 @@ MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lexical(const MDBX_val *a, return likely(diff_data) ? diff_data : diff_len; } -MDBX_NOTHROW_PURE_FUNCTION static __always_inline unsigned -tail3le(const uint8_t *p, size_t l) { +MDBX_NOTHROW_PURE_FUNCTION static __always_inline unsigned tail3le(const uint8_t *p, size_t l) { STATIC_ASSERT(sizeof(unsigned) > 2); // 1: 0 0 0 // 2: 0 1 1 @@ -154,8 +144,7 @@ tail3le(const uint8_t *p, size_t l) { } /* Compare two items in reverse byte order */ -MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_reverse(const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_reverse(const MDBX_val *a, const MDBX_val *b) { size_t left = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len; if (likely(left)) { const uint8_t *pa = ptr_disp(a->iov_base, a->iov_len); @@ -209,25 +198,19 @@ MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_reverse(const MDBX_val *a, } /* Fast non-lexically comparator */ -MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lenfast(const MDBX_val *a, - const MDBX_val *b) { +MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lenfast(const MDBX_val *a, const MDBX_val *b) { int diff = CMP2INT(a->iov_len, b->iov_len); - return (likely(diff) || a->iov_len == 0) - ? diff - : memcmp(a->iov_base, b->iov_base, a->iov_len); + return (likely(diff) || a->iov_len == 0) ? diff : memcmp(a->iov_base, b->iov_base, a->iov_len); } -MDBX_NOTHROW_PURE_FUNCTION __hot bool -eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l) { +MDBX_NOTHROW_PURE_FUNCTION __hot bool eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l) { if (likely(l > 3)) { if (MDBX_UNALIGNED_OK >= 4 && likely(l < 9)) return ((unaligned_peek_u32(1, a) - unaligned_peek_u32(1, b)) | - (unaligned_peek_u32(1, a + l - 4) - - unaligned_peek_u32(1, b + l - 4))) == 0; + (unaligned_peek_u32(1, a + l - 4) - unaligned_peek_u32(1, b + l - 4))) == 0; if (MDBX_UNALIGNED_OK >= 8 && sizeof(size_t) > 7 && likely(l < 17)) return ((unaligned_peek_u64(1, a) - unaligned_peek_u64(1, b)) | - (unaligned_peek_u64(1, a + l - 8) - - unaligned_peek_u64(1, b + l - 8))) == 0; + (unaligned_peek_u64(1, a + l - 8) - unaligned_peek_u64(1, b + l - 8))) == 0; return memcmp(a, b, l) == 0; } if (likely(l)) @@ -235,31 +218,21 @@ eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l) { return true; } -int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return eq_fast(a, b) ? 0 : 1; -} +int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { return eq_fast(a, b) ? 0 : 1; } -int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b) { - return eq_fast(a, b) ? 0 : -1; -} +int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b) { return eq_fast(a, b) ? 0 : -1; } /*----------------------------------------------------------------------------*/ -__cold void update_mlcnt(const MDBX_env *env, - const pgno_t new_aligned_mlocked_pgno, - const bool lock_not_release) { +__cold void update_mlcnt(const MDBX_env *env, const pgno_t new_aligned_mlocked_pgno, const bool lock_not_release) { for (;;) { - const pgno_t mlock_pgno_before = - atomic_load32(&env->mlocked_pgno, mo_AcquireRelease); - eASSERT(env, - pgno_align2os_pgno(env, mlock_pgno_before) == mlock_pgno_before); - eASSERT(env, pgno_align2os_pgno(env, new_aligned_mlocked_pgno) == - new_aligned_mlocked_pgno); + const pgno_t mlock_pgno_before = atomic_load32(&env->mlocked_pgno, mo_AcquireRelease); + eASSERT(env, pgno_align2os_pgno(env, mlock_pgno_before) == mlock_pgno_before); + eASSERT(env, pgno_align2os_pgno(env, new_aligned_mlocked_pgno) == new_aligned_mlocked_pgno); if (lock_not_release ? (mlock_pgno_before >= new_aligned_mlocked_pgno) : (mlock_pgno_before <= new_aligned_mlocked_pgno)) break; - if (likely(atomic_cas32(&((MDBX_env *)env)->mlocked_pgno, mlock_pgno_before, - new_aligned_mlocked_pgno))) + if (likely(atomic_cas32(&((MDBX_env *)env)->mlocked_pgno, mlock_pgno_before, new_aligned_mlocked_pgno))) for (;;) { mdbx_atomic_uint32_t *const mlcnt = env->lck->mlcnt; const int32_t snap_locked = atomic_load32(mlcnt + 0, mo_Relaxed); @@ -269,52 +242,39 @@ __cold void update_mlcnt(const MDBX_env *env, if (unlikely(!atomic_cas32(mlcnt + 0, snap_locked, snap_locked + 1))) continue; } - if (new_aligned_mlocked_pgno == 0 && - (snap_locked - snap_unlocked) > 0) { + if (new_aligned_mlocked_pgno == 0 && (snap_locked - snap_unlocked) > 0) { eASSERT(env, !lock_not_release); - if (unlikely( - !atomic_cas32(mlcnt + 1, snap_unlocked, snap_unlocked + 1))) + if (unlikely(!atomic_cas32(mlcnt + 1, snap_unlocked, snap_unlocked + 1))) continue; } - NOTICE("%s-pages %u..%u, mlocked-process(es) %u -> %u", - lock_not_release ? "lock" : "unlock", + NOTICE("%s-pages %u..%u, mlocked-process(es) %u -> %u", lock_not_release ? "lock" : "unlock", lock_not_release ? mlock_pgno_before : new_aligned_mlocked_pgno, - lock_not_release ? new_aligned_mlocked_pgno : mlock_pgno_before, - snap_locked - snap_unlocked, - atomic_load32(mlcnt + 0, mo_Relaxed) - - atomic_load32(mlcnt + 1, mo_Relaxed)); + lock_not_release ? new_aligned_mlocked_pgno : mlock_pgno_before, snap_locked - snap_unlocked, + atomic_load32(mlcnt + 0, mo_Relaxed) - atomic_load32(mlcnt + 1, mo_Relaxed)); return; } } } -__cold void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, - const size_t end_bytes) { +__cold void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, const size_t end_bytes) { if (atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) > aligned_pgno) { int err = MDBX_ENOSYS; const size_t munlock_begin = pgno2bytes(env, aligned_pgno); const size_t munlock_size = end_bytes - munlock_begin; - eASSERT(env, end_bytes % globals.sys_pagesize == 0 && - munlock_begin % globals.sys_pagesize == 0 && + eASSERT(env, end_bytes % globals.sys_pagesize == 0 && munlock_begin % globals.sys_pagesize == 0 && munlock_size % globals.sys_pagesize == 0); #if defined(_WIN32) || defined(_WIN64) - err = - VirtualUnlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) - ? MDBX_SUCCESS - : (int)GetLastError(); + err = VirtualUnlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) ? MDBX_SUCCESS : (int)GetLastError(); if (err == ERROR_NOT_LOCKED) err = MDBX_SUCCESS; #elif defined(_POSIX_MEMLOCK_RANGE) - err = munlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) - ? errno - : MDBX_SUCCESS; + err = munlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) ? errno : MDBX_SUCCESS; #endif if (likely(err == MDBX_SUCCESS)) update_mlcnt(env, aligned_pgno, false); else { #if defined(_WIN32) || defined(_WIN64) - WARNING("VirtualUnlock(%zu, %zu) error %d", munlock_begin, munlock_size, - err); + WARNING("VirtualUnlock(%zu, %zu) error %d", munlock_begin, munlock_size, err); #else WARNING("munlock(%zu, %zu) error %d", munlock_begin, munlock_size, err); #endif @@ -332,13 +292,11 @@ uint32_t combine_durability_flags(const uint32_t a, const uint32_t b) { uint32_t r = a | b; /* avoid false MDBX_UTTERLY_NOSYNC */ - if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && - !F_ISSET(b, MDBX_UTTERLY_NOSYNC)) + if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && !F_ISSET(b, MDBX_UTTERLY_NOSYNC)) r = (r - MDBX_UTTERLY_NOSYNC) | MDBX_SAFE_NOSYNC; /* convert DEPRECATED_MAPASYNC to MDBX_SAFE_NOSYNC */ - if ((r & (MDBX_WRITEMAP | DEPRECATED_MAPASYNC)) == - (MDBX_WRITEMAP | DEPRECATED_MAPASYNC) && + if ((r & (MDBX_WRITEMAP | DEPRECATED_MAPASYNC)) == (MDBX_WRITEMAP | DEPRECATED_MAPASYNC) && !F_ISSET(r, MDBX_UTTERLY_NOSYNC)) r = (r - DEPRECATED_MAPASYNC) | MDBX_SAFE_NOSYNC; @@ -346,8 +304,6 @@ uint32_t combine_durability_flags(const uint32_t a, const uint32_t b) { if (r & (MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC)) r |= MDBX_NOMETASYNC; - assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && - !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && - !F_ISSET(b, MDBX_UTTERLY_NOSYNC))); + assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && !F_ISSET(b, MDBX_UTTERLY_NOSYNC))); return r; } diff --git a/src/cogs.h b/src/cogs.h index 705900bc..41d79b40 100644 --- a/src/cogs.h +++ b/src/cogs.h @@ -51,19 +51,15 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL bool pv2pages_verify(void); #define PAGESPACE(pagesize) ((pagesize) - PAGEHDRSZ) -#define BRANCH_NODE_MAX(pagesize) \ - (EVEN_FLOOR((PAGESPACE(pagesize) - sizeof(indx_t) - NODESIZE) / (3 - 1) - \ - sizeof(indx_t))) +#define BRANCH_NODE_MAX(pagesize) \ + (EVEN_FLOOR((PAGESPACE(pagesize) - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t))) -#define LEAF_NODE_MAX(pagesize) \ - (EVEN_FLOOR(PAGESPACE(pagesize) / 2) - sizeof(indx_t)) +#define LEAF_NODE_MAX(pagesize) (EVEN_FLOOR(PAGESPACE(pagesize) / 2) - sizeof(indx_t)) #define MAX_GC1OVPAGE(pagesize) (PAGESPACE(pagesize) / sizeof(pgno_t) - 1) -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -keysize_max(size_t pagesize, MDBX_db_flags_t flags) { - assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && - is_powerof2(pagesize)); +MDBX_NOTHROW_CONST_FUNCTION static inline size_t keysize_max(size_t pagesize, MDBX_db_flags_t flags) { + assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && is_powerof2(pagesize)); STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE >= 8); if (flags & MDBX_INTEGERKEY) return 8 /* sizeof(uint64_t) */; @@ -72,18 +68,14 @@ keysize_max(size_t pagesize, MDBX_db_flags_t flags) { STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE - /* sizeof(uint64) as a key */ 8 > sizeof(tree_t)); - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { - const intptr_t max_dupsort_leaf_key = - LEAF_NODE_MAX(pagesize) - NODESIZE - sizeof(tree_t); - return (max_branch_key < max_dupsort_leaf_key) ? max_branch_key - : max_dupsort_leaf_key; + if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { + const intptr_t max_dupsort_leaf_key = LEAF_NODE_MAX(pagesize) - NODESIZE - sizeof(tree_t); + return (max_branch_key < max_dupsort_leaf_key) ? max_branch_key : max_dupsort_leaf_key; } return max_branch_key; } -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) { +MDBX_NOTHROW_CONST_FUNCTION static inline size_t env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) { size_t size_max; if (flags & MDBX_INTEGERKEY) size_max = 8 /* sizeof(uint64_t) */; @@ -92,12 +84,9 @@ env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) { STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE - /* sizeof(uint64) as a key */ 8 > sizeof(tree_t)); - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { - const intptr_t max_dupsort_leaf_key = - env->leaf_nodemax - NODESIZE - sizeof(tree_t); - size_max = (max_branch_key < max_dupsort_leaf_key) ? max_branch_key - : max_dupsort_leaf_key; + if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) { + const intptr_t max_dupsort_leaf_key = env->leaf_nodemax - NODESIZE - sizeof(tree_t); + size_max = (max_branch_key < max_dupsort_leaf_key) ? max_branch_key : max_dupsort_leaf_key; } else size_max = max_branch_key; } @@ -105,13 +94,11 @@ env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) { return size_max; } -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -keysize_min(MDBX_db_flags_t flags) { +MDBX_NOTHROW_CONST_FUNCTION static inline size_t keysize_min(MDBX_db_flags_t flags) { return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; } -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -valsize_min(MDBX_db_flags_t flags) { +MDBX_NOTHROW_CONST_FUNCTION static inline size_t valsize_min(MDBX_db_flags_t flags) { if (flags & MDBX_INTEGERDUP) return 4 /* sizeof(uint32_t) */; else if (flags & MDBX_DUPFIXED) @@ -120,10 +107,8 @@ valsize_min(MDBX_db_flags_t flags) { return 0; } -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -valsize_max(size_t pagesize, MDBX_db_flags_t flags) { - assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && - is_powerof2(pagesize)); +MDBX_NOTHROW_CONST_FUNCTION static inline size_t valsize_max(size_t pagesize, MDBX_db_flags_t flags) { + assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && is_powerof2(pagesize)); if (flags & MDBX_INTEGERDUP) return 8 /* sizeof(uint64_t) */; @@ -136,13 +121,11 @@ valsize_max(size_t pagesize, MDBX_db_flags_t flags) { const size_t hard_pages = hard >> page_ln2; STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO); const size_t pages_limit = PAGELIST_LIMIT / 4; - const size_t limit = - (hard_pages < pages_limit) ? hard : (pages_limit << page_ln2); + const size_t limit = (hard_pages < pages_limit) ? hard : (pages_limit << page_ln2); return (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2; } -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) { +MDBX_NOTHROW_CONST_FUNCTION static inline size_t env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) { size_t size_max; if (flags & MDBX_INTEGERDUP) size_max = 8 /* sizeof(uint64_t) */; @@ -153,8 +136,7 @@ env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) { const size_t hard_pages = hard >> env->ps2ln; STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO); const size_t pages_limit = PAGELIST_LIMIT / 4; - const size_t limit = - (hard_pages < pages_limit) ? hard : (pages_limit << env->ps2ln); + const size_t limit = (hard_pages < pages_limit) ? hard : (pages_limit << env->ps2ln); size_max = (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2; } eASSERT(env, size_max == valsize_max(env->ps, flags)); @@ -163,8 +145,8 @@ env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) { /*----------------------------------------------------------------------------*/ -MDBX_NOTHROW_PURE_FUNCTION static inline size_t -leaf_size(const MDBX_env *env, const MDBX_val *key, const MDBX_val *data) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t leaf_size(const MDBX_env *env, const MDBX_val *key, + const MDBX_val *data) { size_t node_bytes = node_size(key, data); if (node_bytes > env->leaf_nodemax) /* put on large/overflow page */ @@ -173,35 +155,30 @@ leaf_size(const MDBX_env *env, const MDBX_val *key, const MDBX_val *data) { return node_bytes + sizeof(indx_t); } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t -branch_size(const MDBX_env *env, const MDBX_val *key) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t branch_size(const MDBX_env *env, const MDBX_val *key) { /* Size of a node in a branch page with a given key. * This is just the node header plus the key, there is no data. */ size_t node_bytes = node_size(key, nullptr); if (unlikely(node_bytes > env->branch_nodemax)) { /* put on large/overflow page, not implemented */ - mdbx_panic("node_size(key) %zu > %u branch_nodemax", node_bytes, - env->branch_nodemax); + mdbx_panic("node_size(key) %zu > %u branch_nodemax", node_bytes, env->branch_nodemax); node_bytes = node_size(key, nullptr) + sizeof(pgno_t); } return node_bytes + sizeof(indx_t); } -MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t -flags_db2sub(uint16_t db_flags) { +MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t flags_db2sub(uint16_t db_flags) { uint16_t sub_flags = db_flags & MDBX_DUPFIXED; /* MDBX_INTEGERDUP => MDBX_INTEGERKEY */ #define SHIFT_INTEGERDUP_TO_INTEGERKEY 2 - STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == - MDBX_INTEGERKEY); + STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == MDBX_INTEGERKEY); sub_flags |= (db_flags & MDBX_INTEGERDUP) >> SHIFT_INTEGERDUP_TO_INTEGERKEY; /* MDBX_REVERSEDUP => MDBX_REVERSEKEY */ #define SHIFT_REVERSEDUP_TO_REVERSEKEY 5 - STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == - MDBX_REVERSEKEY); + STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == MDBX_REVERSEKEY); sub_flags |= (db_flags & MDBX_REVERSEDUP) >> SHIFT_REVERSEDUP_TO_REVERSEKEY; return sub_flags; @@ -219,41 +196,33 @@ static inline bool check_table_flags(unsigned flags) { case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: case MDBX_DB_DEFAULTS: - return (flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != - (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + return (flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != (MDBX_REVERSEKEY | MDBX_INTEGERKEY); } } /*----------------------------------------------------------------------------*/ -MDBX_NOTHROW_PURE_FUNCTION static inline size_t pgno2bytes(const MDBX_env *env, - size_t pgno) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t pgno2bytes(const MDBX_env *env, size_t pgno) { eASSERT(env, (1u << env->ps2ln) == env->ps); return ((size_t)pgno) << env->ps2ln; } -MDBX_NOTHROW_PURE_FUNCTION static inline page_t *pgno2page(const MDBX_env *env, - size_t pgno) { +MDBX_NOTHROW_PURE_FUNCTION static inline page_t *pgno2page(const MDBX_env *env, size_t pgno) { return ptr_disp(env->dxb_mmap.base, pgno2bytes(env, pgno)); } -MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t bytes2pgno(const MDBX_env *env, - size_t bytes) { +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t bytes2pgno(const MDBX_env *env, size_t bytes) { eASSERT(env, (env->ps >> env->ps2ln) == 1); return (pgno_t)(bytes >> env->ps2ln); } -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t -bytes_align2os_bytes(const MDBX_env *env, size_t bytes); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes); -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t -pgno_align2os_bytes(const MDBX_env *env, size_t pgno); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t pgno_align2os_bytes(const MDBX_env *env, size_t pgno); -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL pgno_t -pgno_align2os_pgno(const MDBX_env *env, size_t pgno); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL pgno_t pgno_align2os_pgno(const MDBX_env *env, size_t pgno); -MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t -largechunk_npages(const MDBX_env *env, size_t bytes) { +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t largechunk_npages(const MDBX_env *env, size_t bytes) { return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1; } @@ -264,69 +233,53 @@ MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val get_key(const node_t *node) { return key; } -static inline void get_key_optional(const node_t *node, - MDBX_val *keyptr /* __may_null */) { +static inline void get_key_optional(const node_t *node, MDBX_val *keyptr /* __may_null */) { if (keyptr) *keyptr = get_key(node); } -MDBX_NOTHROW_PURE_FUNCTION static inline void *page_data(const page_t *mp) { - return ptr_disp(mp, PAGEHDRSZ); -} +MDBX_NOTHROW_PURE_FUNCTION static inline void *page_data(const page_t *mp) { return ptr_disp(mp, PAGEHDRSZ); } -MDBX_NOTHROW_PURE_FUNCTION static inline const page_t * -data_page(const void *data) { +MDBX_NOTHROW_PURE_FUNCTION static inline const page_t *data_page(const void *data) { return container_of(data, page_t, entries); } -MDBX_NOTHROW_PURE_FUNCTION static inline meta_t *page_meta(page_t *mp) { - return (meta_t *)page_data(mp); -} +MDBX_NOTHROW_PURE_FUNCTION static inline meta_t *page_meta(page_t *mp) { return (meta_t *)page_data(mp); } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_numkeys(const page_t *mp) { - return mp->lower >> 1; -} +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_numkeys(const page_t *mp) { return mp->lower >> 1; } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_room(const page_t *mp) { - return mp->upper - mp->lower; -} +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_room(const page_t *mp) { return mp->upper - mp->lower; } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t -page_space(const MDBX_env *env) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_space(const MDBX_env *env) { STATIC_ASSERT(PAGEHDRSZ % 2 == 0); return env->ps - PAGEHDRSZ; } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_used(const MDBX_env *env, - const page_t *mp) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_used(const MDBX_env *env, const page_t *mp) { return page_space(env) - page_room(mp); } /* The percentage of space used in the page, in a percents. */ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline unsigned -page_fill_percentum_x10(const MDBX_env *env, const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline unsigned page_fill_percentum_x10(const MDBX_env *env, + const page_t *mp) { const size_t space = page_space(env); return (unsigned)((page_used(env, mp) * 1000 + space / 2) / space); } -MDBX_NOTHROW_PURE_FUNCTION static inline node_t *page_node(const page_t *mp, - size_t i) { +MDBX_NOTHROW_PURE_FUNCTION static inline node_t *page_node(const page_t *mp, size_t i) { assert(page_type_compat(mp) == P_LEAF || page_type(mp) == P_BRANCH); assert(page_numkeys(mp) > i); assert(mp->entries[i] % 2 == 0); return ptr_disp(mp, mp->entries[i] + PAGEHDRSZ); } -MDBX_NOTHROW_PURE_FUNCTION static inline void * -page_dupfix_ptr(const page_t *mp, size_t i, size_t keysize) { - assert(page_type_compat(mp) == (P_LEAF | P_DUPFIX) && i == (indx_t)i && - mp->dupfix_ksize == keysize); +MDBX_NOTHROW_PURE_FUNCTION static inline void *page_dupfix_ptr(const page_t *mp, size_t i, size_t keysize) { + assert(page_type_compat(mp) == (P_LEAF | P_DUPFIX) && i == (indx_t)i && mp->dupfix_ksize == keysize); (void)keysize; return ptr_disp(mp, PAGEHDRSZ + mp->dupfix_ksize * (indx_t)i); } -MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val -page_dupfix_key(const page_t *mp, size_t i, size_t keysize) { +MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val page_dupfix_key(const page_t *mp, size_t i, size_t keysize) { MDBX_val r; r.iov_base = page_dupfix_ptr(mp, i, keysize); r.iov_len = mp->dupfix_ksize; @@ -335,11 +288,9 @@ page_dupfix_key(const page_t *mp, size_t i, size_t keysize) { /*----------------------------------------------------------------------------*/ -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int -cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b); -#if MDBX_UNALIGNED_OK < 2 || \ - (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) +#if MDBX_UNALIGNED_OK < 2 || (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int /* Compare two items pointing at 2-byte aligned unsigned int's. */ cmp_int_align2(const MDBX_val *a, const MDBX_val *b); @@ -347,8 +298,7 @@ cmp_int_align2(const MDBX_val *a, const MDBX_val *b); #define cmp_int_align2 cmp_int_unaligned #endif /* !MDBX_UNALIGNED_OK || debug */ -#if MDBX_UNALIGNED_OK < 4 || \ - (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) +#if MDBX_UNALIGNED_OK < 4 || (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG)) MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int /* Compare two items pointing at 4-byte aligned unsigned int's. */ cmp_int_align4(const MDBX_val *a, const MDBX_val *b); @@ -357,50 +307,38 @@ cmp_int_align4(const MDBX_val *a, const MDBX_val *b); #endif /* !MDBX_UNALIGNED_OK || debug */ /* Compare two items lexically */ -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lexical(const MDBX_val *a, - const MDBX_val *b); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lexical(const MDBX_val *a, const MDBX_val *b); /* Compare two items in reverse byte order */ -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_reverse(const MDBX_val *a, - const MDBX_val *b); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_reverse(const MDBX_val *a, const MDBX_val *b); /* Fast non-lexically comparator */ -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lenfast(const MDBX_val *a, - const MDBX_val *b); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lenfast(const MDBX_val *a, const MDBX_val *b); -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL bool -eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL bool eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l); -MDBX_NOTHROW_PURE_FUNCTION static inline bool eq_fast(const MDBX_val *a, - const MDBX_val *b) { - return unlikely(a->iov_len == b->iov_len) && - eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); +MDBX_NOTHROW_PURE_FUNCTION static inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) { + return unlikely(a->iov_len == b->iov_len) && eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); } -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int -cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b); -MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int -cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b); +MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b); static inline MDBX_cmp_func *builtin_keycmp(MDBX_db_flags_t flags) { - return (flags & MDBX_REVERSEKEY) ? cmp_reverse - : (flags & MDBX_INTEGERKEY) ? cmp_int_align2 - : cmp_lexical; + return (flags & MDBX_REVERSEKEY) ? cmp_reverse : (flags & MDBX_INTEGERKEY) ? cmp_int_align2 : cmp_lexical; } static inline MDBX_cmp_func *builtin_datacmp(MDBX_db_flags_t flags) { return !(flags & MDBX_DUPSORT) ? cmp_lenfast - : ((flags & MDBX_INTEGERDUP) - ? cmp_int_unaligned - : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical)); + : ((flags & MDBX_INTEGERDUP) ? cmp_int_unaligned + : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical)); } /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL uint32_t combine_durability_flags(const uint32_t a, - const uint32_t b); +MDBX_INTERNAL uint32_t combine_durability_flags(const uint32_t a, const uint32_t b); MDBX_CONST_FUNCTION static inline lck_t *lckless_stub(const MDBX_env *env) { uintptr_t stub = (uintptr_t)&env->lckless_placeholder; @@ -477,12 +415,10 @@ static inline int check_txn(const MDBX_txn *txn, int bad_bits) { } tASSERT(txn, (txn->flags & MDBX_TXN_FINISHED) || - (txn->flags & MDBX_NOSTICKYTHREADS) == - (txn->env->flags & MDBX_NOSTICKYTHREADS)); + (txn->flags & MDBX_NOSTICKYTHREADS) == (txn->env->flags & MDBX_NOSTICKYTHREADS)); #if MDBX_TXN_CHECKOWNER STATIC_ASSERT((long)MDBX_NOSTICKYTHREADS > (long)MDBX_TXN_FINISHED); - if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) < - MDBX_TXN_FINISHED && + if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) < MDBX_TXN_FINISHED && unlikely(txn->owner != osal_thread_self())) return txn->owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN; #endif /* MDBX_TXN_CHECKOWNER */ @@ -508,12 +444,10 @@ static inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) { MDBX_INTERNAL void mincore_clean_cache(const MDBX_env *const env); -MDBX_INTERNAL void update_mlcnt(const MDBX_env *env, - const pgno_t new_aligned_mlocked_pgno, +MDBX_INTERNAL void update_mlcnt(const MDBX_env *env, const pgno_t new_aligned_mlocked_pgno, const bool lock_not_release); -MDBX_INTERNAL void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, - const size_t end_bytes); +MDBX_INTERNAL void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, const size_t end_bytes); MDBX_INTERNAL void munlock_all(const MDBX_env *env); @@ -527,15 +461,13 @@ MDBX_INTERNAL void munlock_all(const MDBX_env *env); #define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier() #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ -MDBX_MAYBE_UNUSED static inline void -osal_flush_incoherent_mmap(const void *addr, size_t nbytes, - const intptr_t pagesize) { +MDBX_MAYBE_UNUSED static inline void osal_flush_incoherent_mmap(const void *addr, size_t nbytes, + const intptr_t pagesize) { #ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE #error "The MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined before" #elif MDBX_MMAP_INCOHERENT_FILE_WRITE char *const begin = (char *)(-pagesize & (intptr_t)addr); - char *const end = - (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1)); + char *const end = (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1)); int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0; eASSERT(nullptr, err == 0); (void)err; diff --git a/src/coherency.c b/src/coherency.c index 7701271b..6b9df51d 100644 --- a/src/coherency.c +++ b/src/coherency.c @@ -4,8 +4,7 @@ #include "internals.h" /* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ -static bool coherency_check(const MDBX_env *env, const txnid_t txnid, - const volatile tree_t *trees, +static bool coherency_check(const MDBX_env *env, const txnid_t txnid, const volatile tree_t *trees, const volatile meta_t *meta, bool report) { const txnid_t freedb_mod_txnid = trees[FREE_DBI].mod_txnid; const txnid_t maindb_mod_txnid = trees[MAIN_DBI].mod_txnid; @@ -13,67 +12,42 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, const pgno_t freedb_root_pgno = trees[FREE_DBI].root; const page_t *freedb_root = - (env->dxb_mmap.base && freedb_root_pgno < last_pgno) - ? pgno2page(env, freedb_root_pgno) - : nullptr; + (env->dxb_mmap.base && freedb_root_pgno < last_pgno) ? pgno2page(env, freedb_root_pgno) : nullptr; const pgno_t maindb_root_pgno = trees[MAIN_DBI].root; const page_t *maindb_root = - (env->dxb_mmap.base && maindb_root_pgno < last_pgno) - ? pgno2page(env, maindb_root_pgno) - : nullptr; - const uint64_t magic_and_version = - unaligned_peek_u64_volatile(4, &meta->magic_and_version); + (env->dxb_mmap.base && maindb_root_pgno < last_pgno) ? pgno2page(env, maindb_root_pgno) : nullptr; + const uint64_t magic_and_version = unaligned_peek_u64_volatile(4, &meta->magic_and_version); bool ok = true; - if (freedb_root_pgno != P_INVALID && - unlikely(freedb_root_pgno >= last_pgno)) { + if (freedb_root_pgno != P_INVALID && unlikely(freedb_root_pgno >= last_pgno)) { if (report) - WARNING( - "catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN - " %s", - "free", freedb_root_pgno, txnid, - (env->stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); + WARNING("catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "free", freedb_root_pgno, txnid, + (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); ok = false; } - if (maindb_root_pgno != P_INVALID && - unlikely(maindb_root_pgno >= last_pgno)) { + if (maindb_root_pgno != P_INVALID && unlikely(maindb_root_pgno >= last_pgno)) { if (report) - WARNING( - "catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN - " %s", - "main", maindb_root_pgno, txnid, - (env->stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); + WARNING("catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "main", maindb_root_pgno, txnid, + (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); ok = false; } if (unlikely(txnid < freedb_mod_txnid || - (!freedb_mod_txnid && freedb_root && - likely(magic_and_version == MDBX_DATA_MAGIC)))) { + (!freedb_mod_txnid && freedb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) { if (report) WARNING( - "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN - " %s", - "free", freedb_mod_txnid, txnid, - (env->stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); + "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "free", freedb_mod_txnid, txnid, + (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" : "(wagering meta)"); ok = false; } if (unlikely(txnid < maindb_mod_txnid || - (!maindb_mod_txnid && maindb_root && - likely(magic_and_version == MDBX_DATA_MAGIC)))) { + (!maindb_mod_txnid && maindb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) { if (report) WARNING( - "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN - " %s", - "main", maindb_mod_txnid, txnid, - (env->stuck_meta < 0) - ? "(workaround for incoherent flaw of unified page/buffer cache)" - : "(wagering meta)"); + "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "main", maindb_mod_txnid, txnid, + (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" : "(wagering meta)"); ok = false; } @@ -81,15 +55,13 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, * в пределах текущего отображения. Иначе возможны SIGSEGV до переноса * вызова coherency_check_head() после dxb_resize() внутри txn_renew(). */ if (likely(freedb_root && freedb_mod_txnid && - (size_t)ptr_dist(env->dxb_mmap.base, freedb_root) < - env->dxb_mmap.limit)) { + (size_t)ptr_dist(env->dxb_mmap.base, freedb_root) < env->dxb_mmap.limit)) { VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->txnid)); MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, sizeof(freedb_root->txnid)); const txnid_t root_txnid = freedb_root->txnid; if (unlikely(root_txnid != freedb_mod_txnid)) { if (report) - WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN - " for %s-db.mod_txnid %" PRIaTXN " %s", + WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN " for %s-db.mod_txnid %" PRIaTXN " %s", freedb_root_pgno, root_txnid, "free", freedb_mod_txnid, (env->stuck_meta < 0) ? "(workaround for incoherent flaw of " "unified page/buffer cache)" @@ -98,15 +70,13 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, } } if (likely(maindb_root && maindb_mod_txnid && - (size_t)ptr_dist(env->dxb_mmap.base, maindb_root) < - env->dxb_mmap.limit)) { + (size_t)ptr_dist(env->dxb_mmap.base, maindb_root) < env->dxb_mmap.limit)) { VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->txnid)); MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, sizeof(maindb_root->txnid)); const txnid_t root_txnid = maindb_root->txnid; if (unlikely(root_txnid != maindb_mod_txnid)) { if (report) - WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN - " for %s-db.mod_txnid %" PRIaTXN " %s", + WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN " for %s-db.mod_txnid %" PRIaTXN " %s", maindb_root_pgno, root_txnid, "main", maindb_mod_txnid, (env->stuck_meta < 0) ? "(workaround for incoherent flaw of " "unified page/buffer cache)" @@ -116,24 +86,19 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, } if (unlikely(!ok) && report) env->lck->pgops.incoherence.weak = - (env->lck->pgops.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->lck->pgops.incoherence.weak + 1; + (env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1; return ok; } -__cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, - const MDBX_env *env) { +__cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, const MDBX_env *env) { if (likely(timestamp && *timestamp == 0)) *timestamp = osal_monotime(); - else if (unlikely(!timestamp || osal_monotime() - *timestamp > - osal_16dot16_to_monotime(65536 / 10))) { + else if (unlikely(!timestamp || osal_monotime() - *timestamp > osal_16dot16_to_monotime(65536 / 10))) { if (pgno >= 0 && pgno != env->stuck_meta) ERROR("bailout waiting for %" PRIuSIZE " page arrival %s", pgno, "(workaround for incoherent flaw of unified page/buffer cache)"); else if (env->stuck_meta < 0) - ERROR("bailout waiting for valid snapshot (%s)", - "workaround for incoherent flaw of unified page/buffer cache"); + ERROR("bailout waiting for valid snapshot (%s)", "workaround for incoherent flaw of unified page/buffer cache"); return MDBX_PROBLEM; } @@ -152,28 +117,23 @@ __cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, /* check with timeout as the workaround * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ -__hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, - uint64_t *timestamp) { +__hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp) { /* Copy the DB info and flags */ txn->txnid = head.txnid; txn->geo = head.ptr_c->geometry; memcpy(txn->dbs, &head.ptr_c->trees, sizeof(head.ptr_c->trees)); STATIC_ASSERT(sizeof(head.ptr_c->trees) == CORE_DBS * sizeof(tree_t)); - VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS, - txn->env->max_dbi - CORE_DBS); + VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS, txn->env->max_dbi - CORE_DBS); txn->canary = head.ptr_c->canary; - if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v, - *timestamp == 0) || + if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v, *timestamp == 0) || txn->txnid != meta_txnid(head.ptr_v))) return coherency_timeout(timestamp, -1, txn->env); if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) { if ((txn->dbs[FREE_DBI].flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || - unaligned_peek_u64(4, &head.ptr_c->magic_and_version) == - MDBX_DATA_MAGIC) { - ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, - "GC/FreeDB"); + unaligned_peek_u64(4, &head.ptr_c->magic_and_version) == MDBX_DATA_MAGIC) { + ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, "GC/FreeDB"); return MDBX_INCOMPATIBLE; } txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS; @@ -183,23 +143,19 @@ __hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, return MDBX_SUCCESS; } -int coherency_check_written(const MDBX_env *env, const txnid_t txnid, - const volatile meta_t *meta, const intptr_t pgno, +int coherency_check_written(const MDBX_env *env, const txnid_t txnid, const volatile meta_t *meta, const intptr_t pgno, uint64_t *timestamp) { const bool report = !(timestamp && *timestamp); const txnid_t head_txnid = meta_txnid(meta); if (likely(head_txnid >= MIN_TXNID && head_txnid >= txnid)) { - if (likely( - coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) { + if (likely(coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) { eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY); eASSERT(env, check_table_flags(meta->trees.main.flags)); return MDBX_SUCCESS; } } else if (report) { env->lck->pgops.incoherence.weak = - (env->lck->pgops.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->lck->pgops.incoherence.weak + 1; + (env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1; WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s", (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid, bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), @@ -208,9 +164,7 @@ int coherency_check_written(const MDBX_env *env, const txnid_t txnid, return coherency_timeout(timestamp, pgno, env); } -bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, - bool report) { +bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report) { uint64_t timestamp = 0; - return coherency_check_written(env, 0, meta, -1, - report ? ×tamp : nullptr) == MDBX_SUCCESS; + return coherency_check_written(env, 0, meta, -1, report ? ×tamp : nullptr) == MDBX_SUCCESS; } diff --git a/src/cold.c b/src/cold.c index 35665101..11260ace 100644 --- a/src/cold.c +++ b/src/cold.c @@ -14,8 +14,7 @@ __cold size_t mdbx_default_pagesize(void) { __cold intptr_t mdbx_limits_dbsize_min(intptr_t pagesize) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; @@ -25,8 +24,7 @@ __cold intptr_t mdbx_limits_dbsize_min(intptr_t pagesize) { __cold intptr_t mdbx_limits_dbsize_max(intptr_t pagesize) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; @@ -38,112 +36,90 @@ __cold intptr_t mdbx_limits_dbsize_max(intptr_t pagesize) { __cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); - const uint64_t pgl_limit = - pagesize * (uint64_t)(PAGELIST_LIMIT / MDBX_GOLD_RATIO_DBL); + const uint64_t pgl_limit = pagesize * (uint64_t)(PAGELIST_LIMIT / MDBX_GOLD_RATIO_DBL); const uint64_t map_limit = (uint64_t)(MAX_MAPSIZE / MDBX_GOLD_RATIO_DBL); return (pgl_limit < map_limit) ? (intptr_t)pgl_limit : (intptr_t)map_limit; } -__cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, - MDBX_db_flags_t flags) { +__cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; return keysize_max(pagesize, flags); } -__cold int mdbx_env_get_maxkeysize_ex(const MDBX_env *env, - MDBX_db_flags_t flags) { +__cold int mdbx_env_get_maxkeysize_ex(const MDBX_env *env, MDBX_db_flags_t flags) { if (unlikely(!env || env->signature.weak != env_signature)) return -1; return (int)mdbx_limits_keysize_max((intptr_t)env->ps, flags); } -__cold int mdbx_env_get_maxkeysize(const MDBX_env *env) { - return mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT); -} +__cold int mdbx_env_get_maxkeysize(const MDBX_env *env) { return mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT); } -__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { - return keysize_min(flags); -} +__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { return keysize_min(flags); } -__cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, - MDBX_db_flags_t flags) { +__cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; return valsize_max(pagesize, flags); } -__cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, - MDBX_db_flags_t flags) { +__cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags) { if (unlikely(!env || env->signature.weak != env_signature)) return -1; return (int)mdbx_limits_valsize_max((intptr_t)env->ps, flags); } -__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { - return valsize_min(flags); -} +__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { return valsize_min(flags); } -__cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags) { +__cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) + if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) return BRANCH_NODE_MAX(pagesize) - NODESIZE; return LEAF_NODE_MAX(pagesize) - NODESIZE; } -__cold int mdbx_env_get_pairsize4page_max(const MDBX_env *env, - MDBX_db_flags_t flags) { +__cold int mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags) { if (unlikely(!env || env->signature.weak != env_signature)) return -1; return (int)mdbx_limits_pairsize4page_max((intptr_t)env->ps, flags); } -__cold intptr_t mdbx_limits_valsize4page_max(intptr_t pagesize, - MDBX_db_flags_t flags) { +__cold intptr_t mdbx_limits_valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) { if (pagesize < 1) pagesize = (intptr_t)mdbx_default_pagesize(); - if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || - pagesize > (intptr_t)MDBX_MAX_PAGESIZE || + if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2((size_t)pagesize))) return -1; - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) + if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) return valsize_max(pagesize, flags); return PAGESPACE(pagesize); } -__cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, - MDBX_db_flags_t flags) { +__cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags) { if (unlikely(!env || env->signature.weak != env_signature)) return -1; @@ -152,17 +128,14 @@ __cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, /*----------------------------------------------------------------------------*/ -__cold static void stat_add(const tree_t *db, MDBX_stat *const st, - const size_t bytes) { +__cold static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) { st->ms_depth += db->height; st->ms_branch_pages += db->branch_pages; st->ms_leaf_pages += db->leaf_pages; st->ms_overflow_pages += db->large_pages; st->ms_entries += db->items; - if (likely(bytes >= - offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) - st->ms_mod_txnid = - (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid; + if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) + st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid; } __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { @@ -179,15 +152,13 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { const MDBX_env *const env = txn->env; st->ms_psize = env->ps; - TXN_FOREACH_DBI_FROM( - txn, dbi, - /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { + TXN_FOREACH_DBI_FROM(txn, dbi, + /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) stat_add(txn->dbs + dbi, st, bytes); } - if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && - txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { + if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { /* scan and account not opened named tables */ err = tree_search(&cx.outer, nullptr, Z_FIRST); @@ -198,8 +169,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { if (node_flags(node) != N_TREE) continue; if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid table node size", node_ds(node)); + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node)); return MDBX_CORRUPTED; } @@ -228,8 +198,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { return MDBX_SUCCESS; } -__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_stat *dest, size_t bytes) { +__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) { if (unlikely(!dest)) return LOG_IFERR(MDBX_EINVAL); const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); @@ -265,18 +234,15 @@ __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, /*----------------------------------------------------------------------------*/ static size_t estimate_rss(size_t database_bytes) { - return database_bytes + database_bytes / 64 + - (512 + MDBX_WORDBITS * 16) * MEGABYTE; + return database_bytes + database_bytes / 64 + (512 + MDBX_WORDBITS * 16) * MEGABYTE; } -__cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, - MDBX_warmup_flags_t flags, +__cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, MDBX_warmup_flags_t flags, unsigned timeout_seconds_16dot16) { if (unlikely(env == nullptr && txn == nullptr)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(flags > - (MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | - MDBX_warmup_touchlimit | MDBX_warmup_release))) + if (unlikely(flags > (MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | MDBX_warmup_touchlimit | + MDBX_warmup_release))) return LOG_IFERR(MDBX_EINVAL); if (txn) { @@ -294,10 +260,9 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, env = txn->env; } - const uint64_t timeout_monotime = - (timeout_seconds_16dot16 && (flags & MDBX_warmup_force)) - ? osal_monotime() + osal_16dot16_to_monotime(timeout_seconds_16dot16) - : 0; + const uint64_t timeout_monotime = (timeout_seconds_16dot16 && (flags & MDBX_warmup_force)) + ? osal_monotime() + osal_16dot16_to_monotime(timeout_seconds_16dot16) + : 0; if (flags & MDBX_warmup_release) munlock_all(env); @@ -317,18 +282,14 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, const size_t estimated_rss = estimate_rss(used_range); #if defined(_WIN32) || defined(_WIN64) SIZE_T current_ws_lower, current_ws_upper; - if (GetProcessWorkingSetSize(GetCurrentProcess(), ¤t_ws_lower, - ¤t_ws_upper) && + if (GetProcessWorkingSetSize(GetCurrentProcess(), ¤t_ws_lower, ¤t_ws_upper) && current_ws_lower < estimated_rss) { const SIZE_T ws_lower = estimated_rss; const SIZE_T ws_upper = - (MDBX_WORDBITS == 32 && ws_lower > MEGABYTE * 2048) - ? ws_lower - : ws_lower + MDBX_WORDBITS * MEGABYTE * 32; + (MDBX_WORDBITS == 32 && ws_lower > MEGABYTE * 2048) ? ws_lower : ws_lower + MDBX_WORDBITS * MEGABYTE * 32; if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_lower, ws_upper)) { rc = (int)GetLastError(); - WARNING("SetProcessWorkingSetSize(%zu, %zu) error %d", ws_lower, - ws_upper, rc); + WARNING("SetProcessWorkingSetSize(%zu, %zu) error %d", ws_lower, ws_upper, rc); } } #endif /* Windows */ @@ -340,23 +301,21 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, rss.rlim_max = estimated_rss; if (setrlimit(RLIMIT_RSS, &rss)) { rc = errno; - WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS", - (size_t)rss.rlim_cur, (size_t)rss.rlim_max, rc); + WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS", (size_t)rss.rlim_cur, (size_t)rss.rlim_max, rc); } } #endif /* RLIMIT_RSS */ #ifdef RLIMIT_MEMLOCK if (flags & MDBX_warmup_lock) { struct rlimit memlock; - if (getrlimit(RLIMIT_MEMLOCK, &memlock) == 0 && - memlock.rlim_cur < estimated_rss) { + if (getrlimit(RLIMIT_MEMLOCK, &memlock) == 0 && memlock.rlim_cur < estimated_rss) { memlock.rlim_cur = estimated_rss; if (memlock.rlim_max < estimated_rss) memlock.rlim_max = estimated_rss; if (setrlimit(RLIMIT_MEMLOCK, &memlock)) { rc = errno; - WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_MEMLOCK", - (size_t)memlock.rlim_cur, (size_t)memlock.rlim_max, rc); + WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_MEMLOCK", (size_t)memlock.rlim_cur, + (size_t)memlock.rlim_max, rc); } } } @@ -364,12 +323,10 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, (void)estimated_rss; } -#if defined(MLOCK_ONFAULT) && \ - ((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 27)) || \ - (defined(__ANDROID_API__) && __ANDROID_API__ >= 30)) && \ +#if defined(MLOCK_ONFAULT) && \ + ((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 27)) || (defined(__ANDROID_API__) && __ANDROID_API__ >= 30)) && \ (defined(__linux__) || defined(__gnu_linux__)) - if ((flags & MDBX_warmup_lock) != 0 && - globals.linux_kernel_version >= 0x04040000 && + if ((flags & MDBX_warmup_lock) != 0 && globals.linux_kernel_version >= 0x04040000 && atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) < mlock_pgno) { if (mlock2(env->dxb_mmap.base, used_range, MLOCK_ONFAULT)) { rc = errno; @@ -388,8 +345,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, if (err != MDBX_SUCCESS && rc == MDBX_SUCCESS) rc = err; - if ((flags & MDBX_warmup_force) != 0 && - (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS)) { + if ((flags & MDBX_warmup_force) != 0 && (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS)) { const volatile uint8_t *ptr = env->dxb_mmap.base; size_t offset = 0, unused = 42; #if !(defined(_WIN32) || defined(_WIN64)) @@ -440,8 +396,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, (void)unused; } - if ((flags & MDBX_warmup_lock) != 0 && - (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS) && + if ((flags & MDBX_warmup_lock) != 0 && (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS) && atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) < mlock_pgno) { #if defined(_WIN32) || defined(_WIN64) if (VirtualLock(env->dxb_mmap.base, used_range)) { @@ -481,14 +436,12 @@ __cold int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) { return MDBX_SUCCESS; } -__cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, - bool onoff) { +__cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, bool onoff) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - if (unlikely(flags & ((env->flags & ENV_ACTIVE) ? ~ENV_CHANGEABLE_FLAGS - : ~ENV_USABLE_FLAGS))) + if (unlikely(flags & ((env->flags & ENV_ACTIVE) ? ~ENV_CHANGEABLE_FLAGS : ~ENV_USABLE_FLAGS))) return LOG_IFERR(MDBX_EPERM); if (unlikely(env->flags & MDBX_RDONLY)) @@ -536,9 +489,7 @@ __cold int mdbx_env_set_userctx(MDBX_env *env, void *ctx) { return MDBX_SUCCESS; } -__cold void *mdbx_env_get_userctx(const MDBX_env *env) { - return env ? env->userctx : nullptr; -} +__cold void *mdbx_env_get_userctx(const MDBX_env *env) { return env ? env->userctx : nullptr; } __cold int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) { int rc = check_env(env, false); @@ -564,8 +515,7 @@ __cold int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) { } __cold MDBX_hsr_func *mdbx_env_get_hsr(const MDBX_env *env) { - return likely(env && env->signature.weak == env_signature) ? env->hsr_callback - : nullptr; + return likely(env && env->signature.weak == env_signature) ? env->hsr_callback : nullptr; } #if defined(_WIN32) || defined(_WIN64) @@ -595,13 +545,10 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { *arg = nullptr; DWORD flags = /* WC_ERR_INVALID_CHARS */ 0x80; size_t mb_len = - WideCharToMultiByte(CP_THREAD_ACP, flags, env->pathname.specified, -1, - nullptr, 0, nullptr, nullptr); + WideCharToMultiByte(CP_THREAD_ACP, flags, env->pathname.specified, -1, nullptr, 0, nullptr, nullptr); rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); if (rc == ERROR_INVALID_FLAGS) { - mb_len = - WideCharToMultiByte(CP_THREAD_ACP, flags = 0, env->pathname.specified, - -1, nullptr, 0, nullptr, nullptr); + mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, env->pathname.specified, -1, nullptr, 0, nullptr, nullptr); rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); } if (unlikely(rc != MDBX_SUCCESS)) @@ -610,16 +557,14 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { char *const mb_pathname = osal_malloc(mb_len); if (!mb_pathname) return LOG_IFERR(MDBX_ENOMEM); - if (mb_len != (size_t)WideCharToMultiByte( - CP_THREAD_ACP, flags, env->pathname.specified, -1, - mb_pathname, (int)mb_len, nullptr, nullptr)) { + if (mb_len != (size_t)WideCharToMultiByte(CP_THREAD_ACP, flags, env->pathname.specified, -1, mb_pathname, + (int)mb_len, nullptr, nullptr)) { rc = (int)GetLastError(); osal_free(mb_pathname); return LOG_IFERR(rc); } if (env->pathname_char || - InterlockedCompareExchangePointer((PVOID volatile *)&env->pathname_char, - mb_pathname, nullptr)) + InterlockedCompareExchangePointer((PVOID volatile *)&env->pathname_char, mb_pathname, nullptr)) osal_free(mb_pathname); } *arg = env->pathname_char; @@ -634,41 +579,29 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { #ifndef LIBMDBX_NO_EXPORTS_LEGACY_API -LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, - MDBX_txn_flags_t flags, MDBX_txn **ret) { +LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret) { return __inline_mdbx_txn_begin(env, parent, flags, ret); } -LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn) { - return __inline_mdbx_txn_commit(txn); -} +LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn) { return __inline_mdbx_txn_commit(txn); } -LIBMDBX_API __cold int mdbx_env_stat(const MDBX_env *env, MDBX_stat *stat, - size_t bytes) { +LIBMDBX_API __cold int mdbx_env_stat(const MDBX_env *env, MDBX_stat *stat, size_t bytes) { return __inline_mdbx_env_stat(env, stat, bytes); } -LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, - size_t bytes) { +LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, size_t bytes) { return __inline_mdbx_env_info(env, info, bytes); } -LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, - unsigned *flags) { +LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) { return __inline_mdbx_dbi_flags(txn, dbi, flags); } -LIBMDBX_API __cold int mdbx_env_sync(MDBX_env *env) { - return __inline_mdbx_env_sync(env); -} +LIBMDBX_API __cold int mdbx_env_sync(MDBX_env *env) { return __inline_mdbx_env_sync(env); } -LIBMDBX_API __cold int mdbx_env_sync_poll(MDBX_env *env) { - return __inline_mdbx_env_sync_poll(env); -} +LIBMDBX_API __cold int mdbx_env_sync_poll(MDBX_env *env) { return __inline_mdbx_env_sync_poll(env); } -LIBMDBX_API __cold int mdbx_env_close(MDBX_env *env) { - return __inline_mdbx_env_close(env); -} +LIBMDBX_API __cold int mdbx_env_close(MDBX_env *env) { return __inline_mdbx_env_close(env); } LIBMDBX_API __cold int mdbx_env_set_mapsize(MDBX_env *env, size_t size) { return __inline_mdbx_env_set_mapsize(env, size); @@ -682,13 +615,11 @@ LIBMDBX_API __cold int mdbx_env_get_maxdbs(const MDBX_env *env, MDBX_dbi *dbs) { return __inline_mdbx_env_get_maxdbs(env, dbs); } -LIBMDBX_API __cold int mdbx_env_set_maxreaders(MDBX_env *env, - unsigned readers) { +LIBMDBX_API __cold int mdbx_env_set_maxreaders(MDBX_env *env, unsigned readers) { return __inline_mdbx_env_set_maxreaders(env, readers); } -LIBMDBX_API __cold int mdbx_env_get_maxreaders(const MDBX_env *env, - unsigned *readers) { +LIBMDBX_API __cold int mdbx_env_get_maxreaders(const MDBX_env *env, unsigned *readers) { return __inline_mdbx_env_get_maxreaders(env, readers); } @@ -696,35 +627,24 @@ LIBMDBX_API __cold int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) { return __inline_mdbx_env_set_syncbytes(env, threshold); } -LIBMDBX_API __cold int mdbx_env_get_syncbytes(const MDBX_env *env, - size_t *threshold) { +LIBMDBX_API __cold int mdbx_env_get_syncbytes(const MDBX_env *env, size_t *threshold) { return __inline_mdbx_env_get_syncbytes(env, threshold); } -LIBMDBX_API __cold int mdbx_env_set_syncperiod(MDBX_env *env, - unsigned seconds_16dot16) { +LIBMDBX_API __cold int mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) { return __inline_mdbx_env_set_syncperiod(env, seconds_16dot16); } -LIBMDBX_API __cold int mdbx_env_get_syncperiod(const MDBX_env *env, - unsigned *seconds_16dot16) { +LIBMDBX_API __cold int mdbx_env_get_syncperiod(const MDBX_env *env, unsigned *seconds_16dot16) { return __inline_mdbx_env_get_syncperiod(env, seconds_16dot16); } -LIBMDBX_API __cold uint64_t mdbx_key_from_int64(const int64_t i64) { - return __inline_mdbx_key_from_int64(i64); -} +LIBMDBX_API __cold uint64_t mdbx_key_from_int64(const int64_t i64) { return __inline_mdbx_key_from_int64(i64); } -LIBMDBX_API __cold uint32_t mdbx_key_from_int32(const int32_t i32) { - return __inline_mdbx_key_from_int32(i32); -} +LIBMDBX_API __cold uint32_t mdbx_key_from_int32(const int32_t i32) { return __inline_mdbx_key_from_int32(i32); } -LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_min(void) { - return __inline_mdbx_limits_pgsize_min(); -} +LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_min(void) { return __inline_mdbx_limits_pgsize_min(); } -LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_max(void) { - return __inline_mdbx_limits_pgsize_max(); -} +LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_max(void) { return __inline_mdbx_limits_pgsize_max(); } #endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */ diff --git a/src/copy.c b/src/copy.c index c1b7ef7d..3c7a4f2c 100644 --- a/src/copy.c +++ b/src/copy.c @@ -93,8 +93,7 @@ __cold static int compacting_toggle_write_buffers(ctx_t *ctx) { return ctx->error; } -static int compacting_put_bytes(ctx_t *ctx, const void *src, size_t bytes, - pgno_t pgno, pgno_t npages) { +static int compacting_put_bytes(ctx_t *ctx, const void *src, size_t bytes, pgno_t pgno, pgno_t npages) { assert(pgno == 0 || bytes > PAGEHDRSZ); while (bytes > 0) { const size_t side = ctx->head & 1; @@ -130,17 +129,14 @@ static int compacting_put_bytes(ctx_t *ctx, const void *src, size_t bytes, return MDBX_SUCCESS; } -static int compacting_put_page(ctx_t *ctx, const page_t *mp, - const size_t head_bytes, const size_t tail_bytes, +static int compacting_put_page(ctx_t *ctx, const page_t *mp, const size_t head_bytes, const size_t tail_bytes, const pgno_t npages) { if (tail_bytes) { assert(head_bytes + tail_bytes <= ctx->env->ps); - assert(npages == 1 && - (page_type(mp) == P_BRANCH || page_type(mp) == P_LEAF)); + assert(npages == 1 && (page_type(mp) == P_BRANCH || page_type(mp) == P_LEAF)); } else { assert(head_bytes <= pgno2bytes(ctx->env, npages)); - assert((npages == 1 && page_type(mp) == (P_LEAF | P_DUPFIX)) || - page_type(mp) == P_LARGE); + assert((npages == 1 && page_type(mp) == (P_LEAF | P_DUPFIX)) || page_type(mp) == P_LARGE); } const pgno_t pgno = ctx->first_unallocated; @@ -148,18 +144,13 @@ static int compacting_put_page(ctx_t *ctx, const page_t *mp, int err = compacting_put_bytes(ctx, mp, head_bytes, pgno, npages); if (unlikely(err != MDBX_SUCCESS)) return err; - err = compacting_put_bytes( - ctx, nullptr, pgno2bytes(ctx->env, npages) - (head_bytes + tail_bytes), 0, - 0); + err = compacting_put_bytes(ctx, nullptr, pgno2bytes(ctx->env, npages) - (head_bytes + tail_bytes), 0, 0); if (unlikely(err != MDBX_SUCCESS)) return err; - return compacting_put_bytes(ctx, ptr_disp(mp, ctx->env->ps - tail_bytes), - tail_bytes, 0, 0); + return compacting_put_bytes(ctx, ptr_disp(mp, ctx->env->ps - tail_bytes), tail_bytes, 0, 0); } -__cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, - pgno_t *const parent_pgno, - txnid_t parent_txnid) { +__cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, pgno_t *const parent_pgno, txnid_t parent_txnid) { mc->top = 0; mc->ki[0] = 0; int rc = page_get(mc, *parent_pgno, &mc->pg[0], parent_txnid); @@ -201,22 +192,18 @@ __cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, node = page_node(mp, i); } - const pgr_t lp = - page_get_large(mc, node_largedata_pgno(node), mp->txnid); + const pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); if (unlikely((rc = lp.err) != MDBX_SUCCESS)) goto bailout; const size_t datasize = node_ds(node); const pgno_t npages = largechunk_npages(ctx->env, datasize); poke_pgno(node_data(node), ctx->first_unallocated); - rc = compacting_put_page(ctx, lp.page, PAGEHDRSZ + datasize, 0, - npages); + rc = compacting_put_page(ctx, lp.page, PAGEHDRSZ + datasize, 0, npages); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } else if (node_flags(node) & N_TREE) { - if (!MDBX_DISABLE_VALIDATION && - unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", + if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); rc = MDBX_CORRUPTED; goto bailout; @@ -235,13 +222,11 @@ __cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, rc = cursor_dupsort_setup(mc, node, mp); if (likely(rc == MDBX_SUCCESS)) { nested = &mc->subcur->nested_tree; - rc = compacting_walk(ctx, &mc->subcur->cursor, &nested->root, - mp->txnid); + rc = compacting_walk(ctx, &mc->subcur->cursor, &nested->root, mp->txnid); } } else { cASSERT(mc, (mc->flags & z_inner) == 0 && mc->subcur == 0); - cursor_couple_t *couple = - container_of(mc, cursor_couple_t, outer); + cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer); nested = &couple->inner.nested_tree; memcpy(nested, node_data(node), sizeof(tree_t)); rc = compacting_walk_tree(ctx, nested); @@ -280,11 +265,9 @@ __cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, const pgno_t pgno = ctx->first_unallocated; if (likely(!is_dupfix_leaf(mp))) { - rc = compacting_put_page(ctx, mp, PAGEHDRSZ + mp->lower, - ctx->env->ps - (PAGEHDRSZ + mp->upper), 1); + rc = compacting_put_page(ctx, mp, PAGEHDRSZ + mp->lower, ctx->env->ps - (PAGEHDRSZ + mp->upper), 1); } else { - rc = compacting_put_page( - ctx, mp, PAGEHDRSZ + page_numkeys(mp) * mp->dupfix_ksize, 0, 1); + rc = compacting_put_page(ctx, mp, PAGEHDRSZ + page_numkeys(mp) * mp->dupfix_ksize, 0, 1); } if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -326,19 +309,15 @@ __cold static int compacting_walk_tree(ctx_t *ctx, tree_t *tree) { __cold static void compacting_fixup_meta(MDBX_env *env, meta_t *meta) { eASSERT(env, meta->trees.gc.mod_txnid || meta->trees.gc.root == P_INVALID); - eASSERT(env, - meta->trees.main.mod_txnid || meta->trees.main.root == P_INVALID); + eASSERT(env, meta->trees.main.mod_txnid || meta->trees.main.root == P_INVALID); /* Calculate filesize taking in account shrink/growing thresholds */ if (meta->geometry.first_unallocated != meta->geometry.now) { meta->geometry.now = meta->geometry.first_unallocated; - const size_t aligner = - pv2pages(meta->geometry.grow_pv ? meta->geometry.grow_pv - : meta->geometry.shrink_pv); + const size_t aligner = pv2pages(meta->geometry.grow_pv ? meta->geometry.grow_pv : meta->geometry.shrink_pv); if (aligner) { - const pgno_t aligned = pgno_align2os_pgno( - env, meta->geometry.first_unallocated + aligner - - meta->geometry.first_unallocated % aligner); + const pgno_t aligned = pgno_align2os_pgno(env, meta->geometry.first_unallocated + aligner - + meta->geometry.first_unallocated % aligner); meta->geometry.now = aligned; } } @@ -366,13 +345,10 @@ __cold static void meta_make_sizeable(meta_t *meta) { } } -__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, - mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe, - const MDBX_copy_flags_t flags) { +__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, uint8_t *buffer, + const bool dest_is_pipe, const MDBX_copy_flags_t flags) { const size_t meta_bytes = pgno2bytes(env, NUM_METAS); - uint8_t *const data_buffer = - buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); + uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); meta_t *const meta = meta_init_triplet(env, buffer); meta_set_txnid(env, meta, txn->txnid); @@ -405,22 +381,17 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, int rc = cursor_init(&couple.outer, txn, FREE_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - pgno_t gc_npages = txn->dbs[FREE_DBI].branch_pages + - txn->dbs[FREE_DBI].leaf_pages + - txn->dbs[FREE_DBI].large_pages; + pgno_t gc_npages = txn->dbs[FREE_DBI].branch_pages + txn->dbs[FREE_DBI].leaf_pages + txn->dbs[FREE_DBI].large_pages; MDBX_val key, data; rc = outer_first(&couple.outer, &key, &data); while (rc == MDBX_SUCCESS) { const pnl_t pnl = data.iov_base; - if (unlikely(data.iov_len % sizeof(pgno_t) || - data.iov_len < MDBX_PNL_SIZEOF(pnl))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-record length", data.iov_len); + if (unlikely(data.iov_len % sizeof(pgno_t) || data.iov_len < MDBX_PNL_SIZEOF(pnl))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-record length", data.iov_len); return MDBX_CORRUPTED; } if (unlikely(!pnl_check(pnl, txn->geo.first_unallocated))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-record content"); + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-record content"); return MDBX_CORRUPTED; } gc_npages += MDBX_PNL_GETSIZE(pnl); @@ -466,21 +437,16 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, /* toggle to flush non-empty buffers */ compacting_toggle_write_buffers(&ctx); - if (likely(rc == MDBX_SUCCESS) && - unlikely(meta->geometry.first_unallocated != ctx.first_unallocated)) { + if (likely(rc == MDBX_SUCCESS) && unlikely(meta->geometry.first_unallocated != ctx.first_unallocated)) { if (ctx.first_unallocated > meta->geometry.first_unallocated) { - ERROR("the source DB %s: post-compactification used pages %" PRIaPGNO - " %c expected %" PRIaPGNO, - "has double-used pages or other corruption", - ctx.first_unallocated, '>', meta->geometry.first_unallocated); + ERROR("the source DB %s: post-compactification used pages %" PRIaPGNO " %c expected %" PRIaPGNO, + "has double-used pages or other corruption", ctx.first_unallocated, '>', + meta->geometry.first_unallocated); rc = MDBX_CORRUPTED; /* corrupted DB */ } if (ctx.first_unallocated < meta->geometry.first_unallocated) { - WARNING( - "the source DB %s: post-compactification used pages %" PRIaPGNO - " %c expected %" PRIaPGNO, - "has page leak(s)", ctx.first_unallocated, '<', - meta->geometry.first_unallocated); + WARNING("the source DB %s: post-compactification used pages %" PRIaPGNO " %c expected %" PRIaPGNO, + "has page leak(s)", ctx.first_unallocated, '<', meta->geometry.first_unallocated); if (dest_is_pipe) /* the root within already written meta-pages is wrong */ rc = MDBX_CORRUPTED; @@ -493,8 +459,7 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, eASSERT(env, (ctx.write_len[ctx.head & 1]) == 0); compacting_toggle_write_buffers(&ctx); thread_err = osal_thread_join(thread); - eASSERT(env, (ctx.tail == ctx.head && ctx.write_len[ctx.head & 1] == 0) || - ctx.error); + eASSERT(env, (ctx.tail == ctx.head && ctx.write_len[ctx.head & 1] == 0) || ctx.error); osal_condpair_destroy(&ctx.condpair); } if (unlikely(thread_err != MDBX_SUCCESS)) @@ -519,9 +484,8 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, const size_t used_size = pgno2bytes(env, meta->geometry.first_unallocated); memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); for (size_t offset = used_size; offset < whole_size;) { - const size_t chunk = ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) - ? (size_t)MDBX_ENVCOPY_WRITEBUF - : whole_size - offset; + const size_t chunk = + ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) ? (size_t)MDBX_ENVCOPY_WRITEBUF : whole_size - offset; int rc = osal_write(fd, data_buffer, chunk); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -533,9 +497,8 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, //---------------------------------------------------------------------------- -__cold static int copy_asis(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, - uint8_t *buffer, const bool dest_is_pipe, - const MDBX_copy_flags_t flags) { +__cold static int copy_asis(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, uint8_t *buffer, + const bool dest_is_pipe, const MDBX_copy_flags_t flags) { bool should_unlock = false; if ((txn->flags & MDBX_TXN_RDONLY) != 0 && (flags & MDBX_CP_RENEW_TXN) != 0) { /* Try temporarily block writers until we snapshot the meta pages */ @@ -566,8 +529,7 @@ retry_snap_meta: rc = MDBX_MVCC_RETARDED; for (size_t n = 0; n < NUM_METAS; ++n) { meta_t *const meta = page_meta(ptr_disp(buffer, pgno2bytes(env, n))); - if (troika.txnid[n] == txn->txnid && - ((/* is_steady */ (troika.fsm >> n) & 1) || rc != MDBX_SUCCESS)) { + if (troika.txnid[n] == txn->txnid && ((/* is_steady */ (troika.fsm >> n) & 1) || rc != MDBX_SUCCESS)) { rc = MDBX_SUCCESS; headcopy = meta; } else if (troika.txnid[n] > txn->txnid) @@ -606,14 +568,12 @@ retry_snap_meta: if (dest_is_pipe) rc = osal_write(fd, buffer, meta_bytes); - uint8_t *const data_buffer = - buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); + uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); #if MDBX_USE_COPYFILERANGE static bool copyfilerange_unavailable; bool not_the_same_filesystem = false; struct statfs statfs_info; - if (fstatfs(fd, &statfs_info) || - statfs_info.f_type == /* ECRYPTFS_SUPER_MAGIC */ 0xf15f) + if (fstatfs(fd, &statfs_info) || statfs_info.f_type == /* ECRYPTFS_SUPER_MAGIC */ 0xf15f) /* avoid use copyfilerange_unavailable() to ecryptfs due bugs */ not_the_same_filesystem = true; #endif /* MDBX_USE_COPYFILERANGE */ @@ -629,8 +589,7 @@ retry_snap_meta: static bool sendfile_unavailable; if (dest_is_pipe && likely(!sendfile_unavailable)) { off_t in_offset = offset; - const ssize_t written = - sendfile(fd, env->lazy_fd, &in_offset, used_size - offset); + const ssize_t written = sendfile(fd, env->lazy_fd, &in_offset, used_size - offset); if (likely(written > 0)) { offset = in_offset; if (flags & MDBX_CP_THROTTLE_MVCC) @@ -645,11 +604,9 @@ retry_snap_meta: #endif /* MDBX_USE_SENDFILE */ #if MDBX_USE_COPYFILERANGE - if (!dest_is_pipe && !not_the_same_filesystem && - likely(!copyfilerange_unavailable)) { + if (!dest_is_pipe && !not_the_same_filesystem && likely(!copyfilerange_unavailable)) { off_t in_offset = offset, out_offset = offset; - ssize_t bytes_copied = copy_file_range( - env->lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0); + ssize_t bytes_copied = copy_file_range(env->lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0); if (likely(bytes_copied > 0)) { offset = in_offset; if (flags & MDBX_CP_THROTTLE_MVCC) @@ -672,9 +629,8 @@ retry_snap_meta: #endif /* MDBX_USE_COPYFILERANGE */ /* fallback to portable */ - const size_t chunk = ((size_t)MDBX_ENVCOPY_WRITEBUF < used_size - offset) - ? (size_t)MDBX_ENVCOPY_WRITEBUF - : used_size - offset; + const size_t chunk = + ((size_t)MDBX_ENVCOPY_WRITEBUF < used_size - offset) ? (size_t)MDBX_ENVCOPY_WRITEBUF : used_size - offset; /* copy to avoid EFAULT in case swapped-out */ memcpy(data_buffer, ptr_disp(env->dxb_mmap.base, offset), chunk); if (flags & MDBX_CP_THROTTLE_MVCC) @@ -689,12 +645,9 @@ retry_snap_meta: rc = osal_ftruncate(fd, whole_size); else { memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); - for (size_t offset = used_size; - rc == MDBX_SUCCESS && offset < whole_size;) { + for (size_t offset = used_size; rc == MDBX_SUCCESS && offset < whole_size;) { const size_t chunk = - ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) - ? (size_t)MDBX_ENVCOPY_WRITEBUF - : whole_size - offset; + ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) ? (size_t)MDBX_ENVCOPY_WRITEBUF : whole_size - offset; rc = osal_write(fd, data_buffer, chunk); offset += chunk; } @@ -706,8 +659,7 @@ retry_snap_meta: //---------------------------------------------------------------------------- -__cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags) { +__cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) { if (unlikely(txn->flags & MDBX_TXN_DIRTY)) return MDBX_BAD_TXN; @@ -734,9 +686,7 @@ __cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_env *const env = txn->env; const size_t buffer_size = pgno_align2os_bytes(env, NUM_METAS) + - ceil_powerof2(((flags & MDBX_CP_COMPACT) - ? 2 * (size_t)MDBX_ENVCOPY_WRITEBUF - : (size_t)MDBX_ENVCOPY_WRITEBUF), + ceil_powerof2(((flags & MDBX_CP_COMPACT) ? 2 * (size_t)MDBX_ENVCOPY_WRITEBUF : (size_t)MDBX_ENVCOPY_WRITEBUF), globals.sys_pagesize); uint8_t *buffer = nullptr; @@ -755,8 +705,7 @@ __cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, rc = mdbx_txn_unpark(txn, false); if (likely(rc == MDBX_SUCCESS)) { memset(buffer, 0, pgno2bytes(env, NUM_METAS)); - rc = ((flags & MDBX_CP_COMPACT) ? copy_with_compacting : copy_asis)( - env, txn, fd, buffer, dest_is_pipe, flags); + rc = ((flags & MDBX_CP_COMPACT) ? copy_with_compacting : copy_asis)(env, txn, fd, buffer, dest_is_pipe, flags); if (likely(rc == MDBX_SUCCESS)) rc = mdbx_txn_unpark(txn, false); @@ -785,8 +734,7 @@ __cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, return rc; } -__cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, - MDBX_copy_flags_t flags) { +__cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, MDBX_copy_flags_t flags) { if (unlikely(!dest_path || *dest_path == '\0')) return MDBX_EINVAL; @@ -813,7 +761,7 @@ __cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, lock_op.l_start = 0; lock_op.l_len = OFF_T_MAX; if (MDBX_FCNTL(newfd, MDBX_F_SETLK, &lock_op) -#if (defined(__linux__) || defined(__gnu_linux__)) && defined(LOCK_EX) && \ +#if (defined(__linux__) || defined(__gnu_linux__)) && defined(LOCK_EX) && \ (!defined(__ANDROID_API__) || __ANDROID_API__ >= 24) || flock(newfd, LOCK_EX | LOCK_NB) #endif /* Linux */ @@ -837,8 +785,7 @@ __cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, //---------------------------------------------------------------------------- -__cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags) { +__cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (likely(rc == MDBX_SUCCESS)) rc = copy2fd(txn, fd, flags); @@ -847,8 +794,7 @@ __cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, return LOG_IFERR(rc); } -__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags) { +__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) { if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) return LOG_IFERR(MDBX_EINVAL); @@ -866,8 +812,7 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return LOG_IFERR(rc); } -__cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, - MDBX_copy_flags_t flags) { +__cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, MDBX_copy_flags_t flags) { #if defined(_WIN32) || defined(_WIN64) wchar_t *dest_pathW = nullptr; int rc = osal_mb2w(dest_path, &dest_pathW); @@ -878,8 +823,7 @@ __cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, return LOG_IFERR(rc); } -__cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, - MDBX_copy_flags_t flags) { +__cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, MDBX_copy_flags_t flags) { #endif /* Windows */ int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (likely(rc == MDBX_SUCCESS)) @@ -889,8 +833,7 @@ __cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, return LOG_IFERR(rc); } -__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, - MDBX_copy_flags_t flags) { +__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, MDBX_copy_flags_t flags) { #if defined(_WIN32) || defined(_WIN64) wchar_t *dest_pathW = nullptr; int rc = osal_mb2w(dest_path, &dest_pathW); @@ -901,8 +844,7 @@ __cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, return LOG_IFERR(rc); } -__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, - MDBX_copy_flags_t flags) { +__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, MDBX_copy_flags_t flags) { #endif /* Windows */ if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) return LOG_IFERR(MDBX_EINVAL); @@ -916,8 +858,7 @@ __cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - rc = copy2pathname(txn, dest_path, - flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); + rc = copy2pathname(txn, dest_path, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); mdbx_txn_abort(txn); return LOG_IFERR(rc); } diff --git a/src/cursor.c b/src/cursor.c index 4371f753..a4bffd76 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -11,14 +11,11 @@ __cold int cursor_check(const MDBX_cursor *mc) { } else { cASSERT(mc, (mc->txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); cASSERT(mc, mc->txn->tw.dirtyroom + mc->txn->tw.dirtylist->length == - (mc->txn->parent ? mc->txn->parent->tw.dirtyroom - : mc->txn->env->options.dp_limit)); + (mc->txn->parent ? mc->txn->parent->tw.dirtyroom : mc->txn->env->options.dp_limit)); } - cASSERT(mc, (mc->checking & z_updating) ? mc->top + 1 <= mc->tree->height - : mc->top + 1 == mc->tree->height); - if (unlikely((mc->checking & z_updating) ? mc->top + 1 > mc->tree->height - : mc->top + 1 != mc->tree->height)) + cASSERT(mc, (mc->checking & z_updating) ? mc->top + 1 <= mc->tree->height : mc->top + 1 == mc->tree->height); + if (unlikely((mc->checking & z_updating) ? mc->top + 1 > mc->tree->height : mc->top + 1 != mc->tree->height)) return MDBX_CURSOR_FULL; if (is_pointed(mc) && (mc->checking & z_updating) == 0) { @@ -40,17 +37,14 @@ __cold int cursor_check(const MDBX_cursor *mc) { page_t *mp = mc->pg[n]; const size_t nkeys = page_numkeys(mp); const bool expect_branch = (n < mc->tree->height - 1) ? true : false; - const bool expect_nested_leaf = - (n + 1 == mc->tree->height - 1) ? true : false; + const bool expect_nested_leaf = (n + 1 == mc->tree->height - 1) ? true : false; const bool branch = is_branch(mp) ? true : false; cASSERT(mc, branch == expect_branch); if (unlikely(branch != expect_branch)) return MDBX_CURSOR_FULL; if ((mc->checking & z_updating) == 0) { - cASSERT(mc, nkeys > mc->ki[n] || (!branch && nkeys == mc->ki[n] && - (mc->flags & z_hollow) != 0)); - if (unlikely(nkeys <= mc->ki[n] && !(!branch && nkeys == mc->ki[n] && - (mc->flags & z_hollow) != 0))) + cASSERT(mc, nkeys > mc->ki[n] || (!branch && nkeys == mc->ki[n] && (mc->flags & z_hollow) != 0)); + if (unlikely(nkeys <= mc->ki[n] && !(!branch && nkeys == mc->ki[n] && (mc->flags & z_hollow) != 0))) return MDBX_CURSOR_FULL; } else { cASSERT(mc, nkeys + 1 >= mc->ki[n]); @@ -96,8 +90,7 @@ __cold int cursor_check_updating(MDBX_cursor *mc) { } bool cursor_is_tracked(const MDBX_cursor *mc) { - for (MDBX_cursor *scan = mc->txn->cursors[cursor_dbi(mc)]; scan; - scan = scan->next) + for (MDBX_cursor *scan = mc->txn->cursors[cursor_dbi(mc)]; scan; scan = scan->next) if (mc == ((mc->flags & z_inner) ? &scan->subcur->cursor : scan)) return true; return false; @@ -121,16 +114,14 @@ static int touch_dbi(MDBX_cursor *mc) { if (unlikely(rc != MDBX_SUCCESS)) return rc; mc->txn->dbi_state[MAIN_DBI] |= DBI_DIRTY; - rc = tree_search(&cx.outer, &container_of(mc->clc, kvx_t, clc)->name, - Z_MODIFY); + rc = tree_search(&cx.outer, &container_of(mc->clc, kvx_t, clc)->name, Z_MODIFY); if (unlikely(rc != MDBX_SUCCESS)) return rc; } return MDBX_SUCCESS; } -__hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, - const MDBX_val *data) { +__hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, const MDBX_val *data) { cASSERT(mc, (mc->txn->flags & MDBX_TXN_RDONLY) == 0); cASSERT(mc, is_pointed(mc) || mc->tree->height == 0); cASSERT(mc, cursor_is_tracked(mc)); @@ -193,13 +184,10 @@ __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, /*----------------------------------------------------------------------------*/ -int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, - const size_t dbi) { +int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, const size_t dbi) { tASSERT(nested_txn, dbi > FREE_DBI && dbi < nested_txn->n_dbi); - const size_t size = parent_cursor->subcur - ? sizeof(MDBX_cursor) + sizeof(subcur_t) - : sizeof(MDBX_cursor); + const size_t size = parent_cursor->subcur ? sizeof(MDBX_cursor) + sizeof(subcur_t) : sizeof(MDBX_cursor); for (MDBX_cursor *bk; parent_cursor; parent_cursor = bk->next) { cASSERT(parent_cursor, parent_cursor != parent_cursor->next); bk = parent_cursor; @@ -235,8 +223,7 @@ int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, void cursor_eot(MDBX_cursor *mc, const bool merge) { const unsigned stage = mc->signature; MDBX_cursor *const bk = mc->backup; - ENSURE(mc->txn->env, stage == cur_signature_live || - (stage == cur_signature_wait4eot && bk)); + ENSURE(mc->txn->env, stage == cur_signature_live || (stage == cur_signature_wait4eot && bk)); if (bk) { subcur_t *mx = mc->subcur; cASSERT(mc, mc->txn->parent != nullptr); @@ -274,10 +261,8 @@ void cursor_eot(MDBX_cursor *mc, const bool merge) { /*----------------------------------------------------------------------------*/ -static __always_inline int couple_init(cursor_couple_t *couple, - const MDBX_txn *const txn, - tree_t *const tree, kvx_t *const kvx, - uint8_t *const dbi_state) { +static __always_inline int couple_init(cursor_couple_t *couple, const MDBX_txn *const txn, tree_t *const tree, + kvx_t *const kvx, uint8_t *const dbi_state) { VALGRIND_MAKE_MEM_UNDEFINED(couple, sizeof(cursor_couple_t)); tASSERT(txn, F_ISSET(*dbi_state, DBI_VALID | DBI_LINDO)); @@ -290,12 +275,9 @@ static __always_inline int couple_init(cursor_couple_t *couple, couple->outer.clc = &kvx->clc; couple->outer.dbi_state = dbi_state; couple->outer.top_and_flags = z_fresh_mark; - STATIC_ASSERT((int)z_branch == P_BRANCH && (int)z_leaf == P_LEAF && - (int)z_largepage == P_LARGE && (int)z_dupfix == P_DUPFIX); - couple->outer.checking = - (AUDIT_ENABLED() || (txn->env->flags & MDBX_VALIDATION)) - ? z_pagecheck | z_leaf - : z_leaf; + STATIC_ASSERT((int)z_branch == P_BRANCH && (int)z_leaf == P_LEAF && (int)z_largepage == P_LARGE && + (int)z_dupfix == P_DUPFIX); + couple->outer.checking = (AUDIT_ENABLED() || (txn->env->flags & MDBX_VALIDATION)) ? z_pagecheck | z_leaf : z_leaf; couple->outer.subcur = nullptr; if (tree->flags & MDBX_DUPSORT) { @@ -310,8 +292,7 @@ static __always_inline int couple_init(cursor_couple_t *couple, mx->cursor.dbi_state = dbi_state; mx->cursor.top_and_flags = z_fresh_mark | z_inner; STATIC_ASSERT(MDBX_DUPFIXED * 2 == P_DUPFIX); - mx->cursor.checking = - couple->outer.checking + ((tree->flags & MDBX_DUPFIXED) << 1); + mx->cursor.checking = couple->outer.checking + ((tree->flags & MDBX_DUPFIXED) << 1); } if (unlikely(*dbi_state & DBI_STALE)) @@ -323,8 +304,7 @@ static __always_inline int couple_init(cursor_couple_t *couple, return MDBX_SUCCESS; } -__cold int cursor_init4walk(cursor_couple_t *couple, const MDBX_txn *const txn, - tree_t *const tree, kvx_t *const kvx) { +__cold int cursor_init4walk(cursor_couple_t *couple, const MDBX_txn *const txn, tree_t *const tree, kvx_t *const kvx) { return couple_init(couple, txn, tree, kvx, txn->dbi_state); } @@ -332,21 +312,19 @@ int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(cursor_couple_t, outer) == 0); int rc = dbi_check(txn, dbi); if (likely(rc == MDBX_SUCCESS)) - rc = couple_init(container_of(mc, cursor_couple_t, outer), txn, - &txn->dbs[dbi], &txn->env->kvs[dbi], &txn->dbi_state[dbi]); + rc = couple_init(container_of(mc, cursor_couple_t, outer), txn, &txn->dbs[dbi], &txn->env->kvs[dbi], + &txn->dbi_state[dbi]); return rc; } __cold static int unexpected_dupsort(MDBX_cursor *mc) { - ERROR("unexpected dupsort-page/node for non-dupsort db/cursor (dbi %zu)", - cursor_dbi(mc)); + ERROR("unexpected dupsort-page/node for non-dupsort db/cursor (dbi %zu)", cursor_dbi(mc)); mc->txn->flags |= MDBX_TXN_ERROR; be_poor(mc); return MDBX_CORRUPTED; } -int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, - const page_t *mp) { +int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, const page_t *mp) { cASSERT(mc, is_pointed(mc)); subcur_t *mx = mc->subcur; if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) @@ -359,16 +337,13 @@ int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, goto bailout; case N_DUP | N_TREE: if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), - sizeof(tree_t)); + ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), sizeof(tree_t)); goto bailout; } memcpy(&mx->nested_tree, node_data(node), sizeof(tree_t)); const txnid_t pp_txnid = mp->txnid; - if (!MDBX_DISABLE_VALIDATION && - unlikely(mx->nested_tree.mod_txnid > pp_txnid)) { - ERROR("nested-db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", - mx->nested_tree.mod_txnid, pp_txnid); + if (!MDBX_DISABLE_VALIDATION && unlikely(mx->nested_tree.mod_txnid > pp_txnid)) { + ERROR("nested-db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", mx->nested_tree.mod_txnid, pp_txnid); goto bailout; } mx->cursor.top_and_flags = z_fresh_mark | z_inner; @@ -390,25 +365,21 @@ int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, mx->cursor.pg[0] = sp; mx->cursor.ki[0] = 0; mx->nested_tree.flags = flags_db2sub(mc->tree->flags); - mx->nested_tree.dupfix_size = - (mc->tree->flags & MDBX_DUPFIXED) ? sp->dupfix_ksize : 0; + mx->nested_tree.dupfix_size = (mc->tree->flags & MDBX_DUPFIXED) ? sp->dupfix_ksize : 0; break; } if (unlikely(mx->nested_tree.dupfix_size != mc->tree->dupfix_size)) { if (!MDBX_DISABLE_VALIDATION && unlikely(mc->tree->dupfix_size != 0)) { - ERROR("cursor mismatched nested-db dupfix_size %u", - mc->tree->dupfix_size); + ERROR("cursor mismatched nested-db dupfix_size %u", mc->tree->dupfix_size); goto bailout; } - if (!MDBX_DISABLE_VALIDATION && - unlikely((mc->tree->flags & MDBX_DUPFIXED) == 0)) { + if (!MDBX_DISABLE_VALIDATION && unlikely((mc->tree->flags & MDBX_DUPFIXED) == 0)) { ERROR("mismatched nested-db flags %u", mc->tree->flags); goto bailout; } if (!MDBX_DISABLE_VALIDATION && - unlikely(mx->nested_tree.dupfix_size < mc->clc->v.lmin || - mx->nested_tree.dupfix_size > mc->clc->v.lmax)) { + unlikely(mx->nested_tree.dupfix_size < mc->clc->v.lmin || mx->nested_tree.dupfix_size > mc->clc->v.lmax)) { ERROR("mismatched nested-db.dupfix_size (%u) <> min/max value-length " "(%zu/%zu)", mx->nested_tree.dupfix_size, mc->clc->v.lmin, mc->clc->v.lmax); @@ -418,8 +389,7 @@ int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, mc->clc->v.lmin = mc->clc->v.lmax = mx->nested_tree.dupfix_size; } - DEBUG("Sub-db dbi -%zu root page %" PRIaPGNO, cursor_dbi(&mx->cursor), - mx->nested_tree.root); + DEBUG("Sub-db dbi -%zu root page %" PRIaPGNO, cursor_dbi(&mx->cursor), mx->nested_tree.root); return MDBX_SUCCESS; bailout: @@ -450,14 +420,11 @@ static __always_inline int sibling(MDBX_cursor *mc, bool right) { } cursor_pop(mc); - DEBUG("parent page is page %" PRIaPGNO ", index %u", mc->pg[mc->top]->pgno, - mc->ki[mc->top]); + DEBUG("parent page is page %" PRIaPGNO ", index %u", mc->pg[mc->top]->pgno, mc->ki[mc->top]); int err; - if (right ? (mc->ki[mc->top] + (size_t)1 >= page_numkeys(mc->pg[mc->top])) - : (mc->ki[mc->top] == 0)) { - DEBUG("no more keys aside, moving to next %s sibling", - right ? "right" : "left"); + if (right ? (mc->ki[mc->top] + (size_t)1 >= page_numkeys(mc->pg[mc->top])) : (mc->ki[mc->top] == 0)) { + DEBUG("no more keys aside, moving to next %s sibling", right ? "right" : "left"); err = right ? cursor_sibling_right(mc) : cursor_sibling_left(mc); if (err != MDBX_SUCCESS) { if (likely(err == MDBX_NOTFOUND)) @@ -467,8 +434,7 @@ static __always_inline int sibling(MDBX_cursor *mc, bool right) { } } else { mc->ki[mc->top] += right ? 1 : -1; - DEBUG("just moving to %s index key %u", right ? "right" : "left", - mc->ki[mc->top]); + DEBUG("just moving to %s index key %u", right ? "right" : "left", mc->ki[mc->top]); } cASSERT(mc, is_branch(mc->pg[mc->top])); @@ -515,10 +481,8 @@ __hot int cursor_sibling_right(MDBX_cursor *mc) { /* Функция-шаблон: Приземляет курсор на данные в текущей позиции. * В том числе, загружает данные во вложенный курсор при его наличии. */ -static __always_inline int cursor_bring(const bool inner, const bool tend2first, - MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, - MDBX_val *__restrict data, bool eof) { +static __always_inline int cursor_bring(const bool inner, const bool tend2first, MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, MDBX_val *__restrict data, bool eof) { if (inner) { cASSERT(mc, !data && !mc->subcur && (mc->flags & z_inner) != 0); } else { @@ -527,8 +491,7 @@ static __always_inline int cursor_bring(const bool inner, const bool tend2first, const page_t *mp = mc->pg[mc->top]; if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->pgno, mp->flags); + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags); return MDBX_CORRUPTED; } @@ -554,8 +517,7 @@ static __always_inline int cursor_bring(const bool inner, const bool tend2first, return err; MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr); if (node_flags(node) & N_TREE) { - err = tend2first ? inner_first(&mc->subcur->cursor, data) - : inner_last(&mc->subcur->cursor, data); + err = tend2first ? inner_first(&mc->subcur->cursor, data) : inner_last(&mc->subcur->cursor, data); if (unlikely(err != MDBX_SUCCESS)) return err; } else { @@ -592,10 +554,8 @@ static __always_inline int cursor_bring(const bool inner, const bool tend2first, } /* Функция-шаблон: Устанавливает курсор в начало или конец. */ -static __always_inline int cursor_brim(const bool inner, const bool tend2first, - MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, - MDBX_val *__restrict data) { +static __always_inline int cursor_brim(const bool inner, const bool tend2first, MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, MDBX_val *__restrict data) { if (mc->top != 0) { int err = tree_search(mc, nullptr, tend2first ? Z_FIRST : Z_LAST); if (unlikely(err != MDBX_SUCCESS)) @@ -607,13 +567,9 @@ static __always_inline int cursor_brim(const bool inner, const bool tend2first, return cursor_bring(inner, tend2first, mc, key, data, !tend2first); } -__hot int inner_first(MDBX_cursor *mc, MDBX_val *data) { - return cursor_brim(true, true, mc, data, nullptr); -} +__hot int inner_first(MDBX_cursor *mc, MDBX_val *data) { return cursor_brim(true, true, mc, data, nullptr); } -__hot int inner_last(MDBX_cursor *mc, MDBX_val *data) { - return cursor_brim(true, false, mc, data, nullptr); -} +__hot int inner_last(MDBX_cursor *mc, MDBX_val *data) { return cursor_brim(true, false, mc, data, nullptr); } __hot int outer_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { return cursor_brim(false, true, mc, key, data); @@ -627,23 +583,18 @@ __hot int outer_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { /* Функция-шаблон: Передвигает курсор на одну позицию. * При необходимости управляет вложенным курсором. */ -static __always_inline int cursor_step(const bool inner, const bool forward, - MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, - MDBX_val *__restrict data, - MDBX_cursor_op op) { +static __always_inline int cursor_step(const bool inner, const bool forward, MDBX_cursor *__restrict mc, + MDBX_val *__restrict key, MDBX_val *__restrict data, MDBX_cursor_op op) { if (forward) { if (inner) cASSERT(mc, op == MDBX_NEXT); else - cASSERT(mc, - op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP); + cASSERT(mc, op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP); } else { if (inner) cASSERT(mc, op == MDBX_PREV); else - cASSERT(mc, - op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP); + cASSERT(mc, op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP); } if (inner) { cASSERT(mc, !data && !mc->subcur && (mc->flags & z_inner) != 0); @@ -668,15 +619,13 @@ static __always_inline int cursor_step(const bool inner, const bool forward, cASSERT(mc, nkeys > 0); intptr_t ki = mc->ki[mc->top]; - const uint8_t state = - mc->flags & (z_after_delete | z_hollow | z_eof_hard | z_eof_soft); + const uint8_t state = mc->flags & (z_after_delete | z_hollow | z_eof_hard | z_eof_soft); if (likely(state == 0)) { cASSERT(mc, ki < nkeys); if (!inner && op != (forward ? MDBX_NEXT_NODUP : MDBX_PREV_NODUP)) { int err = MDBX_NOTFOUND; if (inner_pointed(mc)) { - err = forward ? inner_next(&mc->subcur->cursor, data) - : inner_prev(&mc->subcur->cursor, data); + err = forward ? inner_next(&mc->subcur->cursor, data) : inner_prev(&mc->subcur->cursor, data); if (likely(err == MDBX_SUCCESS)) { get_key_optional(page_node(mp, ki), key); return MDBX_SUCCESS; @@ -715,9 +664,8 @@ static __always_inline int cursor_step(const bool inner, const bool forward, } } - DEBUG("turn-%s: top page was %" PRIaPGNO " in cursor %p, ki %zi of %zi", - forward ? "next" : "prev", mp->pgno, __Wpedantic_format_voidptr(mc), ki, - nkeys); + DEBUG("turn-%s: top page was %" PRIaPGNO " in cursor %p, ki %zi of %zi", forward ? "next" : "prev", mp->pgno, + __Wpedantic_format_voidptr(mc), ki, nkeys); if (forward) { if (likely(++ki < nkeys)) mc->ki[mc->top] = (indx_t)ki; @@ -727,8 +675,7 @@ static __always_inline int cursor_step(const bool inner, const bool forward, if (unlikely(err != MDBX_SUCCESS)) return err; mp = mc->pg[mc->top]; - DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, - mc->ki[mc->top]); + DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, mc->ki[mc->top]); } } else { if (likely(--ki >= 0)) @@ -739,47 +686,38 @@ static __always_inline int cursor_step(const bool inner, const bool forward, if (unlikely(err != MDBX_SUCCESS)) return err; mp = mc->pg[mc->top]; - DEBUG("prev page is %" PRIaPGNO ", key index %u", mp->pgno, - mc->ki[mc->top]); + DEBUG("prev page is %" PRIaPGNO ", key index %u", mp->pgno, mc->ki[mc->top]); } } - DEBUG("==> cursor points to page %" PRIaPGNO " with %zu keys, key index %u", - mp->pgno, page_numkeys(mp), mc->ki[mc->top]); + DEBUG("==> cursor points to page %" PRIaPGNO " with %zu keys, key index %u", mp->pgno, page_numkeys(mp), + mc->ki[mc->top]); bring: return cursor_bring(inner, forward, mc, key, data, false); } -__hot int inner_next(MDBX_cursor *mc, MDBX_val *data) { - return cursor_step(true, true, mc, data, nullptr, MDBX_NEXT); -} +__hot int inner_next(MDBX_cursor *mc, MDBX_val *data) { return cursor_step(true, true, mc, data, nullptr, MDBX_NEXT); } -__hot int inner_prev(MDBX_cursor *mc, MDBX_val *data) { - return cursor_step(true, false, mc, data, nullptr, MDBX_PREV); -} +__hot int inner_prev(MDBX_cursor *mc, MDBX_val *data) { return cursor_step(true, false, mc, data, nullptr, MDBX_PREV); } -__hot int outer_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { +__hot int outer_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { return cursor_step(false, true, mc, key, data, op); } -__hot int outer_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { +__hot int outer_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { return cursor_step(false, false, mc, key, data, op); } /*----------------------------------------------------------------------------*/ -__hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, - unsigned flags) { +__hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { int err; DKBUF_DEBUG; MDBX_env *const env = mc->txn->env; if (LOG_ENABLED(MDBX_LOG_DEBUG) && (flags & MDBX_RESERVE)) data->iov_base = nullptr; - DEBUG("==> put db %d key [%s], size %" PRIuPTR ", data [%s] size %" PRIuPTR, - cursor_dbi_dbg(mc), DKEY_DEBUG(key), key->iov_len, DVAL_DEBUG(data), - data->iov_len); + DEBUG("==> put db %d key [%s], size %" PRIuPTR ", data [%s] size %" PRIuPTR, cursor_dbi_dbg(mc), DKEY_DEBUG(key), + key->iov_len, DVAL_DEBUG(data), data->iov_len); if ((flags & MDBX_CURRENT) != 0 && (mc->flags & z_inner) == 0) { if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE))) @@ -805,8 +743,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, /* Если за ключом более одного значения, либо если размер данных * отличается, то вместо обновления требуется удаление и * последующая вставка. */ - if (mc->subcur->nested_tree.items > 1 || - current_data.iov_len != data->iov_len) { + if (mc->subcur->nested_tree.items > 1 || current_data.iov_len != data->iov_len) { drop_current: err = cursor_del(mc, flags & MDBX_ALLDUPS); if (unlikely(err != MDBX_SUCCESS)) @@ -826,8 +763,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, goto skip_check_samedata; } } - if (!(flags & MDBX_RESERVE) && - unlikely(cmp_lenfast(¤t_data, data) == 0)) + if (!(flags & MDBX_RESERVE) && unlikely(cmp_lenfast(¤t_data, data) == 0)) return MDBX_SUCCESS /* the same data, nothing to update */; skip_check_samedata:; } @@ -843,8 +779,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if ((flags & MDBX_APPEND) && mc->tree->items > 0) { old_data.iov_base = nullptr; old_data.iov_len = 0; - rc = (mc->flags & z_inner) ? inner_last(mc, &last_key) - : outer_last(mc, &last_key, &old_data); + rc = (mc->flags & z_inner) ? inner_last(mc, &last_key) : outer_last(mc, &last_key, &old_data); if (likely(rc == MDBX_SUCCESS)) { const int cmp = mc->clc->k.cmp(key, &last_key); if (likely(cmp > 0)) { @@ -875,11 +810,10 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (unlikely(mc->flags & z_inner)) { /* nested subtree of DUPSORT-database with the same key, * nothing to update */ - eASSERT(env, - data->iov_len == 0 && (old_data.iov_len == 0 || - /* olddata may not be updated in case - DUPFIX-page of dupfix-table */ - (mc->tree->flags & MDBX_DUPFIXED))); + eASSERT(env, data->iov_len == 0 && (old_data.iov_len == 0 || + /* olddata may not be updated in case + DUPFIX-page of dupfix-table */ + (mc->tree->flags & MDBX_DUPFIXED))); return MDBX_SUCCESS; } if (unlikely(flags & MDBX_ALLDUPS) && inner_pointed(mc)) { @@ -936,13 +870,11 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, mc->tree->root = npr.page->pgno; mc->tree->height++; if (mc->tree->flags & MDBX_INTEGERKEY) { - assert(key->iov_len >= mc->clc->k.lmin && - key->iov_len <= mc->clc->k.lmax); + assert(key->iov_len >= mc->clc->k.lmin && key->iov_len <= mc->clc->k.lmax); mc->clc->k.lmin = mc->clc->k.lmax = key->iov_len; } if (mc->tree->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) { - assert(data->iov_len >= mc->clc->v.lmin && - data->iov_len <= mc->clc->v.lmax); + assert(data->iov_len >= mc->clc->v.lmin && data->iov_len <= mc->clc->v.lmax); assert(mc->subcur != nullptr); mc->tree->dupfix_size = /* mc->subcur->nested_tree.dupfix_size = */ (unsigned)(mc->clc->v.lmin = mc->clc->v.lmax = data->iov_len); @@ -966,12 +898,10 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->ki[mc->top]); - if ((mc->tree->flags & MDBX_DUPSORT) && - node_size(key, data) > env->leaf_nodemax) { + if ((mc->tree->flags & MDBX_DUPSORT) && node_size(key, data) > env->leaf_nodemax) { /* Too big for a node, insert in sub-DB. Set up an empty * "old sub-page" for convert_to_subtree to expand to a full page. */ - fp->dupfix_ksize = - (mc->tree->flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; + fp->dupfix_ksize = (mc->tree->flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; fp->lower = fp->upper = 0; old_data.iov_len = PAGEHDRSZ; goto convert_to_subtree; @@ -1022,9 +952,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, /* Large/Overflow page overwrites need special handling */ if (unlikely(node_flags(node) & N_BIG)) { - const size_t dpages = (node_size(key, data) > env->leaf_nodemax) - ? largechunk_npages(env, data->iov_len) - : 0; + const size_t dpages = (node_size(key, data) > env->leaf_nodemax) ? largechunk_npages(env, data->iov_len) : 0; const pgno_t pgno = node_largedata_pgno(node); pgr_t lp = page_get_large(mc, pgno, mc->pg[mc->top]->txnid); @@ -1035,11 +963,8 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, /* Is the ov page from this txn (or a parent) and big enough? */ const size_t ovpages = lp.page->pages; const size_t extra_threshold = - (mc->tree == &mc->txn->dbs[FREE_DBI]) - ? 1 - : /* LY: add configurable threshold to keep reserve space */ 0; - if (!is_frozen(mc->txn, lp.page) && ovpages >= dpages && - ovpages <= dpages + extra_threshold) { + (mc->tree == &mc->txn->dbs[FREE_DBI]) ? 1 : /* LY: add configurable threshold to keep reserve space */ 0; + if (!is_frozen(mc->txn, lp.page) && ovpages >= dpages && ovpages <= dpages + extra_threshold) { /* yes, overwrite it. */ if (!is_modifable(mc->txn, lp.page)) { if (is_spilled(mc->txn, lp.page)) { @@ -1052,10 +977,8 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (unlikely(!mc->txn->parent)) { ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s " "page %" PRIaPGNO " mod-txnid %" PRIaTXN "," - " without parent transaction, current txn %" PRIaTXN - " front %" PRIaTXN, - "large/overflow", pgno, lp.page->txnid, mc->txn->txnid, - mc->txn->front_txnid); + " without parent transaction, current txn %" PRIaTXN " front %" PRIaTXN, + "large/overflow", pgno, lp.page->txnid, mc->txn->txnid, mc->txn->front_txnid); return MDBX_PROBLEM; } @@ -1094,8 +1017,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } else { old_data.iov_len = node_ds(node); old_data.iov_base = node_data(node); - cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= - ptr_disp(mc->pg[mc->top], env->ps)); + cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= ptr_disp(mc->pg[mc->top], env->ps)); /* DB has dups? */ if (mc->tree->flags & MDBX_DUPSORT) { @@ -1133,8 +1055,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } /* Back up original data item */ - memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, - old_singledup.iov_len = old_data.iov_len); + memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, old_singledup.iov_len = old_data.iov_len); /* Make sub-page header for the dup items, with dummy body */ fp->flags = P_LEAF | P_SUBP; @@ -1149,13 +1070,11 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, * не сразу расширять созданную под-страницу. * Резервирование в целом сомнительно (см ниже), но может сработать * в плюс (а если в минус то несущественный) при коротких ключах. */ - xdata.iov_len += page_subleaf2_reserve( - env, page_room(mc->pg[mc->top]) + old_data.iov_len, - xdata.iov_len, data->iov_len); + xdata.iov_len += + page_subleaf2_reserve(env, page_room(mc->pg[mc->top]) + old_data.iov_len, xdata.iov_len, data->iov_len); cASSERT(mc, (xdata.iov_len & 1) == 0); } else { - xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + - (old_data.iov_len & 1) + (data->iov_len & 1); + xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + (old_data.iov_len & 1) + (data->iov_len & 1); } cASSERT(mc, (xdata.iov_len & 1) == 0); fp->upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); @@ -1169,9 +1088,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, fp = old_data.iov_base; switch (flags) { default: - growth = is_dupfix_leaf(fp) - ? fp->dupfix_ksize - : (node_size(data, nullptr) + sizeof(indx_t)); + growth = is_dupfix_leaf(fp) ? fp->dupfix_ksize : (node_size(data, nullptr) + sizeof(indx_t)); if (page_room(fp) >= growth) { /* На текущей под-странице есть место для добавления элемента. * Оптимальнее продолжить использовать эту страницу, ибо @@ -1241,9 +1158,8 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, * subpage_reserve_prereq = leaf_nodemax (1000‰). */ if (is_dupfix_leaf(fp)) - growth += page_subleaf2_reserve( - env, page_room(mc->pg[mc->top]) + old_data.iov_len, - xdata.iov_len, data->iov_len); + growth += page_subleaf2_reserve(env, page_room(mc->pg[mc->top]) + old_data.iov_len, xdata.iov_len, + data->iov_len); else { /* TODO: Если добавить возможность для пользователя задавать * min/max размеров ключей/данных, то здесь разумно реализовать @@ -1265,13 +1181,10 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } fp_flags = fp->flags; - if (xdata.iov_len > env->subpage_limit || - node_size_len(node_ks(node), xdata.iov_len) > env->leaf_nodemax || + if (xdata.iov_len > env->subpage_limit || node_size_len(node_ks(node), xdata.iov_len) > env->leaf_nodemax || (env->subpage_room_threshold && - page_room(mc->pg[mc->top]) + - node_size_len(node_ks(node), old_data.iov_len) < - env->subpage_room_threshold + - node_size_len(node_ks(node), xdata.iov_len))) { + page_room(mc->pg[mc->top]) + node_size_len(node_ks(node), old_data.iov_len) < + env->subpage_room_threshold + node_size_len(node_ks(node), xdata.iov_len))) { /* Too big for a sub-page, convert to sub-DB */ convert_to_subtree: fp_flags &= ~P_SUBP; @@ -1310,17 +1223,13 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, cASSERT(mc, fp->upper + growth < UINT16_MAX); mp->upper = fp->upper + (indx_t)growth; if (unlikely(fp_flags & P_DUPFIX)) { - memcpy(page_data(mp), page_data(fp), - page_numkeys(fp) * fp->dupfix_ksize); - cASSERT(mc, (((mp->dupfix_ksize & page_numkeys(mp)) ^ mp->upper) & - 1) == 0); + memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->dupfix_ksize); + cASSERT(mc, (((mp->dupfix_ksize & page_numkeys(mp)) ^ mp->upper) & 1) == 0); } else { cASSERT(mc, (mp->upper & 1) == 0); - memcpy(ptr_disp(mp, mp->upper + PAGEHDRSZ), - ptr_disp(fp, fp->upper + PAGEHDRSZ), + memcpy(ptr_disp(mp, mp->upper + PAGEHDRSZ), ptr_disp(fp, fp->upper + PAGEHDRSZ), old_data.iov_len - fp->upper - PAGEHDRSZ); - memcpy(mp->entries, fp->entries, - page_numkeys(fp) * sizeof(mp->entries[0])); + memcpy(mp->entries, fp->entries, page_numkeys(fp) * sizeof(mp->entries[0])); for (size_t i = 0; i < page_numkeys(fp); i++) { cASSERT(mc, mp->entries[i] + growth <= UINT16_MAX); mp->entries[i] += (indx_t)growth; @@ -1357,8 +1266,7 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, cASSERT(mc, key->iov_len < UINT16_MAX); node_set_ks(node, key->iov_len); memcpy(node_key(node), key->iov_base, key->iov_len); - cASSERT(mc, ptr_disp(node_key(node), node_ds(node)) < - ptr_disp(mc->pg[mc->top], env->ps)); + cASSERT(mc, ptr_disp(node_key(node), node_ds(node)) < ptr_disp(mc->pg[mc->top], env->ps)); goto fix_parent; } @@ -1377,12 +1285,9 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, insert_node:; const unsigned naf = flags & NODE_ADD_FLAGS; - size_t nsize = is_dupfix_leaf(mc->pg[mc->top]) - ? key->iov_len - : leaf_size(env, key, ref_data); + size_t nsize = is_dupfix_leaf(mc->pg[mc->top]) ? key->iov_len : leaf_size(env, key, ref_data); if (page_room(mc->pg[mc->top]) < nsize) { - rc = page_split(mc, key, ref_data, P_INVALID, - insert_key ? naf : naf | MDBX_SPLIT_REPLACE); + rc = page_split(mc, key, ref_data, P_INVALID, insert_key ? naf : naf | MDBX_SPLIT_REPLACE); if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) rc = insert_key ? cursor_check(mc) : cursor_check_updating(mc); } else { @@ -1420,12 +1325,8 @@ insert_node:; empty.iov_base = nullptr; node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]); #define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1 - STATIC_ASSERT( - (MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == - MDBX_NOOVERWRITE); - unsigned inner_flags = - MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> - SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); + STATIC_ASSERT((MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == MDBX_NOOVERWRITE); + unsigned inner_flags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); if ((flags & MDBX_CURRENT) == 0) { inner_flags -= MDBX_CURRENT; rc = cursor_dupsort_setup(mc, node, mc->pg[mc->top]); @@ -1434,8 +1335,7 @@ insert_node:; } subcur_t *const mx = mc->subcur; if (sub_root) { - cASSERT(mc, mx->nested_tree.height == 1 && - mx->nested_tree.root == sub_root->pgno); + cASSERT(mc, mx->nested_tree.height == 1 && mx->nested_tree.root == sub_root->pgno); mx->cursor.flags = z_inner; mx->cursor.top = 0; mx->cursor.pg[0] = sub_root; @@ -1470,9 +1370,7 @@ insert_node:; m2->subcur->cursor.top_and_flags = z_inner; m2->subcur->cursor.ki[0] = 0; } - DEBUG("Sub-dbi -%zu root page %" PRIaPGNO, - cursor_dbi(&m2->subcur->cursor), - m2->subcur->nested_tree.root); + DEBUG("Sub-dbi -%zu root page %" PRIaPGNO, cursor_dbi(&m2->subcur->cursor), m2->subcur->nested_tree.root); } else if (!insert_key && m2->ki[mc->top] < nkeys) cursor_inner_refresh(m2, mp, m2->ki[mc->top]); } @@ -1480,10 +1378,8 @@ insert_node:; cASSERT(mc, mc->subcur->nested_tree.items < PTRDIFF_MAX); const size_t probe = (size_t)mc->subcur->nested_tree.items; #define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 - STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == - MDBX_APPEND); - inner_flags |= - (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; + STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == MDBX_APPEND); + inner_flags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; rc = cursor_put(&mc->subcur->cursor, data, &empty, inner_flags); if (flags & N_TREE) { void *db = node_data(node); @@ -1530,16 +1426,13 @@ insert_node:; return rc; } -__hot int cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, - MDBX_val *data, unsigned flags) { +__hot int cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { cASSERT(mc, (mc->flags & z_inner) == 0); - if (unlikely(key->iov_len > mc->clc->k.lmax || - key->iov_len < mc->clc->k.lmin)) { + if (unlikely(key->iov_len > mc->clc->k.lmax || key->iov_len < mc->clc->k.lmin)) { cASSERT(mc, !"Invalid key-size"); return MDBX_BAD_VALSIZE; } - if (unlikely(data->iov_len > mc->clc->v.lmax || - data->iov_len < mc->clc->v.lmin)) { + if (unlikely(data->iov_len > mc->clc->v.lmax || data->iov_len < mc->clc->v.lmin)) { cASSERT(mc, !"Invalid data-size"); return MDBX_BAD_VALSIZE; } @@ -1611,8 +1504,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { page_t *mp = mc->pg[mc->top]; cASSERT(mc, is_modifable(mc->txn, mp)); if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->pgno, mp->flags); + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags); return MDBX_CORRUPTED; } if (is_dupfix_leaf(mp)) @@ -1644,8 +1536,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { node = node_shrink(mp, mc->ki[mc->top], node); mc->subcur->cursor.pg[0] = node_data(node); /* fix other sub-DB cursors pointed at sub-pages on this page */ - for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; - m2 = m2->next) { + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; m2 = m2->next) { if (!is_related(mc, m2) || m2->pg[mc->top] != mp) continue; const node_t *inner = node; @@ -1664,8 +1555,7 @@ __hot int cursor_del(MDBX_cursor *mc, unsigned flags) { } } mc->tree->items -= 1; - cASSERT(mc, mc->tree->items > 0 && mc->tree->height > 0 && - mc->tree->root != P_INVALID); + cASSERT(mc, mc->tree->items > 0 && mc->tree->height > 0 && mc->tree->root != P_INVALID); return rc; } /* otherwise fall thru and delete the sub-DB */ @@ -1725,9 +1615,7 @@ del_key: /* DB is totally empty now, just bail out. * Other cursors adjustments were already done * by rebalance and aren't needed here. */ - cASSERT(mc, mc->tree->items == 0 && - (mc->tree->root == P_INVALID || - (is_inner(mc) && !mc->tree->root)) && + cASSERT(mc, mc->tree->items == 0 && (mc->tree->root == P_INVALID || (is_inner(mc) && !mc->tree->root)) && mc->flags < 0); return MDBX_SUCCESS; } @@ -1736,9 +1624,7 @@ del_key: mp = mc->pg[mc->top]; cASSERT(mc, is_leaf(mc->pg[mc->top])); size_t nkeys = page_numkeys(mp); - cASSERT(mc, - (mc->tree->items > 0 && nkeys > 0) || - ((mc->flags & z_inner) && mc->tree->items == 0 && nkeys == 0)); + cASSERT(mc, (mc->tree->items > 0 && nkeys > 0) || ((mc->flags & z_inner) && mc->tree->items == 0 && nkeys == 0)); /* Adjust this and other cursors pointing to mp */ const intptr_t top = /* может быть сброшен в -1 */ mc->top; @@ -1777,8 +1663,7 @@ del_key: * нужно установить на первый дубликат. */ if (is_pointed(&m3->subcur->cursor)) { if ((node_flags(node) & N_TREE) == 0) { - cASSERT(m3, m3->subcur->cursor.top == 0 && - m3->subcur->nested_tree.height == 1); + cASSERT(m3, m3->subcur->cursor.top == 0 && m3->subcur->nested_tree.height == 1); m3->subcur->cursor.pg[0] = node_data(node); } } else { @@ -1808,14 +1693,12 @@ fail: /*----------------------------------------------------------------------------*/ -__hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op) { +__hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { DKBUF_DEBUG; csr_t ret; ret.exact = false; - if (unlikely(key->iov_len < mc->clc->k.lmin || - key->iov_len > mc->clc->k.lmax)) { + if (unlikely(key->iov_len < mc->clc->k.lmin || key->iov_len > mc->clc->k.lmax)) { cASSERT(mc, !"Invalid key-size"); ret.err = MDBX_BAD_VALSIZE; return ret; @@ -1848,9 +1731,8 @@ __hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, const size_t nkeys = page_numkeys(mp); if (unlikely(nkeys == 0)) { /* при создании первой листовой страницы */ - cASSERT(mc, mc->top == 0 && mc->tree->height == 1 && - mc->tree->branch_pages == 0 && - mc->tree->leaf_pages == 1 && mc->ki[0] == 0); + cASSERT(mc, mc->top == 0 && mc->tree->height == 1 && mc->tree->branch_pages == 0 && mc->tree->leaf_pages == 1 && + mc->ki[0] == 0); /* Логически верно, но нет смысла, ибо это мимолетная/временная * ситуация до добавления элемента выше по стеку вызовов: mc->flags |= z_eof_soft | z_hollow; */ @@ -1901,8 +1783,7 @@ __hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, * первой/последний и соответственно такое сравнение было выше. */ if (mc->ki[mc->top] > 0 && mc->ki[mc->top] < nkeys - 1) { if (is_dupfix_leaf(mp)) { - nodekey.iov_base = - page_dupfix_ptr(mp, mc->ki[mc->top], nodekey.iov_len); + nodekey.iov_base = page_dupfix_ptr(mp, mc->ki[mc->top], nodekey.iov_len); } else { node = page_node(mp, mc->ki[mc->top]); nodekey = get_key(node); @@ -1928,8 +1809,7 @@ __hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mc->ki[mc->top] = (indx_t)nkeys; if (op < MDBX_SET_RANGE) { target_not_found: - cASSERT(mc, op == MDBX_SET || op == MDBX_SET_KEY || - op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE); + cASSERT(mc, op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE); /* Операция предполагает поиск конкретного ключа, который не найден. * Поэтому переводим курсор в неустановленное состояние, но без сброса * top, что позволяет работать fastpath при последующем поиске по дереву @@ -1991,8 +1871,7 @@ got_node: cASSERT(mc, is_pointed(mc) && !inner_pointed(mc)); cASSERT(mc, mc->ki[mc->top] < page_numkeys(mc->pg[mc->top])); if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->pgno, mp->flags); + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags); ret.err = MDBX_CORRUPTED; return ret; } @@ -2037,8 +1916,7 @@ got_node: } } else if (likely(data)) { if (op <= MDBX_GET_BOTH_RANGE) { - if (unlikely(data->iov_len < mc->clc->v.lmin || - data->iov_len > mc->clc->v.lmax)) { + if (unlikely(data->iov_len < mc->clc->v.lmin || data->iov_len > mc->clc->v.lmax)) { cASSERT(mc, !"Invalid data-size"); ret.err = MDBX_BAD_VALSIZE; return ret; @@ -2049,13 +1927,11 @@ got_node: if (aligned_data.iov_len == 8) { if (unlikely(7 & (uintptr_t)aligned_data.iov_base)) /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = - bcopy_8(&aligned_databytes, aligned_data.iov_base); + aligned_data.iov_base = bcopy_8(&aligned_databytes, aligned_data.iov_base); } else if (aligned_data.iov_len == 4) { if (unlikely(3 & (uintptr_t)aligned_data.iov_base)) /* copy instead of return error to avoid break compatibility */ - aligned_data.iov_base = - bcopy_4(&aligned_databytes, aligned_data.iov_base); + aligned_data.iov_base = bcopy_4(&aligned_databytes, aligned_data.iov_base); } else { cASSERT(mc, !"data-size is invalid for MDBX_INTEGERDUP"); ret.err = MDBX_BAD_VALSIZE; @@ -2089,18 +1965,15 @@ got_node: if (op >= MDBX_SET_KEY) get_key_optional(node, key); - DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), - DVAL_DEBUG(data)); + DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), DVAL_DEBUG(data)); ret.err = MDBX_SUCCESS; be_filled(mc); return ret; } -__hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - const MDBX_cursor_op op) { +__hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, const MDBX_cursor_op op) { if (op != MDBX_GET_CURRENT) - DEBUG(">> cursor %p(0x%x), ops %u, key %p, value %p", - __Wpedantic_format_voidptr(mc), mc->flags, op, + DEBUG(">> cursor %p(0x%x), ops %u, key %p, value %p", __Wpedantic_format_voidptr(mc), mc->flags, op, __Wpedantic_format_voidptr(key), __Wpedantic_format_voidptr(data)); int rc; @@ -2163,8 +2036,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, cASSERT(mc, is_filled(mc)); else if (rc == MDBX_NOTFOUND && mc->tree->items) { cASSERT(mc, is_pointed(mc)); - cASSERT(mc, op == MDBX_SET_RANGE || op == MDBX_GET_BOTH_RANGE || - is_hollow(mc)); + cASSERT(mc, op == MDBX_SET_RANGE || op == MDBX_GET_BOTH_RANGE || is_hollow(mc)); cASSERT(mc, op == MDBX_GET_BOTH_RANGE || inner_hollow(mc)); } else cASSERT(mc, is_poor(mc) && !is_filled(mc)); @@ -2271,8 +2143,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if ((node_flags(node) & N_DUP) == 0) return node_read(mc, node, data, mc->pg[mc->top]); else if (MDBX_DISABLE_VALIDATION || likely(mc->subcur)) - return ((op == MDBX_FIRST_DUP) ? inner_first - : inner_last)(&mc->subcur->cursor, data); + return ((op == MDBX_FIRST_DUP) ? inner_first : inner_last)(&mc->subcur->cursor, data); else return unexpected_dupsort(mc); } @@ -2338,8 +2209,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, rc = outer_prev(mc, key, data, MDBX_PREV_NODUP); else if (op == MDBX_TO_KEY_GREATER_THAN) rc = outer_next(mc, key, data, MDBX_NEXT_NODUP); - } else if (op < MDBX_TO_KEY_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + } else if (op < MDBX_TO_KEY_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) rc = outer_prev(mc, key, data, MDBX_PREV_NODUP); else if (op == MDBX_TO_KEY_EQUAL && rc == MDBX_SUCCESS) rc = MDBX_NOTFOUND; @@ -2371,8 +2241,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, rc = inner_prev(mx, data); else if (op == MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN) rc = inner_next(mx, data); - } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) rc = inner_prev(mx, data); else if (op == MDBX_TO_EXACT_KEY_VALUE_EQUAL && rc == MDBX_SUCCESS) rc = MDBX_NOTFOUND; @@ -2425,8 +2294,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, rc = outer_prev(mc, key, data, MDBX_PREV); else if (op == MDBX_TO_PAIR_GREATER_THAN) rc = outer_next(mc, key, data, MDBX_NEXT); - } else if (op < MDBX_TO_PAIR_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + } else if (op < MDBX_TO_PAIR_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) rc = outer_prev(mc, key, data, MDBX_PREV); else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) rc = MDBX_NOTFOUND; @@ -2458,8 +2326,7 @@ __hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, break; } } - } else if (op < MDBX_TO_PAIR_EQUAL && - (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + } else if (op < MDBX_TO_PAIR_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) rc = outer_prev(mc, key, data, MDBX_PREV_NODUP); else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) rc = MDBX_NOTFOUND; diff --git a/src/cursor.h b/src/cursor.h index 9ecad9d6..4ae45222 100644 --- a/src/cursor.h +++ b/src/cursor.h @@ -125,13 +125,11 @@ enum cursor_state { z_fresh_mark = z_poor_mark | z_fresh }; -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_inner(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_inner(const MDBX_cursor *mc) { return (mc->flags & z_inner) != 0; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_poor(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_poor(const MDBX_cursor *mc) { const bool r = mc->top < 0; cASSERT(mc, r == (mc->top_and_flags < 0)); if (r && mc->subcur) @@ -139,8 +137,7 @@ is_poor(const MDBX_cursor *mc) { return r; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_pointed(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_pointed(const MDBX_cursor *mc) { const bool r = mc->top >= 0; cASSERT(mc, r == (mc->top_and_flags >= 0)); if (!r && mc->subcur) @@ -148,49 +145,41 @@ is_pointed(const MDBX_cursor *mc) { return r; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_hollow(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_hollow(const MDBX_cursor *mc) { const bool r = mc->flags < 0; if (!r) { cASSERT(mc, mc->top >= 0); - cASSERT(mc, (mc->flags & z_eof_hard) || - mc->ki[mc->top] < page_numkeys(mc->pg[mc->top])); + cASSERT(mc, (mc->flags & z_eof_hard) || mc->ki[mc->top] < page_numkeys(mc->pg[mc->top])); } else if (mc->subcur) cASSERT(mc, is_poor(&mc->subcur->cursor)); return r; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_eof(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_eof(const MDBX_cursor *mc) { const bool r = z_eof_soft <= (uint8_t)mc->flags; return r; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_filled(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_filled(const MDBX_cursor *mc) { const bool r = z_eof_hard > (uint8_t)mc->flags; return r; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -inner_filled(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_filled(const MDBX_cursor *mc) { return mc->subcur && is_filled(&mc->subcur->cursor); } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -inner_pointed(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_pointed(const MDBX_cursor *mc) { return mc->subcur && is_pointed(&mc->subcur->cursor); } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -inner_hollow(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_hollow(const MDBX_cursor *mc) { return !mc->subcur || is_hollow(&mc->subcur->cursor); } MDBX_MAYBE_UNUSED static inline void inner_gone(MDBX_cursor *mc) { if (mc->subcur) { - TRACE("reset inner cursor %p", - __Wpedantic_format_voidptr(&mc->subcur->cursor)); + TRACE("reset inner cursor %p", __Wpedantic_format_voidptr(&mc->subcur->cursor)); mc->subcur->nested_tree.root = 0; mc->subcur->cursor.top_and_flags = z_inner | z_poor_mark; } @@ -218,8 +207,7 @@ MDBX_MAYBE_UNUSED static inline void be_filled(MDBX_cursor *mc) { cASSERT(mc, inner == is_inner(mc)); } -MDBX_MAYBE_UNUSED static inline bool is_related(const MDBX_cursor *base, - const MDBX_cursor *scan) { +MDBX_MAYBE_UNUSED static inline bool is_related(const MDBX_cursor *base, const MDBX_cursor *scan) { cASSERT(base, base->top >= 0); return base->top <= scan->top && base != scan; } @@ -238,36 +226,30 @@ enum cursor_checking { MDBX_INTERNAL int __must_check_result cursor_check(const MDBX_cursor *mc); -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline size_t -cursor_dbi(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline size_t cursor_dbi(const MDBX_cursor *mc) { cASSERT(mc, mc->txn && mc->txn->signature == txn_signature); size_t dbi = mc->dbi_state - mc->txn->dbi_state; cASSERT(mc, dbi < mc->txn->env->n_dbi); return dbi; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -cursor_dbi_changed(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_dbi_changed(const MDBX_cursor *mc) { return dbi_changed(mc->txn, cursor_dbi(mc)); } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t * -cursor_dbi_state(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t *cursor_dbi_state(const MDBX_cursor *mc) { return mc->dbi_state; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -cursor_is_gc(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_is_gc(const MDBX_cursor *mc) { return mc->dbi_state == mc->txn->dbi_state + FREE_DBI; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -cursor_is_main(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_is_main(const MDBX_cursor *mc) { return mc->dbi_state == mc->txn->dbi_state + MAIN_DBI; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -cursor_is_core(const MDBX_cursor *mc) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_is_core(const MDBX_cursor *mc) { return mc->dbi_state < mc->txn->dbi_state + CORE_DBS; } @@ -277,10 +259,8 @@ MDBX_MAYBE_UNUSED static inline int cursor_dbi_dbg(const MDBX_cursor *mc) { return (mc->flags & z_inner) ? -dbi : dbi; } -MDBX_MAYBE_UNUSED static inline int __must_check_result -cursor_push(MDBX_cursor *mc, page_t *mp, indx_t ki) { - TRACE("pushing page %" PRIaPGNO " on db %d cursor %p", mp->pgno, - cursor_dbi_dbg(mc), __Wpedantic_format_voidptr(mc)); +MDBX_MAYBE_UNUSED static inline int __must_check_result cursor_push(MDBX_cursor *mc, page_t *mp, indx_t ki) { + TRACE("pushing page %" PRIaPGNO " on db %d cursor %p", mp->pgno, cursor_dbi_dbg(mc), __Wpedantic_format_voidptr(mc)); if (unlikely(mc->top >= CURSOR_STACK_SIZE - 1)) { be_poor(mc); mc->txn->flags |= MDBX_TXN_ERROR; @@ -293,43 +273,32 @@ cursor_push(MDBX_cursor *mc, page_t *mp, indx_t ki) { } MDBX_MAYBE_UNUSED static inline void cursor_pop(MDBX_cursor *mc) { - TRACE("popped page %" PRIaPGNO " off db %d cursor %p", mc->pg[mc->top]->pgno, - cursor_dbi_dbg(mc), __Wpedantic_format_voidptr(mc)); + TRACE("popped page %" PRIaPGNO " off db %d cursor %p", mc->pg[mc->top]->pgno, cursor_dbi_dbg(mc), + __Wpedantic_format_voidptr(mc)); cASSERT(mc, mc->top >= 0); mc->top -= 1; } -MDBX_NOTHROW_PURE_FUNCTION static inline bool -check_leaf_type(const MDBX_cursor *mc, const page_t *mp) { - return (((page_type(mp) ^ mc->checking) & - (z_branch | z_leaf | z_largepage | z_dupfix)) == 0); +MDBX_NOTHROW_PURE_FUNCTION static inline bool check_leaf_type(const MDBX_cursor *mc, const page_t *mp) { + return (((page_type(mp) ^ mc->checking) & (z_branch | z_leaf | z_largepage | z_dupfix)) == 0); } MDBX_INTERNAL void cursor_eot(MDBX_cursor *mc, const bool merge); -MDBX_INTERNAL int cursor_shadow(MDBX_cursor *parent_cursor, - MDBX_txn *nested_txn, const size_t dbi); +MDBX_INTERNAL int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, const size_t dbi); -MDBX_INTERNAL MDBX_cursor *cursor_cpstk(const MDBX_cursor *csrc, - MDBX_cursor *cdst); +MDBX_INTERNAL MDBX_cursor *cursor_cpstk(const MDBX_cursor *csrc, MDBX_cursor *cdst); -MDBX_INTERNAL int __must_check_result cursor_ops(MDBX_cursor *mc, MDBX_val *key, - MDBX_val *data, +MDBX_INTERNAL int __must_check_result cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, const MDBX_cursor_op op); -MDBX_INTERNAL int __must_check_result cursor_put_checklen(MDBX_cursor *mc, - const MDBX_val *key, - MDBX_val *data, +MDBX_INTERNAL int __must_check_result cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags); -MDBX_INTERNAL int __must_check_result cursor_put(MDBX_cursor *mc, - const MDBX_val *key, - MDBX_val *data, - unsigned flags); +MDBX_INTERNAL int __must_check_result cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags); MDBX_INTERNAL int __must_check_result cursor_check_updating(MDBX_cursor *mc); -MDBX_INTERNAL int __must_check_result cursor_del(MDBX_cursor *mc, - unsigned flags); +MDBX_INTERNAL int __must_check_result cursor_del(MDBX_cursor *mc, unsigned flags); MDBX_INTERNAL int __must_check_result cursor_sibling_left(MDBX_cursor *mc); MDBX_INTERNAL int __must_check_result cursor_sibling_right(MDBX_cursor *mc); @@ -339,56 +308,37 @@ typedef struct cursor_set_result { bool exact; } csr_t; -MDBX_INTERNAL csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, - MDBX_cursor_op op); +MDBX_INTERNAL csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); -MDBX_INTERNAL int __must_check_result inner_first(MDBX_cursor *__restrict mc, +MDBX_INTERNAL int __must_check_result inner_first(MDBX_cursor *__restrict mc, MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result inner_last(MDBX_cursor *__restrict mc, MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result outer_first(MDBX_cursor *__restrict mc, MDBX_val *__restrict key, MDBX_val *__restrict data); -MDBX_INTERNAL int __must_check_result inner_last(MDBX_cursor *__restrict mc, - MDBX_val *__restrict data); -MDBX_INTERNAL int __must_check_result outer_first(MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, - MDBX_val *__restrict data); -MDBX_INTERNAL int __must_check_result outer_last(MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, +MDBX_INTERNAL int __must_check_result outer_last(MDBX_cursor *__restrict mc, MDBX_val *__restrict key, MDBX_val *__restrict data); -MDBX_INTERNAL int __must_check_result inner_next(MDBX_cursor *__restrict mc, - MDBX_val *__restrict data); -MDBX_INTERNAL int __must_check_result inner_prev(MDBX_cursor *__restrict mc, - MDBX_val *__restrict data); -MDBX_INTERNAL int __must_check_result outer_next(MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, - MDBX_val *__restrict data, - MDBX_cursor_op op); -MDBX_INTERNAL int __must_check_result outer_prev(MDBX_cursor *__restrict mc, - MDBX_val *__restrict key, - MDBX_val *__restrict data, - MDBX_cursor_op op); +MDBX_INTERNAL int __must_check_result inner_next(MDBX_cursor *__restrict mc, MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result inner_prev(MDBX_cursor *__restrict mc, MDBX_val *__restrict data); +MDBX_INTERNAL int __must_check_result outer_next(MDBX_cursor *__restrict mc, MDBX_val *__restrict key, + MDBX_val *__restrict data, MDBX_cursor_op op); +MDBX_INTERNAL int __must_check_result outer_prev(MDBX_cursor *__restrict mc, MDBX_val *__restrict key, + MDBX_val *__restrict data, MDBX_cursor_op op); -MDBX_INTERNAL int cursor_init4walk(cursor_couple_t *couple, - const MDBX_txn *const txn, - tree_t *const tree, kvx_t *const kvx); +MDBX_INTERNAL int cursor_init4walk(cursor_couple_t *couple, const MDBX_txn *const txn, tree_t *const tree, + kvx_t *const kvx); -MDBX_INTERNAL int __must_check_result cursor_init(MDBX_cursor *mc, - const MDBX_txn *txn, - size_t dbi); +MDBX_INTERNAL int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi); -MDBX_INTERNAL int __must_check_result cursor_dupsort_setup(MDBX_cursor *mc, - const node_t *node, - const page_t *mp); +MDBX_INTERNAL int __must_check_result cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, const page_t *mp); -MDBX_INTERNAL int __must_check_result cursor_touch(MDBX_cursor *const mc, - const MDBX_val *key, - const MDBX_val *data); +MDBX_INTERNAL int __must_check_result cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, const MDBX_val *data); /*----------------------------------------------------------------------------*/ /* Update sub-page pointer, if any, in mc->subcur. * Needed when the node which contains the sub-page may have moved. * Called with mp = mc->pg[mc->top], ki = mc->ki[mc->top]. */ -MDBX_MAYBE_UNUSED static inline void -cursor_inner_refresh(const MDBX_cursor *mc, const page_t *mp, unsigned ki) { +MDBX_MAYBE_UNUSED static inline void cursor_inner_refresh(const MDBX_cursor *mc, const page_t *mp, unsigned ki) { cASSERT(mc, is_leaf(mp)); const node_t *node = page_node(mp, ki); if ((node_flags(node) & (N_DUP | N_TREE)) == N_DUP) diff --git a/src/dbi.c b/src/dbi.c index acadf0e5..60b32d8a 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -8,16 +8,14 @@ size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi) { tASSERT(txn, bmi > 0); bmi &= -bmi; if (sizeof(txn->dbi_sparse[0]) > 4) { - static const uint8_t debruijn_ctz64[64] = { - 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, - 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, - 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, - 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12}; + static const uint8_t debruijn_ctz64[64] = {0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12}; return debruijn_ctz64[(UINT64_C(0x022FDD63CC95386D) * (uint64_t)bmi) >> 58]; } else { - static const uint8_t debruijn_ctz32[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + static const uint8_t debruijn_ctz32[32] = {0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27]; } } @@ -45,8 +43,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { const size_t bitmap_indx = dbi / bitmap_chunk; const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; if (dbi >= txn->n_dbi) { - for (size_t i = (txn->n_dbi + bitmap_chunk - 1) / bitmap_chunk; - bitmap_indx >= i; ++i) + for (size_t i = (txn->n_dbi + bitmap_chunk - 1) / bitmap_chunk; bitmap_indx >= i; ++i) txn->dbi_sparse[i] = 0; eASSERT(env, (txn->dbi_sparse[bitmap_indx] & bitmap_mask) == 0); MDBX_txn *scan = txn; @@ -92,8 +89,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { int rc = dbi_check(parent, dbi); /* копируем состояние table очищая new-флаги. */ eASSERT(env, txn->dbi_seqs == parent->dbi_seqs); - txn->dbi_state[dbi] = - parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->dbi_state[dbi] = parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); if (likely(rc == MDBX_SUCCESS)) { txn->dbs[dbi] = parent->dbs[dbi]; if (parent->cursors[dbi]) { @@ -111,8 +107,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { txn->dbi_state[dbi] = DBI_LINDO; } else { eASSERT(env, txn->dbi_seqs[dbi] != env->dbi_seqs[dbi].weak); - if (unlikely((txn->dbi_state[dbi] & (DBI_VALID | DBI_OLDEN)) || - txn->cursors[dbi])) { + if (unlikely((txn->dbi_state[dbi] & (DBI_VALID | DBI_OLDEN)) || txn->cursors[dbi])) { /* хендл уже использовался в транзакции, но был закрыт или переоткрыт, * либо при явном пере-открытии хендла есть висячие курсоры */ eASSERT(env, (txn->dbi_state[dbi] & DBI_STALE) == 0); @@ -137,8 +132,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { return MDBX_BAD_DBI; } -static int defer_and_release(MDBX_env *const env, - defer_free_item_t *const chain) { +static int defer_and_release(MDBX_env *const env, defer_free_item_t *const chain) { size_t length = 0; defer_free_item_t *obsolete_chain = nullptr; #if MDBX_ENABLE_DBI_LOCKFREE @@ -232,8 +226,7 @@ int dbi_update(MDBX_txn *txn, int keep) { while ((env->dbs_flags[i - 1] & DB_VALID) == 0) { --i; eASSERT(env, i >= CORE_DBS); - eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && - !env->kvs[i].name.iov_base); + eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && !env->kvs[i].name.iov_base); } env->n_dbi = (unsigned)i; defer_and_release(env, defer_chain); @@ -241,21 +234,17 @@ int dbi_update(MDBX_txn *txn, int keep) { return MDBX_SUCCESS; } -int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { +int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { const MDBX_env *const env = txn->env; eASSERT(env, dbi < txn->n_dbi && dbi < env->n_dbi); eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); eASSERT(env, env->dbs_flags[dbi] != DB_POISON); if ((env->dbs_flags[dbi] & DB_VALID) == 0) { - eASSERT(env, !env->kvs[dbi].clc.k.cmp && !env->kvs[dbi].clc.v.cmp && - !env->kvs[dbi].name.iov_len && - !env->kvs[dbi].name.iov_base && - !env->kvs[dbi].clc.k.lmax && !env->kvs[dbi].clc.k.lmin && + eASSERT(env, !env->kvs[dbi].clc.k.cmp && !env->kvs[dbi].clc.v.cmp && !env->kvs[dbi].name.iov_len && + !env->kvs[dbi].name.iov_base && !env->kvs[dbi].clc.k.lmax && !env->kvs[dbi].clc.k.lmin && !env->kvs[dbi].clc.v.lmax && !env->kvs[dbi].clc.v.lmin); } else { - eASSERT(env, !(txn->dbi_state[dbi] & DBI_VALID) || - (txn->dbs[dbi].flags | DB_VALID) == env->dbs_flags[dbi]); + eASSERT(env, !(txn->dbi_state[dbi] & DBI_VALID) || (txn->dbs[dbi].flags | DB_VALID) == env->dbs_flags[dbi]); eASSERT(env, env->kvs[dbi].name.iov_base || dbi < CORE_DBS); } @@ -271,8 +260,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, * 4) user_flags отличаются, но table пустая и задан флаг MDBX_CREATE * = предполагаем что пользователь пересоздает table; */ - if ((user_flags & ~MDBX_CREATE) != - (unsigned)(env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS)) { + if ((user_flags & ~MDBX_CREATE) != (unsigned)(env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS)) { /* flags are differs, check other conditions */ if ((!user_flags && (!keycmp || keycmp == env->kvs[dbi].clc.k.cmp) && (!datacmp || datacmp == env->kvs[dbi].clc.v.cmp)) || @@ -287,11 +275,8 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, if (unlikely(err == MDBX_SUCCESS)) return err; } - eASSERT(env, ((env->dbs_flags[dbi] ^ txn->dbs[dbi].flags) & - DB_PERSISTENT_FLAGS) == 0); - eASSERT(env, - (txn->dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == - (DBI_LINDO | DBI_VALID)); + eASSERT(env, ((env->dbs_flags[dbi] ^ txn->dbs[dbi].flags) & DB_PERSISTENT_FLAGS) == 0); + eASSERT(env, (txn->dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == (DBI_LINDO | DBI_VALID)); if (unlikely(txn->dbs[dbi].leaf_pages)) return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; @@ -299,13 +284,11 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, if (unlikely(txn->cursors[dbi])) return MDBX_DANGLING_DBI; env->dbs_flags[dbi] = DB_POISON; - atomic_store32(&env->dbi_seqs[dbi], dbi_seq_next(env, dbi), - mo_AcquireRelease); + atomic_store32(&env->dbi_seqs[dbi], dbi_seq_next(env, dbi), mo_AcquireRelease); const uint32_t seq = dbi_seq_next(env, dbi); const uint16_t db_flags = user_flags & DB_PERSISTENT_FLAGS; - eASSERT(env, txn->dbs[dbi].height == 0 && txn->dbs[dbi].items == 0 && - txn->dbs[dbi].root == P_INVALID); + eASSERT(env, txn->dbs[dbi].height == 0 && txn->dbs[dbi].items == 0 && txn->dbs[dbi].root == P_INVALID); env->kvs[dbi].clc.k.cmp = keycmp ? keycmp : builtin_keycmp(user_flags); env->kvs[dbi].clc.v.cmp = datacmp ? datacmp : builtin_datacmp(user_flags); txn->dbs[dbi].flags = db_flags; @@ -325,8 +308,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, } if (!keycmp) - keycmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.k.cmp - : builtin_keycmp(user_flags); + keycmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.k.cmp : builtin_keycmp(user_flags); if (env->kvs[dbi].clc.k.cmp != keycmp) { if (env->dbs_flags[dbi] & DB_VALID) return MDBX_EINVAL; @@ -334,8 +316,7 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, } if (!datacmp) - datacmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.v.cmp - : builtin_datacmp(user_flags); + datacmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.v.cmp : builtin_datacmp(user_flags); if (env->kvs[dbi].clc.v.cmp != datacmp) { if (env->dbs_flags[dbi] & DB_VALID) return MDBX_EINVAL; @@ -346,19 +327,15 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, } static inline size_t dbi_namelen(const MDBX_val name) { - return (name.iov_len > sizeof(defer_free_item_t)) ? name.iov_len - : sizeof(defer_free_item_t); + return (name.iov_len > sizeof(defer_free_item_t)) ? name.iov_len : sizeof(defer_free_item_t); } -static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, - MDBX_val name) { +static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp, MDBX_val name) { MDBX_env *const env = txn->env; /* Cannot mix named table(s) with DUPSORT flags */ - tASSERT(txn, - (txn->dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == - (DBI_LINDO | DBI_VALID)); + tASSERT(txn, (txn->dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == (DBI_LINDO | DBI_VALID)); if (unlikely(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT)) { if (unlikely((user_flags & MDBX_CREATE) == 0)) return MDBX_NOTFOUND; @@ -367,18 +344,15 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, return MDBX_INCOMPATIBLE; /* Пересоздаём MainDB когда там пусто. */ - tASSERT(txn, txn->dbs[MAIN_DBI].height == 0 && - txn->dbs[MAIN_DBI].items == 0 && - txn->dbs[MAIN_DBI].root == P_INVALID); + tASSERT(txn, + txn->dbs[MAIN_DBI].height == 0 && txn->dbs[MAIN_DBI].items == 0 && txn->dbs[MAIN_DBI].root == P_INVALID); if (unlikely(txn->cursors[MAIN_DBI])) return MDBX_DANGLING_DBI; env->dbs_flags[MAIN_DBI] = DB_POISON; - atomic_store32(&env->dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), - mo_AcquireRelease); + atomic_store32(&env->dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), mo_AcquireRelease); const uint32_t seq = dbi_seq_next(env, MAIN_DBI); - const uint16_t main_flags = - txn->dbs[MAIN_DBI].flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + const uint16_t main_flags = txn->dbs[MAIN_DBI].flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY); env->kvs[MAIN_DBI].clc.k.cmp = builtin_keycmp(main_flags); env->kvs[MAIN_DBI].clc.v.cmp = builtin_datacmp(main_flags); txn->dbs[MAIN_DBI].flags = main_flags; @@ -391,8 +365,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, return err; } env->dbs_flags[MAIN_DBI] = main_flags | DB_VALID; - txn->dbi_seqs[MAIN_DBI] = - atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + txn->dbi_seqs[MAIN_DBI] = atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); txn->dbi_state[MAIN_DBI] |= DBI_DIRTY; txn->flags |= MDBX_TXN_DIRTY; } @@ -410,8 +383,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, if (!env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[scan].name)) { slot = scan; int err = dbi_check(txn, slot); - if (err == MDBX_BAD_DBI && - txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) { + if (err == MDBX_BAD_DBI && txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) { /* хендл использовался, стал невалидным, * но теперь явно пере-открывается в этой транзакци */ eASSERT(env, !txn->cursors[slot]); @@ -433,12 +405,10 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, return MDBX_DBS_FULL; if (env->n_dbi == slot) - eASSERT(env, !env->dbs_flags[slot] && !env->kvs[slot].name.iov_len && - !env->kvs[slot].name.iov_base); + eASSERT(env, !env->dbs_flags[slot] && !env->kvs[slot].name.iov_len && !env->kvs[slot].name.iov_base); env->dbs_flags[slot] = DB_POISON; - atomic_store32(&env->dbi_seqs[slot], dbi_seq_next(env, slot), - mo_AcquireRelease); + atomic_store32(&env->dbi_seqs[slot], dbi_seq_next(env, slot), mo_AcquireRelease); memset(&env->kvs[slot], 0, sizeof(env->kvs[slot])); if (env->n_dbi == slot) env->n_dbi = (unsigned)slot + 1; @@ -461,13 +431,11 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, return rc; } else { /* make sure this is actually a table */ - node_t *node = - page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); if (unlikely((node_flags(node) & (N_DUP | N_TREE)) != N_TREE)) return MDBX_INCOMPATIBLE; if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(tree_t))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid table node size", body.iov_len); + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", body.iov_len); return MDBX_CORRUPTED; } memcpy(&txn->dbs[slot], body.iov_base, sizeof(tree_t)); @@ -490,8 +458,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, txn->dbs[slot].flags = user_flags & DB_PERSISTENT_FLAGS; cx.outer.next = txn->cursors[MAIN_DBI]; txn->cursors[MAIN_DBI] = &cx.outer; - rc = - cursor_put_checklen(&cx.outer, &name, &body, N_TREE | MDBX_NOOVERWRITE); + rc = cursor_put_checklen(&cx.outer, &name, &body, N_TREE | MDBX_NOOVERWRITE); txn->cursors[MAIN_DBI] = cx.outer.next; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -503,9 +470,8 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, /* Got info, register DBI in this txn */ const uint32_t seq = dbi_seq_next(env, slot); - eASSERT(env, - env->dbs_flags[slot] == DB_POISON && !txn->cursors[slot] && - (txn->dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO); + eASSERT(env, env->dbs_flags[slot] == DB_POISON && !txn->cursors[slot] && + (txn->dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO); txn->dbi_state[slot] = dbi_state; memcpy(&txn->dbs[slot], body.iov_base, sizeof(txn->dbs[slot])); env->dbs_flags[slot] = txn->dbs[slot].flags; @@ -515,8 +481,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, env->kvs[slot].name = name; env->dbs_flags[slot] = txn->dbs[slot].flags | DB_VALID; - txn->dbi_seqs[slot] = - atomic_store32(&env->dbi_seqs[slot], seq, mo_AcquireRelease); + txn->dbi_seqs[slot] = atomic_store32(&env->dbi_seqs[slot], seq, mo_AcquireRelease); done: *dbi = (MDBX_dbi)slot; @@ -525,8 +490,7 @@ done: return MDBX_SUCCESS; bailout: - eASSERT(env, !txn->cursors[slot] && !env->kvs[slot].name.iov_len && - !env->kvs[slot].name.iov_base); + eASSERT(env, !txn->cursors[slot] && !env->kvs[slot].name.iov_len && !env->kvs[slot].name.iov_base); txn->dbi_state[slot] &= DBI_LINDO | DBI_OLDEN; env->dbs_flags[slot] = 0; osal_free(clone); @@ -535,14 +499,13 @@ bailout: return rc; } -int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, - MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { +int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { if (unlikely(!dbi)) return MDBX_EINVAL; *dbi = 0; - if (user_flags != MDBX_ACCEDE && - unlikely(!check_table_flags(user_flags & ~MDBX_CREATE))) + if (user_flags != MDBX_ACCEDE && unlikely(!check_table_flags(user_flags & ~MDBX_CREATE))) return MDBX_EINVAL; int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -567,8 +530,7 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, } if (unlikely(name == MDBX_CHK_META || name->iov_base == MDBX_CHK_META)) return MDBX_EINVAL; - if (unlikely(name->iov_len > - txn->env->leaf_nodemax - NODESIZE - sizeof(tree_t))) + if (unlikely(name->iov_len > txn->env->leaf_nodemax - NODESIZE - sizeof(tree_t))) return MDBX_EINVAL; #if MDBX_ENABLE_DBI_LOCKFREE @@ -582,31 +544,24 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, continue; } - const uint32_t snap_seq = - atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease); + const uint32_t snap_seq = atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease); const uint16_t snap_flags = env->dbs_flags[i]; const MDBX_val snap_name = env->kvs[i].name; if (user_flags != MDBX_ACCEDE && - (((user_flags ^ snap_flags) & DB_PERSISTENT_FLAGS) || - (keycmp && keycmp != env->kvs[i].clc.k.cmp) || + (((user_flags ^ snap_flags) & DB_PERSISTENT_FLAGS) || (keycmp && keycmp != env->kvs[i].clc.k.cmp) || (datacmp && datacmp != env->kvs[i].clc.v.cmp))) continue; - const uint32_t main_seq = - atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease); + const uint32_t main_seq = atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease); MDBX_cmp_func *const snap_cmp = env->kvs[MAIN_DBI].clc.k.cmp; - if (unlikely(!(snap_flags & DB_VALID) || !snap_name.iov_base || - !snap_name.iov_len || !snap_cmp)) + if (unlikely(!(snap_flags & DB_VALID) || !snap_name.iov_base || !snap_name.iov_len || !snap_cmp)) continue; const bool name_match = snap_cmp(&snap_name, name) == 0; osal_flush_incoherent_cpu_writeback(); - if (unlikely( - snap_seq != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) || - main_seq != - atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) || - snap_flags != env->dbs_flags[i] || - snap_name.iov_base != env->kvs[i].name.iov_base || - snap_name.iov_len != env->kvs[i].name.iov_len)) + if (unlikely(snap_seq != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) || + main_seq != atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) || + snap_flags != env->dbs_flags[i] || snap_name.iov_base != env->kvs[i].name.iov_base || + snap_name.iov_len != env->kvs[i].name.iov_len)) goto retry; if (name_match) { rc = dbi_check(txn, i); @@ -634,18 +589,15 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, rc = osal_fastmutex_acquire(&txn->env->dbi_lock); if (likely(rc == MDBX_SUCCESS)) { rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); - ENSURE(txn->env, - osal_fastmutex_release(&txn->env->dbi_lock) == MDBX_SUCCESS); + ENSURE(txn->env, osal_fastmutex_release(&txn->env->dbi_lock) == MDBX_SUCCESS); } return rc; } -static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, - MDBX_db_flags_t flags, MDBX_dbi *dbi, +static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { MDBX_val thunk, *name; - if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || - name_cstr == MDBX_CHK_META) + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META) name = (void *)name_cstr; else { thunk.iov_len = strlen(name_cstr); @@ -660,8 +612,7 @@ struct dbi_rename_result { int err; }; -__cold static struct dbi_rename_result -dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { +__cold static struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { struct dbi_rename_result pair; pair.defer = nullptr; pair.err = dbi_check(txn, dbi); @@ -670,8 +621,7 @@ dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { MDBX_env *const env = txn->env; MDBX_val old_name = env->kvs[dbi].name; - if (env->kvs[MAIN_DBI].clc.k.cmp(&new_name, &old_name) == 0 && - MDBX_DEBUG == 0) + if (env->kvs[MAIN_DBI].clc.k.cmp(&new_name, &old_name) == 0 && MDBX_DEBUG == 0) return pair; cursor_couple_t cx; @@ -695,8 +645,7 @@ dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { txn->cursors[MAIN_DBI] = &cx.outer; MDBX_val data = {&txn->dbs[dbi], sizeof(tree_t)}; - pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, - N_TREE | MDBX_NOOVERWRITE); + pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, N_TREE | MDBX_NOOVERWRITE); if (likely(pair.err == MDBX_SUCCESS)) { pair.err = cursor_seek(&cx.outer, &old_name, nullptr, MDBX_SET).err; if (likely(pair.err == MDBX_SUCCESS)) @@ -732,8 +681,7 @@ static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { do { --i; eASSERT(env, i >= CORE_DBS); - eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && - !env->kvs[i].name.iov_base); + eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && !env->kvs[i].name.iov_base); } while (i > CORE_DBS && !env->kvs[i - 1].name.iov_base); env->n_dbi = (unsigned)i; } @@ -745,25 +693,21 @@ static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { /*----------------------------------------------------------------------------*/ /* API */ -int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi) { +int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr)); } -int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi) { +int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr)); } -int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, - MDBX_dbi *dbi, MDBX_cmp_func *keycmp, +int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp)); } -int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, - MDBX_db_flags_t flags, MDBX_dbi *dbi, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { +int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp)); } @@ -780,8 +724,7 @@ __cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { if (txn->dbs[dbi].height) { cx.outer.next = txn->cursors[dbi]; txn->cursors[dbi] = &cx.outer; - rc = tree_drop(&cx.outer, - dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT)); + rc = tree_drop(&cx.outer, dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT)); txn->cursors[dbi] = cx.outer.next; if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -832,8 +775,7 @@ __cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { __cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { MDBX_val thunk, *name; - if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || - name_cstr == MDBX_CHK_META) + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META) name = (void *)name_cstr; else { thunk.iov_len = strlen(name_cstr); @@ -860,8 +802,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { rc = osal_fastmutex_acquire(&env->dbi_lock); if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) { retry: - if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && - (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { + if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { /* LY: Опасный код, так как env->txn может быть изменено в другом потоке. * К сожалению тут нет надежного решения и может быть падение при неверном * использовании API (вызове mdbx_dbi_close конкурентно с завершением @@ -884,8 +825,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { * транзакции, и поэтому этот путь потенциально более опасен. */ const MDBX_txn *const hazard = env->txn; osal_compiler_barrier(); - if ((dbi_state(env->basal_txn, dbi) & - (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { + if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { bailout_dirty_dbi: osal_fastmutex_release(&env->dbi_lock); return LOG_IFERR(MDBX_DANGLING_DBI); @@ -893,11 +833,9 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { osal_memory_barrier(); if (unlikely(hazard != env->txn)) goto retry; - if (hazard != env->basal_txn && hazard && - (hazard->flags & MDBX_TXN_FINISHED) == 0 && + if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 && hazard->signature == txn_signature && - (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > - DBI_LINDO) + (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) goto bailout_dirty_dbi; osal_compiler_barrier(); if (unlikely(hazard != env->txn)) @@ -908,8 +846,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { return LOG_IFERR(rc); } -int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, - unsigned *state) { +int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { if (unlikely(!flags || !state)) return LOG_IFERR(MDBX_EINVAL); @@ -928,21 +865,17 @@ int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, } *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; - *state = - txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); + *state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); return MDBX_SUCCESS; } -__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *new_name) { +__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - if (unlikely(new_name == MDBX_CHK_MAIN || - new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || - new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || - new_name->iov_base == MDBX_CHK_META)) + if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || + new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(dbi < CORE_DBS)) @@ -968,13 +901,11 @@ static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { st->ms_leaf_pages = db->leaf_pages; st->ms_overflow_pages = db->large_pages; st->ms_entries = db->items; - if (likely(bytes >= - offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) + if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) st->ms_mod_txnid = db->mod_txnid; } -__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, - size_t bytes) { +__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { if (unlikely(!dest)) return LOG_IFERR(MDBX_EINVAL); @@ -1012,8 +943,7 @@ bailout: return LOG_IFERR(rc); } -__cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, - tree_t *fallback) { +__cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback) { const MDBX_txn *dig = txn; do { tASSERT(txn, txn->n_dbi == dig->n_dbi); @@ -1036,8 +966,7 @@ __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, return fallback; } -__cold int mdbx_enumerate_tables(const MDBX_txn *txn, - MDBX_table_enum_func *func, void *ctx) { +__cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) { if (unlikely(!func)) return LOG_IFERR(MDBX_EINVAL); @@ -1054,13 +983,12 @@ __cold int mdbx_enumerate_tables(const MDBX_txn *txn, txn->cursors[MAIN_DBI] = &cx.outer; for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS; rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) { - node_t *node = - page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); if (node_flags(node) != N_TREE) continue; if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size", + (unsigned)node_ds(node)); rc = MDBX_CORRUPTED; break; } diff --git a/src/dbi.h b/src/dbi.h index bfafe3e3..c06f5bd1 100644 --- a/src/dbi.h +++ b/src/dbi.h @@ -7,8 +7,8 @@ #if MDBX_ENABLE_DBI_SPARSE -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL size_t -dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi); +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, + intptr_t bmi); static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { tASSERT(txn, bmi > 0); @@ -18,8 +18,7 @@ static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { return __builtin_ctz((int)bmi); if (sizeof(txn->dbi_sparse[0]) == sizeof(long)) return __builtin_ctzl((long)bmi); -#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ - __has_builtin(__builtin_ctzll) +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || __has_builtin(__builtin_ctzll) return __builtin_ctzll(bmi); #endif /* have(long long) && long long == uint64_t */ #endif /* GNU C */ @@ -46,27 +45,27 @@ static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { /* LY: Макрос целенаправленно сделан с одним циклом, чтобы сохранить возможность * использования оператора break */ -#define TXN_FOREACH_DBI_FROM(TXN, I, FROM) \ - for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->dbi_sparse[0]), \ - bitmap_item = TXN->dbi_sparse[0] >> FROM, I = FROM; \ - I < TXN->n_dbi; ++I) \ - if (bitmap_item == 0) { \ - I = (I - 1) | (bitmap_chunk - 1); \ - bitmap_item = TXN->dbi_sparse[(1 + I) / bitmap_chunk]; \ - if (!bitmap_item) \ - I += bitmap_chunk; \ - continue; \ - } else if ((bitmap_item & 1) == 0) { \ - size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ - bitmap_item >>= bitmap_skip; \ - I += bitmap_skip - 1; \ - continue; \ +#define TXN_FOREACH_DBI_FROM(TXN, I, FROM) \ + for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->dbi_sparse[0]), bitmap_item = TXN->dbi_sparse[0] >> FROM, \ + I = FROM; \ + I < TXN->n_dbi; ++I) \ + if (bitmap_item == 0) { \ + I = (I - 1) | (bitmap_chunk - 1); \ + bitmap_item = TXN->dbi_sparse[(1 + I) / bitmap_chunk]; \ + if (!bitmap_item) \ + I += bitmap_chunk; \ + continue; \ + } else if ((bitmap_item & 1) == 0) { \ + size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ + bitmap_item >>= bitmap_skip; \ + I += bitmap_skip - 1; \ + continue; \ } else if (bitmap_item >>= 1, TXN->dbi_state[I]) #else -#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP) \ - for (size_t I = SKIP; I < TXN->n_dbi; ++I) \ +#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP) \ + for (size_t I = SKIP; I < TXN->n_dbi; ++I) \ if (TXN->dbi_state[I]) #endif /* MDBX_ENABLE_DBI_SPARSE */ @@ -80,24 +79,19 @@ struct dbi_snap_result { uint32_t sequence; unsigned flags; }; -MDBX_INTERNAL struct dbi_snap_result dbi_snap(const MDBX_env *env, - const size_t dbi); +MDBX_INTERNAL struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi); MDBX_INTERNAL int dbi_update(MDBX_txn *txn, int keep); static inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { - STATIC_ASSERT( - (int)DBI_DIRTY == MDBX_DBI_DIRTY && (int)DBI_STALE == MDBX_DBI_STALE && - (int)DBI_FRESH == MDBX_DBI_FRESH && (int)DBI_CREAT == MDBX_DBI_CREAT); + STATIC_ASSERT((int)DBI_DIRTY == MDBX_DBI_DIRTY && (int)DBI_STALE == MDBX_DBI_STALE && + (int)DBI_FRESH == MDBX_DBI_FRESH && (int)DBI_CREAT == MDBX_DBI_CREAT); #if MDBX_ENABLE_DBI_SPARSE const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->dbi_sparse[0]); const size_t bitmap_indx = dbi / bitmap_chunk; const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; - return likely(dbi < txn->n_dbi && - (txn->dbi_sparse[bitmap_indx] & bitmap_mask) != 0) - ? txn->dbi_state[dbi] - : 0; + return likely(dbi < txn->n_dbi && (txn->dbi_sparse[bitmap_indx] & bitmap_mask) != 0) ? txn->dbi_state[dbi] : 0; #else return likely(dbi < txn->n_dbi) ? txn->dbi_state[dbi] : 0; #endif /* MDBX_ENABLE_DBI_SPARSE */ @@ -106,8 +100,7 @@ static inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { static inline bool dbi_changed(const MDBX_txn *txn, const size_t dbi) { const MDBX_env *const env = txn->env; eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); - const uint32_t snap_seq = - atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease); + const uint32_t snap_seq = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease); return snap_seq != txn->dbi_seqs[dbi]; } @@ -125,12 +118,10 @@ static inline uint32_t dbi_seq_next(const MDBX_env *const env, size_t dbi) { return v ? v : 1; } -MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, - unsigned user_flags, MDBX_dbi *dbi, +MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); -MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp); -MDBX_INTERNAL const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, - tree_t *fallback); +MDBX_INTERNAL const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback); diff --git a/src/debug_begin.h b/src/debug_begin.h index 521e99cf..09f62cea 100644 --- a/src/debug_begin.h +++ b/src/debug_begin.h @@ -9,28 +9,22 @@ #pragma push_macro("eASSERT") #undef TRACE -#define TRACE(fmt, ...) \ - debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__) +#define TRACE(fmt, ...) debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__) #undef DEBUG -#define DEBUG(fmt, ...) \ - debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__) +#define DEBUG(fmt, ...) debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__) #undef VERBOSE -#define VERBOSE(fmt, ...) \ - debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__) +#define VERBOSE(fmt, ...) debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__) #undef NOTICE -#define NOTICE(fmt, ...) \ - debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__) +#define NOTICE(fmt, ...) debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__) #undef WARNING -#define WARNING(fmt, ...) \ - debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__) +#define WARNING(fmt, ...) debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__) #undef ERROR -#define ERROR(fmt, ...) \ - debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__) +#define ERROR(fmt, ...) debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__) #undef eASSERT #define eASSERT(env, expr) ENSURE(env, expr) diff --git a/src/dpl.c b/src/dpl.c index 5e9f4485..6055ea7b 100644 --- a/src/dpl.c +++ b/src/dpl.c @@ -9,12 +9,10 @@ static inline size_t dpl_size2bytes(ptrdiff_t size) { size += size; #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) + - (PAGELIST_LIMIT * (MDBX_DPL_PREALLOC_FOR_RADIXSORT + 1)) * - sizeof(dp_t) + + (PAGELIST_LIMIT * (MDBX_DPL_PREALLOC_FOR_RADIXSORT + 1)) * sizeof(dp_t) + MDBX_PNL_GRANULATE * sizeof(void *) * 2 < SIZE_MAX / 4 * 3); - size_t bytes = ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) + - size * sizeof(dp_t), + size_t bytes = ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) + size * sizeof(dp_t), MDBX_PNL_GRANULATE * sizeof(void *) * 2) - MDBX_ASSUME_MALLOC_OVERHEAD; return bytes; @@ -22,8 +20,7 @@ static inline size_t dpl_size2bytes(ptrdiff_t size) { static inline size_t dpl_bytes2size(const ptrdiff_t bytes) { size_t size = (bytes - sizeof(dpl_t)) / sizeof(dp_t); - assert(size > CURSOR_STACK_SIZE && - size <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); + assert(size > CURSOR_STACK_SIZE && size <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); #if MDBX_DPL_PREALLOC_FOR_RADIXSORT size >>= 1; #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ @@ -41,8 +38,7 @@ dpl_t *dpl_reserve(MDBX_txn *txn, size_t size) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - size_t bytes = - dpl_size2bytes((size < PAGELIST_LIMIT) ? size : PAGELIST_LIMIT); + size_t bytes = dpl_size2bytes((size < PAGELIST_LIMIT) ? size : PAGELIST_LIMIT); dpl_t *const dl = osal_realloc(txn->tw.dirtylist, bytes); if (likely(dl)) { #if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) @@ -59,16 +55,13 @@ int dpl_alloc(MDBX_txn *txn) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - const size_t wanna = (txn->env->options.dp_initial < txn->geo.upper) - ? txn->env->options.dp_initial - : txn->geo.upper; + const size_t wanna = (txn->env->options.dp_initial < txn->geo.upper) ? txn->env->options.dp_initial : txn->geo.upper; #if MDBX_FORCE_ASSERTIONS || MDBX_DEBUG if (txn->tw.dirtylist) /* обнуляем чтобы не сработал ассерт внутри dpl_reserve() */ txn->tw.dirtylist->sorted = txn->tw.dirtylist->length = 0; #endif /* asertions enabled */ - if (unlikely(!txn->tw.dirtylist || txn->tw.dirtylist->detent < wanna || - txn->tw.dirtylist->detent > wanna + wanna) && + if (unlikely(!txn->tw.dirtylist || txn->tw.dirtylist->detent < wanna || txn->tw.dirtylist->detent > wanna + wanna) && unlikely(!dpl_reserve(txn, wanna))) return MDBX_ENOMEM; @@ -77,8 +70,7 @@ int dpl_alloc(MDBX_txn *txn) { } #define MDBX_DPL_EXTRACT_KEY(ptr) ((ptr)->pgno) -RADIXSORT_IMPL(dp, dp_t, MDBX_DPL_EXTRACT_KEY, MDBX_DPL_PREALLOC_FOR_RADIXSORT, - 1) +RADIXSORT_IMPL(dp, dp_t, MDBX_DPL_EXTRACT_KEY, MDBX_DPL_PREALLOC_FOR_RADIXSORT, 1) #define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno) SORT_IMPL(dp_sort, false, dp_t, DP_SORT_CMP) @@ -90,16 +82,13 @@ __hot __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn) { dpl_t *dl = txn->tw.dirtylist; assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); const size_t unsorted = dl->length - dl->sorted; - if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) || - unlikely(!dp_radixsort(dl->items + 1, dl->length))) { + if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) || unlikely(!dp_radixsort(dl->items + 1, dl->length))) { if (dl->sorted > unsorted / 4 + 4 && - (MDBX_DPL_PREALLOC_FOR_RADIXSORT || - dl->length + unsorted < dl->detent + dpl_gap_mergesort)) { + (MDBX_DPL_PREALLOC_FOR_RADIXSORT || dl->length + unsorted < dl->detent + dpl_gap_mergesort)) { dp_t *const sorted_begin = dl->items + 1; dp_t *const sorted_end = sorted_begin + dl->sorted; - dp_t *const end = dl->items + (MDBX_DPL_PREALLOC_FOR_RADIXSORT - ? dl->length + dl->length + 1 - : dl->detent + dpl_reserve_gap); + dp_t *const end = + dl->items + (MDBX_DPL_PREALLOC_FOR_RADIXSORT ? dl->length + dl->length + 1 : dl->detent + dpl_reserve_gap); dp_t *const tmp = end - unsorted; assert(dl->items + dl->length + 1 < tmp); /* copy unsorted to the end of allocated space and sort it */ @@ -120,19 +109,16 @@ __hot __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn) { #endif } while (likely(--w > l)); assert(r == tmp - 1); - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); if (ASSERT_ENABLED()) for (size_t i = 0; i <= dl->length; ++i) assert(dl->items[i].pgno < dl->items[i + 1].pgno); } else { dp_sort(dl->items + 1, dl->items + dl->length + 1); - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); } } else { - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); } dl->sorted = dl->length; return dl; @@ -143,8 +129,7 @@ __hot __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn) { #define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id)) SEARCH_IMPL(dp_bsearch, dp_t, pgno_t, DP_SEARCH_CMP) -__hot __noinline MDBX_INTERNAL size_t dpl_search(const MDBX_txn *txn, - pgno_t pgno) { +__hot __noinline MDBX_INTERNAL size_t dpl_search(const MDBX_txn *txn, pgno_t pgno) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); @@ -166,10 +151,10 @@ __hot __noinline MDBX_INTERNAL size_t dpl_search(const MDBX_txn *txn, /* whole sorted cases */ break; -#define LINEAR_SEARCH_CASE(N) \ - case N: \ - if (dl->items[dl->length - N + 1].pgno == pgno) \ - return dl->length - N + 1; \ +#define LINEAR_SEARCH_CASE(N) \ + case N: \ + if (dl->items[dl->length - N + 1].pgno == pgno) \ + return dl->length - N + 1; \ __fallthrough /* use linear scan until the threshold */ @@ -193,8 +178,7 @@ const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno) { const dpl_t *dl = txn->tw.dirtylist; if (dl) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - assert(dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); for (size_t i = dl->length; i > dl->sorted; --i) if (dl->items[i].pgno == pgno) return dl->items[i].ptr; @@ -220,13 +204,11 @@ void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) { dl->pages_including_loose -= npages; dl->sorted -= dl->sorted >= i; dl->length -= 1; - memmove(dl->items + i, dl->items + i + 1, - (dl->length - i + 2) * sizeof(dl->items[0])); + memmove(dl->items + i, dl->items + i + 1, (dl->length - i + 2) * sizeof(dl->items[0])); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); } -int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, - size_t npages) { +int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, size_t npages) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); const dp_t dp = {page, pgno, (pgno_t)npages}; @@ -237,8 +219,7 @@ int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, dpl_t *dl = txn->tw.dirtylist; tASSERT(txn, dl->length <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); - tASSERT(txn, dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + tASSERT(txn, dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); if (AUDIT_ENABLED()) { for (size_t i = dl->length; i > 0; --i) { assert(dl->items[i].pgno != dp.pgno); @@ -254,9 +235,7 @@ int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, ERROR("DPL is full (PAGELIST_LIMIT %zu)", PAGELIST_LIMIT); return MDBX_TXN_FULL; } - const size_t size = (dl->detent < MDBX_PNL_INITIAL * 42) - ? dl->detent + dl->detent - : dl->detent + dl->detent / 2; + const size_t size = (dl->detent < MDBX_PNL_INITIAL * 42) ? dl->detent + dl->detent : dl->detent + dl->detent / 2; dl = dpl_reserve(txn, size); if (unlikely(!dl)) return MDBX_ENOMEM; @@ -288,10 +267,7 @@ int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, const ptrdiff_t pivot = (ptrdiff_t)dl->length - dpl_insertion_threshold; #if MDBX_HAVE_CMOV const pgno_t pivot_pgno = - dl->items[(dl->length < dpl_insertion_threshold) - ? 0 - : dl->length - dpl_insertion_threshold] - .pgno; + dl->items[(dl->length < dpl_insertion_threshold) ? 0 : dl->length - dpl_insertion_threshold].pgno; #endif /* MDBX_HAVE_CMOV */ /* copy the stub beyond the end */ @@ -310,9 +286,7 @@ int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, while (i >= dl->items + dl->sorted) { #if !defined(__GNUC__) /* пытаемся избежать вызова memmove() */ i[1] = *i; -#elif MDBX_WORDBITS == 64 && \ - (defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) +#elif MDBX_WORDBITS == 64 && (defined(__SIZEOF_INT128__) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) STATIC_ASSERT(sizeof(dp) == sizeof(__uint128_t)); ((__uint128_t *)i)[1] = *(volatile __uint128_t *)i; #else @@ -347,9 +321,8 @@ __cold bool dpl_check(MDBX_txn *txn) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); - tASSERT(txn, txn->tw.dirtyroom + dl->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + tASSERT(txn, + txn->tw.dirtyroom + dl->length == (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); if (!AUDIT_ENABLED()) return true; @@ -389,16 +362,12 @@ __cold bool dpl_check(MDBX_txn *txn) { return false; } - const size_t rpa = - pnl_search(txn->tw.relist, dp->pgno, txn->geo.first_unallocated); - tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.relist) || - txn->tw.relist[rpa] != dp->pgno); - if (rpa <= MDBX_PNL_GETSIZE(txn->tw.relist) && - unlikely(txn->tw.relist[rpa] == dp->pgno)) + const size_t rpa = pnl_search(txn->tw.relist, dp->pgno, txn->geo.first_unallocated); + tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.relist) || txn->tw.relist[rpa] != dp->pgno); + if (rpa <= MDBX_PNL_GETSIZE(txn->tw.relist) && unlikely(txn->tw.relist[rpa] == dp->pgno)) return false; if (num > 1) { - const size_t rpb = pnl_search(txn->tw.relist, dp->pgno + num - 1, - txn->geo.first_unallocated); + const size_t rpb = pnl_search(txn->tw.relist, dp->pgno + num - 1, txn->geo.first_unallocated); tASSERT(txn, rpa == rpb); if (unlikely(rpa != rpb)) return false; @@ -432,8 +401,7 @@ __noinline void dpl_lru_reduce(MDBX_txn *txn) { txn->tw.dirtylru >>= 1; dpl_t *dl = txn->tw.dirtylist; for (size_t i = 1; i <= dl->length; ++i) { - size_t *const ptr = - ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); + size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); *ptr >>= 1; } txn = txn->parent; @@ -444,8 +412,7 @@ void dpl_sift(MDBX_txn *const txn, pnl_t pl, const bool spilled) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); if (MDBX_PNL_GETSIZE(pl) && txn->tw.dirtylist->length) { - tASSERT(txn, pnl_check_allocated(pl, (size_t)txn->geo.first_unallocated - << spilled)); + tASSERT(txn, pnl_check_allocated(pl, (size_t)txn->geo.first_unallocated << spilled)); dpl_t *dl = dpl_sort(txn); /* Scanning in ascend order */ @@ -501,8 +468,7 @@ void dpl_sift(MDBX_txn *const txn, pnl_t pl, const bool spilled) { dl->sorted = dpl_setlen(dl, w - 1); txn->tw.dirtyroom += r - w; tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); return; } } diff --git a/src/dpl.h b/src/dpl.h index 9d2f59c6..0059dc58 100644 --- a/src/dpl.h +++ b/src/dpl.h @@ -49,38 +49,32 @@ static inline dpl_t *dpl_sort(const MDBX_txn *txn) { dpl_t *dl = txn->tw.dirtylist; tASSERT(txn, dl->length <= PAGELIST_LIMIT); tASSERT(txn, dl->sorted <= dl->length); - tASSERT(txn, dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + tASSERT(txn, dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); return likely(dl->sorted == dl->length) ? dl : dpl_sort_slowpath(txn); } MDBX_INTERNAL __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno); -MDBX_MAYBE_UNUSED MDBX_INTERNAL const page_t * -debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno); +MDBX_MAYBE_UNUSED MDBX_INTERNAL const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno); -MDBX_NOTHROW_PURE_FUNCTION static inline unsigned dpl_npages(const dpl_t *dl, - size_t i) { +MDBX_NOTHROW_PURE_FUNCTION static inline unsigned dpl_npages(const dpl_t *dl, size_t i) { assert(0 <= (intptr_t)i && i <= dl->length); unsigned n = dl->items[i].npages; assert(n == (is_largepage(dl->items[i].ptr) ? dl->items[i].ptr->pages : 1)); return n; } -MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t dpl_endpgno(const dpl_t *dl, - size_t i) { +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t dpl_endpgno(const dpl_t *dl, size_t i) { return dpl_npages(dl, i) + dl->items[i].pgno; } -static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, - size_t npages) { +static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); dpl_t *dl = txn->tw.dirtylist; tASSERT(txn, dl->sorted == dl->length); - tASSERT(txn, dl->items[0].pgno == 0 && - dl->items[dl->length + 1].pgno == P_INVALID); + tASSERT(txn, dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); size_t const n = dpl_search(txn, pgno); tASSERT(txn, n >= 1 && n <= dl->length + 1); tASSERT(txn, pgno <= dl->items[n].pgno); @@ -92,8 +86,7 @@ static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, bool check = false; for (size_t i = 1; i <= dl->length; ++i) { const page_t *const dp = dl->items[i].ptr; - if (!(dp->pgno /* begin */ >= /* end */ pgno + npages || - dpl_endpgno(dl, i) /* end */ <= /* begin */ pgno)) + if (!(dp->pgno /* begin */ >= /* end */ pgno + npages || dpl_endpgno(dl, i) /* end */ <= /* begin */ pgno)) check |= true; } tASSERT(txn, check == rc); @@ -101,8 +94,7 @@ static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, return rc; } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t dpl_exist(const MDBX_txn *txn, - pgno_t pgno) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t dpl_exist(const MDBX_txn *txn, pgno_t pgno) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); dpl_t *dl = txn->tw.dirtylist; size_t i = dpl_search(txn, pgno); @@ -116,13 +108,11 @@ static inline void dpl_remove(const MDBX_txn *txn, size_t i) { dpl_remove_ex(txn, i, dpl_npages(txn->tw.dirtylist, i)); } -MDBX_INTERNAL int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, - page_t *page, size_t npages); +MDBX_INTERNAL int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, size_t npages); MDBX_MAYBE_UNUSED MDBX_INTERNAL bool dpl_check(MDBX_txn *txn); -MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t dpl_age(const MDBX_txn *txn, - size_t i) { +MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t dpl_age(const MDBX_txn *txn, size_t i) { tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); const dpl_t *dl = txn->tw.dirtylist; assert((intptr_t)i > 0 && i <= dl->length); @@ -134,8 +124,7 @@ MDBX_INTERNAL void dpl_lru_reduce(MDBX_txn *txn); static inline uint32_t dpl_lru_turn(MDBX_txn *txn) { txn->tw.dirtylru += 1; - if (unlikely(txn->tw.dirtylru > UINT32_MAX / 3) && - (txn->flags & MDBX_WRITEMAP) == 0) + if (unlikely(txn->tw.dirtylru > UINT32_MAX / 3) && (txn->flags & MDBX_WRITEMAP) == 0) dpl_lru_reduce(txn); return txn->tw.dirtylru; } diff --git a/src/dxb.c b/src/dxb.c index 8d50895c..b85258bb 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -3,8 +3,7 @@ #include "internals.h" -__cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, - const mdbx_mode_t mode_bits) { +__cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, const mdbx_mode_t mode_bits) { memset(dest, 0, sizeof(meta_t)); int rc = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); if (unlikely(rc != MDBX_SUCCESS)) @@ -19,20 +18,17 @@ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, unsigned guess_pagesize = 0; for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) { const unsigned meta_number = loop_count % NUM_METAS; - const unsigned offset = - (guess_pagesize ? guess_pagesize - : (loop_count > NUM_METAS) ? env->ps - : globals.sys_pagesize) * - meta_number; + const unsigned offset = (guess_pagesize ? guess_pagesize + : (loop_count > NUM_METAS) ? env->ps + : globals.sys_pagesize) * + meta_number; char buffer[MDBX_MIN_PAGESIZE]; unsigned retryleft = 42; while (1) { - TRACE("reading meta[%d]: offset %u, bytes %u, retry-left %u", meta_number, - offset, MDBX_MIN_PAGESIZE, retryleft); + TRACE("reading meta[%d]: offset %u, bytes %u, retry-left %u", meta_number, offset, MDBX_MIN_PAGESIZE, retryleft); int err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset); - if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && - env->dxb_mmap.filesize == 0 && + if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && env->dxb_mmap.filesize == 0 && mode_bits /* non-zero for DB creation */ != 0) { NOTICE("read meta: empty file (%d, %s)", err, mdbx_strerror(err)); return err; @@ -42,15 +38,13 @@ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, SleepEx(0, true); err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset); if (err == ERROR_LOCK_VIOLATION && --retryleft) { - WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, - mdbx_strerror(err)); + WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err)); continue; } } #endif /* Windows */ if (err != MDBX_SUCCESS) { - ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, - mdbx_strerror(err)); + ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err)); return err; } @@ -61,15 +55,13 @@ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, SleepEx(0, true); err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset); if (err == ERROR_LOCK_VIOLATION && --retryleft) { - WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, - mdbx_strerror(err)); + WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err)); continue; } } #endif /* Windows */ if (err != MDBX_SUCCESS) { - ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, - mdbx_strerror(err)); + ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err)); return err; } @@ -94,13 +86,11 @@ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, if (env->stuck_meta >= 0) latch = (meta_number == (unsigned)env->stuck_meta); else if (meta_bootid_match(meta)) - latch = meta_choice_recent( - meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), - dest->unsafe_txnid, SIGN_IS_STEADY(dest->unsafe_sign)); + latch = meta_choice_recent(meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), dest->unsafe_txnid, + SIGN_IS_STEADY(dest->unsafe_sign)); else - latch = meta_choice_steady( - meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), - dest->unsafe_txnid, SIGN_IS_STEADY(dest->unsafe_sign)); + latch = meta_choice_steady(meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), dest->unsafe_txnid, + SIGN_IS_STEADY(dest->unsafe_sign)); if (latch) { *dest = *meta; if (!lck_exclusive && !meta_is_steady(dest)) @@ -110,9 +100,7 @@ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, } if (dest->pagesize == 0 || - (env->stuck_meta < 0 && - !(meta_is_steady(dest) || - meta_weak_acceptable(env, dest, lck_exclusive)))) { + (env->stuck_meta < 0 && !(meta_is_steady(dest) || meta_weak_acceptable(env, dest, lck_exclusive)))) { ERROR("%s", "no usable meta-pages, database is corrupted"); if (rc == MDBX_SUCCESS) { /* TODO: try to restore the database by fully checking b-tree structure @@ -125,8 +113,7 @@ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, return MDBX_SUCCESS; } -__cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, - const pgno_t size_pgno, pgno_t limit_pgno, +__cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const pgno_t size_pgno, pgno_t limit_pgno, const enum resize_mode mode) { /* Acquire guard to avoid collision between read and write txns * around geo_in_bytes and dxb_mmap */ @@ -164,14 +151,11 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno); eASSERT(env, bytes2pgno(env, limit_bytes) >= limit_pgno); - unsigned mresize_flags = - env->flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC); + unsigned mresize_flags = env->flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC); if (mode >= impilict_shrink) mresize_flags |= txn_shrink_allowed; - if (limit_bytes == env->dxb_mmap.limit && - size_bytes == env->dxb_mmap.current && - size_bytes == env->dxb_mmap.filesize) + if (limit_bytes == env->dxb_mmap.limit && size_bytes == env->dxb_mmap.current && size_bytes == env->dxb_mmap.filesize) goto bailout; /* При использовании MDBX_NOSTICKYTHREADS с транзакциями могут работать любые @@ -179,8 +163,7 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, * выполнить remap-действия требующие приостановки работающих с БД потоков. */ if ((env->flags & MDBX_NOSTICKYTHREADS) == 0) { #if defined(_WIN32) || defined(_WIN64) - if ((size_bytes < env->dxb_mmap.current && mode > implicit_grow) || - limit_bytes != env->dxb_mmap.limit) { + if ((size_bytes < env->dxb_mmap.current && mode > implicit_grow) || limit_bytes != env->dxb_mmap.limit) { /* 1) Windows allows only extending a read-write section, but not a * corresponding mapped view. Therefore in other cases we must suspend * the local threads for safe remap. @@ -198,9 +181,8 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, ERROR("failed suspend-for-remap: errcode %d", rc); goto bailout; } - mresize_flags |= (mode < explicit_resize) - ? MDBX_MRESIZE_MAY_UNMAP - : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; + mresize_flags |= + (mode < explicit_resize) ? MDBX_MRESIZE_MAY_UNMAP : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; } #else /* Windows */ lck_t *const lck = env->lck_mmap.lck; @@ -214,12 +196,10 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } /* looking for readers from this process */ - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); eASSERT(env, mode == explicit_resize); for (size_t i = 0; i < snap_nreaders; ++i) { - if (lck->rdt[i].pid.weak == env->pid && - lck->rdt[i].tid.weak != osal_thread_self()) { + if (lck->rdt[i].pid.weak == env->pid && lck->rdt[i].tid.weak != osal_thread_self()) { /* the base address of the mapping can't be changed since * the other reader thread from this process exists. */ lck_rdt_unlock(env); @@ -233,17 +213,14 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } const pgno_t aligned_munlock_pgno = - (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) - ? 0 - : bytes2pgno(env, size_bytes); + (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) ? 0 : bytes2pgno(env, size_bytes); if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) { mincore_clean_cache(env); if ((env->flags & MDBX_WRITEMAP) && env->lck->unsynced_pages.weak) { #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.msync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno), - MDBX_SYNC_NONE); + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno), MDBX_SYNC_NONE); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } @@ -251,51 +228,40 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, munlock_after(env, aligned_munlock_pgno, size_bytes); if (size_bytes < prev_size && mode > implicit_grow) { - NOTICE("resize-MADV_%s %u..%u", - (env->flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno, + NOTICE("resize-MADV_%s %u..%u", (env->flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno, bytes2pgno(env, prev_size)); - const uint32_t munlocks_before = - atomic_load32(&env->lck->mlcnt[1], mo_Relaxed); + const uint32_t munlocks_before = atomic_load32(&env->lck->mlcnt[1], mo_Relaxed); rc = MDBX_RESULT_TRUE; #if defined(MADV_REMOVE) if (env->flags & MDBX_WRITEMAP) - rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), - prev_size - size_bytes, MADV_REMOVE) - ? ignore_enosys(errno) - : MDBX_SUCCESS; + rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, MADV_REMOVE) ? ignore_enosys(errno) + : MDBX_SUCCESS; #endif /* MADV_REMOVE */ #if defined(MADV_DONTNEED) if (rc == MDBX_RESULT_TRUE) - rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), - prev_size - size_bytes, MADV_DONTNEED) + rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, MADV_DONTNEED) ? ignore_enosys(errno) : MDBX_SUCCESS; #elif defined(POSIX_MADV_DONTNEED) if (rc == MDBX_RESULT_TRUE) - rc = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, size_bytes), - prev_size - size_bytes, - POSIX_MADV_DONTNEED)); + rc = ignore_enosys( + posix_madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, POSIX_MADV_DONTNEED)); #elif defined(POSIX_FADV_DONTNEED) if (rc == MDBX_RESULT_TRUE) - rc = ignore_enosys(posix_fadvise(env->lazy_fd, size_bytes, - prev_size - size_bytes, - POSIX_FADV_DONTNEED)); + rc = ignore_enosys(posix_fadvise(env->lazy_fd, size_bytes, prev_size - size_bytes, POSIX_FADV_DONTNEED)); #endif /* MADV_DONTNEED */ if (unlikely(MDBX_IS_ERROR(rc))) { - const uint32_t mlocks_after = - atomic_load32(&env->lck->mlcnt[0], mo_Relaxed); + const uint32_t mlocks_after = atomic_load32(&env->lck->mlcnt[0], mo_Relaxed); if (rc == MDBX_EINVAL) { - const int severity = - (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN; + const int severity = (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN; if (LOG_ENABLED(severity)) debug_log(severity, __func__, __LINE__, "%s-madvise: ignore EINVAL (%d) since some pages maybe " "locked (%u/%u mlcnt-processes)", "resize", rc, mlocks_after, munlocks_before); } else { - ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", - "mresize", "DONTNEED", size_bytes, prev_size - size_bytes, - mlocks_after, munlocks_before, rc); + ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", "mresize", "DONTNEED", size_bytes, + prev_size - size_bytes, mlocks_after, munlocks_before, rc); goto bailout; } } else @@ -314,10 +280,8 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, eASSERT(env, size_bytes <= env->dxb_mmap.current); env->lck->discarded_tail.weak = size_pgno; const bool readahead = - !(env->flags & MDBX_NORDAHEAD) && - mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size); - const bool force = limit_bytes != prev_limit || - env->dxb_mmap.base != prev_map + !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size); + const bool force = limit_bytes != prev_limit || env->dxb_mmap.base != prev_map #if defined(_WIN32) || defined(_WIN64) || prev_size > size_bytes #endif /* Windows */ @@ -343,8 +307,7 @@ bailout: VALGRIND_DISCARD(env->valgrind_handle); env->valgrind_handle = 0; if (env->dxb_mmap.limit) - env->valgrind_handle = VALGRIND_CREATE_BLOCK( - env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx"); + env->valgrind_handle = VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx"); } #endif /* ENABLE_MEMCHECK */ } else { @@ -377,8 +340,7 @@ bailout: osal_free(suspended); } #else - if (env->lck_mmap.lck && - (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) != 0) + if (env->lck_mmap.lck && (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) != 0) lck_rdt_unlock(env); int err = osal_fastmutex_release(&env->remap_guard); #endif /* Windows */ @@ -397,10 +359,8 @@ void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { if (txn) { /* transaction start */ if (env->poison_edge < txn->geo.first_unallocated) env->poison_edge = txn->geo.first_unallocated; - VALGRIND_MAKE_MEM_DEFINED(env->dxb_mmap.base, - pgno2bytes(env, txn->geo.first_unallocated)); - MDBX_ASAN_UNPOISON_MEMORY_REGION( - env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated)); + VALGRIND_MAKE_MEM_DEFINED(env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated)); + MDBX_ASAN_UNPOISON_MEMORY_REGION(env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated)); /* don't touch more, it should be already poisoned */ } else { /* transaction end */ bool should_unlock = false; @@ -410,8 +370,7 @@ void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { return; } else if (env->txn && env_txn0_owned(env)) { /* inside write-txn */ - last = meta_recent(env, &env->basal_txn->tw.troika) - .ptr_v->geometry.first_unallocated; + last = meta_recent(env, &env->basal_txn->tw.troika).ptr_v->geometry.first_unallocated; } else if (env->flags & MDBX_RDONLY) { /* read-only mode, no write-txn, no wlock mutex */ last = NUM_METAS; @@ -429,12 +388,8 @@ void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { if (edge > last) { eASSERT(env, last >= NUM_METAS); env->poison_edge = last; - VALGRIND_MAKE_MEM_NOACCESS( - ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), - pgno2bytes(env, edge - last)); - MDBX_ASAN_POISON_MEMORY_REGION( - ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), - pgno2bytes(env, edge - last)); + VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), pgno2bytes(env, edge - last)); + MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), pgno2bytes(env, edge - last)); } if (should_unlock) lck_txn_unlock(env); @@ -443,22 +398,16 @@ void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) { #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ /* Turn on/off readahead. It's harmful when the DB is larger than RAM. */ -__cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, - const bool enable, const bool force_whole) { +__cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, const bool enable, const bool force_whole) { eASSERT(env, edge >= NUM_METAS && edge <= MAX_PAGENO + 1); eASSERT(env, (enable & 1) == (enable != 0)); - const bool toggle = force_whole || - ((enable ^ env->lck->readahead_anchor) & 1) || - !env->lck->readahead_anchor; + const bool toggle = force_whole || ((enable ^ env->lck->readahead_anchor) & 1) || !env->lck->readahead_anchor; const pgno_t prev_edge = env->lck->readahead_anchor >> 1; const size_t limit = env->dxb_mmap.limit; - size_t offset = - toggle ? 0 - : pgno_align2os_bytes(env, (prev_edge < edge) ? prev_edge : edge); + size_t offset = toggle ? 0 : pgno_align2os_bytes(env, (prev_edge < edge) ? prev_edge : edge); offset = (offset < limit) ? offset : limit; - size_t length = - pgno_align2os_bytes(env, (prev_edge < edge) ? edge : prev_edge); + size_t length = pgno_align2os_bytes(env, (prev_edge < edge) ? edge : prev_edge); length = (length < limit) ? length : limit; length -= offset; @@ -466,8 +415,7 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, if (length == 0) return MDBX_SUCCESS; - NOTICE("readahead %s %u..%u", enable ? "ON" : "OFF", bytes2pgno(env, offset), - bytes2pgno(env, offset + length)); + NOTICE("readahead %s %u..%u", enable ? "ON" : "OFF", bytes2pgno(env, offset), bytes2pgno(env, offset + length)); #if defined(F_RDAHEAD) if (toggle && unlikely(fcntl(env->lazy_fd, F_RDAHEAD, enable) == -1)) @@ -478,8 +426,7 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, void *const ptr = ptr_disp(env->dxb_mmap.base, offset); if (enable) { #if defined(MADV_NORMAL) - err = - madvise(ptr, length, MADV_NORMAL) ? ignore_enosys(errno) : MDBX_SUCCESS; + err = madvise(ptr, length, MADV_NORMAL) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_MADV_NORMAL) @@ -487,8 +434,7 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED) - err = ignore_enosys( - posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_NORMAL)); + err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_NORMAL)); if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(_WIN32) || defined(_WIN64) @@ -505,15 +451,10 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, #if defined(F_RDADVISE) struct radvisory hint; hint.ra_offset = offset; - hint.ra_count = - unlikely(length > INT_MAX && sizeof(length) > sizeof(hint.ra_count)) - ? INT_MAX - : (int)length; - (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl( - env->lazy_fd, F_RDADVISE, &hint); + hint.ra_count = unlikely(length > INT_MAX && sizeof(length) > sizeof(hint.ra_count)) ? INT_MAX : (int)length; + (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl(env->lazy_fd, F_RDADVISE, &hint); #elif defined(MADV_WILLNEED) - err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys(errno) - : MDBX_SUCCESS; + err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_MADV_WILLNEED) @@ -528,8 +469,7 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, (void)imports.PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0); } #elif defined(POSIX_FADV_WILLNEED) - err = ignore_enosys( - posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_WILLNEED)); + err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_WILLNEED)); if (unlikely(MDBX_IS_ERROR(err))) return err; #else @@ -539,8 +479,7 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, } else { mincore_clean_cache(env); #if defined(MADV_RANDOM) - err = - madvise(ptr, length, MADV_RANDOM) ? ignore_enosys(errno) : MDBX_SUCCESS; + err = madvise(ptr, length, MADV_RANDOM) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_MADV_RANDOM) @@ -548,8 +487,7 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_FADV_RANDOM) - err = ignore_enosys( - posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_RANDOM)); + err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_RANDOM)); if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(_WIN32) || defined(_WIN64) @@ -564,15 +502,13 @@ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, return err; } -__cold int dxb_setup(MDBX_env *env, const int lck_rc, - const mdbx_mode_t mode_bits) { +__cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits) { meta_t header; eASSERT(env, !(env->flags & ENV_ACTIVE)); int rc = MDBX_RESULT_FALSE; int err = dxb_read_header(env, &header, lck_rc, mode_bits); if (unlikely(err != MDBX_SUCCESS)) { - if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || - (env->flags & MDBX_RDONLY) != 0 || + if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || (env->flags & MDBX_RDONLY) != 0 || /* recovery mode */ env->stuck_meta >= 0) return err; @@ -591,14 +527,11 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, return err; header = *meta_init_triplet(env, env->page_auxbuf); - err = osal_pwrite(env->lazy_fd, env->page_auxbuf, - env->ps * (size_t)NUM_METAS, 0); + err = osal_pwrite(env->lazy_fd, env->page_auxbuf, env->ps * (size_t)NUM_METAS, 0); if (unlikely(err != MDBX_SUCCESS)) return err; - err = osal_ftruncate(env->lazy_fd, env->dxb_mmap.filesize = - env->dxb_mmap.current = - env->geo_in_bytes.now); + err = osal_ftruncate(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -609,19 +542,14 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, #endif } - VERBOSE("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO - "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN - ", %s", - header.trees.main.root, header.trees.gc.root, header.geometry.lower, - header.geometry.first_unallocated, header.geometry.now, - header.geometry.upper, pv2pages(header.geometry.grow_pv), - pv2pages(header.geometry.shrink_pv), - unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); + VERBOSE("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO + " +%u -%u, txn_id %" PRIaTXN ", %s", + header.trees.main.root, header.trees.gc.root, header.geometry.lower, header.geometry.first_unallocated, + header.geometry.now, header.geometry.upper, pv2pages(header.geometry.grow_pv), + pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); - if (unlikely((header.trees.gc.flags & DB_PERSISTENT_FLAGS) != - MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%x for %s", header.trees.gc.flags, - "GC/FreeDB"); + if (unlikely((header.trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%x for %s", header.trees.gc.flags, "GC/FreeDB"); return MDBX_INCOMPATIBLE; } env->dbs_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; @@ -629,8 +557,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, env->kvs[FREE_DBI].clc.k.lmax = env->kvs[FREE_DBI].clc.k.lmin = 8; env->kvs[FREE_DBI].clc.v.cmp = cmp_lenfast; env->kvs[FREE_DBI].clc.v.lmin = 4; - env->kvs[FREE_DBI].clc.v.lmax = - mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); + env->kvs[FREE_DBI].clc.v.lmax = mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); if (env->ps != header.pagesize) env_setup_pagesize(env, header.pagesize); @@ -641,18 +568,15 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, } const size_t used_bytes = pgno2bytes(env, header.geometry.first_unallocated); - const size_t used_aligned2os_bytes = - ceil_powerof2(used_bytes, globals.sys_pagesize); + const size_t used_aligned2os_bytes = ceil_powerof2(used_bytes, globals.sys_pagesize); if ((env->flags & MDBX_RDONLY) /* readonly */ || lck_rc != MDBX_RESULT_TRUE /* not exclusive */ || /* recovery mode */ env->stuck_meta >= 0) { /* use present params from db */ const size_t pagesize = header.pagesize; - err = mdbx_env_set_geometry( - env, header.geometry.lower * pagesize, header.geometry.now * pagesize, - header.geometry.upper * pagesize, - pv2pages(header.geometry.grow_pv) * pagesize, - pv2pages(header.geometry.shrink_pv) * pagesize, header.pagesize); + err = mdbx_env_set_geometry(env, header.geometry.lower * pagesize, header.geometry.now * pagesize, + header.geometry.upper * pagesize, pv2pages(header.geometry.grow_pv) * pagesize, + pv2pages(header.geometry.shrink_pv) * pagesize, header.pagesize); if (unlikely(err != MDBX_SUCCESS)) { ERROR("%s: err %d", "could not apply geometry from db", err); return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; @@ -668,24 +592,17 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, * - upper or lower limit changes * - shrink threshold or growth step * But ignore change just a 'now/current' size. */ - if (bytes_align2os_bytes(env, env->geo_in_bytes.upper) != - pgno2bytes(env, header.geometry.upper) || - bytes_align2os_bytes(env, env->geo_in_bytes.lower) != - pgno2bytes(env, header.geometry.lower) || - bytes_align2os_bytes(env, env->geo_in_bytes.shrink) != - pgno2bytes(env, pv2pages(header.geometry.shrink_pv)) || - bytes_align2os_bytes(env, env->geo_in_bytes.grow) != - pgno2bytes(env, pv2pages(header.geometry.grow_pv))) { + if (bytes_align2os_bytes(env, env->geo_in_bytes.upper) != pgno2bytes(env, header.geometry.upper) || + bytes_align2os_bytes(env, env->geo_in_bytes.lower) != pgno2bytes(env, header.geometry.lower) || + bytes_align2os_bytes(env, env->geo_in_bytes.shrink) != pgno2bytes(env, pv2pages(header.geometry.shrink_pv)) || + bytes_align2os_bytes(env, env->geo_in_bytes.grow) != pgno2bytes(env, pv2pages(header.geometry.grow_pv))) { if (env->geo_in_bytes.shrink && env->geo_in_bytes.now > used_bytes) /* pre-shrink if enabled */ - env->geo_in_bytes.now = used_bytes + env->geo_in_bytes.shrink - - used_bytes % env->geo_in_bytes.shrink; + env->geo_in_bytes.now = used_bytes + env->geo_in_bytes.shrink - used_bytes % env->geo_in_bytes.shrink; - err = mdbx_env_set_geometry( - env, env->geo_in_bytes.lower, env->geo_in_bytes.now, - env->geo_in_bytes.upper, env->geo_in_bytes.grow, - env->geo_in_bytes.shrink, header.pagesize); + err = mdbx_env_set_geometry(env, env->geo_in_bytes.lower, env->geo_in_bytes.now, env->geo_in_bytes.upper, + env->geo_in_bytes.grow, env->geo_in_bytes.shrink, header.pagesize); if (unlikely(err != MDBX_SUCCESS)) { ERROR("%s: err %d", "could not apply preconfigured db-geometry", err); return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; @@ -695,20 +612,14 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, header.geometry.now = bytes2pgno(env, env->geo_in_bytes.now); header.geometry.lower = bytes2pgno(env, env->geo_in_bytes.lower); header.geometry.upper = bytes2pgno(env, env->geo_in_bytes.upper); - header.geometry.grow_pv = - pages2pv(bytes2pgno(env, env->geo_in_bytes.grow)); - header.geometry.shrink_pv = - pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink)); + header.geometry.grow_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.grow)); + header.geometry.shrink_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink)); - VERBOSE("amended: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO - "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO + VERBOSE("amended: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", - header.trees.main.root, header.trees.gc.root, - header.geometry.lower, header.geometry.first_unallocated, - header.geometry.now, header.geometry.upper, - pv2pages(header.geometry.grow_pv), - pv2pages(header.geometry.shrink_pv), - unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); + header.trees.main.root, header.trees.gc.root, header.geometry.lower, header.geometry.first_unallocated, + header.geometry.now, header.geometry.upper, pv2pages(header.geometry.grow_pv), + pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header)); } else { /* fetch back 'now/current' size, since it was ignored during comparison * and may differ. */ @@ -722,31 +633,25 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, env->geo_in_bytes.lower = pgno2bytes(env, header.geometry.lower); env->geo_in_bytes.upper = pgno2bytes(env, header.geometry.upper); env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(header.geometry.grow_pv)); - env->geo_in_bytes.shrink = - pgno2bytes(env, pv2pages(header.geometry.shrink_pv)); + env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(header.geometry.shrink_pv)); } - ENSURE(env, pgno_align2os_bytes(env, header.geometry.now) == - env->geo_in_bytes.now); + ENSURE(env, pgno_align2os_bytes(env, header.geometry.now) == env->geo_in_bytes.now); ENSURE(env, env->geo_in_bytes.now >= used_bytes); const uint64_t filesize_before = env->dxb_mmap.filesize; if (unlikely(filesize_before != env->geo_in_bytes.now)) { if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) { - VERBOSE("filesize mismatch (expect %" PRIuPTR "b/%" PRIaPGNO - "p, have %" PRIu64 "b/%" PRIaPGNO "p), " + VERBOSE("filesize mismatch (expect %" PRIuPTR "b/%" PRIaPGNO "p, have %" PRIu64 "b/%" PRIaPGNO "p), " "assume other process working", - env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), - filesize_before, bytes2pgno(env, (size_t)filesize_before)); - } else { - WARNING("filesize mismatch (expect %" PRIuSIZE "b/%" PRIaPGNO - "p, have %" PRIu64 "b/%" PRIaPGNO "p)", - env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), - filesize_before, bytes2pgno(env, (size_t)filesize_before)); - if (filesize_before < used_bytes) { - ERROR("last-page beyond end-of-file (last %" PRIaPGNO - ", have %" PRIaPGNO ")", - header.geometry.first_unallocated, + env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), filesize_before, bytes2pgno(env, (size_t)filesize_before)); + } else { + WARNING("filesize mismatch (expect %" PRIuSIZE "b/%" PRIaPGNO "p, have %" PRIu64 "b/%" PRIaPGNO "p)", + env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), filesize_before, + bytes2pgno(env, (size_t)filesize_before)); + if (filesize_before < used_bytes) { + ERROR("last-page beyond end-of-file (last %" PRIaPGNO ", have %" PRIaPGNO ")", + header.geometry.first_unallocated, bytes2pgno(env, (size_t)filesize_before)); return MDBX_CORRUPTED; } @@ -757,65 +662,50 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, } WARNING("%s", "ignore filesize mismatch in readonly-mode"); } else { - VERBOSE("will resize datafile to %" PRIuSIZE " bytes, %" PRIaPGNO - " pages", - env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now)); + VERBOSE("will resize datafile to %" PRIuSIZE " bytes, %" PRIaPGNO " pages", env->geo_in_bytes.now, + bytes2pgno(env, env->geo_in_bytes.now)); } } } - VERBOSE("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)", - globals.bootid.x, globals.bootid.y, + VERBOSE("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)", globals.bootid.x, globals.bootid.y, (globals.bootid.x | globals.bootid.y) ? "" : "not-"); /* calculate readahead hint before mmap with zero redundant pages */ const bool readahead = - !(env->flags & MDBX_NORDAHEAD) && - mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; + !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; - err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, - env->geo_in_bytes.upper, + err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, env->geo_in_bytes.upper, (lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0); if (unlikely(err != MDBX_SUCCESS)) return err; #if defined(MADV_DONTDUMP) - err = madvise(env->dxb_mmap.base, env->dxb_mmap.limit, MADV_DONTDUMP) - ? ignore_enosys(errno) - : MDBX_SUCCESS; + err = madvise(env->dxb_mmap.base, env->dxb_mmap.limit, MADV_DONTDUMP) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #endif /* MADV_DONTDUMP */ #if defined(MADV_DODUMP) if (globals.runtime_flags & MDBX_DBG_DUMP) { const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS); - err = madvise(env->dxb_mmap.base, meta_length_aligned2os, MADV_DODUMP) - ? ignore_enosys(errno) - : MDBX_SUCCESS; + err = madvise(env->dxb_mmap.base, meta_length_aligned2os, MADV_DODUMP) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; } #endif /* MADV_DODUMP */ #ifdef ENABLE_MEMCHECK - env->valgrind_handle = - VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx"); + env->valgrind_handle = VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx"); #endif /* ENABLE_MEMCHECK */ - eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && - used_bytes <= env->dxb_mmap.limit); + eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && used_bytes <= env->dxb_mmap.limit); #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) - if (env->dxb_mmap.filesize > used_bytes && - env->dxb_mmap.filesize < env->dxb_mmap.limit) { - VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, used_bytes), - env->dxb_mmap.filesize - used_bytes); - MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, used_bytes), - env->dxb_mmap.filesize - used_bytes); + if (env->dxb_mmap.filesize > used_bytes && env->dxb_mmap.filesize < env->dxb_mmap.limit) { + VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, used_bytes), env->dxb_mmap.filesize - used_bytes); + MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, used_bytes), env->dxb_mmap.filesize - used_bytes); } env->poison_edge = - bytes2pgno(env, (env->dxb_mmap.filesize < env->dxb_mmap.limit) - ? env->dxb_mmap.filesize - : env->dxb_mmap.limit); + bytes2pgno(env, (env->dxb_mmap.filesize < env->dxb_mmap.limit) ? env->dxb_mmap.filesize : env->dxb_mmap.limit); #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ troika_t troika = meta_tap(env); @@ -829,8 +719,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, meta_t const *const target = METAPAGE(env, env->stuck_meta); err = meta_validate_copy(env, target, &clone); if (unlikely(err != MDBX_SUCCESS)) { - ERROR("target meta[%u] is corrupted", - bytes2pgno(env, ptr_dist(data_page(target), env->dxb_mmap.base))); + ERROR("target meta[%u] is corrupted", bytes2pgno(env, ptr_dist(data_page(target), env->dxb_mmap.base))); meta_troika_dump(env, &troika); return MDBX_CORRUPTED; } @@ -872,9 +761,8 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, err = meta_validate_copy(env, prefer_steady.ptr_c, &clone); if (unlikely(err != MDBX_SUCCESS)) { ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed", - bytes2pgno(env, - ptr_dist(prefer_steady.ptr_c, env->dxb_mmap.base)), - "steady", prefer_steady.txnid, "manual recovery"); + bytes2pgno(env, ptr_dist(prefer_steady.ptr_c, env->dxb_mmap.base)), "steady", prefer_steady.txnid, + "manual recovery"); meta_troika_dump(env, &troika); return MDBX_CORRUPTED; } @@ -882,23 +770,17 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, break; } - const pgno_t pgno = - bytes2pgno(env, ptr_dist(recent.ptr_c, env->dxb_mmap.base)); - const bool last_valid = - meta_validate_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS; - eASSERT(env, - !prefer_steady.is_steady || recent.txnid != prefer_steady.txnid); + const pgno_t pgno = bytes2pgno(env, ptr_dist(recent.ptr_c, env->dxb_mmap.base)); + const bool last_valid = meta_validate_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS; + eASSERT(env, !prefer_steady.is_steady || recent.txnid != prefer_steady.txnid); if (unlikely(!last_valid)) { if (unlikely(!prefer_steady.is_steady)) { - ERROR("%s for open or automatic rollback, %s", - "there are no suitable meta-pages", + ERROR("%s for open or automatic rollback, %s", "there are no suitable meta-pages", "manual recovery is required"); meta_troika_dump(env, &troika); return MDBX_CORRUPTED; } - WARNING("meta[%u] with last txnid %" PRIaTXN - " is corrupted, rollback needed", - pgno, recent.txnid); + WARNING("meta[%u] with last txnid %" PRIaTXN " is corrupted, rollback needed", pgno, recent.txnid); meta_troika_dump(env, &troika); goto purge_meta_head; } @@ -907,15 +789,14 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if (env->flags & MDBX_RDONLY) { ERROR("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: " "rollback NOT needed, steady-sync NEEDED%s", - "opening after an unclean shutdown", globals.bootid.x, - globals.bootid.y, ", but unable in read-only mode"); + "opening after an unclean shutdown", globals.bootid.x, globals.bootid.y, + ", but unable in read-only mode"); meta_troika_dump(env, &troika); return MDBX_WANNA_RECOVERY; } WARNING("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: " "rollback NOT needed, steady-sync NEEDED%s", - "opening after an unclean shutdown", globals.bootid.x, - globals.bootid.y, ""); + "opening after an unclean shutdown", globals.bootid.x, globals.bootid.y, ""); header = clone; env->lck->unsynced_pages.weak = header.geometry.first_unallocated; if (!env->lck->eoos_timestamp.weak) @@ -923,18 +804,14 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, break; } if (unlikely(!prefer_steady.is_steady)) { - ERROR("%s, but %s for automatic rollback: %s", - "opening after an unclean shutdown", - "there are no suitable meta-pages", - "manual recovery is required"); + ERROR("%s, but %s for automatic rollback: %s", "opening after an unclean shutdown", + "there are no suitable meta-pages", "manual recovery is required"); meta_troika_dump(env, &troika); return MDBX_CORRUPTED; } if (env->flags & MDBX_RDONLY) { - ERROR("%s and rollback needed: (from head %" PRIaTXN - " to steady %" PRIaTXN ")%s", - "opening after an unclean shutdown", recent.txnid, - prefer_steady.txnid, ", but unable in read-only mode"); + ERROR("%s and rollback needed: (from head %" PRIaTXN " to steady %" PRIaTXN ")%s", + "opening after an unclean shutdown", recent.txnid, prefer_steady.txnid, ", but unable in read-only mode"); meta_troika_dump(env, &troika); return MDBX_WANNA_RECOVERY; } @@ -942,15 +819,13 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, purge_meta_head: NOTICE("%s and doing automatic rollback: " "purge%s meta[%u] with%s txnid %" PRIaTXN, - "opening after an unclean shutdown", last_valid ? "" : " invalid", - pgno, last_valid ? " weak" : "", recent.txnid); + "opening after an unclean shutdown", last_valid ? "" : " invalid", pgno, last_valid ? " weak" : "", + recent.txnid); meta_troika_dump(env, &troika); ENSURE(env, prefer_steady.is_steady); - err = meta_override(env, pgno, 0, - last_valid ? recent.ptr_c : prefer_steady.ptr_c); + err = meta_override(env, pgno, 0, last_valid ? recent.ptr_c : prefer_steady.ptr_c); if (err) { - ERROR("rollback: overwrite meta[%u] with txnid %" PRIaTXN ", error %d", - pgno, recent.txnid, err); + ERROR("rollback: overwrite meta[%u] with txnid %" PRIaTXN ", error %d", pgno, recent.txnid, err); return err; } troika = meta_tap(env); @@ -961,17 +836,14 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) { //-------------------------------------------------- shrink DB & update geo /* re-check size after mmap */ - if ((env->dxb_mmap.current & (globals.sys_pagesize - 1)) != 0 || - env->dxb_mmap.current < used_bytes) { - ERROR("unacceptable/unexpected datafile size %" PRIuPTR, - env->dxb_mmap.current); + if ((env->dxb_mmap.current & (globals.sys_pagesize - 1)) != 0 || env->dxb_mmap.current < used_bytes) { + ERROR("unacceptable/unexpected datafile size %" PRIuPTR, env->dxb_mmap.current); return MDBX_PROBLEM; } if (env->dxb_mmap.current != env->geo_in_bytes.now) { header.geometry.now = bytes2pgno(env, env->dxb_mmap.current); - NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO - " pages", - env->dxb_mmap.current, header.geometry.now); + NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO " pages", env->dxb_mmap.current, + header.geometry.now); } const meta_ptr_t recent = meta_recent(env, &troika); @@ -979,21 +851,15 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, header.geometry.grow_pv != recent.ptr_c->geometry.grow_pv || header.geometry.shrink_pv != recent.ptr_c->geometry.shrink_pv || header.geometry.lower != recent.ptr_c->geometry.lower || - header.geometry.upper != recent.ptr_c->geometry.upper || - header.geometry.now != recent.ptr_c->geometry.now) { + header.geometry.upper != recent.ptr_c->geometry.upper || header.geometry.now != recent.ptr_c->geometry.now) { if ((env->flags & MDBX_RDONLY) != 0 || /* recovery mode */ env->stuck_meta >= 0) { - WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO - "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u, to l%" PRIaPGNO - "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u", - (env->stuck_meta < 0) ? "read-only" : "recovery", - recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, - recent.ptr_c->geometry.upper, - pv2pages(recent.ptr_c->geometry.shrink_pv), - pv2pages(recent.ptr_c->geometry.grow_pv), header.geometry.lower, - header.geometry.now, header.geometry.upper, - pv2pages(header.geometry.shrink_pv), - pv2pages(header.geometry.grow_pv)); + WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO + "/s%u-g%u, to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u", + (env->stuck_meta < 0) ? "read-only" : "recovery", recent.ptr_c->geometry.lower, + recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper, pv2pages(recent.ptr_c->geometry.shrink_pv), + pv2pages(recent.ptr_c->geometry.grow_pv), header.geometry.lower, header.geometry.now, + header.geometry.upper, pv2pages(header.geometry.shrink_pv), pv2pages(header.geometry.grow_pv)); } else { const txnid_t next_txnid = safe64_txnid_next(recent.txnid); if (unlikely(next_txnid > MAX_TXNID)) { @@ -1001,57 +867,41 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, return MDBX_TXN_FULL; } NOTICE("updating meta.geo: " - "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN "), " - "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN ")", - recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, - recent.ptr_c->geometry.upper, - pv2pages(recent.ptr_c->geometry.shrink_pv), - pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid, - header.geometry.lower, header.geometry.now, - header.geometry.upper, pv2pages(header.geometry.shrink_pv), + "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN "), " + "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN ")", + recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper, + pv2pages(recent.ptr_c->geometry.shrink_pv), pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid, + header.geometry.lower, header.geometry.now, header.geometry.upper, pv2pages(header.geometry.shrink_pv), pv2pages(header.geometry.grow_pv), next_txnid); ENSURE(env, header.unsafe_txnid == recent.txnid); meta_set_txnid(env, &header, next_txnid); - err = dxb_sync_locked(env, env->flags | txn_shrink_allowed, &header, - &troika); + err = dxb_sync_locked(env, env->flags | txn_shrink_allowed, &header, &troika); if (err) { ERROR("error %d, while updating meta.geo: " - "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN "), " - "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO - "/s%u-g%u (txn#%" PRIaTXN ")", - err, recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, - recent.ptr_c->geometry.upper, - pv2pages(recent.ptr_c->geometry.shrink_pv), - pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid, - header.geometry.lower, header.geometry.now, - header.geometry.upper, pv2pages(header.geometry.shrink_pv), + "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN "), " + "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN ")", + err, recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper, + pv2pages(recent.ptr_c->geometry.shrink_pv), pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid, + header.geometry.lower, header.geometry.now, header.geometry.upper, pv2pages(header.geometry.shrink_pv), pv2pages(header.geometry.grow_pv), header.unsafe_txnid); return err; } } } - atomic_store32(&env->lck->discarded_tail, - bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed); + atomic_store32(&env->lck->discarded_tail, bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed); if ((env->flags & MDBX_RDONLY) == 0 && env->stuck_meta < 0 && (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) == 0) { for (unsigned n = 0; n < NUM_METAS; ++n) { meta_t *const meta = METAPAGE(env, n); - if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != - MDBX_DATA_MAGIC) || - (meta->dxbid.x | meta->dxbid.y) == 0 || - (meta->gc_flags & ~DB_PERSISTENT_FLAGS)) { - const txnid_t txnid = - meta_is_used(&troika, n) ? constmeta_txnid(meta) : 0; + if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != MDBX_DATA_MAGIC) || + (meta->dxbid.x | meta->dxbid.y) == 0 || (meta->gc_flags & ~DB_PERSISTENT_FLAGS)) { + const txnid_t txnid = meta_is_used(&troika, n) ? constmeta_txnid(meta) : 0; NOTICE("%s %s" "meta[%u], txnid %" PRIaTXN, - "updating db-format/guid signature for", - meta_is_steady(meta) ? "stead-" : "weak-", n, txnid); + "updating db-format/guid signature for", meta_is_steady(meta) ? "stead-" : "weak-", n, txnid); err = meta_override(env, n, txnid, meta); if (unlikely(err != MDBX_SUCCESS) && /* Just ignore the MDBX_PROBLEM error, since here it is @@ -1059,8 +909,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, * meta-page that is invalid for current state of a DB, * e.g. after shrinking DB file */ err != MDBX_PROBLEM) { - ERROR("%s meta[%u], txnid %" PRIaTXN ", error %d", - "updating db-format signature for", n, txnid, err); + ERROR("%s meta[%u], txnid %" PRIaTXN ", error %d", "updating db-format signature for", n, txnid, err); return err; } troika = meta_tap(env); @@ -1074,11 +923,10 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, #if defined(MADV_REMOVE) if (lck_rc && (env->flags & MDBX_WRITEMAP) != 0 && /* not recovery mode */ env->stuck_meta < 0) { - NOTICE("open-MADV_%s %u..%u", "REMOVE (deallocate file space)", - env->lck->discarded_tail.weak, + NOTICE("open-MADV_%s %u..%u", "REMOVE (deallocate file space)", env->lck->discarded_tail.weak, bytes2pgno(env, env->dxb_mmap.current)); - err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), - env->dxb_mmap.current - used_aligned2os_bytes, MADV_REMOVE) + err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), env->dxb_mmap.current - used_aligned2os_bytes, + MADV_REMOVE) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) @@ -1086,24 +934,21 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, } #endif /* MADV_REMOVE */ #if defined(MADV_DONTNEED) - NOTICE("open-MADV_%s %u..%u", "DONTNEED", env->lck->discarded_tail.weak, - bytes2pgno(env, env->dxb_mmap.current)); - err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), - env->dxb_mmap.current - used_aligned2os_bytes, MADV_DONTNEED) + NOTICE("open-MADV_%s %u..%u", "DONTNEED", env->lck->discarded_tail.weak, bytes2pgno(env, env->dxb_mmap.current)); + err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), env->dxb_mmap.current - used_aligned2os_bytes, + MADV_DONTNEED) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_MADV_DONTNEED) - err = ignore_enosys(posix_madvise( - ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), - env->dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED)); + err = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), + env->dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED)); if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_FADV_DONTNEED) - err = ignore_enosys(posix_fadvise( - env->lazy_fd, used_aligned2os_bytes, - env->dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED)); + err = ignore_enosys(posix_fadvise(env->lazy_fd, used_aligned2os_bytes, + env->dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED)); if (unlikely(MDBX_IS_ERROR(err))) return err; #endif /* MADV_DONTNEED */ @@ -1116,8 +961,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, return rc; } -int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, - troika_t *const troika) { +int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, troika_t *const troika) { eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0); eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY); eASSERT(env, check_table_flags(pending->trees.main.flags)); @@ -1127,107 +971,82 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, const meta_ptr_t head = meta_recent(env, troika); int rc; - eASSERT(env, - pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS)); + eASSERT(env, pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS)); eASSERT(env, (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0); eASSERT(env, pending->geometry.first_unallocated <= pending->geometry.now); if (flags & MDBX_SAFE_NOSYNC) { /* Check auto-sync conditions */ - const pgno_t autosync_threshold = - atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); - const uint64_t autosync_period = - atomic_load64(&env->lck->autosync_period, mo_Relaxed); + const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); + const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed); uint64_t eoos_timestamp; - if ((autosync_threshold && - atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= - autosync_threshold) || - (autosync_period && - (eoos_timestamp = - atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && + if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) || + (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && osal_monotime() - eoos_timestamp >= autosync_period)) flags &= MDBX_WRITEMAP | txn_shrink_allowed; /* force steady */ } pgno_t shrink = 0; if (flags & txn_shrink_allowed) { - const size_t prev_discarded_pgno = - atomic_load32(&env->lck->discarded_tail, mo_Relaxed); + const size_t prev_discarded_pgno = atomic_load32(&env->lck->discarded_tail, mo_Relaxed); if (prev_discarded_pgno < pending->geometry.first_unallocated) env->lck->discarded_tail.weak = pending->geometry.first_unallocated; - else if (prev_discarded_pgno >= - pending->geometry.first_unallocated + env->madv_threshold) { + else if (prev_discarded_pgno >= pending->geometry.first_unallocated + env->madv_threshold) { /* LY: check conditions to discard unused pages */ - const pgno_t largest_pgno = mvcc_snapshot_largest( - env, (head.ptr_c->geometry.first_unallocated > - pending->geometry.first_unallocated) - ? head.ptr_c->geometry.first_unallocated - : pending->geometry.first_unallocated); + const pgno_t largest_pgno = + mvcc_snapshot_largest(env, (head.ptr_c->geometry.first_unallocated > pending->geometry.first_unallocated) + ? head.ptr_c->geometry.first_unallocated + : pending->geometry.first_unallocated); eASSERT(env, largest_pgno >= NUM_METAS); #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) const pgno_t edge = env->poison_edge; if (edge > largest_pgno) { env->poison_edge = largest_pgno; - VALGRIND_MAKE_MEM_NOACCESS( - ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)), - pgno2bytes(env, edge - largest_pgno)); - MDBX_ASAN_POISON_MEMORY_REGION( - ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)), - pgno2bytes(env, edge - largest_pgno)); + VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)), + pgno2bytes(env, edge - largest_pgno)); + MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)), + pgno2bytes(env, edge - largest_pgno)); } #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ #if defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED) const size_t discard_edge_pgno = pgno_align2os_pgno(env, largest_pgno); if (prev_discarded_pgno >= discard_edge_pgno + env->madv_threshold) { - const size_t prev_discarded_bytes = - pgno_align2os_bytes(env, prev_discarded_pgno); + const size_t prev_discarded_bytes = pgno_align2os_bytes(env, prev_discarded_pgno); const size_t discard_edge_bytes = pgno2bytes(env, discard_edge_pgno); /* из-за выравнивания prev_discarded_bytes и discard_edge_bytes * могут быть равны */ if (prev_discarded_bytes > discard_edge_bytes) { - NOTICE("shrink-MADV_%s %zu..%zu", "DONTNEED", discard_edge_pgno, - prev_discarded_pgno); - munlock_after(env, discard_edge_pgno, - bytes_align2os_bytes(env, env->dxb_mmap.current)); - const uint32_t munlocks_before = - atomic_load32(&env->lck->mlcnt[1], mo_Relaxed); + NOTICE("shrink-MADV_%s %zu..%zu", "DONTNEED", discard_edge_pgno, prev_discarded_pgno); + munlock_after(env, discard_edge_pgno, bytes_align2os_bytes(env, env->dxb_mmap.current)); + const uint32_t munlocks_before = atomic_load32(&env->lck->mlcnt[1], mo_Relaxed); #if defined(MADV_DONTNEED) int advise = MADV_DONTNEED; -#if defined(MADV_FREE) && \ - 0 /* MADV_FREE works for only anonymous vma at the moment */ - if ((env->flags & MDBX_WRITEMAP) && - global.linux_kernel_version > 0x04050000) +#if defined(MADV_FREE) && 0 /* MADV_FREE works for only anonymous vma at the moment */ + if ((env->flags & MDBX_WRITEMAP) && global.linux_kernel_version > 0x04050000) advise = MADV_FREE; #endif /* MADV_FREE */ - int err = madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes), - prev_discarded_bytes - discard_edge_bytes, advise) + int err = madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes), prev_discarded_bytes - discard_edge_bytes, + advise) ? ignore_enosys(errno) : MDBX_SUCCESS; #else - int err = ignore_enosys(posix_madvise( - ptr_disp(env->dxb_mmap.base, discard_edge_bytes), - prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED)); + int err = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes), + prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED)); #endif if (unlikely(MDBX_IS_ERROR(err))) { - const uint32_t mlocks_after = - atomic_load32(&env->lck->mlcnt[0], mo_Relaxed); + const uint32_t mlocks_after = atomic_load32(&env->lck->mlcnt[0], mo_Relaxed); if (err == MDBX_EINVAL) { - const int severity = (mlocks_after - munlocks_before) - ? MDBX_LOG_NOTICE - : MDBX_LOG_WARN; + const int severity = (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN; if (LOG_ENABLED(severity)) - debug_log( - severity, __func__, __LINE__, - "%s-madvise: ignore EINVAL (%d) since some pages maybe " - "locked (%u/%u mlcnt-processes)", - "shrink", err, mlocks_after, munlocks_before); + debug_log(severity, __func__, __LINE__, + "%s-madvise: ignore EINVAL (%d) since some pages maybe " + "locked (%u/%u mlcnt-processes)", + "shrink", err, mlocks_after, munlocks_before); } else { - ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", - "shrink", "DONTNEED", discard_edge_bytes, - prev_discarded_bytes - discard_edge_bytes, mlocks_after, - munlocks_before, err); + ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", "shrink", "DONTNEED", + discard_edge_bytes, prev_discarded_bytes - discard_edge_bytes, mlocks_after, munlocks_before, err); return err; } } else @@ -1239,23 +1058,15 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, /* LY: check conditions to shrink datafile */ const pgno_t backlog_gap = 3 + pending->trees.gc.height * 3; pgno_t shrink_step = 0; - if (pending->geometry.shrink_pv && - pending->geometry.now - pending->geometry.first_unallocated > - (shrink_step = pv2pages(pending->geometry.shrink_pv)) + - backlog_gap) { - if (pending->geometry.now > largest_pgno && - pending->geometry.now - largest_pgno > shrink_step + backlog_gap) { + if (pending->geometry.shrink_pv && pending->geometry.now - pending->geometry.first_unallocated > + (shrink_step = pv2pages(pending->geometry.shrink_pv)) + backlog_gap) { + if (pending->geometry.now > largest_pgno && pending->geometry.now - largest_pgno > shrink_step + backlog_gap) { const pgno_t aligner = - pending->geometry.grow_pv - ? /* grow_step */ pv2pages(pending->geometry.grow_pv) - : shrink_step; + pending->geometry.grow_pv ? /* grow_step */ pv2pages(pending->geometry.grow_pv) : shrink_step; const pgno_t with_backlog_gap = largest_pgno + backlog_gap; const pgno_t aligned = - pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - - with_backlog_gap % aligner); - const pgno_t bottom = (aligned > pending->geometry.lower) - ? aligned - : pending->geometry.lower; + pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - with_backlog_gap % aligner); + const pgno_t bottom = (aligned > pending->geometry.lower) ? aligned : pending->geometry.lower; if (pending->geometry.now > bottom) { if (TROIKA_HAVE_STEADY(troika)) /* force steady, but only if steady-checkpoint is present */ @@ -1264,8 +1075,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, pending->geometry.now = bottom; if (unlikely(head.txnid == pending->unsafe_txnid)) { const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid); - NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, - pending->unsafe_txnid, txnid); + NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, pending->unsafe_txnid, txnid); ENSURE(env, !env->basal_txn || !env->txn); if (unlikely(txnid > MAX_TXNID)) { rc = MDBX_TXN_FULL; @@ -1290,8 +1100,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, if ((flags & MDBX_SAFE_NOSYNC) == 0) { sync_op = 1; mode_bits = MDBX_SYNC_DATA; - if (pending->geometry.first_unallocated > - meta_prefer_steady(env, troika).ptr_c->geometry.now) + if (pending->geometry.first_unallocated > meta_prefer_steady(env, troika).ptr_c->geometry.now) mode_bits |= MDBX_SYNC_SIZE; if (flags & MDBX_NOMETASYNC) mode_bits |= MDBX_SYNC_IODQ; @@ -1303,10 +1112,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, #else (void)sync_op; #endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync( - &env->dxb_mmap, 0, - pgno_align2os_bytes(env, pending->geometry.first_unallocated), - mode_bits); + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, pending->geometry.first_unallocated), mode_bits); } else { #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.fsync.weak += sync_op; @@ -1336,12 +1142,10 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, unaligned_poke_u64(4, pending->sign, DATASIGN_WEAK); } - const bool legal4overwrite = - head.txnid == pending->unsafe_txnid && - !memcmp(&head.ptr_c->trees, &pending->trees, sizeof(pending->trees)) && - !memcmp(&head.ptr_c->canary, &pending->canary, sizeof(pending->canary)) && - !memcmp(&head.ptr_c->geometry, &pending->geometry, - sizeof(pending->geometry)); + const bool legal4overwrite = head.txnid == pending->unsafe_txnid && + !memcmp(&head.ptr_c->trees, &pending->trees, sizeof(pending->trees)) && + !memcmp(&head.ptr_c->canary, &pending->canary, sizeof(pending->canary)) && + !memcmp(&head.ptr_c->geometry, &pending->geometry, sizeof(pending->geometry)); meta_t *target = nullptr; if (head.txnid == pending->unsafe_txnid) { ENSURE(env, legal4overwrite); @@ -1353,51 +1157,40 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, } } else { const unsigned troika_tail = troika->tail_and_flags & 3; - ENSURE(env, troika_tail < NUM_METAS && troika_tail != troika->recent && - troika_tail != troika->prefer_steady); + ENSURE(env, troika_tail < NUM_METAS && troika_tail != troika->recent && troika_tail != troika->prefer_steady); target = (meta_t *)meta_tail(env, troika).ptr_c; } /* LY: step#2 - update meta-page. */ - DEBUG("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO - ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO - " +%u -%u, txn_id %" PRIaTXN ", %s", - data_page(target)->pgno, pending->trees.main.root, - pending->trees.gc.root, pending->geometry.lower, - pending->geometry.first_unallocated, pending->geometry.now, - pending->geometry.upper, pv2pages(pending->geometry.grow_pv), - pv2pages(pending->geometry.shrink_pv), pending->unsafe_txnid, + DEBUG("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO + "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", + data_page(target)->pgno, pending->trees.main.root, pending->trees.gc.root, pending->geometry.lower, + pending->geometry.first_unallocated, pending->geometry.now, pending->geometry.upper, + pv2pages(pending->geometry.grow_pv), pv2pages(pending->geometry.shrink_pv), pending->unsafe_txnid, durable_caption(pending)); DEBUG("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, (meta0 == head.ptr_c) ? "head" : (meta0 == target) ? "tail" : "stay", - durable_caption(meta0), constmeta_txnid(meta0), meta0->trees.main.root, - meta0->trees.gc.root); + durable_caption(meta0), constmeta_txnid(meta0), meta0->trees.main.root, meta0->trees.gc.root); DEBUG("meta1: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, (meta1 == head.ptr_c) ? "head" : (meta1 == target) ? "tail" : "stay", - durable_caption(meta1), constmeta_txnid(meta1), meta1->trees.main.root, - meta1->trees.gc.root); + durable_caption(meta1), constmeta_txnid(meta1), meta1->trees.main.root, meta1->trees.gc.root); DEBUG("meta2: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO, (meta2 == head.ptr_c) ? "head" : (meta2 == target) ? "tail" : "stay", - durable_caption(meta2), constmeta_txnid(meta2), meta2->trees.main.root, - meta2->trees.gc.root); + durable_caption(meta2), constmeta_txnid(meta2), meta2->trees.main.root, meta2->trees.gc.root); - eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta0) || - (meta_is_steady(pending) && !meta_is_steady(meta0))); - eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta1) || - (meta_is_steady(pending) && !meta_is_steady(meta1))); - eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta2) || - (meta_is_steady(pending) && !meta_is_steady(meta2))); + eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta0) || (meta_is_steady(pending) && !meta_is_steady(meta0))); + eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta1) || (meta_is_steady(pending) && !meta_is_steady(meta1))); + eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta2) || (meta_is_steady(pending) && !meta_is_steady(meta2))); eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0); - ENSURE(env, target == head.ptr_c || - constmeta_txnid(target) < pending->unsafe_txnid); + ENSURE(env, target == head.ptr_c || constmeta_txnid(target) < pending->unsafe_txnid); if (flags & MDBX_WRITEMAP) { jitter4testing(true); if (likely(target != head.ptr_c)) { @@ -1430,8 +1223,7 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, } else { /* dangerous case (target == head), only sign could * me updated, check assertions once again */ - eASSERT(env, - legal4overwrite && !head.is_steady && meta_is_steady(pending)); + eASSERT(env, legal4overwrite && !head.is_steady && meta_is_steady(pending)); } memcpy(target->sign, pending->sign, 8); osal_flush_incoherent_cpu_writeback(); @@ -1443,19 +1235,15 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, env->lck->pgops.msync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), - (flags & MDBX_NOMETASYNC) - ? MDBX_SYNC_NONE - : MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + (flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE : MDBX_SYNC_DATA | MDBX_SYNC_IODQ); } else { #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.wops.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ const page_t *page = data_page(target); - rc = osal_pwrite(env->fd4meta, page, env->ps, - ptr_dist(page, env->dxb_mmap.base)); + rc = osal_pwrite(env->fd4meta, page, env->ps, ptr_dist(page, env->dxb_mmap.base)); if (likely(rc == MDBX_SUCCESS)) { - osal_flush_incoherent_mmap(target, sizeof(meta_t), - globals.sys_pagesize); + osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize); if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd) { #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.fsync.weak += 1; @@ -1474,21 +1262,18 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, const meta_t undo_meta = *target; eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY); eASSERT(env, check_table_flags(pending->trees.main.flags)); - rc = osal_pwrite(env->fd4meta, pending, sizeof(meta_t), - ptr_dist(target, env->dxb_mmap.base)); + rc = osal_pwrite(env->fd4meta, pending, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base)); if (unlikely(rc != MDBX_SUCCESS)) { undo: DEBUG("%s", "write failed, disk error?"); /* On a failure, the pagecache still contains the new data. * Try write some old data back, to prevent it from being used. */ - osal_pwrite(env->fd4meta, &undo_meta, sizeof(meta_t), - ptr_dist(target, env->dxb_mmap.base)); + osal_pwrite(env->fd4meta, &undo_meta, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base)); goto fail; } osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize); /* sync meta-pages */ - if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd && - !env->incore) { + if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd && !env->incore) { #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.fsync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ @@ -1500,23 +1285,18 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, uint64_t timestamp = 0; while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") { - rc = coherency_check_written( - env, pending->unsafe_txnid, target, - bytes2pgno(env, ptr_dist(target, env->dxb_mmap.base)), ×tamp); + rc = coherency_check_written(env, pending->unsafe_txnid, target, + bytes2pgno(env, ptr_dist(target, env->dxb_mmap.base)), ×tamp); if (likely(rc == MDBX_SUCCESS)) break; if (unlikely(rc != MDBX_RESULT_TRUE)) goto fail; } - const uint32_t sync_txnid_dist = - ((flags & MDBX_NOMETASYNC) == 0) ? 0 - : ((flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) - ? MDBX_NOMETASYNC_LAZY_FD - : MDBX_NOMETASYNC_LAZY_WRITEMAP; - env->lck->meta_sync_txnid.weak = - pending->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__].weak - - sync_txnid_dist; + const uint32_t sync_txnid_dist = ((flags & MDBX_NOMETASYNC) == 0) ? 0 + : ((flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD + : MDBX_NOMETASYNC_LAZY_WRITEMAP; + env->lck->meta_sync_txnid.weak = pending->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__].weak - sync_txnid_dist; *troika = meta_tap(env); for (MDBX_txn *txn = env->basal_txn; txn; txn = txn->nested) @@ -1525,10 +1305,8 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, /* LY: shrink datafile if needed */ if (unlikely(shrink)) { - VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", - pending->geometry.now, shrink); - rc = dxb_resize(env, pending->geometry.first_unallocated, - pending->geometry.now, pending->geometry.upper, + VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", pending->geometry.now, shrink); + rc = dxb_resize(env, pending->geometry.first_unallocated, pending->geometry.now, pending->geometry.upper, impilict_shrink); if (rc != MDBX_SUCCESS && rc != MDBX_EPERM) goto fail; diff --git a/src/env-opts.c b/src/env-opts.c index e22d1a6a..c3bab5f2 100644 --- a/src/env-opts.c +++ b/src/env-opts.c @@ -6,17 +6,11 @@ __cold static unsigned default_rp_augment_limit(const MDBX_env *env) { const size_t timeframe = /* 16 секунд */ 16 << 16; const size_t remain_1sec = - (env->options.gc_time_limit < timeframe) - ? timeframe - (size_t)env->options.gc_time_limit - : 0; - const size_t minimum = (env->maxgc_large1page * 2 > MDBX_PNL_INITIAL) - ? env->maxgc_large1page * 2 - : MDBX_PNL_INITIAL; + (env->options.gc_time_limit < timeframe) ? timeframe - (size_t)env->options.gc_time_limit : 0; + const size_t minimum = (env->maxgc_large1page * 2 > MDBX_PNL_INITIAL) ? env->maxgc_large1page * 2 : MDBX_PNL_INITIAL; const size_t one_third = env->geo_in_bytes.now / 3 >> env->ps2ln; const size_t augment_limit = - (one_third > minimum) - ? minimum + (one_third - minimum) / timeframe * remain_1sec - : minimum; + (one_third > minimum) ? minimum + (one_third - minimum) / timeframe * remain_1sec : minimum; eASSERT(env, augment_limit < PAGELIST_LIMIT); return pnl_bytes2size(pnl_size2bytes(augment_limit)); } @@ -86,29 +80,23 @@ void env_options_adjust_defaults(MDBX_env *env) { const size_t basis = env->geo_in_bytes.now; /* TODO: use options? */ const unsigned factor = 9; - size_t threshold = (basis < ((size_t)65536 << factor)) - ? 65536 /* minimal threshold */ - : (basis > (MEGABYTE * 4 << factor)) - ? MEGABYTE * 4 /* maximal threshold */ - : basis >> factor; + size_t threshold = (basis < ((size_t)65536 << factor)) ? 65536 /* minimal threshold */ + : (basis > (MEGABYTE * 4 << factor)) ? MEGABYTE * 4 /* maximal threshold */ + : basis >> factor; threshold = - (threshold < env->geo_in_bytes.shrink || !env->geo_in_bytes.shrink) - ? threshold - : env->geo_in_bytes.shrink; + (threshold < env->geo_in_bytes.shrink || !env->geo_in_bytes.shrink) ? threshold : env->geo_in_bytes.shrink; env->madv_threshold = bytes2pgno(env, bytes_align2os_bytes(env, threshold)); } //------------------------------------------------------------------------------ -__cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, - uint64_t value) { +__cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64_t value) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); - const bool lock_needed = - ((env->flags & ENV_ACTIVE) && env->basal_txn && !env_txn0_owned(env)); + const bool lock_needed = ((env->flags & ENV_ACTIVE) && env->basal_txn && !env_txn0_owned(env)); bool should_unlock = false; switch (option) { case MDBX_opt_sync_bytes: @@ -121,10 +109,8 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (unlikely(value > SIZE_MAX - 65536)) return LOG_IFERR(MDBX_EINVAL); value = bytes2pgno(env, (size_t)value + env->ps - 1); - if ((uint32_t)value != - atomic_load32(&env->lck->autosync_threshold, mo_AcquireRelease) && - atomic_store32(&env->lck->autosync_threshold, (uint32_t)value, - mo_Relaxed) + if ((uint32_t)value != atomic_load32(&env->lck->autosync_threshold, mo_AcquireRelease) && + atomic_store32(&env->lck->autosync_threshold, (uint32_t)value, mo_Relaxed) /* Дергаем sync(force=off) только если задано новое не-нулевое значение * и мы вне транзакции */ && lock_needed) { @@ -248,8 +234,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, err = MDBX_EPERM /* unable change during transaction */; else { const pgno_t value32 = (pgno_t)value; - if (option == MDBX_opt_txn_dp_initial && - env->options.dp_initial != value32) { + if (option == MDBX_opt_txn_dp_initial && env->options.dp_initial != value32) { env->options.dp_initial = value32; if (env->options.dp_limit < value32) { env->options.dp_limit = value32; @@ -308,8 +293,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, #if defined(_WIN32) || defined(_WIN64) /* позволяем "установить" значение по-умолчанию и совпадающее * с поведением соответствующим текущей установке MDBX_NOMETASYNC */ - if (value == /* default */ UINT64_MAX && - value != ((env->flags & MDBX_NOMETASYNC) ? 0 : UINT_MAX)) + if (value == /* default */ UINT64_MAX && value != ((env->flags & MDBX_NOMETASYNC) ? 0 : UINT_MAX)) err = MDBX_EINVAL; #else if (value == /* default */ UINT64_MAX) @@ -335,8 +319,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, case MDBX_opt_prefer_waf_insteadof_balance: if (value == /* default */ UINT64_MAX) - env->options.prefer_waf_insteadof_balance = - default_prefer_waf_insteadof_balance(env); + env->options.prefer_waf_insteadof_balance = default_prefer_waf_insteadof_balance(env); else if (value > 1) err = MDBX_EINVAL; else @@ -400,8 +383,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return LOG_IFERR(err); } -__cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, - uint64_t *pvalue) { +__cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, uint64_t *pvalue) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); @@ -412,15 +394,13 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, case MDBX_opt_sync_bytes: if (unlikely(!(env->flags & ENV_ACTIVE))) return LOG_IFERR(MDBX_EPERM); - *pvalue = pgno2bytes( - env, atomic_load32(&env->lck->autosync_threshold, mo_Relaxed)); + *pvalue = pgno2bytes(env, atomic_load32(&env->lck->autosync_threshold, mo_Relaxed)); break; case MDBX_opt_sync_period: if (unlikely(!(env->flags & ENV_ACTIVE))) return LOG_IFERR(MDBX_EPERM); - *pvalue = osal_monotime_to_16dot16( - atomic_load64(&env->lck->autosync_period, mo_Relaxed)); + *pvalue = osal_monotime_to_16dot16(atomic_load64(&env->lck->autosync_period, mo_Relaxed)); break; case MDBX_opt_max_db: diff --git a/src/env.c b/src/env.c index c4902765..91af9413 100644 --- a/src/env.c +++ b/src/env.c @@ -4,17 +4,14 @@ #include "internals.h" bool env_txn0_owned(const MDBX_env *env) { - return (env->flags & MDBX_NOSTICKYTHREADS) - ? (env->basal_txn->owner != 0) - : (env->basal_txn->owner == osal_thread_self()); + return (env->flags & MDBX_NOSTICKYTHREADS) ? (env->basal_txn->owner != 0) + : (env->basal_txn->owner == osal_thread_self()); } int env_page_auxbuffer(MDBX_env *env) { - const int err = - env->page_auxbuf - ? MDBX_SUCCESS - : osal_memalign_alloc(globals.sys_pagesize, - env->ps * (size_t)NUM_METAS, &env->page_auxbuf); + const int err = env->page_auxbuf + ? MDBX_SUCCESS + : osal_memalign_alloc(globals.sys_pagesize, env->ps * (size_t)NUM_METAS, &env->page_auxbuf); if (likely(err == MDBX_SUCCESS)) { memset(env->page_auxbuf, -1, env->ps * (size_t)2); memset(ptr_disp(env->page_auxbuf, env->ps * (size_t)2), 0, env->ps); @@ -34,26 +31,19 @@ __cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MIN_PAGESIZE) > 4); STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MAX_PAGESIZE) < PAGELIST_LIMIT); const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1; - ENSURE(env, - maxgc_ov1page > 42 && maxgc_ov1page < (intptr_t)PAGELIST_LIMIT / 4); + ENSURE(env, maxgc_ov1page > 42 && maxgc_ov1page < (intptr_t)PAGELIST_LIMIT / 4); env->maxgc_large1page = (unsigned)maxgc_ov1page; - env->maxgc_per_branch = - (unsigned)((pagesize - PAGEHDRSZ) / - (sizeof(indx_t) + sizeof(node_t) + sizeof(txnid_t))); + env->maxgc_per_branch = (unsigned)((pagesize - PAGEHDRSZ) / (sizeof(indx_t) + sizeof(node_t) + sizeof(txnid_t))); - STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) > - sizeof(tree_t) + NODESIZE + 42); + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) > sizeof(tree_t) + NODESIZE + 42); STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MAX_PAGESIZE) < UINT16_MAX); - STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) >= - BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE)); + STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) >= BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE)); STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) > NODESIZE + 42); STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) < UINT16_MAX); const intptr_t branch_nodemax = BRANCH_NODE_MAX(pagesize); const intptr_t leaf_nodemax = LEAF_NODE_MAX(pagesize); - ENSURE(env, branch_nodemax > (intptr_t)(NODESIZE + 42) && - branch_nodemax % 2 == 0 && - leaf_nodemax > (intptr_t)(sizeof(tree_t) + NODESIZE + 42) && - leaf_nodemax >= branch_nodemax && + ENSURE(env, branch_nodemax > (intptr_t)(NODESIZE + 42) && branch_nodemax % 2 == 0 && + leaf_nodemax > (intptr_t)(sizeof(tree_t) + NODESIZE + 42) && leaf_nodemax >= branch_nodemax && leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0); env->leaf_nodemax = (uint16_t)leaf_nodemax; env->branch_nodemax = (uint16_t)branch_nodemax; @@ -71,18 +61,14 @@ __cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { if (unlikely(err != MDBX_SUCCESS)) ERROR("mdbx_get_sysraminfo(), rc %d", err); else { - size_t reasonable_dpl_limit = - (size_t)(total_ram_pages + avail_ram_pages) / 42; + size_t reasonable_dpl_limit = (size_t)(total_ram_pages + avail_ram_pages) / 42; if (pagesize > globals.sys_pagesize) reasonable_dpl_limit /= pagesize / globals.sys_pagesize; else if (pagesize < globals.sys_pagesize) reasonable_dpl_limit *= globals.sys_pagesize / pagesize; - reasonable_dpl_limit = (reasonable_dpl_limit < PAGELIST_LIMIT) - ? reasonable_dpl_limit - : PAGELIST_LIMIT; - reasonable_dpl_limit = (reasonable_dpl_limit > CURSOR_STACK_SIZE * 4) - ? reasonable_dpl_limit - : CURSOR_STACK_SIZE * 4; + reasonable_dpl_limit = (reasonable_dpl_limit < PAGELIST_LIMIT) ? reasonable_dpl_limit : PAGELIST_LIMIT; + reasonable_dpl_limit = + (reasonable_dpl_limit > CURSOR_STACK_SIZE * 4) ? reasonable_dpl_limit : CURSOR_STACK_SIZE * 4; env->options.dp_limit = (unsigned)reasonable_dpl_limit; } } @@ -108,46 +94,36 @@ retry:; goto bailout; } - const troika_t troika = - (txn0_owned | should_unlock) ? env->basal_txn->tw.troika : meta_tap(env); + const troika_t troika = (txn0_owned | should_unlock) ? env->basal_txn->tw.troika : meta_tap(env); const meta_ptr_t head = meta_recent(env, &troika); - const uint64_t unsynced_pages = - atomic_load64(&env->lck->unsynced_pages, mo_Relaxed); + const uint64_t unsynced_pages = atomic_load64(&env->lck->unsynced_pages, mo_Relaxed); if (unsynced_pages == 0) { - const uint32_t synched_meta_txnid_u32 = - atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); + const uint32_t synched_meta_txnid_u32 = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); if (synched_meta_txnid_u32 == (uint32_t)head.txnid && head.is_steady) goto bailout; } if (should_unlock && (env->flags & MDBX_WRITEMAP) && - unlikely(head.ptr_c->geometry.first_unallocated > - bytes2pgno(env, env->dxb_mmap.current))) { + unlikely(head.ptr_c->geometry.first_unallocated > bytes2pgno(env, env->dxb_mmap.current))) { - if (unlikely(env->stuck_meta >= 0) && - troika.recent != (uint8_t)env->stuck_meta) { + if (unlikely(env->stuck_meta >= 0) && troika.recent != (uint8_t)env->stuck_meta) { NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " "meta-page (%u)", "sync datafile", env->stuck_meta, troika.recent); rc = MDBX_RESULT_TRUE; } else { - rc = dxb_resize(env, head.ptr_c->geometry.first_unallocated, - head.ptr_c->geometry.now, head.ptr_c->geometry.upper, + rc = dxb_resize(env, head.ptr_c->geometry.first_unallocated, head.ptr_c->geometry.now, head.ptr_c->geometry.upper, implicit_grow); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } } - const size_t autosync_threshold = - atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); - const uint64_t autosync_period = - atomic_load64(&env->lck->autosync_period, mo_Relaxed); + const size_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); + const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed); uint64_t eoos_timestamp; if (force || (autosync_threshold && unsynced_pages >= autosync_threshold) || - (autosync_period && - (eoos_timestamp = - atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && + (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && osal_monotime() - eoos_timestamp >= autosync_period)) flags &= MDBX_WRITEMAP /* clear flags for full steady sync */; @@ -159,8 +135,7 @@ retry:; int err; /* pre-sync to avoid latency for writer */ - if (unsynced_pages > /* FIXME: define threshold */ 42 && - (flags & MDBX_SAFE_NOSYNC) == 0) { + if (unsynced_pages > /* FIXME: define threshold */ 42 && (flags & MDBX_SAFE_NOSYNC) == 0) { eASSERT(env, ((flags ^ env->flags) & MDBX_WRITEMAP) == 0); if (flags & MDBX_WRITEMAP) { /* Acquire guard to avoid collision with remap */ @@ -171,8 +146,7 @@ retry:; if (unlikely(err != MDBX_SUCCESS)) return err; #endif - const size_t usedbytes = - pgno_align2os_bytes(env, head.ptr_c->geometry.first_unallocated); + const size_t usedbytes = pgno_align2os_bytes(env, head.ptr_c->geometry.first_unallocated); err = osal_msync(&env->dxb_mmap, 0, usedbytes, MDBX_SYNC_DATA); #if defined(_WIN32) || defined(_WIN64) imports.srwl_ReleaseShared(&env->remap_guard); @@ -215,8 +189,7 @@ retry:; eASSERT(env, txn0_owned || should_unlock); eASSERT(env, !txn0_owned || (flags & txn_shrink_allowed) == 0); - if (!head.is_steady && unlikely(env->stuck_meta >= 0) && - troika.recent != (uint8_t)env->stuck_meta) { + if (!head.is_steady && unlikely(env->stuck_meta >= 0) && troika.recent != (uint8_t)env->stuck_meta) { NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " "meta-page (%u)", "sync datafile", env->stuck_meta, troika.recent); @@ -224,9 +197,8 @@ retry:; goto bailout; } if (!head.is_steady || ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) { - DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, - data_page(head.ptr_c)->pgno, durable_caption(head.ptr_c), - unsynced_pages); + DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, data_page(head.ptr_c)->pgno, + durable_caption(head.ptr_c), unsynced_pages); meta_t meta = *head.ptr_c; rc = dxb_sync_locked(env, flags, &meta, &env->basal_txn->tw.troika); if (unlikely(rc != MDBX_SUCCESS)) @@ -235,8 +207,7 @@ retry:; /* LY: sync meta-pages if MDBX_NOMETASYNC enabled * and someone was not synced above. */ - if (atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != - (uint32_t)head.txnid) + if (atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != (uint32_t)head.txnid) rc = meta_sync(env, head); bailout: @@ -334,9 +305,8 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { */ env->pid = osal_getpid(); - int rc = osal_openfile((env->flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ - : MDBX_OPEN_DXB_LAZY, - env, env->pathname.dxb, &env->lazy_fd, mode); + int rc = osal_openfile((env->flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ : MDBX_OPEN_DXB_LAZY, env, env->pathname.dxb, + &env->lazy_fd, mode); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -355,8 +325,7 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { #if defined(_WIN32) || defined(_WIN64) eASSERT(env, env->ioring.overlapped_fd == 0); bool ior_direct = false; - if (!(env->flags & - (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) { + if (!(env->flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) { if (MDBX_AVOID_MSYNC && (env->flags & MDBX_WRITEMAP)) { /* Запрошен режим MDBX_SYNC_DURABLE | MDBX_WRITEMAP при активной опции * MDBX_AVOID_MSYNC. @@ -383,8 +352,7 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { int err = dxb_read_header(env, &header, MDBX_SUCCESS, true); if ((err == MDBX_SUCCESS && header.pagesize >= globals.sys_pagesize) || (err == MDBX_ENODATA && mode && env->ps >= globals.sys_pagesize && - osal_filesize(env->lazy_fd, &dxb_filesize) == MDBX_SUCCESS && - dxb_filesize == 0)) + osal_filesize(env->lazy_fd, &dxb_filesize) == MDBX_SUCCESS && dxb_filesize == 0)) /* Может быть коллизия, если два процесса пытаются одновременно создать * БД с разным размером страницы, который у одного меньше системной * страницы, а у другого НЕ меньше. Эта допустимая, но очень странная @@ -392,9 +360,8 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { ior_direct = true; } - rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT - : MDBX_OPEN_DXB_OVERLAPPED, - env, env->pathname.dxb, &env->ioring.overlapped_fd, 0); + rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT : MDBX_OPEN_DXB_OVERLAPPED, env, env->pathname.dxb, + &env->ioring.overlapped_fd, 0); if (unlikely(rc != MDBX_SUCCESS)) return rc; env->dxb_lock_event = CreateEventW(nullptr, true, false, nullptr); @@ -410,8 +377,7 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { return errno; mode = st.st_mode; } - mode = (/* inherit read permissions for group and others */ mode & - (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | + mode = (/* inherit read permissions for group and others */ mode & (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | /* always add read/write for owner */ S_IRUSR | S_IWUSR | ((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) | ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); @@ -428,8 +394,7 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { | MDBX_EXCLUSIVE #endif /* !Windows */ ))) { - rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->pathname.dxb, - &env->dsync_fd, 0); + rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->pathname.dxb, &env->dsync_fd, 0); if (unlikely(MDBX_IS_ERROR(rc))) return rc; if (env->dsync_fd != INVALID_HANDLE_VALUE) { @@ -439,19 +404,14 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { } } - const MDBX_env_flags_t lazy_flags = - MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_NOMETASYNC; - const MDBX_env_flags_t mode_flags = lazy_flags | MDBX_LIFORECLAIM | - MDBX_NORDAHEAD | MDBX_RDONLY | - MDBX_WRITEMAP; + const MDBX_env_flags_t lazy_flags = MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_NOMETASYNC; + const MDBX_env_flags_t mode_flags = lazy_flags | MDBX_LIFORECLAIM | MDBX_NORDAHEAD | MDBX_RDONLY | MDBX_WRITEMAP; lck_t *const lck = env->lck_mmap.lck; if (lck && lck_rc != MDBX_RESULT_TRUE && (env->flags & MDBX_RDONLY) == 0) { MDBX_env_flags_t snap_flags; - while ((snap_flags = atomic_load32(&lck->envmode, mo_AcquireRelease)) == - MDBX_RDONLY) { - if (atomic_cas32(&lck->envmode, MDBX_RDONLY, - (snap_flags = (env->flags & mode_flags)))) { + while ((snap_flags = atomic_load32(&lck->envmode, mo_AcquireRelease)) == MDBX_RDONLY) { + if (atomic_cas32(&lck->envmode, MDBX_RDONLY, (snap_flags = (env->flags & mode_flags)))) { /* The case: * - let's assume that for some reason the DB file is smaller * than it should be according to the geometry, @@ -471,12 +431,9 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { if (env->flags & MDBX_ACCEDE) { /* Pickup current mode-flags (MDBX_LIFORECLAIM, MDBX_NORDAHEAD, etc). */ const MDBX_env_flags_t diff = - (snap_flags ^ env->flags) & - ((snap_flags & lazy_flags) ? mode_flags - : mode_flags & ~MDBX_WRITEMAP); + (snap_flags ^ env->flags) & ((snap_flags & lazy_flags) ? mode_flags : mode_flags & ~MDBX_WRITEMAP); env->flags ^= diff; - NOTICE("accede mode-flags: 0x%X, 0x%X -> 0x%X", diff, env->flags ^ diff, - env->flags); + NOTICE("accede mode-flags: 0x%X, 0x%X -> 0x%X", diff, env->flags ^ diff, env->flags); } /* Ранее упущенный не очевидный момент: При работе БД в режимах @@ -498,12 +455,10 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { * В результате, требуется либо запретить совместную работу процессам с * разным MDBX_WRITEMAP в режиме отложенной записи, либо отслеживать такое * смешивание и блокировать steady-пометки - что контрпродуктивно. */ - const MDBX_env_flags_t rigorous_flags = - (snap_flags & lazy_flags) - ? MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_WRITEMAP - : MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC; - const MDBX_env_flags_t rigorous_diff = - (snap_flags ^ env->flags) & rigorous_flags; + const MDBX_env_flags_t rigorous_flags = (snap_flags & lazy_flags) + ? MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_WRITEMAP + : MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC; + const MDBX_env_flags_t rigorous_diff = (snap_flags ^ env->flags) & rigorous_flags; if (rigorous_diff) { ERROR("current mode/flags 0x%X incompatible with requested 0x%X, " "rigorous diff 0x%X", @@ -529,8 +484,7 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { } if (unlikely(/* recovery mode */ env->stuck_meta >= 0) && - (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || - (env->flags & MDBX_EXCLUSIVE) == 0)) { + (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || (env->flags & MDBX_EXCLUSIVE) == 0)) { ERROR("%s", "recovery requires exclusive mode"); return MDBX_BUSY; } @@ -545,8 +499,7 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { if (lck) { if (lck_rc == MDBX_RESULT_TRUE) { rc = lck_downgrade(env); - DEBUG("lck-downgrade-%s: rc %i", - (env->flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc); + DEBUG("lck-downgrade-%s: rc %i", (env->flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc); if (rc != MDBX_SUCCESS) return rc; } else { @@ -556,14 +509,13 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) { } } - rc = (env->flags & MDBX_RDONLY) - ? MDBX_SUCCESS - : osal_ioring_create(&env->ioring + rc = (env->flags & MDBX_RDONLY) ? MDBX_SUCCESS + : osal_ioring_create(&env->ioring #if defined(_WIN32) || defined(_WIN64) - , - ior_direct, env->ioring.overlapped_fd + , + ior_direct, env->ioring.overlapped_fd #endif /* Windows */ - ); + ); return rc; } @@ -606,8 +558,7 @@ __cold int env_close(MDBX_env *env, bool resurrect_after_fork) { } #if defined(_WIN32) || defined(_WIN64) - eASSERT(env, !env->ioring.overlapped_fd || - env->ioring.overlapped_fd == INVALID_HANDLE_VALUE); + eASSERT(env, !env->ioring.overlapped_fd || env->ioring.overlapped_fd == INVALID_HANDLE_VALUE); if (env->dxb_lock_event != INVALID_HANDLE_VALUE) { CloseHandle(env->dxb_lock_event); env->dxb_lock_event = INVALID_HANDLE_VALUE; diff --git a/src/essentials.h b/src/essentials.h index e6f42305..3331e524 100644 --- a/src/essentials.h +++ b/src/essentials.h @@ -110,27 +110,22 @@ extern struct libmdbx_imports imports; extern LIBMDBX_API const char *const mdbx_sourcery_anchor; #endif -#define MDBX_IS_ERROR(rc) \ - ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE) +#define MDBX_IS_ERROR(rc) ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE) /*----------------------------------------------------------------------------*/ -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t -int64pgno(int64_t i64) { +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t int64pgno(int64_t i64) { if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1)) return (pgno_t)i64; return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO; } -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t -pgno_add(size_t base, size_t augend) { +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pgno_add(size_t base, size_t augend) { assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO); return int64pgno((int64_t)base + (int64_t)augend); } -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t -pgno_sub(size_t base, size_t subtrahend) { - assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && - subtrahend < MAX_PAGENO); +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pgno_sub(size_t base, size_t subtrahend) { + assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && subtrahend < MAX_PAGENO); return int64pgno((int64_t)base - (int64_t)subtrahend); } diff --git a/src/gc-get.c b/src/gc-get.c index 9b9c0826..196001f9 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -52,8 +52,7 @@ static bool mincore_fetch(MDBX_env *const env, const size_t unit_begin) { env->lck->pgops.mincore.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ uint8_t *const vector = alloca(pages); - if (unlikely(mincore(ptr_disp(env->dxb_mmap.base, offset), length, - (void *)vector))) { + if (unlikely(mincore(ptr_disp(env->dxb_mmap.base, offset), length, (void *)vector))) { NOTICE("mincore(+%zu, %zu), err %d", offset, length, errno); return false; } @@ -79,14 +78,10 @@ static bool mincore_fetch(MDBX_env *const env, const size_t unit_begin) { } #endif /* MDBX_USE_MINCORE */ -MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, - const pgno_t pgno) { +MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, const pgno_t pgno) { #if MDBX_USE_MINCORE - const size_t offset_aligned = - floor_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize); - const unsigned unit_log2 = (env->ps2ln > globals.sys_pagesize_ln2) - ? env->ps2ln - : globals.sys_pagesize_ln2; + const size_t offset_aligned = floor_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize); + const unsigned unit_log2 = (env->ps2ln > globals.sys_pagesize_ln2) ? env->ps2ln : globals.sys_pagesize_ln2; const size_t unit_begin = offset_aligned >> unit_log2; eASSERT(env, (unit_begin << unit_log2) == offset_aligned); const ptrdiff_t dist = unit_begin - env->lck->mincore_cache.begin[0]; @@ -102,8 +97,7 @@ MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, /*----------------------------------------------------------------------------*/ -MDBX_MAYBE_UNUSED __hot static pgno_t * -scan4seq_fallback(pgno_t *range, const size_t len, const size_t seq) { +MDBX_MAYBE_UNUSED __hot static pgno_t *scan4seq_fallback(pgno_t *range, const size_t len, const size_t seq) { assert(seq > 0 && len > seq); #if MDBX_PNL_ASCENDING assert(range[-1] == len); @@ -167,8 +161,7 @@ scan4seq_fallback(pgno_t *range, const size_t len, const size_t seq) { return nullptr; } -MDBX_MAYBE_UNUSED static const pgno_t *scan4range_checker(const pnl_t pnl, - const size_t seq) { +MDBX_MAYBE_UNUSED static const pgno_t *scan4range_checker(const pnl_t pnl, const size_t seq) { size_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pnl); #if MDBX_PNL_ASCENDING while (seq <= MDBX_PNL_GETSIZE(pnl) - begin) { @@ -186,8 +179,7 @@ MDBX_MAYBE_UNUSED static const pgno_t *scan4range_checker(const pnl_t pnl, return nullptr; } -#if defined(_MSC_VER) && !defined(__builtin_clz) && \ - !__has_builtin(__builtin_clz) +#if defined(_MSC_VER) && !defined(__builtin_clz) && !__has_builtin(__builtin_clz) MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clz(uint32_t value) { unsigned long index; _BitScanReverse(&index, value); @@ -195,8 +187,7 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clz(uint32_t value) { } #endif /* _MSC_VER */ -#if defined(_MSC_VER) && !defined(__builtin_clzl) && \ - !__has_builtin(__builtin_clzl) +#if defined(_MSC_VER) && !defined(__builtin_clzl) && !__has_builtin(__builtin_clzl) MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { unsigned long index; #ifdef _WIN64 @@ -213,8 +204,7 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { #if !MDBX_PNL_ASCENDING -#if !defined(MDBX_ATTRIBUTE_TARGET) && \ - (__has_attribute(__target__) || __GNUC_PREREQ(5, 0)) +#if !defined(MDBX_ATTRIBUTE_TARGET) && (__has_attribute(__target__) || __GNUC_PREREQ(5, 0)) #define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target))) #endif /* MDBX_ATTRIBUTE_TARGET */ @@ -223,9 +213,8 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { * gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1: * error: inlining failed in call to 'always_inline' '_mm_movemask_ps': * target specific option mismatch */ -#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || \ - defined(__e2k__) || defined(__clang__) || defined(__amd64__) || \ - defined(__SSE2__) +#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || defined(__e2k__) || defined(__clang__) || \ + defined(__amd64__) || defined(__SSE2__) #define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0 #else #define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1 @@ -237,41 +226,36 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { #elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__) #define __SSE2__ #define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ - !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND #define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2") #endif /* __SSE2__ */ #if defined(__AVX2__) #define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ - !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND #define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2") #endif /* __AVX2__ */ #if defined(MDBX_ATTRIBUTE_TARGET_AVX2) #if defined(__AVX512BW__) #define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ - !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \ +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \ (__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0)) -#define MDBX_ATTRIBUTE_TARGET_AVX512BW \ - MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw") +#define MDBX_ATTRIBUTE_TARGET_AVX512BW MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw") #endif /* __AVX512BW__ */ #endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */ #ifdef MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned -diffcmp2mask_sse2(const pgno_t *const ptr, const ptrdiff_t offset, - const __m128i pattern) { +diffcmp2mask_sse2(const pgno_t *const ptr, const ptrdiff_t offset, const __m128i pattern) { const __m128i f = _mm_loadu_si128((const __m128i *)ptr); const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); return _mm_movemask_ps(*(const __m128 *)&cmp); } -MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_SSE2 static pgno_t * -scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { +MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_SSE2 static pgno_t *scan4seq_sse2(pgno_t *range, const size_t len, + const size_t seq) { assert(seq > 0 && len > seq); #if MDBX_PNL_ASCENDING #error "FIXME: Not implemented" @@ -303,8 +287,7 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ #if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { const unsigned extra = (unsigned)(detent + 4 - range); assert(extra > 0 && extra < 4); mask = 0xF << extra; @@ -324,8 +307,7 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { #ifdef MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned -diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, - const __m256i pattern) { +diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, const __m256i pattern) { const __m256i f = _mm256_loadu_si256((const __m256i *)ptr); const __m256i l = _mm256_loadu_si256((const __m256i *)(ptr + offset)); const __m256i cmp = _mm256_cmpeq_epi32(_mm256_sub_epi32(f, l), pattern); @@ -333,16 +315,15 @@ diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, } MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned -diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, - const __m128i pattern) { +diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, const __m128i pattern) { const __m128i f = _mm_loadu_si128((const __m128i *)ptr); const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); return _mm_movemask_ps(*(const __m128 *)&cmp); } -MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t * -scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { +MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t *scan4seq_avx2(pgno_t *range, const size_t len, + const size_t seq) { assert(seq > 0 && len > seq); #if MDBX_PNL_ASCENDING #error "FIXME: Not implemented" @@ -374,8 +355,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ #if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xfe0 /* enough for '-31' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { const unsigned extra = (unsigned)(detent + 8 - range); assert(extra > 0 && extra < 8); mask = 0xFF << extra; @@ -402,15 +382,14 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { #ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW MDBX_ATTRIBUTE_TARGET_AVX512BW static __always_inline unsigned -diffcmp2mask_avx512bw(const pgno_t *const ptr, const ptrdiff_t offset, - const __m512i pattern) { +diffcmp2mask_avx512bw(const pgno_t *const ptr, const ptrdiff_t offset, const __m512i pattern) { const __m512i f = _mm512_loadu_si512((const __m512i *)ptr); const __m512i l = _mm512_loadu_si512((const __m512i *)(ptr + offset)); return _mm512_cmpeq_epi32_mask(_mm512_sub_epi32(f, l), pattern); } -MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX512BW static pgno_t * -scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { +MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX512BW static pgno_t *scan4seq_avx512bw(pgno_t *range, const size_t len, + const size_t seq) { assert(seq > 0 && len > seq); #if MDBX_PNL_ASCENDING #error "FIXME: Not implemented" @@ -442,8 +421,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ #if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xfc0 /* enough for '-63' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { const unsigned extra = (unsigned)(detent + 16 - range); assert(extra > 0 && extra < 16); mask = 0xFFFF << extra; @@ -474,10 +452,8 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { } #endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */ -#if (defined(__ARM_NEON) || defined(__ARM_NEON__)) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -static __always_inline size_t diffcmp2mask_neon(const pgno_t *const ptr, - const ptrdiff_t offset, +#if (defined(__ARM_NEON) || defined(__ARM_NEON__)) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +static __always_inline size_t diffcmp2mask_neon(const pgno_t *const ptr, const ptrdiff_t offset, const uint32x4_t pattern) { const uint32x4_t f = vld1q_u32(ptr); const uint32x4_t l = vld1q_u32(ptr + offset); @@ -485,12 +461,10 @@ static __always_inline size_t diffcmp2mask_neon(const pgno_t *const ptr, if (sizeof(size_t) > 7) return vget_lane_u64(vreinterpret_u64_u16(cmp), 0); else - return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(cmp, cmp))), - 0); + return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(cmp, cmp))), 0); } -__hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, - const size_t seq) { +__hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, const size_t seq) { assert(seq > 0 && len > seq); #if MDBX_PNL_ASCENDING #error "FIXME: Not implemented" @@ -522,8 +496,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ #if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; - if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && - !RUNNING_ON_VALGRIND) { + if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { const unsigned extra = (unsigned)(detent + 4 - range); assert(extra > 0 && extra < 4); mask = (~(size_t)0) << (extra * sizeof(size_t) * 2); @@ -548,8 +521,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, #define scan4seq_default scan4seq_avx2 #elif defined(__SSE2__) && defined(MDBX_ATTRIBUTE_TARGET_SSE2) #define scan4seq_default scan4seq_sse2 -#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define scan4seq_default scan4seq_neon /* Choosing of another variants should be added here. */ #endif /* scan4seq_default */ @@ -570,17 +542,12 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, #else /* Selecting the most appropriate implementation at runtime, * depending on the available CPU features. */ -static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, - const size_t seq); -static pgno_t *(*scan4seq_impl)(pgno_t *range, const size_t len, - const size_t seq) = scan4seq_resolver; +static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, const size_t seq); +static pgno_t *(*scan4seq_impl)(pgno_t *range, const size_t len, const size_t seq) = scan4seq_resolver; -static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, - const size_t seq) { - pgno_t *(*choice)(pgno_t *range, const size_t len, const size_t seq) = - nullptr; -#if __has_builtin(__builtin_cpu_init) || defined(__BUILTIN_CPU_INIT__) || \ - __GNUC_PREREQ(4, 8) +static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, const size_t seq) { + pgno_t *(*choice)(pgno_t *range, const size_t len, const size_t seq) = nullptr; +#if __has_builtin(__builtin_cpu_init) || defined(__BUILTIN_CPU_INIT__) || __GNUC_PREREQ(4, 8) __builtin_cpu_init(); #endif /* __builtin_cpu_init() */ #ifdef MDBX_ATTRIBUTE_TARGET_SSE2 @@ -607,12 +574,10 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, #define ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */ #define ALLOC_LIFO 16 /* внутреннее состояние */ -static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, - const uint8_t flags) { +static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, const uint8_t flags) { /* If txn is updating the GC, then the retired-list cannot play catch-up with * itself by growing while trying to save it. */ - if (mc->tree == &txn->dbs[FREE_DBI] && !(flags & ALLOC_RESERVE) && - !(mc->flags & z_gcu_preparation)) + if (mc->tree == &txn->dbs[FREE_DBI] && !(flags & ALLOC_RESERVE) && !(mc->flags & z_gcu_preparation)) return false; /* avoid search inside empty tree and while tree is updating, @@ -690,8 +655,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { #ifndef MDBX_ENABLE_SAVING_SEQUENCES #define MDBX_ENABLE_SAVING_SEQUENCES 0 #endif - if (MDBX_ENABLE_SAVING_SEQUENCES && unlikely(target[dir] == *target + 1) && - len > 2) { + if (MDBX_ENABLE_SAVING_SEQUENCES && unlikely(target[dir] == *target + 1) && len > 2) { /* Пытаемся пропускать последовательности при наличии одиночных элементов. * TODO: необходимо кэшировать пропускаемые последовательности * чтобы не сканировать список сначала при каждом выделении. */ @@ -719,8 +683,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { #if MDBX_PNL_ASCENDING /* вырезаем элемент с перемещением хвоста */ MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); - for (const pgno_t *const end = txn->tw.relist + len - 1; target <= end; - ++target) + for (const pgno_t *const end = txn->tw.relist + len - 1; target <= end; ++target) *target = target[1]; #else /* перемещать хвост не нужно, просто усекам список */ @@ -729,8 +692,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { return pgno; } -__hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, - uint8_t flags) { +__hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, uint8_t flags) { const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); pgno_t *edge = MDBX_PNL_EDGE(txn->tw.relist); assert(len >= num && num > 1); @@ -754,8 +716,7 @@ __hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, /* вырезаем найденную последовательность с перемещением хвоста */ MDBX_PNL_SETSIZE(txn->tw.relist, len - num); #if MDBX_PNL_ASCENDING - for (const pgno_t *const end = txn->tw.relist + len - num; target <= end; - ++target) + for (const pgno_t *const end = txn->tw.relist + len - num; target <= end; ++target) *target = target[num]; #else for (const pgno_t *const end = txn->tw.relist + len; ++target <= end;) @@ -766,16 +727,13 @@ __hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, return 0; } -static inline pgr_t page_alloc_finalize(MDBX_env *const env, - MDBX_txn *const txn, - const MDBX_cursor *const mc, +static inline pgr_t page_alloc_finalize(MDBX_env *const env, MDBX_txn *const txn, const MDBX_cursor *const mc, const pgno_t pgno, const size_t num) { #if MDBX_ENABLE_PROFGC size_t majflt_before; const uint64_t cputime_before = osal_cputime(&majflt_before); - gc_prof_stat_t *const prof = (cursor_dbi(mc) == FREE_DBI) - ? &env->lck->pgops.gc_prof.self - : &env->lck->pgops.gc_prof.work; + gc_prof_stat_t *const prof = + (cursor_dbi(mc) == FREE_DBI) ? &env->lck->pgops.gc_prof.self : &env->lck->pgops.gc_prof.work; #else (void)mc; #endif /* MDBX_ENABLE_PROFGC */ @@ -811,8 +769,7 @@ static inline pgr_t page_alloc_finalize(MDBX_env *const env, * грязной I/O очереди. Из-за этого штраф за лишнюю запись может быть * сравним с избегаемым ненужным чтением. */ if (env->prefault_write_activated) { - void *const pattern = - ptr_disp(env->page_auxbuf, need_clean ? env->ps : env->ps * 2); + void *const pattern = ptr_disp(env->page_auxbuf, need_clean ? env->ps : env->ps * 2); size_t file_offset = pgno2bytes(env, pgno); if (likely(num == 1)) { if (!mincore_probe(env, pgno)) { @@ -831,8 +788,7 @@ static inline pgr_t page_alloc_finalize(MDBX_env *const env, iov[n].iov_len = env->ps; iov[n].iov_base = pattern; if (unlikely(++n == MDBX_AUXILARY_IOV_MAX)) { - osal_pwritev(env->lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, - file_offset); + osal_pwritev(env->lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, file_offset); #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.prefault.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ @@ -873,8 +829,7 @@ static inline pgr_t page_alloc_finalize(MDBX_env *const env, ret.err = page_dirty(txn, ret.page, (pgno_t)num); bailout: - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); #if MDBX_ENABLE_PROFGC size_t majflt_after; prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before; @@ -883,32 +838,25 @@ bailout: return ret; } -pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, - uint8_t flags) { +pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) { pgr_t ret; MDBX_txn *const txn = mc->txn; MDBX_env *const env = txn->env; #if MDBX_ENABLE_PROFGC - gc_prof_stat_t *const prof = (cursor_dbi(mc) == FREE_DBI) - ? &env->lck->pgops.gc_prof.self - : &env->lck->pgops.gc_prof.work; + gc_prof_stat_t *const prof = + (cursor_dbi(mc) == FREE_DBI) ? &env->lck->pgops.gc_prof.self : &env->lck->pgops.gc_prof.work; prof->spe_counter += 1; #endif /* MDBX_ENABLE_PROFGC */ eASSERT(env, num > 0 || (flags & ALLOC_RESERVE)); - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); size_t newnext; - const uint64_t monotime_begin = - (MDBX_ENABLE_PROFGC || (num > 1 && env->options.gc_time_limit)) - ? osal_monotime() - : 0; + const uint64_t monotime_begin = (MDBX_ENABLE_PROFGC || (num > 1 && env->options.gc_time_limit)) ? osal_monotime() : 0; struct monotime_cache now_cache; - now_cache.expire_countdown = - 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда - лимит времени задан нулевой или уже исчерпан, так и от подсчета - времени при не-достижении rp_augment_limit */ + now_cache.expire_countdown = 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда + лимит времени задан нулевой или уже исчерпан, так и от подсчета + времени при не-достижении rp_augment_limit */ ; now_cache.value = monotime_begin; pgno_t pgno = 0; @@ -917,9 +865,8 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, prof->xpages += 1; #endif /* MDBX_ENABLE_PROFGC */ if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, - MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && - MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); + eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); pgno = relist_get_sequence(txn, num, flags); if (likely(pgno)) goto done; @@ -936,16 +883,14 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, goto no_gc; } - eASSERT(env, - (flags & (ALLOC_COALESCE | ALLOC_LIFO | ALLOC_SHOULD_SCAN)) == 0); + eASSERT(env, (flags & (ALLOC_COALESCE | ALLOC_LIFO | ALLOC_SHOULD_SCAN)) == 0); flags += (env->flags & MDBX_LIFORECLAIM) ? ALLOC_LIFO : 0; if (/* Не коагулируем записи при подготовке резерва для обновления GC. * Иначе попытка увеличить резерв может приводить к необходимости ещё * большего резерва из-за увеличения списка переработанных страниц. */ (flags & ALLOC_RESERVE) == 0) { - if (txn->dbs[FREE_DBI].branch_pages && - MDBX_PNL_GETSIZE(txn->tw.relist) < env->maxgc_large1page / 2) + if (txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->tw.relist) < env->maxgc_large1page / 2) flags += ALLOC_COALESCE; } @@ -976,9 +921,7 @@ retry_gc_refresh_oldest:; txnid_t oldest = txn_snapshot_oldest(txn); retry_gc_have_oldest: if (unlikely(oldest >= txn->txnid)) { - ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN - " for current-txnid %" PRIaTXN, - oldest, txn->txnid); + ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN " for current-txnid %" PRIaTXN, oldest, txn->txnid); ret.err = MDBX_PROBLEM; goto fail; } @@ -1026,8 +969,7 @@ next_gc:; goto depleted_gc; } if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC key-length"); + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC key-length"); ret.err = MDBX_CORRUPTED; goto fail; } @@ -1046,26 +988,21 @@ next_gc:; /* Reading next GC record */ MDBX_val data; page_t *const mp = gc->pg[gc->top]; - if (unlikely((ret.err = node_read(gc, page_node(mp, gc->ki[gc->top]), &data, - mp)) != MDBX_SUCCESS)) + if (unlikely((ret.err = node_read(gc, page_node(mp, gc->ki[gc->top]), &data, mp)) != MDBX_SUCCESS)) goto fail; pgno_t *gc_pnl = (pgno_t *)data.iov_base; - if (unlikely(data.iov_len % sizeof(pgno_t) || - data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) || + if (unlikely(data.iov_len % sizeof(pgno_t) || data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) || !pnl_check(gc_pnl, txn->geo.first_unallocated))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC value-length"); + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC value-length"); ret.err = MDBX_CORRUPTED; goto fail; } const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl); - TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.relist)); - if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= - env->maxgc_large1page)) { + if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= env->maxgc_large1page)) { /* Don't try to coalesce too much. */ if (flags & ALLOC_SHOULD_SCAN) { eASSERT(env, flags & ALLOC_COALESCE); @@ -1076,10 +1013,8 @@ next_gc:; #endif /* MDBX_ENABLE_PROFGC */ TRACE("clear %s %s", "ALLOC_COALESCE", "since got threshold"); if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, - MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && - MDBX_PNL_FIRST(txn->tw.relist) < - txn->geo.first_unallocated); + eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); if (likely(num == 1)) { pgno = relist_get_single(txn); goto done; @@ -1090,25 +1025,19 @@ next_gc:; } flags -= ALLOC_COALESCE | ALLOC_SHOULD_SCAN; } - if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE( - txn->tw.relist) >= env->options.rp_augment_limit) && + if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(txn->tw.relist) >= env->options.rp_augment_limit) && ((/* not a slot-request from gc-update */ num && - /* have enough unallocated space */ txn->geo.upper >= - txn->geo.first_unallocated + num && - monotime_since_cached(monotime_begin, &now_cache) + - txn->tw.gc.time_acc >= - env->options.gc_time_limit) || + /* have enough unallocated space */ txn->geo.upper >= txn->geo.first_unallocated + num && + monotime_since_cached(monotime_begin, &now_cache) + txn->tw.gc.time_acc >= env->options.gc_time_limit) || gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= PAGELIST_LIMIT)) { /* Stop reclaiming to avoid large/overflow the page list. This is a rare * case while search for a continuously multi-page region in a * large database, see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */ NOTICE("stop reclaiming %s: %zu (current) + %zu " "(chunk) -> %zu, rp_augment_limit %u", - likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) < PAGELIST_LIMIT) - ? "since rp_augment_limit was reached" - : "to avoid PNL overflow", - MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist), + likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) < PAGELIST_LIMIT) ? "since rp_augment_limit was reached" + : "to avoid PNL overflow", + MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.relist), env->options.rp_augment_limit); goto depleted_gc; } @@ -1128,9 +1057,7 @@ next_gc:; goto fail; if (LOG_ENABLED(MDBX_LOG_EXTRA)) { - DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO - " len %zu, PNL", - id, txn->dbs[FREE_DBI].root, gc_len); + DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO " len %zu, PNL", id, txn->dbs[FREE_DBI].root, gc_len); for (size_t i = gc_len; i; i--) DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]); DEBUG_EXTRA_PRINT(", first_unallocated %u\n", txn->geo.first_unallocated); @@ -1141,33 +1068,27 @@ next_gc:; flags |= ALLOC_SHOULD_SCAN; if (AUDIT_ENABLED()) { if (unlikely(!pnl_check(txn->tw.relist, txn->geo.first_unallocated))) { - ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid txn retired-list"); + ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid txn retired-list"); ret.err = MDBX_CORRUPTED; goto fail; } } else { - eASSERT(env, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated)); } eASSERT(env, dpl_check(txn)); - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || - MDBX_PNL_MOST(txn->tw.relist) < txn->geo.first_unallocated); + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->geo.first_unallocated); if (MDBX_ENABLE_REFUND && MDBX_PNL_GETSIZE(txn->tw.relist) && - unlikely(MDBX_PNL_MOST(txn->tw.relist) == - txn->geo.first_unallocated - 1)) { + unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->geo.first_unallocated - 1)) { /* Refund suitable pages into "unallocated" space */ txn_refund(txn); } - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); /* Done for a kick-reclaim mode, actually no page needed */ if (unlikely(num == 0)) { eASSERT(env, ret.err == MDBX_SUCCESS); - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, MDBX_PNL_GETSIZE(txn->tw.relist)); goto early_exit; } @@ -1175,8 +1096,7 @@ next_gc:; eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); if (flags & ALLOC_COALESCE) { - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, MDBX_PNL_GETSIZE(txn->tw.relist)); goto next_gc; } @@ -1184,9 +1104,8 @@ scan: eASSERT(env, flags & ALLOC_SHOULD_SCAN); eASSERT(env, num > 0); if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, - MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && - MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); + eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); if (likely(num == 1)) { eASSERT(env, !(flags & ALLOC_RESERVE)); pgno = relist_get_single(txn); @@ -1198,14 +1117,12 @@ scan: } flags -= ALLOC_SHOULD_SCAN; if (ret.err == MDBX_SUCCESS) { - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, MDBX_PNL_GETSIZE(txn->tw.relist)); goto next_gc; } depleted_gc: - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, - MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, MDBX_PNL_GETSIZE(txn->tw.relist)); ret.err = MDBX_NOTFOUND; if (flags & ALLOC_SHOULD_SCAN) goto scan; @@ -1226,16 +1143,11 @@ depleted_gc: /* Does reclaiming stopped at the last steady point? */ const meta_ptr_t recent = meta_recent(env, &txn->tw.troika); const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika); - if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && - detent == prefer_steady.txnid + 1) { - DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN - "-%s, detent %" PRIaTXN, - recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid, - durable_caption(prefer_steady.ptr_c), detent); - const pgno_t autosync_threshold = - atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); - const uint64_t autosync_period = - atomic_load64(&env->lck->autosync_period, mo_Relaxed); + if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && detent == prefer_steady.txnid + 1) { + DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN "-%s, detent %" PRIaTXN, recent.txnid, + durable_caption(recent.ptr_c), prefer_steady.txnid, durable_caption(prefer_steady.ptr_c), detent); + const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); + const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed); uint64_t eoos_timestamp; /* wipe the last steady-point if one of: * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified @@ -1246,8 +1158,7 @@ depleted_gc: * - database is full (with the current file size) * AND auto-sync threshold it NOT specified */ if (F_ISSET(env->flags, MDBX_UTTERLY_NOSYNC) && - ((autosync_threshold | autosync_period) == 0 || - newnext >= prefer_steady.ptr_c->geometry.now)) { + ((autosync_threshold | autosync_period) == 0 || newnext >= prefer_steady.ptr_c->geometry.now)) { /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode * without any auto-sync threshold(s). */ #if MDBX_ENABLE_PROFGC @@ -1257,39 +1168,30 @@ depleted_gc: DEBUG("gc-wipe-steady, rc %d", ret.err); if (unlikely(ret.err != MDBX_SUCCESS)) goto fail; - eASSERT(env, prefer_steady.ptr_c != - meta_prefer_steady(env, &txn->tw.troika).ptr_c); + eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->tw.troika).ptr_c); goto retry_gc_refresh_oldest; } - if ((autosync_threshold && - atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= - autosync_threshold) || - (autosync_period && - (eoos_timestamp = - atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && + if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) || + (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && osal_monotime() - eoos_timestamp >= autosync_period) || newnext >= txn->geo.upper || - ((num == 0 || newnext >= txn->geo.end_pgno) && - (autosync_threshold | autosync_period) == 0)) { + ((num == 0 || newnext >= txn->geo.end_pgno) && (autosync_threshold | autosync_period) == 0)) { /* make steady checkpoint. */ #if MDBX_ENABLE_PROFGC env->lck->pgops.gc_prof.flushes += 1; #endif /* MDBX_ENABLE_PROFGC */ meta_t meta = *recent.ptr_c; - ret.err = dxb_sync_locked(env, env->flags & MDBX_WRITEMAP, &meta, - &txn->tw.troika); + ret.err = dxb_sync_locked(env, env->flags & MDBX_WRITEMAP, &meta, &txn->tw.troika); DEBUG("gc-make-steady, rc %d", ret.err); eASSERT(env, ret.err != MDBX_RESULT_TRUE); if (unlikely(ret.err != MDBX_SUCCESS)) goto fail; - eASSERT(env, prefer_steady.ptr_c != - meta_prefer_steady(env, &txn->tw.troika).ptr_c); + eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->tw.troika).ptr_c); goto retry_gc_refresh_oldest; } } - if (unlikely(true == - atomic_load32(&env->lck->rdt_refresh_flag, mo_AcquireRelease))) { + if (unlikely(true == atomic_load32(&env->lck->rdt_refresh_flag, mo_AcquireRelease))) { oldest = txn_snapshot_oldest(txn); if (oldest >= detent) goto retry_gc_have_oldest; @@ -1315,8 +1217,7 @@ no_gc: #ifndef MDBX_ENABLE_BACKLOG_DEPLETED #define MDBX_ENABLE_BACKLOG_DEPLETED 0 #endif /* MDBX_ENABLE_BACKLOG_DEPLETED*/ - if (MDBX_ENABLE_BACKLOG_DEPLETED && - unlikely(!(txn->flags & txn_gc_drained))) { + if (MDBX_ENABLE_BACKLOG_DEPLETED && unlikely(!(txn->flags & txn_gc_drained))) { ret.err = MDBX_BACKLOG_DEPLETED; goto fail; } @@ -1338,20 +1239,16 @@ no_gc: eASSERT(env, newnext > txn->geo.end_pgno); const size_t grow_step = pv2pages(txn->geo.grow_pv); - size_t aligned = pgno_align2os_pgno( - env, (pgno_t)(newnext + grow_step - newnext % grow_step)); + size_t aligned = pgno_align2os_pgno(env, (pgno_t)(newnext + grow_step - newnext % grow_step)); if (aligned > txn->geo.upper) aligned = txn->geo.upper; eASSERT(env, aligned >= newnext); - VERBOSE("try growth datafile to %zu pages (+%zu)", aligned, - aligned - txn->geo.end_pgno); - ret.err = dxb_resize(env, txn->geo.first_unallocated, (pgno_t)aligned, - txn->geo.upper, implicit_grow); + VERBOSE("try growth datafile to %zu pages (+%zu)", aligned, aligned - txn->geo.end_pgno); + ret.err = dxb_resize(env, txn->geo.first_unallocated, (pgno_t)aligned, txn->geo.upper, implicit_grow); if (ret.err != MDBX_SUCCESS) { - ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned, - aligned - txn->geo.end_pgno, ret.err); + ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned, aligned - txn->geo.end_pgno, ret.err); goto fail; } env->txn->geo.end_pgno = (pgno_t)aligned; @@ -1363,26 +1260,20 @@ done: ret.err = MDBX_SUCCESS; if (likely((flags & ALLOC_RESERVE) == 0)) { if (pgno) { - eASSERT(env, - pgno + num <= txn->geo.first_unallocated && pgno >= NUM_METAS); - eASSERT(env, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + eASSERT(env, pgno + num <= txn->geo.first_unallocated && pgno >= NUM_METAS); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); } else { pgno = txn->geo.first_unallocated; txn->geo.first_unallocated += (pgno_t)num; eASSERT(env, txn->geo.first_unallocated <= txn->geo.end_pgno); - eASSERT(env, - pgno >= NUM_METAS && pgno + num <= txn->geo.first_unallocated); + eASSERT(env, pgno >= NUM_METAS && pgno + num <= txn->geo.first_unallocated); } ret = page_alloc_finalize(env, txn, mc, pgno, num); if (unlikely(ret.err != MDBX_SUCCESS)) { fail: eASSERT(env, ret.err != MDBX_SUCCESS); - eASSERT(env, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); int level; const char *what; if (flags & ALLOC_RESERVE) { @@ -1398,12 +1289,9 @@ done: "unable alloc %zu %s, alloc-flags 0x%x, err %d, txn-flags " "0x%x, re-list-len %zu, loose-count %zu, gc: height %u, " "branch %zu, leaf %zu, large %zu, entries %zu\n", - num, what, flags, ret.err, txn->flags, - MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count, - txn->dbs[FREE_DBI].height, - (size_t)txn->dbs[FREE_DBI].branch_pages, - (size_t)txn->dbs[FREE_DBI].leaf_pages, - (size_t)txn->dbs[FREE_DBI].large_pages, + num, what, flags, ret.err, txn->flags, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count, + txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages, + (size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages, (size_t)txn->dbs[FREE_DBI].items); ret.page = nullptr; } @@ -1411,8 +1299,7 @@ done: txn->tw.gc.time_acc += monotime_since_cached(monotime_begin, &now_cache); } else { early_exit: - DEBUG("return nullptr for %zu pages for ALLOC_%s, rc %d", num, - num ? "RESERVE" : "SLOT", ret.err); + DEBUG("return nullptr for %zu pages for ALLOC_%s, rc %d", num, num ? "RESERVE" : "SLOT", ret.err); ret.page = nullptr; } @@ -1425,8 +1312,7 @@ done: __hot pgr_t gc_alloc_single(const MDBX_cursor *const mc) { MDBX_txn *const txn = mc->txn; tASSERT(txn, mc->txn->flags & MDBX_TXN_DIRTY); - tASSERT(txn, - F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY)); + tASSERT(txn, F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY)); /* If there are any loose pages, just use them */ while (likely(txn->tw.loose_pages)) { @@ -1443,8 +1329,7 @@ __hot pgr_t gc_alloc_single(const MDBX_cursor *const mc) { VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); txn->tw.loose_pages = page_next(lp); txn->tw.loose_count--; - DEBUG_EXTRA("db %d use loose page %" PRIaPGNO, cursor_dbi_dbg(mc), - lp->pgno); + DEBUG_EXTRA("db %d use loose page %" PRIaPGNO, cursor_dbi_dbg(mc), lp->pgno); tASSERT(txn, lp->pgno < txn->geo.first_unallocated); tASSERT(txn, lp->pgno >= NUM_METAS); VALGRIND_MAKE_MEM_UNDEFINED(page_data(lp), page_space(txn->env)); diff --git a/src/gc-put.c b/src/gc-put.c index c8648830..04999c6a 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -11,9 +11,7 @@ MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(const gcu_t *ctx) { return is_lifo(ctx->cursor.txn) ? " lifo" : " fifo"; } -static inline size_t backlog_size(MDBX_txn *txn) { - return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count; -} +static inline size_t backlog_size(MDBX_txn *txn) { return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count; } static int clean_stored_retired(MDBX_txn *txn, gcu_t *ctx) { int err = MDBX_SUCCESS; @@ -53,8 +51,7 @@ static int clean_stored_retired(MDBX_txn *txn, gcu_t *ctx) { } static int touch_gc(gcu_t *ctx) { - tASSERT(ctx->cursor.txn, is_pointed(&ctx->cursor) || - ctx->cursor.txn->dbs[FREE_DBI].leaf_pages == 0); + tASSERT(ctx->cursor.txn, is_pointed(&ctx->cursor) || ctx->cursor.txn->dbs[FREE_DBI].leaf_pages == 0); MDBX_val key, val; key.iov_base = val.iov_base = nullptr; key.iov_len = sizeof(txnid_t); @@ -70,24 +67,19 @@ static int touch_gc(gcu_t *ctx) { * during a deleting, when GC tree is unbalanced. */ static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { const size_t for_cow = txn->dbs[FREE_DBI].height; - const size_t for_rebalance = - for_cow + 1 + - (txn->dbs[FREE_DBI].height + 1ul >= txn->dbs[FREE_DBI].branch_pages); + const size_t for_rebalance = for_cow + 1 + (txn->dbs[FREE_DBI].height + 1ul >= txn->dbs[FREE_DBI].branch_pages); size_t for_split = ctx->retired_stored == 0; tASSERT(txn, is_pointed(&ctx->cursor) || txn->dbs[FREE_DBI].leaf_pages == 0); - const intptr_t retired_left = - MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored; + const intptr_t retired_left = MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored; size_t for_relist = 0; if (MDBX_ENABLE_BIGFOOT && retired_left > 0) { - for_relist = (retired_left + txn->env->maxgc_large1page - 1) / - txn->env->maxgc_large1page; + for_relist = (retired_left + txn->env->maxgc_large1page - 1) / txn->env->maxgc_large1page; const size_t per_branch_page = txn->env->maxgc_per_branch; for (size_t entries = for_relist; entries > 1; for_split += entries) entries = (entries + per_branch_page - 1) / per_branch_page; } else if (!MDBX_ENABLE_BIGFOOT && retired_left != 0) { - for_relist = - largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + for_relist = largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); } const size_t for_tree_before_touch = for_cow + for_rebalance + for_split; @@ -96,23 +88,20 @@ static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { const size_t for_all_after_touch = for_relist + for_tree_after_touch; if (likely(for_relist < 2 && backlog_size(txn) > for_all_before_touch) && - (ctx->cursor.top < 0 || - is_modifable(txn, ctx->cursor.pg[ctx->cursor.top]))) + (ctx->cursor.top < 0 || is_modifable(txn, ctx->cursor.pg[ctx->cursor.top]))) return MDBX_SUCCESS; TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " "4split %zu, " "4cow %zu, 4tree %zu)", - ctx->retired_stored, retired_left, backlog_size(txn), - for_all_before_touch, for_relist, for_split, for_cow, + ctx->retired_stored, retired_left, backlog_size(txn), for_all_before_touch, for_relist, for_split, for_cow, for_tree_before_touch); int err = touch_gc(ctx); TRACE("== after-touch, backlog %zu, err %d", backlog_size(txn), err); if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) && - MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored && - err == MDBX_SUCCESS) { + MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored && err == MDBX_SUCCESS) { if (unlikely(ctx->retired_stored)) { err = clean_stored_retired(txn, ctx); if (unlikely(err != MDBX_SUCCESS)) @@ -122,8 +111,7 @@ static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { } err = gc_alloc_ex(&ctx->cursor, for_relist, ALLOC_RESERVE).err; TRACE("== after-4linear, backlog %zu, err %d", backlog_size(txn), err); - cASSERT(&ctx->cursor, - backlog_size(txn) >= for_relist || err != MDBX_SUCCESS); + cASSERT(&ctx->cursor, backlog_size(txn) >= for_relist || err != MDBX_SUCCESS); } while (backlog_size(txn) < for_all_after_touch && err == MDBX_SUCCESS) @@ -131,10 +119,8 @@ static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { TRACE("<< backlog %zu, err %d, gc: height %u, branch %zu, leaf %zu, large " "%zu, entries %zu", - backlog_size(txn), err, txn->dbs[FREE_DBI].height, - (size_t)txn->dbs[FREE_DBI].branch_pages, - (size_t)txn->dbs[FREE_DBI].leaf_pages, - (size_t)txn->dbs[FREE_DBI].large_pages, + backlog_size(txn), err, txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages, + (size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages, (size_t)txn->dbs[FREE_DBI].items); tASSERT(txn, err != MDBX_NOTFOUND || (txn->flags & txn_gc_drained) != 0); return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS; @@ -164,12 +150,10 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { * though usually none are left at this point. * The pages themselves remain in dirtylist. */ if (unlikely(!txn->tw.gc.reclaimed && txn->tw.gc.last_reclaimed < 1)) { - TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix(ctx), - txn->tw.loose_count); + TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix(ctx), txn->tw.loose_count); int err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err; if (err == MDBX_SUCCESS) { - TRACE("%s: retry since gc-slot for %zu loose-pages available", - dbg_prefix(ctx), txn->tw.loose_count); + TRACE("%s: retry since gc-slot for %zu loose-pages available", dbg_prefix(ctx), txn->tw.loose_count); return MDBX_RESULT_TRUE; } @@ -183,15 +167,13 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); } - TRACE("%s: append %zu loose-pages to retired-pages", dbg_prefix(ctx), - txn->tw.loose_count); + TRACE("%s: append %zu loose-pages to retired-pages", dbg_prefix(ctx), txn->tw.loose_count); } else { /* Room for loose pages + temp PNL with same */ int err = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2); if (unlikely(err != MDBX_SUCCESS)) return err; - pnl_t loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - - txn->tw.loose_count - 1; + pnl_t loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - txn->tw.loose_count - 1; size_t count = 0; for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) { tASSERT(txn, lp->flags == P_LOOSE); @@ -203,8 +185,7 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { MDBX_PNL_SETSIZE(loose, count); pnl_sort(loose, txn->geo.first_unallocated); pnl_merge(txn->tw.relist, loose); - TRACE("%s: append %zu loose-pages to reclaimed-pages", dbg_prefix(ctx), - txn->tw.loose_count); + TRACE("%s: append %zu loose-pages to reclaimed-pages", dbg_prefix(ctx), txn->tw.loose_count); } /* filter-out list of dirty-pages from loose-pages */ @@ -227,8 +208,7 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { page_shadow_release(txn->env, dp, 1); } } - TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", - dbg_prefix(ctx), dl->length, w); + TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", dbg_prefix(ctx), dl->length, w); tASSERT(txn, txn->tw.loose_count == dl->length - w); dl->sorted -= sorted_out; tASSERT(txn, dl->sorted <= w); @@ -236,8 +216,7 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { dl->pages_including_loose -= txn->tw.loose_count; txn->tw.dirtyroom += txn->tw.loose_count; tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); } else { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); } @@ -276,8 +255,8 @@ static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { if (unlikely(err != MDBX_SUCCESS)) return err; if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), - retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), retired_pages_before, + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); break; } @@ -290,19 +269,16 @@ static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { if (unlikely(err != MDBX_SUCCESS)) return err; if (ctx->retired_stored >= MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), - retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), retired_pages_before, + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); break; } } key.iov_len = sizeof(txnid_t); key.iov_base = &ctx->bigfoot; - const size_t left = - MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored; + const size_t left = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored; const size_t chunk = - (left > txn->env->maxgc_large1page && ctx->bigfoot < MAX_TXNID) - ? txn->env->maxgc_large1page - : left; + (left > txn->env->maxgc_large1page && ctx->bigfoot < MAX_TXNID) ? txn->env->maxgc_large1page : left; data.iov_len = (chunk + 1) * sizeof(pgno_t); err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE); if (unlikely(err != MDBX_SUCCESS)) @@ -318,9 +294,7 @@ static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { #endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { - const size_t at = (is_lifo(txn) == MDBX_PNL_ASCENDING) - ? left - chunk - : ctx->retired_stored; + const size_t at = (is_lifo(txn) == MDBX_PNL_ASCENDING) ? left - chunk : ctx->retired_stored; pgno_t *const begin = txn->tw.retired_pages + at; /* MDBX_PNL_ASCENDING == false && LIFO == false: * - the larger pgno is at the beginning of retired list @@ -332,15 +306,11 @@ static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { *begin = (pgno_t)chunk; memcpy(data.iov_base, begin, data.iov_len); *begin = save; - TRACE("%s: put-retired/bigfoot @ %" PRIaTXN - " (slice #%u) #%zu [%zu..%zu] of %zu", - dbg_prefix(ctx), ctx->bigfoot, - (unsigned)(ctx->bigfoot - txn->txnid), chunk, at, at + chunk, - retired_pages_before); + TRACE("%s: put-retired/bigfoot @ %" PRIaTXN " (slice #%u) #%zu [%zu..%zu] of %zu", dbg_prefix(ctx), + ctx->bigfoot, (unsigned)(ctx->bigfoot - txn->txnid), chunk, at, at + chunk, retired_pages_before); } ctx->retired_stored += chunk; - } while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) && - (++ctx->bigfoot, true)); + } while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) && (++ctx->bigfoot, true)); } while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)); #else /* Write to last page of GC */ @@ -369,13 +339,11 @@ static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) { tASSERT(txn, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages)); memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len); - TRACE("%s: put-retired #%zu @ %" PRIaTXN, dbg_prefix(ctx), - ctx->retired_stored, txn->txnid); + TRACE("%s: put-retired #%zu @ %" PRIaTXN, dbg_prefix(ctx), ctx->retired_stored, txn->txnid); #endif /* MDBX_ENABLE_BIGFOOT */ if (LOG_ENABLED(MDBX_LOG_EXTRA)) { size_t i = ctx->retired_stored; - DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", - txn->txnid, txn->dbs[FREE_DBI].root, i); + DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", txn->txnid, txn->dbs[FREE_DBI].root, i); for (; i; i--) DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]); DEBUG_EXTRA_PRINT("%s\n", "."); @@ -388,8 +356,7 @@ typedef struct gcu_rid_result { txnid_t rid; } rid_t; -static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, - const size_t left) { +static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) { rid_t r; if (is_lifo(txn)) { if (txn->tw.gc.reclaimed == nullptr) { @@ -400,8 +367,7 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, } } if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && - left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * - txn->env->maxgc_large1page && + left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page && !ctx->dense) { /* Hужен свободный для для сохранения списка страниц. */ bool need_cleanup = false; @@ -411,15 +377,11 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, r.err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err; snap_oldest = txn->env->lck->cached_oldest.weak; if (likely(r.err == MDBX_SUCCESS)) { - TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx), - MDBX_PNL_LAST(txn->tw.gc.reclaimed)); + TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx), MDBX_PNL_LAST(txn->tw.gc.reclaimed)); need_cleanup = true; } - } while (r.err == MDBX_SUCCESS && - MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && - left > - (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * - txn->env->maxgc_large1page); + } while (r.err == MDBX_SUCCESS && MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && + left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page); if (likely(r.err == MDBX_SUCCESS)) { TRACE("%s: got enough from GC.", dbg_prefix(ctx)); @@ -443,20 +405,16 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, /* no reclaimable GC entries, * therefore no entries with ID < mdbx_find_oldest(txn) */ txn->tw.gc.last_reclaimed = ctx->rid = snap_oldest; - TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix(ctx), - ctx->rid); + TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix(ctx), ctx->rid); } /* В GC нет годных к переработке записей, * будем использовать свободные id в обратном порядке. */ while (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && - left > - (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * - txn->env->maxgc_large1page) { + left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page) { if (unlikely(ctx->rid <= MIN_TXNID)) { ctx->dense = true; - if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <= - ctx->reused_slot)) { + if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <= ctx->reused_slot)) { NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " "gc.reclaimed %zu)", ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); @@ -470,21 +428,16 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, MDBX_val key = {&ctx->rid, sizeof(ctx->rid)}, data; r.err = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; if (unlikely(r.err == MDBX_SUCCESS)) { - DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", - dbg_prefix(ctx), ctx->rid); + DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", dbg_prefix(ctx), ctx->rid); r.err = outer_first(&ctx->cursor, &key, nullptr); - if (unlikely(r.err != MDBX_SUCCESS || - key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); + if (unlikely(r.err != MDBX_SUCCESS || key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len); r.err = MDBX_CORRUPTED; goto return_error; } const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); if (unlikely(gc_first <= INITIAL_TXNID)) { - NOTICE("%s: no free GC's id(s) less than %" PRIaTXN - " (going dense-mode)", - dbg_prefix(ctx), ctx->rid); + NOTICE("%s: no free GC's id(s) less than %" PRIaTXN " (going dense-mode)", dbg_prefix(ctx), ctx->rid); ctx->dense = true; goto return_restart; } @@ -501,18 +454,15 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, * with less fragmentation. */ need_cleanup = true; else - ctx->cleaned_slot += - 1 /* mark cleanup is not needed for added slot. */; + ctx->cleaned_slot += 1 /* mark cleanup is not needed for added slot. */; - TRACE("%s: append @%" PRIaTXN - " to lifo-reclaimed, cleaned-gc-slot = %zu", - dbg_prefix(ctx), ctx->rid, ctx->cleaned_slot); + TRACE("%s: append @%" PRIaTXN " to lifo-reclaimed, cleaned-gc-slot = %zu", dbg_prefix(ctx), ctx->rid, + ctx->cleaned_slot); } if (need_cleanup) { if (ctx->cleaned_slot) { - TRACE("%s: restart to clear and re-create GC entries", - dbg_prefix(ctx)); + TRACE("%s: restart to clear and re-create GC entries", dbg_prefix(ctx)); goto return_restart; } goto return_continue; @@ -522,8 +472,7 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t i = MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot; tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); r.rid = txn->tw.gc.reclaimed[i]; - TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix(ctx), - r.rid, i); + TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix(ctx), r.rid, i); } else { tASSERT(txn, txn->tw.gc.reclaimed == nullptr); if (unlikely(ctx->rid == 0)) { @@ -532,8 +481,7 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, r.err = outer_first(&ctx->cursor, &key, nullptr); if (likely(r.err == MDBX_SUCCESS)) { if (unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len); r.err = MDBX_CORRUPTED; goto return_error; } @@ -600,18 +548,15 @@ retry: ctx->loop += !(ctx->prev_first_unallocated > txn->geo.first_unallocated); TRACE(">> restart, loop %u", ctx->loop); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) { - ERROR("txn #%" PRIaTXN " too more loops %u, bailout", txn->txnid, - ctx->loop); + ERROR("txn #%" PRIaTXN " too more loops %u, bailout", txn->txnid, ctx->loop); rc = MDBX_PROBLEM; goto bailout; } - if (unlikely(ctx->dense || - ctx->prev_first_unallocated > txn->geo.first_unallocated)) { + if (unlikely(ctx->dense || ctx->prev_first_unallocated > txn->geo.first_unallocated)) { rc = clean_stored_retired(txn, ctx); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -630,13 +575,10 @@ retry: /* Come back here after each Put() in case retired-list changed */ TRACE("%s", " >> continue"); - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); MDBX_val key, data; if (is_lifo(txn)) { - if (ctx->cleaned_slot < - (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)) { + if (ctx->cleaned_slot < (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)) { ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; @@ -644,8 +586,7 @@ retry: /* LY: cleanup reclaimed records. */ do { ctx->cleaned_id = txn->tw.gc.reclaimed[++ctx->cleaned_slot]; - tASSERT(txn, ctx->cleaned_slot > 0 && - ctx->cleaned_id <= env->lck->cached_oldest.weak); + tASSERT(txn, ctx->cleaned_slot > 0 && ctx->cleaned_id <= env->lck->cached_oldest.weak); key.iov_base = &ctx->cleaned_id; key.iov_len = sizeof(ctx->cleaned_id); rc = cursor_seek(&ctx->cursor, &key, nullptr, MDBX_SET).err; @@ -657,8 +598,7 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak); - TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix(ctx), - ctx->cleaned_slot, ctx->cleaned_id); + TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix(ctx), ctx->cleaned_slot, ctx->cleaned_id); tASSERT(txn, *txn->cursors == &ctx->cursor); rc = cursor_del(&ctx->cursor, 0); if (unlikely(rc != MDBX_SUCCESS)) @@ -668,8 +608,7 @@ retry: } } else { /* Удаляем оставшиеся вынутые из GC записи. */ - while (txn->tw.gc.last_reclaimed && - ctx->cleaned_id <= txn->tw.gc.last_reclaimed) { + while (txn->tw.gc.last_reclaimed && ctx->cleaned_id <= txn->tw.gc.last_reclaimed) { rc = outer_first(&ctx->cursor, &key, nullptr); if (rc == MDBX_NOTFOUND) { ctx->cleaned_id = txn->tw.gc.last_reclaimed + 1; @@ -680,10 +619,8 @@ retry: } if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - if (!MDBX_DISABLE_VALIDATION && - unlikely(key.iov_len != sizeof(txnid_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid GC-key size", (unsigned)key.iov_len); + if (!MDBX_DISABLE_VALIDATION && unlikely(key.iov_len != sizeof(txnid_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len); rc = MDBX_CORRUPTED; goto bailout; } @@ -700,8 +637,7 @@ retry: goto bailout; tASSERT(txn, ctx->cleaned_id <= txn->tw.gc.last_reclaimed); tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak); - TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix(ctx), - ctx->cleaned_id); + TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix(ctx), ctx->cleaned_id); tASSERT(txn, *txn->cursors == &ctx->cursor); rc = cursor_del(&ctx->cursor, 0); if (unlikely(rc != MDBX_SUCCESS)) @@ -709,9 +645,7 @@ retry: } } - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (AUDIT_ENABLED()) { rc = audit_ex(txn, ctx->retired_stored, false); @@ -721,9 +655,7 @@ retry: /* return suitable into unallocated space */ if (txn_refund(txn)) { - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); if (AUDIT_ENABLED()) { rc = audit_ex(txn, ctx->retired_stored, false); if (unlikely(rc != MDBX_SUCCESS)) @@ -743,10 +675,9 @@ retry: } if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) && - (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) > - env->maxgc_large1page / 2)) { - TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), - ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); + (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) > env->maxgc_large1page / 2)) { + TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), ctx->amount, + MDBX_PNL_GETSIZE(txn->tw.relist)); ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); goto retry; } @@ -760,9 +691,7 @@ retry: continue; } - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, txn->tw.loose_count == 0); TRACE("%s", " >> reserving"); @@ -776,8 +705,7 @@ retry: "lifo-reclaimed-slots %zu, " "reused-gc-slots %zu", dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left, - txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, - ctx->reused_slot); + txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, ctx->reused_slot); if (0 >= (intptr_t)left) break; @@ -795,59 +723,46 @@ retry: size_t chunk = left; if (unlikely(left > env->maxgc_large1page)) { - const size_t avail_gc_slots = - txn->tw.gc.reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot + 1 - : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid - : INT16_MAX; + const size_t avail_gc_slots = txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot + 1 + : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid + : INT16_MAX; if (likely(avail_gc_slots > 1)) { #if MDBX_ENABLE_BIGFOOT chunk = env->maxgc_large1page; - if (avail_gc_slots < INT16_MAX && - unlikely(left > env->maxgc_large1page * avail_gc_slots)) + if (avail_gc_slots < INT16_MAX && unlikely(left > env->maxgc_large1page * avail_gc_slots)) /* TODO: Можно смотреть последовательности какой длины есть в relist * и пробовать нарезать куски соответствующего размера. * Смысл в том, чтобы не дробить последовательности страниц, * а использовать целиком. */ - chunk = env->maxgc_large1page + - left / (env->maxgc_large1page * avail_gc_slots) * - env->maxgc_large1page; + chunk = env->maxgc_large1page + left / (env->maxgc_large1page * avail_gc_slots) * env->maxgc_large1page; #else if (chunk < env->maxgc_large1page * 2) chunk /= 2; else { const size_t prefer_max_scatter = 257; const size_t threshold = - env->maxgc_large1page * ((avail_gc_slots < prefer_max_scatter) - ? avail_gc_slots - : prefer_max_scatter); + env->maxgc_large1page * ((avail_gc_slots < prefer_max_scatter) ? avail_gc_slots : prefer_max_scatter); if (left < threshold) chunk = env->maxgc_large1page; else { const size_t tail = left - threshold + env->maxgc_large1page + 1; size_t span = 1; - size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / - sizeof(pgno_t)) /* - 1 + span */; + size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) /* - 1 + span */; if (tail > avail) { for (size_t i = ctx->amount - span; i > 0; --i) { if (MDBX_PNL_ASCENDING ? (txn->tw.relist[i] + span) - : (txn->tw.relist[i] - span) == - txn->tw.relist[i + span]) { + : (txn->tw.relist[i] - span) == txn->tw.relist[i + span]) { span += 1; - avail = - ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) - - 1 + span; + avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) - 1 + span; if (avail >= tail) break; } } } - chunk = (avail >= tail) ? tail - span - : (avail_gc_slots > 3 && - ctx->reused_slot < prefer_max_scatter - 3) - ? avail - span - : tail; + chunk = (avail >= tail) ? tail - span + : (avail_gc_slots > 3 && ctx->reused_slot < prefer_max_scatter - 3) ? avail - span + : tail; } } #endif /* MDBX_ENABLE_BIGFOOT */ @@ -859,57 +774,43 @@ retry: "%" PRIaTXN, dbg_prefix(ctx), ctx->rid, ctx->reused_slot, reservation_gc_id); - TRACE("%s: chunk %zu, gc-per-ovpage %u", dbg_prefix(ctx), chunk, - env->maxgc_large1page); + TRACE("%s: chunk %zu, gc-per-ovpage %u", dbg_prefix(ctx), chunk, env->maxgc_large1page); tASSERT(txn, reservation_gc_id <= env->lck->cached_oldest.weak); if (unlikely(reservation_gc_id < MIN_TXNID || - reservation_gc_id > - atomic_load64(&env->lck->cached_oldest, mo_Relaxed))) { - ERROR("** internal error (reservation_gc_id %" PRIaTXN ")", - reservation_gc_id); + reservation_gc_id > atomic_load64(&env->lck->cached_oldest, mo_Relaxed))) { + ERROR("** internal error (reservation_gc_id %" PRIaTXN ")", reservation_gc_id); rc = MDBX_PROBLEM; goto bailout; } - tASSERT(txn, - reservation_gc_id >= MIN_TXNID && reservation_gc_id <= MAX_TXNID); + tASSERT(txn, reservation_gc_id >= MIN_TXNID && reservation_gc_id <= MAX_TXNID); key.iov_len = sizeof(reservation_gc_id); key.iov_base = (void *)&reservation_gc_id; data.iov_len = (chunk + 1) * sizeof(pgno_t); - TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix(ctx), chunk, - ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id); + TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix(ctx), chunk, ctx->reserved + 1, + ctx->reserved + chunk + 1, reservation_gc_id); prepare_backlog(txn, ctx); rc = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE); - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; zeroize_reserved(env, data); ctx->reserved += chunk; - TRACE("%s: reserved %zu (+%zu), continue", dbg_prefix(ctx), ctx->reserved, - chunk); + TRACE("%s: reserved %zu (+%zu), continue", dbg_prefix(ctx), ctx->reserved, chunk); continue; } - tASSERT( - txn, - ctx->cleaned_slot == - (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)); + tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)); TRACE("%s", " >> filling"); /* Fill in the reserved records */ size_t excess_slots = 0; - ctx->fill_idx = - txn->tw.gc.reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot - : ctx->reused_slot; + ctx->fill_idx = txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot : ctx->reused_slot; rc = MDBX_SUCCESS; - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (ctx->amount) { MDBX_val key, data; @@ -930,20 +831,17 @@ retry: while (true) { txnid_t fill_gc_id; - TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, - MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, MDBX_PNL_GETSIZE(txn->tw.relist)); if (txn->tw.gc.reclaimed == nullptr) { tASSERT(txn, is_lifo(txn) == 0); - fill_gc_id = - key.iov_base ? unaligned_peek_u64(4, key.iov_base) : MIN_TXNID; + fill_gc_id = key.iov_base ? unaligned_peek_u64(4, key.iov_base) : MIN_TXNID; if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) { if (!left) break; - NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN - " > last_reclaimed %" PRIaTXN ", left %zu", + NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN + ", left %zu", ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); - ctx->reserve_adj = - (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; + ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } ctx->fill_idx -= 1; @@ -955,26 +853,20 @@ retry: NOTICE("** restart: reserve depleted (fill_idx %zu >= " "gc.reclaimed %zu, left %zu", ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left); - ctx->reserve_adj = - (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; + ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } ctx->fill_idx += 1; fill_gc_id = txn->tw.gc.reclaimed[ctx->fill_idx]; - TRACE("%s: seek-reservation @%" PRIaTXN " at gc.reclaimed[%zu]", - dbg_prefix(ctx), fill_gc_id, ctx->fill_idx); + TRACE("%s: seek-reservation @%" PRIaTXN " at gc.reclaimed[%zu]", dbg_prefix(ctx), fill_gc_id, ctx->fill_idx); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); rc = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - tASSERT(txn, - ctx->cleaned_slot == (txn->tw.gc.reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - : 0)); - tASSERT(txn, - fill_gc_id > 0 && fill_gc_id <= env->lck->cached_oldest.weak); + tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)); + tASSERT(txn, fill_gc_id > 0 && fill_gc_id <= env->lck->cached_oldest.weak); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); @@ -983,14 +875,12 @@ retry: if (unlikely(chunk > left)) { const size_t delta = chunk - left; excess += delta; - TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, - left, fill_gc_id); + TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, left, fill_gc_id); if (!left) { excess_slots += 1; goto next; } - if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || - delta > env->maxgc_large1page) + if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || delta > env->maxgc_large1page) data.iov_len = (left + 1) * sizeof(pgno_t); chunk = left; } @@ -999,29 +889,23 @@ retry: goto bailout; zeroize_reserved(env, data); - if (unlikely(txn->tw.loose_count || - ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { - NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", - ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist), - txn->tw.loose_count); + if (unlikely(txn->tw.loose_count || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", ctx->amount, + MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) goto retry_clean_adj; goto retry; } - if (unlikely(txn->tw.gc.reclaimed - ? ctx->cleaned_slot < - MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - : ctx->cleaned_id < txn->tw.gc.last_reclaimed)) { + if (unlikely(txn->tw.gc.reclaimed ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) + : ctx->cleaned_id < txn->tw.gc.last_reclaimed)) { NOTICE("%s", "** restart: reclaimed-slots changed"); goto retry; } - if (unlikely(ctx->retired_stored != - MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { - tASSERT(txn, - ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - NOTICE("** restart: retired-list growth (%zu -> %zu)", - ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + if (unlikely(ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { + tASSERT(txn, ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + NOTICE("** restart: retired-list growth (%zu -> %zu)", ctx->retired_stored, + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); goto retry; } @@ -1030,9 +914,8 @@ retry: pgno_t *src = MDBX_PNL_BEGIN(txn->tw.relist) + left - chunk; memcpy(dst, src, chunk * sizeof(pgno_t)); pgno_t *from = src, *to = src + chunk; - TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, - dbg_prefix(ctx), chunk, from - txn->tw.relist, from[0], - to - txn->tw.relist, to[-1], fill_gc_id); + TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, dbg_prefix(ctx), chunk, + from - txn->tw.relist, from[0], to - txn->tw.relist, to[-1], fill_gc_id); left -= chunk; if (AUDIT_ENABLED()) { @@ -1063,16 +946,14 @@ retry: while (n >= env->maxgc_large1page) adj -= n /= env->maxgc_large1page; ctx->reserve_adj += adj; - TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx), - excess, adj, ctx->reserve_adj); + TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx), excess, adj, ctx->reserve_adj); } } tASSERT(txn, rc == MDBX_SUCCESS); - if (unlikely(txn->tw.loose_count != 0 || - ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { - NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", - txn->tw.loose_count, ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist)); + if (unlikely(txn->tw.loose_count != 0 || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", txn->tw.loose_count, ctx->amount, + MDBX_PNL_GETSIZE(txn->tw.relist)); goto retry; } @@ -1080,14 +961,12 @@ retry: const bool will_retry = ctx->loop < 5 || excess_slots > 1; NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " "loop %u)", - will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, - ctx->reserve_adj, ctx->loop); + will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, ctx->reserve_adj, ctx->loop); if (will_retry) goto retry; } - tASSERT(txn, txn->tw.gc.reclaimed == nullptr || - ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + tASSERT(txn, txn->tw.gc.reclaimed == nullptr || ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); bailout: txn->cursors[FREE_DBI] = ctx->cursor.next; diff --git a/src/gc.h b/src/gc.h index a8a68a24..b3ec23cf 100644 --- a/src/gc.h +++ b/src/gc.h @@ -34,8 +34,7 @@ static inline int gc_update_init(MDBX_txn *txn, gcu_t *ctx) { #define ALLOC_DEFAULT 0 #define ALLOC_RESERVE 1 #define ALLOC_UNIMPORTANT 2 -MDBX_INTERNAL pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, - uint8_t flags); +MDBX_INTERNAL pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags); MDBX_INTERNAL pgr_t gc_alloc_single(const MDBX_cursor *const mc); MDBX_INTERNAL int gc_update(MDBX_txn *txn, gcu_t *ctx); diff --git a/src/global.c b/src/global.c index 88432580..69340319 100644 --- a/src/global.c +++ b/src/global.c @@ -122,8 +122,7 @@ extern void __gmon_start__(void) __attribute__((__weak__)); #endif /* ENABLE_GPROF */ MDBX_EXCLUDE_FOR_GPROF -__cold static __attribute__((__constructor__)) void -mdbx_global_constructor(void) { +__cold static __attribute__((__constructor__)) void mdbx_global_constructor(void) { #ifdef ENABLE_GPROF if (!&__gmon_start__) monstartup((uintptr_t)&_init, (uintptr_t)&_fini); @@ -154,9 +153,8 @@ mdbx_global_constructor(void) { * So, the REQUIREMENTS for this code: * 1. MUST detect WSL1 without false-negatives. * 2. DESIRABLE detect WSL2 but without the risk of violating the first. */ - globals.running_on_WSL1 = probe_for_WSL(buffer.version) == 1 || - probe_for_WSL(buffer.sysname) == 1 || - probe_for_WSL(buffer.release) == 1; + globals.running_on_WSL1 = + probe_for_WSL(buffer.version) == 1 || probe_for_WSL(buffer.sysname) == 1 || probe_for_WSL(buffer.release) == 1; } #endif /* Linux */ @@ -164,8 +162,7 @@ mdbx_global_constructor(void) { } MDBX_EXCLUDE_FOR_GPROF -__cold static __attribute__((__destructor__)) void -mdbx_global_destructor(void) { +__cold static __attribute__((__destructor__)) void mdbx_global_destructor(void) { mdbx_fini(); #ifdef ENABLE_GPROF if (!&__gmon_start__) @@ -180,13 +177,11 @@ mdbx_global_destructor(void) { struct libmdbx_globals globals; __cold static void mdbx_init(void) { - globals.runtime_flags = ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + - ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT; + globals.runtime_flags = ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT; globals.loglevel = MDBX_LOG_FATAL; ENSURE(nullptr, osal_fastmutex_init(&globals.debug_lock) == 0); osal_ctor(); - assert(globals.sys_pagesize > 0 && - (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0); + assert(globals.sys_pagesize > 0 && (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0); rthc_ctor(); #if MDBX_DEBUG ENSURE(nullptr, troika_verify_fsm()); diff --git a/src/internals.h b/src/internals.h index 9d45fda6..2dca2d23 100644 --- a/src/internals.h +++ b/src/internals.h @@ -156,9 +156,8 @@ enum txn_flags { txn_shrink_allowed = UINT32_C(0x40000000), txn_parked = MDBX_TXN_PARKED, txn_gc_drained = 0x40 /* GC was depleted up to oldest reader */, - txn_state_flags = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | - MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | - txn_gc_drained + txn_state_flags = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | + MDBX_TXN_INVALID | txn_gc_drained }; /* A database transaction. @@ -336,12 +335,9 @@ enum env_flags { /* Only a subset of the mdbx_env flags can be changed * at runtime. Changing other flags requires closing the * environment and re-opening it with the new flags. */ - ENV_CHANGEABLE_FLAGS = MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | - DEPRECATED_MAPASYNC | MDBX_NOMEMINIT | - DEPRECATED_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | - MDBX_VALIDATION, - ENV_CHANGELESS_FLAGS = MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | - MDBX_NOSTICKYTHREADS | MDBX_NORDAHEAD | + ENV_CHANGEABLE_FLAGS = MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | DEPRECATED_MAPASYNC | MDBX_NOMEMINIT | + DEPRECATED_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | MDBX_VALIDATION, + ENV_CHANGELESS_FLAGS = MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS | MDBX_NORDAHEAD | MDBX_LIFORECLAIM | MDBX_EXCLUSIVE, ENV_USABLE_FLAGS = ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS }; @@ -368,8 +364,8 @@ struct MDBX_env { uint16_t subpage_reserve_prereq; uint16_t subpage_reserve_limit; atomic_pgno_t mlocked_pgno; - uint8_t ps2ln; /* log2 of DB page size */ - int8_t stuck_meta; /* recovery-only: target meta page or less that zero */ + uint8_t ps2ln; /* log2 of DB page size */ + int8_t stuck_meta; /* recovery-only: target meta page or less that zero */ uint16_t merge_threshold, merge_threshold_gc; /* pages emptier than this are candidates for merging */ unsigned max_readers; /* size of the reader table */ @@ -385,7 +381,7 @@ struct MDBX_env { kvx_t *kvs; /* array of auxiliary key-value properties */ uint8_t *__restrict dbs_flags; /* array of flags from tree_t.flags */ mdbx_atomic_uint32_t *dbi_seqs; /* array of dbi sequence numbers */ - unsigned maxgc_large1page; /* Number of pgno_t fit in a single large page */ + unsigned maxgc_large1page; /* Number of pgno_t fit in a single large page */ unsigned maxgc_per_branch; uint32_t registered_reader_pid; /* have liveness lock in reader table */ void *userctx; /* User-settable context */ @@ -492,9 +488,7 @@ struct MDBX_env { #endif /* ------------------------------------------------- stub for lck-less mode */ - mdbx_atomic_uint64_t - lckless_placeholder[(sizeof(lck_t) + MDBX_CACHELINE_SIZE - 1) / - sizeof(mdbx_atomic_uint64_t)]; + mdbx_atomic_uint64_t lckless_placeholder[(sizeof(lck_t) + MDBX_CACHELINE_SIZE - 1) / sizeof(mdbx_atomic_uint64_t)]; }; /*----------------------------------------------------------------------------*/ @@ -509,8 +503,8 @@ struct MDBX_env { #define DEFAULT_READERS 61 enum db_flags { - DB_PERSISTENT_FLAGS = MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | - MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP, + DB_PERSISTENT_FLAGS = + MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP, /* mdbx_dbi_open() flags */ DB_USABLE_FLAGS = DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE, @@ -524,27 +518,19 @@ enum db_flags { MDBX_MAYBE_UNUSED static void static_checks(void) { STATIC_ASSERT(MDBX_WORDBITS == sizeof(void *) * CHAR_BIT); STATIC_ASSERT(UINT64_C(0x80000000) == (uint32_t)ENV_FATAL_ERROR); - STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI, - "Oops, MDBX_MAX_DBI or CORE_DBS?"); + STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI, "Oops, MDBX_MAX_DBI or CORE_DBS?"); STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) == - ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) & - (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)), - "Oops, some flags overlapped or wrong"); - STATIC_ASSERT_MSG((DB_INTERNAL_FLAGS & DB_USABLE_FLAGS) == 0, - "Oops, some flags overlapped or wrong"); - STATIC_ASSERT_MSG((DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS) == 0, + ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) & (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)), "Oops, some flags overlapped or wrong"); + STATIC_ASSERT_MSG((DB_INTERNAL_FLAGS & DB_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong"); + STATIC_ASSERT_MSG((DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong"); STATIC_ASSERT(DB_PERSISTENT_FLAGS <= UINT8_MAX); - STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0, - "Oops, some flags overlapped or wrong"); + STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong"); - STATIC_ASSERT_MSG( - (txn_state_flags & (txn_rw_begin_flags | txn_ro_begin_flags)) == 0, - "Oops, some txn flags overlapped or wrong"); - STATIC_ASSERT_MSG( - ((txn_rw_begin_flags | txn_ro_begin_flags | txn_state_flags) & - txn_shrink_allowed) == 0, - "Oops, some txn flags overlapped or wrong"); + STATIC_ASSERT_MSG((txn_state_flags & (txn_rw_begin_flags | txn_ro_begin_flags)) == 0, + "Oops, some txn flags overlapped or wrong"); + STATIC_ASSERT_MSG(((txn_rw_begin_flags | txn_ro_begin_flags | txn_state_flags) & txn_shrink_allowed) == 0, + "Oops, some txn flags overlapped or wrong"); STATIC_ASSERT(sizeof(reader_slot_t) == 32); #if MDBX_LOCKING > 0 diff --git a/src/layout-dxb.h b/src/layout-dxb.h index 74d09166..ed2f261d 100644 --- a/src/layout-dxb.h +++ b/src/layout-dxb.h @@ -17,10 +17,8 @@ /* FROZEN: The version number for a database's datafile format. */ #define MDBX_DATA_VERSION 3 -#define MDBX_DATA_MAGIC \ - ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION) -#define MDBX_DATA_MAGIC_LEGACY_COMPAT \ - ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2) +#define MDBX_DATA_MAGIC ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION) +#define MDBX_DATA_MAGIC_LEGACY_COMPAT ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2) #define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255) /* handle for the DB used to track free pages. */ @@ -261,40 +259,30 @@ typedef enum node_flags { #pragma pack(pop) -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t -page_type(const page_t *mp) { - return mp->flags; -} +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t page_type(const page_t *mp) { return mp->flags; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t -page_type_compat(const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t page_type_compat(const page_t *mp) { /* Drop legacy P_DIRTY flag for sub-pages for compatilibity, * for assertions only. */ - return unlikely(mp->flags & P_SUBP) ? mp->flags & ~(P_SUBP | P_LEGACY_DIRTY) - : mp->flags; + return unlikely(mp->flags & P_SUBP) ? mp->flags & ~(P_SUBP | P_LEGACY_DIRTY) : mp->flags; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_leaf(const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_leaf(const page_t *mp) { return (mp->flags & P_LEAF) != 0; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_dupfix_leaf(const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_dupfix_leaf(const page_t *mp) { return (mp->flags & P_DUPFIX) != 0; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_branch(const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_branch(const page_t *mp) { return (mp->flags & P_BRANCH) != 0; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_largepage(const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_largepage(const page_t *mp) { return (mp->flags & P_LARGE) != 0; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_subpage(const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_subpage(const page_t *mp) { return (mp->flags & P_SUBP) != 0; } diff --git a/src/layout-lck.h b/src/layout-lck.h index b24b20c6..f4a2a368 100644 --- a/src/layout-lck.h +++ b/src/layout-lck.h @@ -19,8 +19,7 @@ typedef void osal_ipclock_t; #define MDBX_LCK_SIGN UINT32_C(0xF18D) typedef mdbx_pid_t osal_ipclock_t; -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 #define MDBX_LCK_SIGN UINT32_C(0x8017) typedef pthread_mutex_t osal_ipclock_t; @@ -64,19 +63,15 @@ typedef struct pgops { mdbx_atomic_uint64_t merge; /* Page merges */ mdbx_atomic_uint64_t spill; /* Quantity of spilled dirty pages */ mdbx_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */ - mdbx_atomic_uint64_t - wops; /* Number of explicit write operations (not a pages) to a disk */ - mdbx_atomic_uint64_t - msync; /* Number of explicit msync/flush-to-disk operations */ - mdbx_atomic_uint64_t - fsync; /* Number of explicit fsync/flush-to-disk operations */ + mdbx_atomic_uint64_t wops; /* Number of explicit write operations (not a pages) to a disk */ + mdbx_atomic_uint64_t msync; /* Number of explicit msync/flush-to-disk operations */ + mdbx_atomic_uint64_t fsync; /* Number of explicit fsync/flush-to-disk operations */ mdbx_atomic_uint64_t prefault; /* Number of prefault write operations */ mdbx_atomic_uint64_t mincore; /* Number of mincore() calls */ - mdbx_atomic_uint32_t - incoherence; /* number of https://libmdbx.dqdkfa.ru/dead-github/issues/269 - caught */ + mdbx_atomic_uint32_t incoherence; /* number of https://libmdbx.dqdkfa.ru/dead-github/issues/269 + caught */ mdbx_atomic_uint32_t reserved; /* Статистика для профилирования GC. @@ -202,8 +197,7 @@ typedef struct shared_lck { * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */ #define MDBX_NOMETASYNC_LAZY_UNK (UINT32_MAX / 3) #define MDBX_NOMETASYNC_LAZY_FD (MDBX_NOMETASYNC_LAZY_UNK + UINT32_MAX / 8) -#define MDBX_NOMETASYNC_LAZY_WRITEMAP \ - (MDBX_NOMETASYNC_LAZY_UNK - UINT32_MAX / 8) +#define MDBX_NOMETASYNC_LAZY_WRITEMAP (MDBX_NOMETASYNC_LAZY_UNK - UINT32_MAX / 8) mdbx_atomic_uint32_t meta_sync_txnid; /* Period for timed auto-sync feature, i.e. at the every steady checkpoint @@ -277,12 +271,10 @@ typedef struct shared_lck { reader_slot_t rdt[] /* dynamic size */; /* Lockfile format signature: version, features and field layout */ -#define MDBX_LOCK_FORMAT \ - (MDBX_LCK_SIGN * 27733 + (unsigned)sizeof(reader_slot_t) * 13 + \ - (unsigned)offsetof(reader_slot_t, snapshot_pages_used) * 251 + \ - (unsigned)offsetof(lck_t, cached_oldest) * 83 + \ - (unsigned)offsetof(lck_t, rdt_length) * 37 + \ - (unsigned)offsetof(lck_t, rdt) * 29) +#define MDBX_LOCK_FORMAT \ + (MDBX_LCK_SIGN * 27733 + (unsigned)sizeof(reader_slot_t) * 13 + \ + (unsigned)offsetof(reader_slot_t, snapshot_pages_used) * 251 + (unsigned)offsetof(lck_t, cached_oldest) * 83 + \ + (unsigned)offsetof(lck_t, rdt_length) * 37 + (unsigned)offsetof(lck_t, rdt) * 29) #endif /* FLEXIBLE_ARRAY_MEMBERS */ } lck_t; diff --git a/src/lck-posix.c b/src/lck-posix.c index 083817de..4cf83c46 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -71,11 +71,10 @@ __cold static void choice_fcntl(void) { assert(!op_setlk && !op_setlkw && !op_getlk); if ((globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 #if defined(__linux__) || defined(__gnu_linux__) - && globals.linux_kernel_version > - 0x030f0000 /* OFD locks are available since 3.15, but engages here - only for 3.16 and later kernels (i.e. LTS) because - of reliability reasons */ -#endif /* linux */ + && globals.linux_kernel_version > 0x030f0000 /* OFD locks are available since 3.15, but engages here + only for 3.16 and later kernels (i.e. LTS) because + of reliability reasons */ +#endif /* linux */ ) { op_setlk = MDBX_F_OFD_SETLK; op_setlkw = MDBX_F_OFD_SETLKW; @@ -92,32 +91,25 @@ __cold static void choice_fcntl(void) { #define op_getlk MDBX_F_GETLK #endif /* MDBX_USE_OFDLOCKS */ -static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, - const off_t offset, off_t len) { - STATIC_ASSERT(sizeof(off_t) >= sizeof(void *) && - sizeof(off_t) >= sizeof(size_t)); +static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, const off_t offset, off_t len) { + STATIC_ASSERT(sizeof(off_t) >= sizeof(void *) && sizeof(off_t) >= sizeof(size_t)); #ifdef __ANDROID_API__ - STATIC_ASSERT_MSG((sizeof(off_t) * 8 == MDBX_WORDBITS), - "The bitness of system `off_t` type is mismatch. Please " - "fix build and/or NDK configuration."); + STATIC_ASSERT_MSG((sizeof(off_t) * 8 == MDBX_WORDBITS), "The bitness of system `off_t` type is mismatch. Please " + "fix build and/or NDK configuration."); #endif /* Android */ assert(offset >= 0 && len > 0); - assert((uint64_t)offset < (uint64_t)INT64_MAX && - (uint64_t)len < (uint64_t)INT64_MAX && + assert((uint64_t)offset < (uint64_t)INT64_MAX && (uint64_t)len < (uint64_t)INT64_MAX && (uint64_t)(offset + len) > (uint64_t)offset); - assert((uint64_t)offset < (uint64_t)OFF_T_MAX && - (uint64_t)len <= (uint64_t)OFF_T_MAX && + assert((uint64_t)offset < (uint64_t)OFF_T_MAX && (uint64_t)len <= (uint64_t)OFF_T_MAX && (uint64_t)(offset + len) <= (uint64_t)OFF_T_MAX); - assert((uint64_t)((off_t)((uint64_t)offset + (uint64_t)len)) == - ((uint64_t)offset + (uint64_t)len)); + assert((uint64_t)((off_t)((uint64_t)offset + (uint64_t)len)) == ((uint64_t)offset + (uint64_t)len)); jitter4testing(true); for (;;) { MDBX_STRUCT_FLOCK lock_op; - STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(lock_op.l_start) && - sizeof(off_t) <= sizeof(lock_op.l_len) && + STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(lock_op.l_start) && sizeof(off_t) <= sizeof(lock_op.l_len) && OFF_T_MAX == (off_t)OFF_T_MAX, "Support for large/64-bit-sized files is misconfigured " "for the target system and/or toolchain. " @@ -134,15 +126,13 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, /* Checks reader by pid. Returns: * MDBX_RESULT_TRUE - if pid is live (reader holds a lock). * MDBX_RESULT_FALSE - if pid is dead (a lock could be placed). */ - return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE - : MDBX_RESULT_TRUE; + return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; } return MDBX_SUCCESS; } rc = errno; #if MDBX_USE_OFDLOCKS - if (rc == EINVAL && (cmd == MDBX_F_OFD_SETLK || cmd == MDBX_F_OFD_SETLKW || - cmd == MDBX_F_OFD_GETLK)) { + if (rc == EINVAL && (cmd == MDBX_F_OFD_SETLK || cmd == MDBX_F_OFD_SETLKW || cmd == MDBX_F_OFD_GETLK)) { /* fallback to non-OFD locks */ if (cmd == MDBX_F_OFD_SETLK) cmd = MDBX_F_SETLK; @@ -197,8 +187,7 @@ MDBX_INTERNAL int lck_rpid_check(MDBX_env *env, uint32_t pid) { MDBX_INTERNAL int lck_ipclock_stubinit(osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX1988 return sem_init(ipc, false, 1) ? errno : 0; -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 return pthread_mutex_init(ipc, nullptr); #else #error "FIXME" @@ -208,8 +197,7 @@ MDBX_INTERNAL int lck_ipclock_stubinit(osal_ipclock_t *ipc) { MDBX_INTERNAL int lck_ipclock_destroy(osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX1988 return sem_destroy(ipc) ? errno : 0; -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 return pthread_mutex_destroy(ipc); #else #error "FIXME" @@ -233,14 +221,12 @@ static int check_fstat(MDBX_env *env) { #else rc = EPERM; #endif - ERROR("%s %s, err %d", "DXB", - (st.st_nlink < 1) ? "file was removed" : "not a regular file", rc); + ERROR("%s %s, err %d", "DXB", (st.st_nlink < 1) ? "file was removed" : "not a regular file", rc); return rc; } if (st.st_size < (off_t)(MDBX_MIN_PAGESIZE * NUM_METAS)) { - VERBOSE("dxb-file is too short (%u), exclusive-lock needed", - (unsigned)st.st_size); + VERBOSE("dxb-file is too short (%u), exclusive-lock needed", (unsigned)st.st_size); rc = MDBX_RESULT_TRUE; } @@ -258,16 +244,14 @@ static int check_fstat(MDBX_env *env) { #else rc = EPERM; #endif - ERROR("%s %s, err %d", "LCK", - (st.st_nlink < 1) ? "file was removed" : "not a regular file", rc); + ERROR("%s %s, err %d", "LCK", (st.st_nlink < 1) ? "file was removed" : "not a regular file", rc); return rc; } /* Checking file size for detect the situation when we got the shared lock * immediately after lck_destroy(). */ if (st.st_size < (off_t)(sizeof(lck_t) + sizeof(reader_slot_t))) { - VERBOSE("lck-file is too short (%u), exclusive-lock needed", - (unsigned)st.st_size); + VERBOSE("lck-file is too short (%u), exclusive-lock needed", (unsigned)st.st_size); rc = MDBX_RESULT_TRUE; } @@ -298,8 +282,7 @@ __cold MDBX_INTERNAL int lck_seize(MDBX_env *env) { if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) { /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ - rc = lck_op(env->lazy_fd, op_setlk, - (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); + rc = lck_op(env->lazy_fd, op_setlk, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); if (rc != MDBX_SUCCESS) { ERROR("%s, err %u", "without-lck", rc); eASSERT(env, MDBX_IS_ERROR(rc)); @@ -329,8 +312,7 @@ retry: return rc; continue_dxb_exclusive: - rc = lck_op(env->lazy_fd, op_setlk, - (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); + rc = lck_op(env->lazy_fd, op_setlk, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX); if (rc == MDBX_SUCCESS) return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */; @@ -339,16 +321,14 @@ retry: return err; /* the cause may be a collision with POSIX's file-lock recovery. */ - if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || - rc == EDEADLK)) { + if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK)) { ERROR("%s, err %u", "dxb-exclusive", rc); eASSERT(env, MDBX_IS_ERROR(rc)); return rc; } /* Fallback to lck-shared */ - } else if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || - rc == EWOULDBLOCK || rc == EDEADLK)) { + } else if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK)) { ERROR("%s, err %u", "try-exclusive", rc); eASSERT(env, MDBX_IS_ERROR(rc)); return rc; @@ -384,16 +364,14 @@ retry: if (rc == MDBX_SUCCESS) goto continue_dxb_exclusive; - if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || - rc == EDEADLK)) { + if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK)) { ERROR("%s, err %u", "try-exclusive", rc); eASSERT(env, MDBX_IS_ERROR(rc)); return rc; } /* Lock against another process operating in without-lck or exclusive mode. */ - rc = lck_op(env->lazy_fd, op_setlk, - (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->pid, 1); + rc = lck_op(env->lazy_fd, op_setlk, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->pid, 1); if (rc != MDBX_SUCCESS) { ERROR("%s, err %u", "lock-against-without-lck", rc); eASSERT(env, MDBX_IS_ERROR(rc)); @@ -413,8 +391,7 @@ MDBX_INTERNAL int lck_downgrade(MDBX_env *env) { if ((env->flags & MDBX_EXCLUSIVE) == 0) { rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, 0, env->pid); if (rc == MDBX_SUCCESS) - rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, env->pid + 1, - OFF_T_MAX - env->pid - 1); + rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, env->pid + 1, OFF_T_MAX - env->pid - 1); } if (rc == MDBX_SUCCESS) rc = lck_op(env->lck_mmap.fd, op_setlk, F_RDLCK, 0, 1); @@ -433,13 +410,10 @@ MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { const int cmd = dont_wait ? op_setlk : op_setlkw; int rc = lck_op(env->lck_mmap.fd, cmd, F_WRLCK, 0, 1); if (rc == MDBX_SUCCESS && (env->flags & MDBX_EXCLUSIVE) == 0) { - rc = (env->pid > 1) ? lck_op(env->lazy_fd, cmd, F_WRLCK, 0, env->pid - 1) - : MDBX_SUCCESS; + rc = (env->pid > 1) ? lck_op(env->lazy_fd, cmd, F_WRLCK, 0, env->pid - 1) : MDBX_SUCCESS; if (rc == MDBX_SUCCESS) { - rc = lck_op(env->lazy_fd, cmd, F_WRLCK, env->pid + 1, - OFF_T_MAX - env->pid - 1); - if (rc != MDBX_SUCCESS && env->pid > 1 && - lck_op(env->lazy_fd, op_setlk, F_UNLCK, 0, env->pid - 1)) + rc = lck_op(env->lazy_fd, cmd, F_WRLCK, env->pid + 1, OFF_T_MAX - env->pid - 1); + if (rc != MDBX_SUCCESS && env->pid > 1 && lck_op(env->lazy_fd, op_setlk, F_UNLCK, 0, env->pid - 1)) rc = MDBX_PANIC; } if (rc != MDBX_SUCCESS && lck_op(env->lck_mmap.fd, op_setlk, F_RDLCK, 0, 1)) @@ -452,9 +426,7 @@ MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { return rc; } -__cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor, - const uint32_t current_pid) { +__cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t current_pid) { eASSERT(env, osal_getpid() == current_pid); int rc = MDBX_SUCCESS; struct stat lck_info; @@ -464,9 +436,7 @@ __cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && /* if LCK was not removed */ fstat(env->lck_mmap.fd, &lck_info) == 0 && lck_info.st_nlink > 0 && - lck_op(env->lazy_fd, op_setlk, - (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, - OFF_T_MAX) == 0) { + lck_op(env->lazy_fd, op_setlk, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX) == 0) { VERBOSE("%p got exclusive, drown ipc-locks", (void *)env); eASSERT(env, current_pid == env->pid); @@ -492,8 +462,7 @@ __cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, if (current_pid != env->pid) { eASSERT(env, !inprocess_neighbor); - NOTICE("drown env %p after-fork pid %d -> %d", - __Wpedantic_format_voidptr(env), env->pid, current_pid); + NOTICE("drown env %p after-fork pid %d -> %d", __Wpedantic_format_voidptr(env), env->pid, current_pid); inprocess_neighbor = nullptr; } @@ -516,11 +485,8 @@ __cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, env->lazy_fd = INVALID_HANDLE_VALUE; if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) { /* restore file-lock */ - rc = lck_op(inprocess_neighbor->lazy_fd, F_SETLKW, - (inprocess_neighbor->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, - (inprocess_neighbor->flags & MDBX_EXCLUSIVE) - ? 0 - : inprocess_neighbor->pid, + rc = lck_op(inprocess_neighbor->lazy_fd, F_SETLKW, (inprocess_neighbor->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, + (inprocess_neighbor->flags & MDBX_EXCLUSIVE) ? 0 : inprocess_neighbor->pid, (inprocess_neighbor->flags & MDBX_EXCLUSIVE) ? OFF_T_MAX : 1); } } @@ -545,8 +511,7 @@ __cold MDBX_INTERNAL int lck_destroy(MDBX_env *env, /*---------------------------------------------------------------------------*/ -__cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, - int global_uniqueness_flag) { +__cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, int global_uniqueness_flag) { #if MDBX_LOCKING == MDBX_LOCKING_SYSV int semid = -1; /* don't initialize semaphores twice */ @@ -556,9 +521,7 @@ __cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, if (fstat(env->lazy_fd, &st)) return errno; sysv_retry_create: - semid = semget(env->me_sysv_ipc.key, 2, - IPC_CREAT | IPC_EXCL | - (st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))); + semid = semget(env->me_sysv_ipc.key, 2, IPC_CREAT | IPC_EXCL | (st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))); if (unlikely(semid == -1)) { int err = errno; if (err != EEXIST) @@ -614,8 +577,7 @@ __cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, } return MDBX_SUCCESS; -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 if (inprocess_neighbor) return MDBX_SUCCESS /* don't need any initialization for mutexes if LCK already opened/used inside current process */ @@ -653,8 +615,7 @@ __cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, #if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 #if defined(PTHREAD_MUTEX_ROBUST) || defined(pthread_mutexattr_setrobust) rc = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); -#elif defined(PTHREAD_MUTEX_ROBUST_NP) || \ - defined(pthread_mutexattr_setrobust_np) +#elif defined(PTHREAD_MUTEX_ROBUST_NP) || defined(pthread_mutexattr_setrobust_np) rc = pthread_mutexattr_setrobust_np(&ma, PTHREAD_MUTEX_ROBUST_NP); #elif _POSIX_THREAD_PROCESS_SHARED < 200809L rc = pthread_mutexattr_setrobust_np(&ma, PTHREAD_MUTEX_ROBUST_NP); @@ -665,8 +626,7 @@ __cold MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, goto bailout; #endif /* MDBX_LOCKING == MDBX_LOCKING_POSIX2008 */ -#if defined(_POSIX_THREAD_PRIO_INHERIT) && _POSIX_THREAD_PRIO_INHERIT >= 0 && \ - !defined(MDBX_SAFE4QEMU) +#if defined(_POSIX_THREAD_PRIO_INHERIT) && _POSIX_THREAD_PRIO_INHERIT >= 0 && !defined(MDBX_SAFE4QEMU) rc = pthread_mutexattr_setprotocol(&ma, PTHREAD_PRIO_INHERIT); if (rc == ENOTSUP) rc = pthread_mutexattr_setprotocol(&ma, PTHREAD_PRIO_NONE); @@ -691,8 +651,7 @@ bailout: #endif /* MDBX_LOCKING > 0 */ } -__cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, - const int err) { +__cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, const int err) { int rc = err; #if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 || MDBX_LOCKING == MDBX_LOCKING_SYSV @@ -712,8 +671,7 @@ __cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, rc = MDBX_PANIC; } } - WARNING("%clock owner died, %s", (rlocked ? 'r' : 'w'), - (rc ? "this process' env is hosed" : "recovering")); + WARNING("%clock owner died, %s", (rlocked ? 'r' : 'w'), (rc ? "this process' env is hosed" : "recovering")); int check_rc = mvcc_cleanup_dead(env, rlocked, nullptr); check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc; @@ -781,10 +739,8 @@ MDBX_INTERNAL int osal_check_tid4bionic(void) { } #endif /* __ANDROID_API__ || ANDROID) || BIONIC */ -static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, - const bool dont_wait) { -#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, const bool dont_wait) { +#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 int rc = osal_check_tid4bionic(); if (likely(rc == 0)) rc = dont_wait ? pthread_mutex_trylock(ipc) : pthread_mutex_lock(ipc); @@ -800,9 +756,8 @@ static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, } else if (sem_wait(ipc)) rc = errno; #elif MDBX_LOCKING == MDBX_LOCKING_SYSV - struct sembuf op = {.sem_num = (ipc != &env->lck->wrt_lock), - .sem_op = -1, - .sem_flg = dont_wait ? IPC_NOWAIT | SEM_UNDO : SEM_UNDO}; + struct sembuf op = { + .sem_num = (ipc != &env->lck->wrt_lock), .sem_op = -1, .sem_flg = dont_wait ? IPC_NOWAIT | SEM_UNDO : SEM_UNDO}; int rc; if (semop(env->me_sysv_ipc.semid, &op, 1)) { rc = errno; @@ -823,8 +778,7 @@ static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { int err = MDBX_ENOSYS; -#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 err = pthread_mutex_unlock(ipc); #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 err = sem_post(ipc) ? errno : MDBX_SUCCESS; @@ -833,9 +787,7 @@ int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { err = EPERM; else { *ipc = 0; - struct sembuf op = {.sem_num = (ipc != &env->lck->wrt_lock), - .sem_op = 1, - .sem_flg = SEM_UNDO}; + struct sembuf op = {.sem_num = (ipc != &env->lck->wrt_lock), .sem_op = 1, .sem_flg = SEM_UNDO}; err = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; } #else @@ -845,13 +797,9 @@ int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { if (unlikely(rc != MDBX_SUCCESS)) { const uint32_t current_pid = osal_getpid(); if (current_pid == env->pid || LOG_ENABLED(MDBX_LOG_NOTICE)) - debug_log((current_pid == env->pid) - ? MDBX_LOG_FATAL - : (rc = MDBX_SUCCESS, MDBX_LOG_NOTICE), - "ipc-unlock()", __LINE__, "failed: env %p, lck-%s %p, err %d\n", - __Wpedantic_format_voidptr(env), - (env->lck == env->lck_mmap.lck) ? "mmap" : "stub", - __Wpedantic_format_voidptr(env->lck), err); + debug_log((current_pid == env->pid) ? MDBX_LOG_FATAL : (rc = MDBX_SUCCESS, MDBX_LOG_NOTICE), "ipc-unlock()", + __LINE__, "failed: env %p, lck-%s %p, err %d\n", __Wpedantic_format_voidptr(env), + (env->lck == env->lck_mmap.lck) ? "mmap" : "stub", __Wpedantic_format_voidptr(env->lck), err); } return rc; } @@ -879,10 +827,9 @@ int lck_txn_lock(MDBX_env *env, bool dont_wait) { const int err = osal_ipclock_lock(env, &env->lck->wrt_lock, dont_wait); int rc = err; if (likely(!MDBX_IS_ERROR(err))) { - eASSERT(env, !env->basal_txn->owner || - err == /* если другой поток в этом-же процессе завершился - не освободив блокировку */ - MDBX_RESULT_TRUE); + eASSERT(env, !env->basal_txn->owner || err == /* если другой поток в этом-же процессе завершился + не освободив блокировку */ + MDBX_RESULT_TRUE); env->basal_txn->owner = osal_thread_self(); rc = MDBX_SUCCESS; } diff --git a/src/lck-windows.c b/src/lck-windows.c index fb8aa78e..10c362a6 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -16,10 +16,8 @@ #define LCK_WAITFOR 0 #define LCK_DONTWAIT LOCKFILE_FAIL_IMMEDIATELY -static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, - size_t offset, size_t bytes) { - TRACE("lock>>: fd %p, event %p, flags 0x%x offset %zu, bytes %zu >>", fd, - event, flags, offset, bytes); +static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, size_t offset, size_t bytes) { + TRACE("lock>>: fd %p, event %p, flags 0x%x offset %zu, bytes %zu >>", fd, event, flags, offset, bytes); OVERLAPPED ov; ov.Internal = 0; ov.InternalHigh = 0; @@ -27,8 +25,7 @@ static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, ov.Offset = (DWORD)offset; ov.OffsetHigh = HIGH_DWORD(offset); if (LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov)) { - TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, - event, flags, offset, bytes, "done"); + TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, event, flags, offset, bytes, "done"); return MDBX_SUCCESS; } @@ -36,37 +33,32 @@ static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, if (rc == ERROR_IO_PENDING) { if (event) { if (GetOverlappedResult(fd, &ov, &rc, true)) { - TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", - fd, event, flags, offset, bytes, "overlapped-done"); + TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, event, flags, offset, bytes, + "overlapped-done"); return MDBX_SUCCESS; } rc = GetLastError(); } else CancelIo(fd); } - TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << err %d", - fd, event, flags, offset, bytes, (int)rc); + TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << err %d", fd, event, flags, offset, bytes, + (int)rc); return (int)rc; } -static inline int flock(HANDLE fd, unsigned flags, size_t offset, - size_t bytes) { +static inline int flock(HANDLE fd, unsigned flags, size_t offset, size_t bytes) { return flock_with_event(fd, 0, flags, offset, bytes); } -static inline int flock_data(const MDBX_env *env, unsigned flags, size_t offset, - size_t bytes) { - const HANDLE fd4data = - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; +static inline int flock_data(const MDBX_env *env, unsigned flags, size_t offset, size_t bytes) { + const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; return flock_with_event(fd4data, env->dxb_lock_event, flags, offset, bytes); } static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { TRACE("unlock: fd %p, offset %zu, bytes %zu", fd, offset, bytes); - return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes, - HIGH_DWORD(bytes)) - ? MDBX_SUCCESS - : (int)GetLastError(); + return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes, HIGH_DWORD(bytes)) ? MDBX_SUCCESS + : (int)GetLastError(); } /*----------------------------------------------------------------------------*/ @@ -88,9 +80,7 @@ int lck_txn_lock(MDBX_env *env, bool dontwait) { } else { __try { EnterCriticalSection(&env->windowsbug_lock); - } - __except ((GetExceptionCode() == - 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) + } __except ((GetExceptionCode() == 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { return MDBX_EDEADLK; @@ -101,20 +91,15 @@ int lck_txn_lock(MDBX_env *env, bool dontwait) { if (env->flags & MDBX_EXCLUSIVE) goto done; - const HANDLE fd4data = - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; int rc = flock_with_event(fd4data, env->dxb_lock_event, - dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) - : (LCK_EXCLUSIVE | LCK_WAITFOR), - DXB_BODY); + dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) : (LCK_EXCLUSIVE | LCK_WAITFOR), DXB_BODY); if (rc == ERROR_LOCK_VIOLATION && dontwait) { SleepEx(0, true); - rc = flock_with_event(fd4data, env->dxb_lock_event, - LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY); + rc = flock_with_event(fd4data, env->dxb_lock_event, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY); if (rc == ERROR_LOCK_VIOLATION) { SleepEx(0, true); - rc = flock_with_event(fd4data, env->dxb_lock_event, - LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY); + rc = flock_with_event(fd4data, env->dxb_lock_event, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY); } } if (rc == MDBX_SUCCESS) { @@ -133,8 +118,7 @@ int lck_txn_lock(MDBX_env *env, bool dontwait) { void lck_txn_unlock(MDBX_env *env) { eASSERT(env, env->basal_txn->owner == osal_thread_self()); if ((env->flags & MDBX_EXCLUSIVE) == 0) { - const HANDLE fd4data = - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; int err = funlock(fd4data, DXB_BODY); if (err != MDBX_SUCCESS) mdbx_panic("%s failed: err %u", __func__, err); @@ -173,8 +157,7 @@ MDBX_INTERNAL int lck_rdt_lock(MDBX_env *env) { } MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env) { - if (env->lck_mmap.fd != INVALID_HANDLE_VALUE && - (env->flags & MDBX_EXCLUSIVE) == 0) { + if (env->lck_mmap.fd != INVALID_HANDLE_VALUE && (env->flags & MDBX_EXCLUSIVE) == 0) { /* transition from S-E (locked) to S-? (used), e.g. unlock upper-part */ int err = funlock(env->lck_mmap.fd, LCK_UPPER); if (err != MDBX_SUCCESS) @@ -184,22 +167,15 @@ MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env) { } MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait) { - return flock( - fd, wait ? LCK_EXCLUSIVE | LCK_WAITFOR : LCK_EXCLUSIVE | LCK_DONTWAIT, 0, - DXB_MAXLEN); + return flock(fd, wait ? LCK_EXCLUSIVE | LCK_WAITFOR : LCK_EXCLUSIVE | LCK_DONTWAIT, 0, DXB_MAXLEN); } -static int suspend_and_append(mdbx_handle_array_t **array, - const DWORD ThreadId) { +static int suspend_and_append(mdbx_handle_array_t **array, const DWORD ThreadId) { const unsigned limit = (*array)->limit; if ((*array)->count == limit) { - mdbx_handle_array_t *const ptr = - osal_realloc((limit > ARRAY_LENGTH((*array)->handles)) - ? *array - : /* don't free initial array on the stack */ nullptr, - sizeof(mdbx_handle_array_t) + - sizeof(HANDLE) * (limit * (size_t)2 - - ARRAY_LENGTH((*array)->handles))); + mdbx_handle_array_t *const ptr = osal_realloc( + (limit > ARRAY_LENGTH((*array)->handles)) ? *array : /* don't free initial array on the stack */ nullptr, + sizeof(mdbx_handle_array_t) + sizeof(HANDLE) * (limit * (size_t)2 - ARRAY_LENGTH((*array)->handles))); if (!ptr) return MDBX_ENOMEM; if (limit == ARRAY_LENGTH((*array)->handles)) @@ -208,16 +184,15 @@ static int suspend_and_append(mdbx_handle_array_t **array, (*array)->limit = limit * 2; } - HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, - FALSE, ThreadId); + HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, FALSE, ThreadId); if (hThread == nullptr) return (int)GetLastError(); if (SuspendThread(hThread) == (DWORD)-1) { int err = (int)GetLastError(); DWORD ExitCode; - if (err == /* workaround for Win10 UCRT bug */ ERROR_ACCESS_DENIED || - !GetExitCodeThread(hThread, &ExitCode) || ExitCode != STILL_ACTIVE) + if (err == /* workaround for Win10 UCRT bug */ ERROR_ACCESS_DENIED || !GetExitCodeThread(hThread, &ExitCode) || + ExitCode != STILL_ACTIVE) err = MDBX_SUCCESS; CloseHandle(hThread); return err; @@ -227,21 +202,17 @@ static int suspend_and_append(mdbx_handle_array_t **array, return MDBX_SUCCESS; } -MDBX_INTERNAL int -osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { +MDBX_INTERNAL int osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { eASSERT(env, (env->flags & MDBX_NOSTICKYTHREADS) == 0); const uintptr_t CurrentTid = GetCurrentThreadId(); int rc; if (env->lck_mmap.lck) { /* Scan LCK for threads of the current process */ const reader_slot_t *const begin = env->lck_mmap.lck->rdt; - const reader_slot_t *const end = - begin + - atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); + const reader_slot_t *const end = begin + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); const uintptr_t WriteTxnOwner = env->basal_txn ? env->basal_txn->owner : 0; for (const reader_slot_t *reader = begin; reader < end; ++reader) { - if (reader->pid.weak != env->pid || !reader->tid.weak || - reader->tid.weak >= MDBX_TID_TXN_OUSTED) { + if (reader->pid.weak != env->pid || !reader->tid.weak || reader->tid.weak >= MDBX_TID_TXN_OUSTED) { skip_lck: continue; } @@ -280,8 +251,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { } do { - if (entry.th32OwnerProcessID != env->pid || - entry.th32ThreadID == CurrentTid) + if (entry.th32OwnerProcessID != env->pid || entry.th32ThreadID == CurrentTid) continue; rc = suspend_and_append(array, entry.th32ThreadID); @@ -306,8 +276,8 @@ MDBX_INTERNAL int osal_resume_threads_after_remap(mdbx_handle_array_t *array) { if (ResumeThread(hThread) == (DWORD)-1) { const int err = (int)GetLastError(); DWORD ExitCode; - if (err != /* workaround for Win10 UCRT bug */ ERROR_ACCESS_DENIED && - GetExitCodeThread(hThread, &ExitCode) && ExitCode == STILL_ACTIVE) + if (err != /* workaround for Win10 UCRT bug */ ERROR_ACCESS_DENIED && GetExitCodeThread(hThread, &ExitCode) && + ExitCode == STILL_ACTIVE) rc = err; } CloseHandle(hThread); @@ -358,35 +328,30 @@ static void lck_unlock(MDBX_env *env) { do err = funlock(env->lck_mmap.fd, LCK_LOWER); while (err == MDBX_SUCCESS); - assert(err == ERROR_NOT_LOCKED || - (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); + assert(err == ERROR_NOT_LOCKED || (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); do err = funlock(env->lck_mmap.fd, LCK_UPPER); while (err == MDBX_SUCCESS); - assert(err == ERROR_NOT_LOCKED || - (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); + assert(err == ERROR_NOT_LOCKED || (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); } - const HANDLE fd4data = - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; if (fd4data != INVALID_HANDLE_VALUE) { /* explicitly unlock to avoid latency for other processes (windows kernel * releases such locks via deferred queues) */ do err = funlock(fd4data, DXB_BODY); while (err == MDBX_SUCCESS); - assert(err == ERROR_NOT_LOCKED || - (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); + assert(err == ERROR_NOT_LOCKED || (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); do err = funlock(fd4data, DXB_WHOLE); while (err == MDBX_SUCCESS); - assert(err == ERROR_NOT_LOCKED || - (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); + assert(err == ERROR_NOT_LOCKED || (globals.running_under_Wine && err == ERROR_LOCK_VIOLATION)); SetLastError(ERROR_SUCCESS); } } @@ -418,8 +383,7 @@ static int internal_seize_lck(HANDLE lfd) { /* 6) something went wrong, give up */ rc = funlock(lfd, LCK_UPPER); if (rc != MDBX_SUCCESS) - mdbx_panic("%s(%s) failed: err %u", __func__, "?-E(middle) >> ?-?(free)", - rc); + mdbx_panic("%s(%s) failed: err %u", __func__, "?-E(middle) >> ?-?(free)", rc); return rc; } @@ -435,16 +399,14 @@ static int internal_seize_lck(HANDLE lfd) { * transition to S-? (used) or ?-? (free) */ int err = funlock(lfd, LCK_UPPER); if (err != MDBX_SUCCESS) - mdbx_panic("%s(%s) failed: err %u", __func__, - "X-E(locked/middle) >> X-?(used/free)", err); + mdbx_panic("%s(%s) failed: err %u", __func__, "X-E(locked/middle) >> X-?(used/free)", err); /* 9) now on S-? (used, DONE) or ?-? (free, FAILURE) */ return rc; } MDBX_INTERNAL int lck_seize(MDBX_env *env) { - const HANDLE fd4data = - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; assert(fd4data != INVALID_HANDLE_VALUE); if (env->flags & MDBX_EXCLUSIVE) return MDBX_RESULT_TRUE /* nope since files were must be opened @@ -479,16 +441,14 @@ MDBX_INTERNAL int lck_seize(MDBX_env *env) { jitter4testing(false); err = funlock(fd4data, DXB_WHOLE); if (err != MDBX_SUCCESS) - mdbx_panic("%s(%s) failed: err %u", __func__, - "unlock-against-without-lck", err); + mdbx_panic("%s(%s) failed: err %u", __func__, "unlock-against-without-lck", err); } return rc; } MDBX_INTERNAL int lck_downgrade(MDBX_env *env) { - const HANDLE fd4data = - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; /* Transite from exclusive-write state (E-E) to used (S-?) */ assert(fd4data != INVALID_HANDLE_VALUE); assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE); @@ -499,8 +459,7 @@ MDBX_INTERNAL int lck_downgrade(MDBX_env *env) { /* 1) now at E-E (exclusive-write), transition to ?_E (middle) */ int rc = funlock(env->lck_mmap.fd, LCK_LOWER); if (rc != MDBX_SUCCESS) - mdbx_panic("%s(%s) failed: err %u", __func__, - "E-E(exclusive-write) >> ?-E(middle)", rc); + mdbx_panic("%s(%s) failed: err %u", __func__, "E-E(exclusive-write) >> ?-E(middle)", rc); /* 2) now at ?-E (middle), transition to S-E (locked) */ rc = flock(env->lck_mmap.fd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER); @@ -513,8 +472,7 @@ MDBX_INTERNAL int lck_downgrade(MDBX_env *env) { /* 4) got S-E (locked), continue transition to S-? (used) */ rc = funlock(env->lck_mmap.fd, LCK_UPPER); if (rc != MDBX_SUCCESS) - mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)", - rc); + mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)", rc); return MDBX_SUCCESS /* 5) now at S-? (used), done */; } @@ -529,9 +487,7 @@ MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { /* 1) now on S-? (used), try S-E (locked) */ jitter4testing(false); - int rc = flock(env->lck_mmap.fd, - dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, - LCK_UPPER); + int rc = flock(env->lck_mmap.fd, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_UPPER); if (rc != MDBX_SUCCESS) { /* 2) something went wrong, give up */; VERBOSE("%s, err %u", "S-?(used) >> S-E(locked)", rc); @@ -541,14 +497,11 @@ MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { /* 3) now on S-E (locked), transition to ?-E (middle) */ rc = funlock(env->lck_mmap.fd, LCK_LOWER); if (rc != MDBX_SUCCESS) - mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> ?-E(middle)", - rc); + mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> ?-E(middle)", rc); /* 4) now on ?-E (middle), try E-E (exclusive-write) */ jitter4testing(false); - rc = flock(env->lck_mmap.fd, - dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, - LCK_LOWER); + rc = flock(env->lck_mmap.fd, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_LOWER); if (rc != MDBX_SUCCESS) { /* 5) something went wrong, give up */; VERBOSE("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc); @@ -558,8 +511,7 @@ MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait) { return MDBX_SUCCESS /* 6) now at E-E (exclusive-write), done */; } -MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, - int global_uniqueness_flag) { +MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, int global_uniqueness_flag) { (void)env; (void)inprocess_neighbor; (void)global_uniqueness_flag; @@ -568,12 +520,9 @@ MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, TOKEN_PRIVILEGES privileges; privileges.PrivilegeCount = 1; privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, - &token) || - !LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, - &privileges.Privileges[0].Luid) || - !AdjustTokenPrivileges(token, FALSE, &privileges, sizeof(privileges), - nullptr, nullptr) || + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &token) || + !LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &privileges.Privileges[0].Luid) || + !AdjustTokenPrivileges(token, FALSE, &privileges, sizeof(privileges), nullptr, nullptr) || GetLastError() != ERROR_SUCCESS) imports.SetFileIoOverlappedRange = nullptr; @@ -583,8 +532,7 @@ MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, return MDBX_SUCCESS; } -MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, - const uint32_t current_pid) { +MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t current_pid) { (void)current_pid; /* LY: should unmap before releasing the locks to avoid race condition and * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ @@ -593,8 +541,7 @@ MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, if (env->lck_mmap.lck) { const bool synced = env->lck_mmap.lck->unsynced_pages.weak == 0; osal_munmap(&env->lck_mmap); - if (synced && !inprocess_neighbor && - env->lck_mmap.fd != INVALID_HANDLE_VALUE && + if (synced && !inprocess_neighbor && env->lck_mmap.fd != INVALID_HANDLE_VALUE && lck_upgrade(env, true) == MDBX_SUCCESS) /* this will fail if LCK is used/mmapped by other process(es) */ osal_ftruncate(env->lck_mmap.fd, 0); diff --git a/src/lck.c b/src/lck.c index 291a257d..1c7c3811 100644 --- a/src/lck.c +++ b/src/lck.c @@ -15,14 +15,12 @@ __cold static int lck_setup_locked(MDBX_env *env) { if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) { env->lck = lckless_stub(env); env->max_readers = UINT_MAX; - DEBUG("lck-setup:%s%s%s", " lck-less", - (env->flags & MDBX_RDONLY) ? " readonly" : "", + DEBUG("lck-setup:%s%s%s", " lck-less", (env->flags & MDBX_RDONLY) ? " readonly" : "", (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); return lck_seize_rc; } - DEBUG("lck-setup:%s%s%s", " with-lck", - (env->flags & MDBX_RDONLY) ? " readonly" : "", + DEBUG("lck-setup:%s%s%s", " with-lck", (env->flags & MDBX_RDONLY) ? " readonly" : "", (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); MDBX_env *inprocess_neighbor = nullptr; @@ -30,8 +28,7 @@ __cold static int lck_setup_locked(MDBX_env *env) { if (unlikely(MDBX_IS_ERROR(err))) return err; if (inprocess_neighbor) { - if ((globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || - (inprocess_neighbor->flags & MDBX_EXCLUSIVE) != 0) + if ((globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || (inprocess_neighbor->flags & MDBX_EXCLUSIVE) != 0) return MDBX_BUSY; if (lck_seize_rc == MDBX_RESULT_TRUE) { err = lck_downgrade(env); @@ -47,52 +44,41 @@ __cold static int lck_setup_locked(MDBX_env *env) { return err; if (lck_seize_rc == MDBX_RESULT_TRUE) { - size = - ceil_powerof2(env->max_readers * sizeof(reader_slot_t) + sizeof(lck_t), - globals.sys_pagesize); + size = ceil_powerof2(env->max_readers * sizeof(reader_slot_t) + sizeof(lck_t), globals.sys_pagesize); jitter4testing(false); } else { if (env->flags & MDBX_EXCLUSIVE) return MDBX_BUSY; - if (size > INT_MAX || (size & (globals.sys_pagesize - 1)) != 0 || - size < globals.sys_pagesize) { + if (size > INT_MAX || (size & (globals.sys_pagesize - 1)) != 0 || size < globals.sys_pagesize) { ERROR("lck-file has invalid size %" PRIu64 " bytes", size); return MDBX_PROBLEM; } } - const size_t maxreaders = - ((size_t)size - sizeof(lck_t)) / sizeof(reader_slot_t); + const size_t maxreaders = ((size_t)size - sizeof(lck_t)) / sizeof(reader_slot_t); if (maxreaders < 4) { ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); return MDBX_PROBLEM; } - env->max_readers = (maxreaders <= MDBX_READERS_LIMIT) - ? (unsigned)maxreaders - : (unsigned)MDBX_READERS_LIMIT; + env->max_readers = (maxreaders <= MDBX_READERS_LIMIT) ? (unsigned)maxreaders : (unsigned)MDBX_READERS_LIMIT; - err = osal_mmap((env->flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, &env->lck_mmap, - (size_t)size, (size_t)size, - lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE - : MMAP_OPTION_SEMAPHORE); + err = osal_mmap((env->flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, &env->lck_mmap, (size_t)size, (size_t)size, + lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE : MMAP_OPTION_SEMAPHORE); if (unlikely(err != MDBX_SUCCESS)) return err; #ifdef MADV_DODUMP - err = madvise(env->lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) - : MDBX_SUCCESS; + err = madvise(env->lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #endif /* MADV_DODUMP */ #ifdef MADV_WILLNEED - err = madvise(env->lck_mmap.lck, size, MADV_WILLNEED) ? ignore_enosys(errno) - : MDBX_SUCCESS; + err = madvise(env->lck_mmap.lck, size, MADV_WILLNEED) ? ignore_enosys(errno) : MDBX_SUCCESS; if (unlikely(MDBX_IS_ERROR(err))) return err; #elif defined(POSIX_MADV_WILLNEED) - err = ignore_enosys( - posix_madvise(env->lck_mmap.lck, size, POSIX_MADV_WILLNEED)); + err = ignore_enosys(posix_madvise(env->lck_mmap.lck, size, POSIX_MADV_WILLNEED)); if (unlikely(MDBX_IS_ERROR(err))) return err; #endif /* MADV_WILLNEED */ @@ -108,8 +94,7 @@ __cold static int lck_setup_locked(MDBX_env *env) { #if MDBX_ENABLE_PGOP_STAT lck->pgops.wops.weak = 1; #endif /* MDBX_ENABLE_PGOP_STAT */ - err = osal_msync(&env->lck_mmap, 0, (size_t)size, - MDBX_SYNC_DATA | MDBX_SYNC_SIZE); + err = osal_msync(&env->lck_mmap, 0, (size_t)size, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); if (unlikely(err != MDBX_SUCCESS)) { ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); eASSERT(env, MDBX_IS_ERROR(err)); @@ -118,17 +103,14 @@ __cold static int lck_setup_locked(MDBX_env *env) { } else { if (lck->magic_and_version != MDBX_LOCK_MAGIC) { const bool invalid = (lck->magic_and_version >> 8) != MDBX_MAGIC; - ERROR("lock region has %s", - invalid - ? "invalid magic" - : "incompatible version (only applications with nearly or the " - "same versions of libmdbx can share the same database)"); + ERROR("lock region has %s", invalid ? "invalid magic" + : "incompatible version (only applications with nearly or the " + "same versions of libmdbx can share the same database)"); return invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; } if (lck->os_and_format != MDBX_LOCK_FORMAT) { - ERROR("lock region has os/format signature 0x%" PRIx32 - ", expected 0x%" PRIx32, - lck->os_and_format, MDBX_LOCK_FORMAT); + ERROR("lock region has os/format signature 0x%" PRIx32 ", expected 0x%" PRIx32, lck->os_and_format, + MDBX_LOCK_FORMAT); return MDBX_VERSION_MISMATCH; } } @@ -148,8 +130,7 @@ __cold int lck_setup(MDBX_env *env, mdbx_mode_t mode) { eASSERT(env, env->lazy_fd != INVALID_HANDLE_VALUE); eASSERT(env, env->lck_mmap.fd == INVALID_HANDLE_VALUE); - int err = osal_openfile(MDBX_OPEN_LCK, env, env->pathname.lck, - &env->lck_mmap.fd, mode); + int err = osal_openfile(MDBX_OPEN_LCK, env, env->pathname.lck, &env->lck_mmap.fd, mode); if (err != MDBX_SUCCESS) { switch (err) { default: @@ -186,6 +167,5 @@ __cold int lck_setup(MDBX_env *env, mdbx_mode_t mode) { } void mincore_clean_cache(const MDBX_env *const env) { - memset(env->lck->mincore_cache.begin, -1, - sizeof(env->lck->mincore_cache.begin)); + memset(env->lck->mincore_cache.begin, -1, sizeof(env->lck->mincore_cache.begin)); } diff --git a/src/lck.h b/src/lck.h index 00ef6189..eee38e15 100644 --- a/src/lck.h +++ b/src/lck.h @@ -23,8 +23,7 @@ MDBX_INTERNAL int lck_ipclock_destroy(osal_ipclock_t *ipc); /// MUST NOT initialize shared synchronization objects in memory-mapped /// LCK-file that are already in use. /// \return Error code or zero on success. -MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, - int global_uniqueness_flag); +MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, int global_uniqueness_flag); /// \brief Disconnects from shared interprocess objects and destructs /// synchronization objects linked with MDBX_env instance @@ -43,8 +42,7 @@ MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, /// of other instances of MDBX_env within the current process, e.g. /// restore POSIX-fcntl locks after the closing of file descriptors. /// \return Error code (MDBX_PANIC) or zero on success. -MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, - const uint32_t current_pid); +MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t current_pid); /// \brief Connects to shared interprocess locking objects and tries to acquire /// the maximum lock level (shared if exclusive is not available) diff --git a/src/logging_and_debug.c b/src/logging_and_debug.c index 20d8419e..e09f4dd6 100644 --- a/src/logging_and_debug.c +++ b/src/logging_and_debug.c @@ -3,15 +3,13 @@ #include "internals.h" -__cold void debug_log_va(int level, const char *function, int line, - const char *fmt, va_list args) { +__cold void debug_log_va(int level, const char *function, int line, const char *fmt, va_list args) { ENSURE(nullptr, osal_fastmutex_acquire(&globals.debug_lock) == 0); if (globals.logger.ptr) { if (globals.logger_buffer == nullptr) globals.logger.fmt(level, function, line, fmt, args); else { - const int len = vsnprintf(globals.logger_buffer, - globals.logger_buffer_size, fmt, args); + const int len = vsnprintf(globals.logger_buffer, globals.logger_buffer_size, fmt, args); if (len > 0) globals.logger.nofmt(level, function, line, globals.logger_buffer, len); } @@ -51,8 +49,7 @@ __cold void debug_log_va(int level, const char *function, int line, ENSURE(nullptr, osal_fastmutex_release(&globals.debug_lock) == 0); } -__cold void debug_log(int level, const char *function, int line, - const char *fmt, ...) { +__cold void debug_log(int level, const char *function, int line, const char *fmt, ...) { va_list args; va_start(args, fmt); debug_log_va(level, function, line, fmt, args); @@ -62,18 +59,15 @@ __cold void debug_log(int level, const char *function, int line, __cold int log_error(const int err, const char *func, unsigned line) { assert(err != MDBX_SUCCESS); if (unlikely(globals.loglevel >= MDBX_LOG_DEBUG) && - (globals.loglevel >= MDBX_LOG_TRACE || - !(err == MDBX_RESULT_TRUE || err == MDBX_NOTFOUND))) { + (globals.loglevel >= MDBX_LOG_TRACE || !(err == MDBX_RESULT_TRUE || err == MDBX_NOTFOUND))) { char buf[256]; - debug_log(MDBX_LOG_ERROR, func, line, "error %d (%s)\n", err, - mdbx_strerror_r(err, buf, sizeof(buf))); + debug_log(MDBX_LOG_ERROR, func, line, "error %d (%s)\n", err, mdbx_strerror_r(err, buf, sizeof(buf))); } return err; } /* Dump a val in ascii or hexadecimal. */ -__cold const char *mdbx_dump_val(const MDBX_val *val, char *const buf, - const size_t bufsize) { +__cold const char *mdbx_dump_val(const MDBX_val *val, char *const buf, const size_t bufsize) { if (!val) return ""; if (!val->iov_len) @@ -97,9 +91,7 @@ __cold const char *mdbx_dump_val(const MDBX_val *val, char *const buf, } if (is_ascii) { - int len = - snprintf(buf, bufsize, "%.*s", - (val->iov_len > INT_MAX) ? INT_MAX : (int)val->iov_len, data); + int len = snprintf(buf, bufsize, "%.*s", (val->iov_len > INT_MAX) ? INT_MAX : (int)val->iov_len, data); assert(len > 0 && (size_t)len < bufsize); (void)len; } else { @@ -107,8 +99,7 @@ __cold const char *mdbx_dump_val(const MDBX_val *val, char *const buf, char *ptr = buf; *ptr++ = '<'; for (size_t i = 0; i < val->iov_len && ptr < detent; i++) { - const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; *ptr++ = hex[data[i] >> 4]; *ptr++ = hex[data[i] & 15]; } @@ -145,11 +136,8 @@ __cold const char *pagetype_caption(const uint8_t type, char buf4unknown[16]) { } __cold static const char *leafnode_type(node_t *n) { - static const char *const tp[2][2] = {{"", ": DB"}, - {": sub-page", ": sub-DB"}}; - return (node_flags(n) & N_BIG) - ? ": large page" - : tp[!!(node_flags(n) & N_DUP)][!!(node_flags(n) & N_TREE)]; + static const char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}}; + return (node_flags(n) & N_BIG) ? ": large page" : tp[!!(node_flags(n) & N_DUP)][!!(node_flags(n) & N_TREE)]; } /* Display all the keys in the page. */ @@ -181,8 +169,7 @@ __cold void page_list(page_t *mp) { VERBOSE("Overflow page %" PRIaPGNO " pages %u\n", pgno, mp->pages); return; case P_META: - VERBOSE("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno, - unaligned_peek_u64(4, page_meta(mp)->txnid_a)); + VERBOSE("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno, unaligned_peek_u64(4, page_meta(mp)->txnid_a)); return; default: VERBOSE("Bad page %" PRIaPGNO " flags 0x%X\n", pgno, mp->flags); @@ -193,8 +180,7 @@ __cold void page_list(page_t *mp) { VERBOSE("%s %" PRIaPGNO " numkeys %zu\n", type, pgno, nkeys); for (i = 0; i < nkeys; i++) { - if (is_dupfix_leaf( - mp)) { /* DUPFIX pages have no entries[] or node headers */ + if (is_dupfix_leaf(mp)) { /* DUPFIX pages have no entries[] or node headers */ key = page_dupfix_key(mp, i, nsize = mp->dupfix_ksize); total += nsize; VERBOSE("key %zu: nsize %zu, %s\n", i, nsize, DKEY(&key)); @@ -205,8 +191,7 @@ __cold void page_list(page_t *mp) { key.iov_base = node->payload; nsize = NODESIZE + key.iov_len; if (is_branch(mp)) { - VERBOSE("key %zu: page %" PRIaPGNO ", %s\n", i, node_pgno(node), - DKEY(&key)); + VERBOSE("key %zu: page %" PRIaPGNO ", %s\n", i, node_pgno(node), DKEY(&key)); total += nsize; } else { if (node_flags(node) & N_BIG) @@ -215,18 +200,15 @@ __cold void page_list(page_t *mp) { nsize += node_ds(node); total += nsize; nsize += sizeof(indx_t); - VERBOSE("key %zu: nsize %zu, %s%s\n", i, nsize, DKEY(&key), - leafnode_type(node)); + VERBOSE("key %zu: nsize %zu, %s%s\n", i, nsize, DKEY(&key), leafnode_type(node)); } total = EVEN_CEIL(total); } - VERBOSE("Total: header %u + contents %zu + unused %zu\n", - is_dupfix_leaf(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->lower, total, - page_room(mp)); + VERBOSE("Total: header %u + contents %zu + unused %zu\n", is_dupfix_leaf(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->lower, + total, page_room(mp)); } -__cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, - union logger_union logger, char *buffer, +__cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, union logger_union logger, char *buffer, size_t buffer_size) { ENSURE(nullptr, osal_fastmutex_acquire(&globals.debug_lock) == 0); @@ -239,8 +221,7 @@ __cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, #if MDBX_DEBUG MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER | #endif - MDBX_DBG_DUMP | MDBX_DBG_LEGACY_MULTIOPEN | MDBX_DBG_LEGACY_OVERLAP | - MDBX_DBG_DONT_UPGRADE; + MDBX_DBG_DUMP | MDBX_DBG_LEGACY_MULTIOPEN | MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE; globals.runtime_flags = (uint8_t)flags; } @@ -255,18 +236,14 @@ __cold static int setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, return rc; } -__cold int mdbx_setup_debug_nofmt(MDBX_log_level_t level, - MDBX_debug_flags_t flags, - MDBX_debug_func_nofmt *logger, char *buffer, - size_t buffer_size) { +__cold int mdbx_setup_debug_nofmt(MDBX_log_level_t level, MDBX_debug_flags_t flags, MDBX_debug_func_nofmt *logger, + char *buffer, size_t buffer_size) { union logger_union thunk; - thunk.nofmt = - (logger && buffer && buffer_size) ? logger : MDBX_LOGGER_NOFMT_DONTCHANGE; + thunk.nofmt = (logger && buffer && buffer_size) ? logger : MDBX_LOGGER_NOFMT_DONTCHANGE; return setup_debug(level, flags, thunk, buffer, buffer_size); } -__cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, - MDBX_debug_func *logger) { +__cold int mdbx_setup_debug(MDBX_log_level_t level, MDBX_debug_flags_t flags, MDBX_debug_func *logger) { union logger_union thunk; thunk.fmt = logger; return setup_debug(level, flags, thunk, nullptr, 0); diff --git a/src/logging_and_debug.h b/src/logging_and_debug.h index 4feac0b8..9382eafc 100644 --- a/src/logging_and_debug.h +++ b/src/logging_and_debug.h @@ -6,23 +6,17 @@ #include "essentials.h" #ifndef __Wpedantic_format_voidptr -MDBX_MAYBE_UNUSED static inline const void * -__Wpedantic_format_voidptr(const void *ptr) { - return ptr; -} +MDBX_MAYBE_UNUSED static inline const void *__Wpedantic_format_voidptr(const void *ptr) { return ptr; } #define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG) #endif /* __Wpedantic_format_voidptr */ -MDBX_INTERNAL void MDBX_PRINTF_ARGS(4, 5) - debug_log(int level, const char *function, int line, const char *fmt, ...) - MDBX_PRINTF_ARGS(4, 5); -MDBX_INTERNAL void debug_log_va(int level, const char *function, int line, - const char *fmt, va_list args); +MDBX_INTERNAL void MDBX_PRINTF_ARGS(4, 5) debug_log(int level, const char *function, int line, const char *fmt, ...) + MDBX_PRINTF_ARGS(4, 5); +MDBX_INTERNAL void debug_log_va(int level, const char *function, int line, const char *fmt, va_list args); #if MDBX_DEBUG #define LOG_ENABLED(LVL) unlikely(LVL <= globals.loglevel) -#define AUDIT_ENABLED() \ - unlikely((globals.runtime_flags & (unsigned)MDBX_DBG_AUDIT)) +#define AUDIT_ENABLED() unlikely((globals.runtime_flags & (unsigned)MDBX_DBG_AUDIT)) #else /* MDBX_DEBUG */ #define LOG_ENABLED(LVL) (LVL < MDBX_LOG_VERBOSE && LVL <= globals.loglevel) #define AUDIT_ENABLED() (0) @@ -31,91 +25,88 @@ MDBX_INTERNAL void debug_log_va(int level, const char *function, int line, #if MDBX_FORCE_ASSERTIONS #define ASSERT_ENABLED() (1) #elif MDBX_DEBUG -#define ASSERT_ENABLED() \ - likely((globals.runtime_flags & (unsigned)MDBX_DBG_ASSERT)) +#define ASSERT_ENABLED() likely((globals.runtime_flags & (unsigned)MDBX_DBG_ASSERT)) #else #define ASSERT_ENABLED() (0) #endif /* ASSERT_ENABLED() */ -#define DEBUG_EXTRA(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ - debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__); \ +#define DEBUG_EXTRA(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ + debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__); \ } while (0) -#define DEBUG_EXTRA_PRINT(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ - debug_log(MDBX_LOG_EXTRA, nullptr, 0, fmt, __VA_ARGS__); \ +#define DEBUG_EXTRA_PRINT(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ + debug_log(MDBX_LOG_EXTRA, nullptr, 0, fmt, __VA_ARGS__); \ } while (0) -#define TRACE(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_TRACE)) \ - debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ +#define TRACE(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_TRACE)) \ + debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ } while (0) -#define DEBUG(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_DEBUG)) \ - debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ +#define DEBUG(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_DEBUG)) \ + debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ } while (0) -#define VERBOSE(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_VERBOSE)) \ - debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ +#define VERBOSE(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_VERBOSE)) \ + debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ } while (0) -#define NOTICE(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_NOTICE)) \ - debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ +#define NOTICE(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_NOTICE)) \ + debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ } while (0) -#define WARNING(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_WARN)) \ - debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ +#define WARNING(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_WARN)) \ + debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ } while (0) -#undef ERROR /* wingdi.h \ +#undef ERROR /* wingdi.h \ Yeah, morons from M$ put such definition to the public header. */ -#define ERROR(fmt, ...) \ - do { \ - if (LOG_ENABLED(MDBX_LOG_ERROR)) \ - debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ +#define ERROR(fmt, ...) \ + do { \ + if (LOG_ENABLED(MDBX_LOG_ERROR)) \ + debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ } while (0) -#define FATAL(fmt, ...) \ - debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__); +#define FATAL(fmt, ...) debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__); #if MDBX_DEBUG #define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line) #else /* MDBX_DEBUG */ -MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, - unsigned line); -#define ASSERT_FAIL(env, msg, func, line) \ - do { \ - (void)(env); \ - assert_fail(msg, func, line); \ +MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line); +#define ASSERT_FAIL(env, msg, func, line) \ + do { \ + (void)(env); \ + assert_fail(msg, func, line); \ } while (0) #endif /* MDBX_DEBUG */ -#define ENSURE_MSG(env, expr, msg) \ - do { \ - if (unlikely(!(expr))) \ - ASSERT_FAIL(env, msg, __func__, __LINE__); \ +#define ENSURE_MSG(env, expr, msg) \ + do { \ + if (unlikely(!(expr))) \ + ASSERT_FAIL(env, msg, __func__, __LINE__); \ } while (0) #define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr) /* assert(3) variant in environment context */ -#define eASSERT(env, expr) \ - do { \ - if (ASSERT_ENABLED()) \ - ENSURE(env, expr); \ +#define eASSERT(env, expr) \ + do { \ + if (ASSERT_ENABLED()) \ + ENSURE(env, expr); \ } while (0) /* assert(3) variant in cursor context */ @@ -140,14 +131,12 @@ MDBX_MAYBE_UNUSED static inline void jitter4testing(bool tiny) { MDBX_MAYBE_UNUSED MDBX_INTERNAL void page_list(page_t *mp); -MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, - char buf4unknown[16]); +MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, char buf4unknown[16]); /* Key size which fits in a DKBUF (debug key buffer). */ #define DKBUF_MAX 127 #define DKBUF char dbg_kbuf[DKBUF_MAX * 4 + 2] #define DKEY(x) mdbx_dump_val(x, dbg_kbuf, DKBUF_MAX * 2 + 1) -#define DVAL(x) \ - mdbx_dump_val(x, dbg_kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1) +#define DVAL(x) mdbx_dump_val(x, dbg_kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1) #if MDBX_DEBUG #define DKBUF_DEBUG DKBUF @@ -161,8 +150,7 @@ MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, MDBX_INTERNAL int log_error(const int err, const char *func, unsigned line); -MDBX_MAYBE_UNUSED static inline int -log_if_error(const int err, const char *func, unsigned line) { +MDBX_MAYBE_UNUSED static inline int log_if_error(const int err, const char *func, unsigned line) { if (likely(err == MDBX_SUCCESS)) return err; int rc = log_error(err, func, line); diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 7c83aa8b..17ca4718 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -187,21 +187,19 @@ __cold bug::~bug() noexcept {} throw bug(what_and_where); } -#define RAISE_BUG(line, condition, function, file) \ - do { \ - static MDBX_CXX11_CONSTEXPR_VAR trouble_location bug(line, condition, \ - function, file); \ - raise_bug(bug); \ +#define RAISE_BUG(line, condition, function, file) \ + do { \ + static MDBX_CXX11_CONSTEXPR_VAR trouble_location bug(line, condition, function, file); \ + raise_bug(bug); \ } while (0) -#define ENSURE(condition) \ - do \ - if (MDBX_UNLIKELY(!(condition))) \ - MDBX_CXX20_UNLIKELY RAISE_BUG(__LINE__, #condition, __func__, __FILE__); \ +#define ENSURE(condition) \ + do \ + if (MDBX_UNLIKELY(!(condition))) \ + MDBX_CXX20_UNLIKELY RAISE_BUG(__LINE__, #condition, __func__, __FILE__); \ while (0) -#define NOT_IMPLEMENTED() \ - RAISE_BUG(__LINE__, "not_implemented", __func__, __FILE__); +#define NOT_IMPLEMENTED() RAISE_BUG(__LINE__, "not_implemented", __func__, __FILE__); #endif /* Unused*/ @@ -226,14 +224,12 @@ struct line_wrapper { } }; -template -struct temp_buffer { +template struct temp_buffer { TYPE inplace[(INPLACE_BYTES + sizeof(TYPE) - 1) / sizeof(TYPE)]; const size_t size; TYPE *const area; temp_buffer(size_t bytes) - : size((bytes + sizeof(TYPE) - 1) / sizeof(TYPE)), - area((bytes > sizeof(inplace)) ? new TYPE[size] : inplace) { + : size((bytes + sizeof(TYPE) - 1) / sizeof(TYPE)), area((bytes > sizeof(inplace)) ? new TYPE[size] : inplace) { memset(area, 0, sizeof(TYPE) * size); } ~temp_buffer() { @@ -265,8 +261,7 @@ struct temp_buffer { namespace mdbx { [[noreturn]] __cold void throw_max_length_exceeded() { - throw std::length_error( - "mdbx:: Exceeded the maximal length of data/slice/buffer."); + throw std::length_error("mdbx:: Exceeded the maximal length of data/slice/buffer."); } [[noreturn]] __cold void throw_too_small_target_buffer() { @@ -279,38 +274,31 @@ namespace mdbx { } [[noreturn]] __cold void throw_allocators_mismatch() { - throw std::logic_error( - "mdbx:: An allocators mismatch, so an object could not be transferred " - "into an incompatible memory allocation scheme."); + throw std::logic_error("mdbx:: An allocators mismatch, so an object could not be transferred " + "into an incompatible memory allocation scheme."); } [[noreturn]] __cold void throw_incomparable_cursors() { - throw std::logic_error( - "mdbx:: incomparable and/or invalid cursors to compare positions."); + throw std::logic_error("mdbx:: incomparable and/or invalid cursors to compare positions."); } -[[noreturn]] __cold void throw_bad_value_size() { - throw bad_value_size(MDBX_BAD_VALSIZE); -} +[[noreturn]] __cold void throw_bad_value_size() { throw bad_value_size(MDBX_BAD_VALSIZE); } -__cold exception::exception(const ::mdbx::error &error) noexcept - : base(error.what()), error_(error) {} +__cold exception::exception(const ::mdbx::error &error) noexcept : base(error.what()), error_(error) {} __cold exception::~exception() noexcept {} static std::atomic_int fatal_countdown; -__cold fatal::fatal(const ::mdbx::error &error) noexcept : base(error) { - ++fatal_countdown; -} +__cold fatal::fatal(const ::mdbx::error &error) noexcept : base(error) { ++fatal_countdown; } __cold fatal::~fatal() noexcept { if (--fatal_countdown == 0) std::terminate(); } -#define DEFINE_EXCEPTION(NAME) \ - __cold NAME::NAME(const ::mdbx::error &rc) : exception(rc) {} \ +#define DEFINE_EXCEPTION(NAME) \ + __cold NAME::NAME(const ::mdbx::error &rc) : exception(rc) {} \ __cold NAME::~NAME() noexcept {} DEFINE_EXCEPTION(bad_map_id) @@ -352,8 +340,8 @@ __cold const char *error::what() const noexcept { return mdbx_liberr2str(code()); switch (code()) { -#define ERROR_CASE(CODE) \ - case CODE: \ +#define ERROR_CASE(CODE) \ + case CODE: \ return MDBX_STRINGIFY(CODE) ERROR_CASE(MDBX_ENODATA); ERROR_CASE(MDBX_EINVAL); @@ -379,8 +367,7 @@ __cold std::string error::message() const { return std::string(msg ? msg : "unknown"); } -[[noreturn]] __cold void error::panic(const char *context, - const char *func) const noexcept { +[[noreturn]] __cold void error::panic(const char *context, const char *func) const noexcept { assert(code() != MDBX_SUCCESS); ::mdbx_panic("mdbx::%s.%s(): \"%s\" (%d)", context, func, what(), code()); std::terminate(); @@ -397,8 +384,8 @@ __cold void error::throw_exception() const { throw std::logic_error("MDBX_SUCCESS (MDBX_RESULT_FALSE)"); case MDBX_RESULT_TRUE: throw std::logic_error("MDBX_RESULT_TRUE"); -#define CASE_EXCEPTION(NAME, CODE) \ - case CODE: \ +#define CASE_EXCEPTION(NAME, CODE) \ + case CODE: \ throw NAME(code()) CASE_EXCEPTION(bad_map_id, MDBX_BAD_DBI); CASE_EXCEPTION(bad_transaction, MDBX_BAD_TXN); @@ -702,27 +689,23 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { return out; } -char *from_hex::write_bytes(char *__restrict const dest, - size_t dest_size) const { +char *from_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(source.length() % 2 && !ignore_spaces)) - MDBX_CXX20_UNLIKELY throw std::domain_error( - "mdbx::from_hex:: odd length of hexadecimal string"); + MDBX_CXX20_UNLIKELY throw std::domain_error("mdbx::from_hex:: odd length of hexadecimal string"); if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); auto ptr = dest; auto src = source.byte_ptr(); for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(*src <= ' ') && - MDBX_LIKELY(ignore_spaces && isspace(*src))) { + if (MDBX_UNLIKELY(*src <= ' ') && MDBX_LIKELY(ignore_spaces && isspace(*src))) { ++src; --left; continue; } if (MDBX_UNLIKELY(left < 1 || !isxdigit(src[0]) || !isxdigit(src[1]))) - MDBX_CXX20_UNLIKELY throw std::domain_error( - "mdbx::from_hex:: invalid hexadecimal string"); + MDBX_CXX20_UNLIKELY throw std::domain_error("mdbx::from_hex:: invalid hexadecimal string"); int8_t hi = src[0]; hi = (hi | 0x20) - 'a'; @@ -747,8 +730,7 @@ bool from_hex::is_erroneous() const noexcept { bool got = false; auto src = source.byte_ptr(); for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(*src <= ' ') && - MDBX_LIKELY(ignore_spaces && isspace(*src))) { + if (MDBX_UNLIKELY(*src <= ' ') && MDBX_LIKELY(ignore_spaces && isspace(*src))) { ++src; --left; continue; @@ -780,25 +762,21 @@ using b58_uint = uint_fast32_t; #endif struct b58_buffer : public temp_buffer { - b58_buffer(size_t bytes, size_t estimation_ratio_numerator, - size_t estimation_ratio_denominator, size_t extra = 0) - : temp_buffer((/* пересчитываем по указанной пропорции */ - bytes = (bytes * estimation_ratio_numerator + - estimation_ratio_denominator - 1) / - estimation_ratio_denominator, - /* учитываем резервный старший байт в каждом слове */ - ((bytes + sizeof(b58_uint) - 2) / (sizeof(b58_uint) - 1) * - sizeof(b58_uint) + - extra) * - sizeof(b58_uint))) {} + b58_buffer(size_t bytes, size_t estimation_ratio_numerator, size_t estimation_ratio_denominator, size_t extra = 0) + : temp_buffer( + (/* пересчитываем по указанной пропорции */ + bytes = + (bytes * estimation_ratio_numerator + estimation_ratio_denominator - 1) / estimation_ratio_denominator, + /* учитываем резервный старший байт в каждом слове */ + ((bytes + sizeof(b58_uint) - 2) / (sizeof(b58_uint) - 1) * sizeof(b58_uint) + extra) * sizeof(b58_uint))) { + } }; static byte b58_8to11(b58_uint &v) noexcept { - static const char b58_alphabet[58] = { - '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', - 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; + static const char b58_alphabet[58] = {'1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; const auto i = size_t(v % 58); v /= 58; @@ -807,9 +785,8 @@ static byte b58_8to11(b58_uint &v) noexcept { static slice b58_encode(b58_buffer &buf, const byte *begin, const byte *end) { auto high = buf.end(); - const auto modulo = - b58_uint((sizeof(b58_uint) > 4) ? UINT64_C(0x1A636A90B07A00) /* 58^9 */ - : UINT32_C(0xACAD10) /* 58^4 */); + const auto modulo = b58_uint((sizeof(b58_uint) > 4) ? UINT64_C(0x1A636A90B07A00) /* 58^9 */ + : UINT32_C(0xACAD10) /* 58^4 */); static_assert(sizeof(modulo) == 4 || sizeof(modulo) == 8, "WTF?"); while (begin < end) { b58_uint carry = *begin++; @@ -855,8 +832,7 @@ static slice b58_encode(b58_buffer &buf, const byte *begin, const byte *end) { return slice(output, ptr); } -char *to_base58::write_bytes(char *__restrict const dest, - size_t dest_size) const { +char *to_base58::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); @@ -927,8 +903,7 @@ const signed char b58_map[256] = { IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL // f0 }; -static slice b58_decode(b58_buffer &buf, const byte *begin, const byte *end, - bool ignore_spaces) { +static slice b58_decode(b58_buffer &buf, const byte *begin, const byte *end, bool ignore_spaces) { auto high = buf.end(); while (begin < end) { const auto c = b58_map[*begin++]; @@ -969,8 +944,7 @@ static slice b58_decode(b58_buffer &buf, const byte *begin, const byte *end, return slice(output, ptr); } -char *from_base58::write_bytes(char *__restrict const dest, - size_t dest_size) const { +char *from_base58::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); @@ -996,8 +970,7 @@ bool from_base58::is_erroneous() const noexcept { auto begin = source.byte_ptr(); auto const end = source.end_byte_ptr(); while (begin < end) { - if (MDBX_UNLIKELY(b58_map[*begin] < 0 && - !(ignore_spaces && isspace(*begin)))) + if (MDBX_UNLIKELY(b58_map[*begin] < 0 && !(ignore_spaces && isspace(*begin)))) return true; ++begin; } @@ -1006,22 +979,18 @@ bool from_base58::is_erroneous() const noexcept { //------------------------------------------------------------------------------ -static inline void b64_3to4(const byte x, const byte y, const byte z, - char *__restrict dest) noexcept { - static const byte alphabet[64] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; +static inline void b64_3to4(const byte x, const byte y, const byte z, char *__restrict dest) noexcept { + static const byte alphabet[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; dest[0] = alphabet[(x & 0xfc) >> 2]; dest[1] = alphabet[((x & 0x03) << 4) + ((y & 0xf0) >> 4)]; dest[2] = alphabet[((y & 0x0f) << 2) + ((z & 0xc0) >> 6)]; dest[3] = alphabet[z & 0x3f]; } -char *to_base64::write_bytes(char *__restrict const dest, - size_t dest_size) const { +char *to_base64::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); @@ -1115,8 +1084,7 @@ static const signed char b64_map[256] = { IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL // f0 }; -static inline signed char b64_4to3(signed char a, signed char b, signed char c, - signed char d, +static inline signed char b64_4to3(signed char a, signed char b, signed char c, signed char d, char *__restrict dest) noexcept { dest[0] = byte((a << 2) + ((b & 0x30) >> 4)); dest[1] = byte(((b & 0xf) << 4) + ((c & 0x3c) >> 2)); @@ -1124,19 +1092,16 @@ static inline signed char b64_4to3(signed char a, signed char b, signed char c, return a | b | c | d; } -char *from_base64::write_bytes(char *__restrict const dest, - size_t dest_size) const { +char *from_base64::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(source.length() % 4 && !ignore_spaces)) - MDBX_CXX20_UNLIKELY throw std::domain_error( - "mdbx::from_base64:: odd length of base64 string"); + MDBX_CXX20_UNLIKELY throw std::domain_error("mdbx::from_base64:: odd length of base64 string"); if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); auto ptr = dest; auto src = source.byte_ptr(); for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(*src <= ' ') && - MDBX_LIKELY(ignore_spaces && isspace(*src))) { + if (MDBX_UNLIKELY(*src <= ' ') && MDBX_LIKELY(ignore_spaces && isspace(*src))) { ++src; --left; continue; @@ -1147,8 +1112,7 @@ char *from_base64::write_bytes(char *__restrict const dest, bailout: throw std::domain_error("mdbx::from_base64:: invalid base64 string"); } - const signed char a = b64_map[src[0]], b = b64_map[src[1]], - c = b64_map[src[2]], d = b64_map[src[3]]; + const signed char a = b64_map[src[0]], b = b64_map[src[1]], c = b64_map[src[2]], d = b64_map[src[3]]; if (MDBX_UNLIKELY(b64_4to3(a, b, c, d, ptr) < 0)) { if (left == 4 && (a | b) >= 0 && d == EQ) { if (c >= 0) { @@ -1177,8 +1141,7 @@ bool from_base64::is_erroneous() const noexcept { bool got = false; auto src = source.byte_ptr(); for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(*src <= ' ') && - MDBX_LIKELY(ignore_spaces && isspace(*src))) { + if (MDBX_UNLIKELY(*src <= ' ') && MDBX_LIKELY(ignore_spaces && isspace(*src))) { ++src; --left; continue; @@ -1186,8 +1149,7 @@ bool from_base64::is_erroneous() const noexcept { if (MDBX_UNLIKELY(left < 3)) MDBX_CXX20_UNLIKELY return false; - const signed char a = b64_map[src[0]], b = b64_map[src[1]], - c = b64_map[src[2]], d = b64_map[src[3]]; + const signed char a = b64_map[src[0]], b = b64_map[src[1]], c = b64_map[src[2]], d = b64_map[src[3]]; if (MDBX_UNLIKELY((a | b | c | d) < 0)) MDBX_CXX20_UNLIKELY { if (left == 4 && (a | b) >= 0 && d == EQ && (c >= 0 || c == d)) @@ -1205,8 +1167,7 @@ bool from_base64::is_erroneous() const noexcept { template class LIBMDBX_API_TYPE buffer; -#if defined(__cpp_lib_memory_resource) && \ - __cpp_lib_memory_resource >= 201603L && _GLIBCXX_USE_CXX11_ABI +#if defined(__cpp_lib_memory_resource) && __cpp_lib_memory_resource >= 201603L && _GLIBCXX_USE_CXX11_ABI template class LIBMDBX_API_TYPE buffer; #endif /* __cpp_lib_memory_resource >= 201603L */ @@ -1225,8 +1186,7 @@ static inline MDBX_env_flags_t mode2flags(env::mode mode) { } } -__cold MDBX_env_flags_t -env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { +__cold MDBX_env_flags_t env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { MDBX_env_flags_t flags = mode2flags(mode); if (accede) flags |= MDBX_ACCEDE; @@ -1252,8 +1212,7 @@ env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { flags |= MDBX_LIFORECLAIM; switch (durability) { default: - MDBX_CXX20_UNLIKELY throw std::invalid_argument( - "db::durability is invalid"); + MDBX_CXX20_UNLIKELY throw std::invalid_argument("db::durability is invalid"); case env::durability::robust_synchronous: break; case env::durability::half_synchronous_weak_last: @@ -1271,16 +1230,13 @@ env::operate_parameters::make_flags(bool accede, bool use_subdirectory) const { return flags; } -env::mode -env::operate_parameters::mode_from_flags(MDBX_env_flags_t flags) noexcept { +env::mode env::operate_parameters::mode_from_flags(MDBX_env_flags_t flags) noexcept { if (flags & MDBX_RDONLY) return env::mode::readonly; - return (flags & MDBX_WRITEMAP) ? env::mode::write_mapped_io - : env::mode::write_file_io; + return (flags & MDBX_WRITEMAP) ? env::mode::write_mapped_io : env::mode::write_file_io; } -env::durability env::operate_parameters::durability_from_flags( - MDBX_env_flags_t flags) noexcept { +env::durability env::operate_parameters::durability_from_flags(MDBX_env_flags_t flags) noexcept { if ((flags & MDBX_UTTERLY_NOSYNC) == MDBX_UTTERLY_NOSYNC) return env::durability::whole_fragile; if (flags & MDBX_SAFE_NOSYNC) @@ -1291,71 +1247,51 @@ env::durability env::operate_parameters::durability_from_flags( } env::reclaiming_options::reclaiming_options(MDBX_env_flags_t flags) noexcept - : lifo((flags & MDBX_LIFORECLAIM) ? true : false), - coalesce((flags & MDBX_COALESCE) ? true : false) {} + : lifo((flags & MDBX_LIFORECLAIM) ? true : false), coalesce((flags & MDBX_COALESCE) ? true : false) {} env::operate_options::operate_options(MDBX_env_flags_t flags) noexcept - : no_sticky_threads(((flags & (MDBX_NOSTICKYTHREADS | MDBX_EXCLUSIVE)) == - MDBX_NOSTICKYTHREADS) - ? true - : false), - nested_write_transactions((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) ? false - : true), - exclusive((flags & MDBX_EXCLUSIVE) ? true : false), - disable_readahead((flags & MDBX_NORDAHEAD) ? true : false), + : no_sticky_threads(((flags & (MDBX_NOSTICKYTHREADS | MDBX_EXCLUSIVE)) == MDBX_NOSTICKYTHREADS) ? true : false), + nested_write_transactions((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) ? false : true), + exclusive((flags & MDBX_EXCLUSIVE) ? true : false), disable_readahead((flags & MDBX_NORDAHEAD) ? true : false), disable_clear_memory((flags & MDBX_NOMEMINIT) ? true : false) {} -bool env::is_pristine() const { - return get_stat().ms_mod_txnid == 0 && - get_info().mi_recent_txnid == INITIAL_TXNID; -} +bool env::is_pristine() const { return get_stat().ms_mod_txnid == 0 && get_info().mi_recent_txnid == INITIAL_TXNID; } bool env::is_empty() const { return get_stat().ms_leaf_pages == 0; } __cold env &env::copy(filehandle fd, bool compactify, bool force_dynamic_size) { - error::success_or_throw( - ::mdbx_env_copy2fd(handle_, fd, - (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | - (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE - : MDBX_CP_DEFAULTS))); + error::success_or_throw(::mdbx_env_copy2fd(handle_, fd, + (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | + (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS))); return *this; } -__cold env &env::copy(const char *destination, bool compactify, - bool force_dynamic_size) { - error::success_or_throw( - ::mdbx_env_copy(handle_, destination, - (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | - (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE - : MDBX_CP_DEFAULTS))); +__cold env &env::copy(const char *destination, bool compactify, bool force_dynamic_size) { + error::success_or_throw(::mdbx_env_copy(handle_, destination, + (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | + (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS))); return *this; } -__cold env &env::copy(const ::std::string &destination, bool compactify, - bool force_dynamic_size) { +__cold env &env::copy(const ::std::string &destination, bool compactify, bool force_dynamic_size) { return copy(destination.c_str(), compactify, force_dynamic_size); } #if defined(_WIN32) || defined(_WIN64) -__cold env &env::copy(const wchar_t *destination, bool compactify, - bool force_dynamic_size) { - error::success_or_throw( - ::mdbx_env_copyW(handle_, destination, - (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | - (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE - : MDBX_CP_DEFAULTS))); +__cold env &env::copy(const wchar_t *destination, bool compactify, bool force_dynamic_size) { + error::success_or_throw(::mdbx_env_copyW(handle_, destination, + (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | + (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS))); return *this; } -env &env::copy(const ::std::wstring &destination, bool compactify, - bool force_dynamic_size) { +env &env::copy(const ::std::wstring &destination, bool compactify, bool force_dynamic_size) { return copy(destination.c_str(), compactify, force_dynamic_size); } #endif /* Windows */ #ifdef MDBX_STD_FILESYSTEM_PATH -__cold env &env::copy(const MDBX_STD_FILESYSTEM_PATH &destination, - bool compactify, bool force_dynamic_size) { +__cold env &env::copy(const MDBX_STD_FILESYSTEM_PATH &destination, bool compactify, bool force_dynamic_size) { return copy(destination.native(), compactify, force_dynamic_size); } #endif /* MDBX_STD_FILESYSTEM_PATH */ @@ -1375,8 +1311,7 @@ __cold path env::get_path() const { } __cold bool env::remove(const char *pathname, const remove_mode mode) { - return !error::boolean_or_throw( - ::mdbx_env_delete(pathname, MDBX_env_delete_mode_t(mode))); + return !error::boolean_or_throw(::mdbx_env_delete(pathname, MDBX_env_delete_mode_t(mode))); } __cold bool env::remove(const ::std::string &pathname, const remove_mode mode) { @@ -1385,19 +1320,16 @@ __cold bool env::remove(const ::std::string &pathname, const remove_mode mode) { #if defined(_WIN32) || defined(_WIN64) __cold bool env::remove(const wchar_t *pathname, const remove_mode mode) { - return !error::boolean_or_throw( - ::mdbx_env_deleteW(pathname, MDBX_env_delete_mode_t(mode))); + return !error::boolean_or_throw(::mdbx_env_deleteW(pathname, MDBX_env_delete_mode_t(mode))); } -__cold bool env::remove(const ::std::wstring &pathname, - const remove_mode mode) { +__cold bool env::remove(const ::std::wstring &pathname, const remove_mode mode) { return remove(pathname.c_str(), mode); } #endif /* Windows */ #ifdef MDBX_STD_FILESYSTEM_PATH -__cold bool env::remove(const MDBX_STD_FILESYSTEM_PATH &pathname, - const remove_mode mode) { +__cold bool env::remove(const MDBX_STD_FILESYSTEM_PATH &pathname, const remove_mode mode) { return remove(pathname.native(), mode); } #endif /* MDBX_STD_FILESYSTEM_PATH */ @@ -1413,13 +1345,11 @@ static inline MDBX_env *create_env() { __cold env_managed::~env_managed() noexcept { if (MDBX_UNLIKELY(handle_)) - MDBX_CXX20_UNLIKELY error::success_or_panic( - ::mdbx_env_close(handle_), "mdbx::~env()", "mdbx_env_close"); + MDBX_CXX20_UNLIKELY error::success_or_panic(::mdbx_env_close(handle_), "mdbx::~env()", "mdbx_env_close"); } __cold void env_managed::close(bool dont_sync) { - const error rc = - static_cast(::mdbx_env_close_ex(handle_, dont_sync)); + const error rc = static_cast(::mdbx_env_close_ex(handle_, dont_sync)); switch (rc.code()) { case MDBX_EBADSIGN: MDBX_CXX20_UNLIKELY handle_ = nullptr; @@ -1438,87 +1368,69 @@ __cold void env_managed::setup(unsigned max_maps, unsigned max_readers) { error::success_or_throw(::mdbx_env_set_maxdbs(handle_, max_maps)); } -__cold env_managed::env_managed(const char *pathname, - const operate_parameters &op, bool accede) +__cold env_managed::env_managed(const char *pathname, const operate_parameters &op, bool accede) : env_managed(create_env()) { setup(op.max_maps, op.max_readers); - error::success_or_throw( - ::mdbx_env_open(handle_, pathname, op.make_flags(accede), 0)); + error::success_or_throw(::mdbx_env_open(handle_, pathname, op.make_flags(accede), 0)); - if (op.options.nested_write_transactions && - !get_options().nested_write_transactions) + if (op.options.nested_write_transactions && !get_options().nested_write_transactions) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_INCOMPATIBLE); } -__cold env_managed::env_managed(const char *pathname, - const env_managed::create_parameters &cp, +__cold env_managed::env_managed(const char *pathname, const env_managed::create_parameters &cp, const env::operate_parameters &op, bool accede) : env_managed(create_env()) { setup(op.max_maps, op.max_readers); set_geometry(cp.geometry); - error::success_or_throw(::mdbx_env_open( - handle_, pathname, op.make_flags(accede, cp.use_subdirectory), - cp.file_mode_bits)); + error::success_or_throw( + ::mdbx_env_open(handle_, pathname, op.make_flags(accede, cp.use_subdirectory), cp.file_mode_bits)); - if (op.options.nested_write_transactions && - !get_options().nested_write_transactions) + if (op.options.nested_write_transactions && !get_options().nested_write_transactions) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_INCOMPATIBLE); } -__cold env_managed::env_managed(const ::std::string &pathname, - const operate_parameters &op, bool accede) +__cold env_managed::env_managed(const ::std::string &pathname, const operate_parameters &op, bool accede) : env_managed(pathname.c_str(), op, accede) {} -__cold env_managed::env_managed(const ::std::string &pathname, - const env_managed::create_parameters &cp, +__cold env_managed::env_managed(const ::std::string &pathname, const env_managed::create_parameters &cp, const env::operate_parameters &op, bool accede) : env_managed(pathname.c_str(), cp, op, accede) {} #if defined(_WIN32) || defined(_WIN64) -__cold env_managed::env_managed(const wchar_t *pathname, - const operate_parameters &op, bool accede) +__cold env_managed::env_managed(const wchar_t *pathname, const operate_parameters &op, bool accede) : env_managed(create_env()) { setup(op.max_maps, op.max_readers); - error::success_or_throw( - ::mdbx_env_openW(handle_, pathname, op.make_flags(accede), 0)); + error::success_or_throw(::mdbx_env_openW(handle_, pathname, op.make_flags(accede), 0)); - if (op.options.nested_write_transactions && - !get_options().nested_write_transactions) + if (op.options.nested_write_transactions && !get_options().nested_write_transactions) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_INCOMPATIBLE); } -__cold env_managed::env_managed(const wchar_t *pathname, - const env_managed::create_parameters &cp, +__cold env_managed::env_managed(const wchar_t *pathname, const env_managed::create_parameters &cp, const env::operate_parameters &op, bool accede) : env_managed(create_env()) { setup(op.max_maps, op.max_readers); set_geometry(cp.geometry); - error::success_or_throw(::mdbx_env_openW( - handle_, pathname, op.make_flags(accede, cp.use_subdirectory), - cp.file_mode_bits)); + error::success_or_throw( + ::mdbx_env_openW(handle_, pathname, op.make_flags(accede, cp.use_subdirectory), cp.file_mode_bits)); - if (op.options.nested_write_transactions && - !get_options().nested_write_transactions) + if (op.options.nested_write_transactions && !get_options().nested_write_transactions) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_INCOMPATIBLE); } -__cold env_managed::env_managed(const ::std::wstring &pathname, - const operate_parameters &op, bool accede) +__cold env_managed::env_managed(const ::std::wstring &pathname, const operate_parameters &op, bool accede) : env_managed(pathname.c_str(), op, accede) {} -__cold env_managed::env_managed(const ::std::wstring &pathname, - const env_managed::create_parameters &cp, +__cold env_managed::env_managed(const ::std::wstring &pathname, const env_managed::create_parameters &cp, const env::operate_parameters &op, bool accede) : env_managed(pathname.c_str(), cp, op, accede) {} #endif /* Windows */ #ifdef MDBX_STD_FILESYSTEM_PATH -__cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, - const operate_parameters &op, bool accede) +__cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, const operate_parameters &op, bool accede) : env_managed(pathname.native(), op, accede) {} -__cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, - const env_managed::create_parameters &cp, +__cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, const env_managed::create_parameters &cp, const env::operate_parameters &op, bool accede) : env_managed(pathname.native(), cp, op, accede) {} #endif /* MDBX_STD_FILESYSTEM_PATH */ @@ -1528,16 +1440,14 @@ __cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname, txn_managed txn::start_nested() { MDBX_txn *nested; error::throw_on_nullptr(handle_, MDBX_BAD_TXN); - error::success_or_throw(::mdbx_txn_begin(mdbx_txn_env(handle_), handle_, - MDBX_TXN_READWRITE, &nested)); + error::success_or_throw(::mdbx_txn_begin(mdbx_txn_env(handle_), handle_, MDBX_TXN_READWRITE, &nested)); assert(nested != nullptr); return txn_managed(nested); } txn_managed::~txn_managed() noexcept { if (MDBX_UNLIKELY(handle_)) - MDBX_CXX20_UNLIKELY error::success_or_panic(::mdbx_txn_abort(handle_), - "mdbx::~txn", "mdbx_txn_abort"); + MDBX_CXX20_UNLIKELY error::success_or_panic(::mdbx_txn_abort(handle_), "mdbx::~txn", "mdbx_txn_abort"); } void txn_managed::abort() { @@ -1557,8 +1467,7 @@ void txn_managed::commit() { } void txn_managed::commit(commit_latency *latency) { - const error err = - static_cast(::mdbx_txn_commit_ex(handle_, latency)); + const error err = static_cast(::mdbx_txn_commit_ex(handle_, latency)); if (MDBX_LIKELY(err.code() != MDBX_THREAD_MISMATCH)) MDBX_CXX20_LIKELY handle_ = nullptr; if (MDBX_UNLIKELY(err.code() != MDBX_SUCCESS)) @@ -1568,8 +1477,7 @@ void txn_managed::commit(commit_latency *latency) { void txn_managed::commit_embark_read() { auto env = this->env(); commit(); - error::success_or_throw( - ::mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &handle_)); + error::success_or_throw(::mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &handle_)); } //------------------------------------------------------------------------------ @@ -1608,8 +1516,7 @@ __cold bool txn::clear_map(const char *name, bool throw_if_absent) { } } -__cold bool txn::rename_map(const char *old_name, const char *new_name, - bool throw_if_absent) { +__cold bool txn::rename_map(const char *old_name, const char *new_name, bool throw_if_absent) { map_handle map; const int err = ::mdbx_dbi_open(handle_, old_name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { @@ -1660,9 +1567,7 @@ __cold bool txn::clear_map(const ::mdbx::slice &name, bool throw_if_absent) { } } -__cold bool txn::rename_map(const ::mdbx::slice &old_name, - const ::mdbx::slice &new_name, - bool throw_if_absent) { +__cold bool txn::rename_map(const ::mdbx::slice &old_name, const ::mdbx::slice &new_name, bool throw_if_absent) { map_handle map; const int err = ::mdbx_dbi_open2(handle_, old_name, MDBX_DB_ACCEDE, &map.dbi); switch (err) { @@ -1679,11 +1584,8 @@ __cold bool txn::rename_map(const ::mdbx::slice &old_name, } } -__cold bool txn::rename_map(const ::std::string &old_name, - const ::std::string &new_name, - bool throw_if_absent) { - return rename_map(::mdbx::slice(old_name), ::mdbx::slice(new_name), - throw_if_absent); +__cold bool txn::rename_map(const ::std::string &old_name, const ::std::string &new_name, bool throw_if_absent) { + return rename_map(::mdbx::slice(old_name), ::mdbx::slice(new_name), throw_if_absent); } //------------------------------------------------------------------------------ @@ -1723,12 +1625,10 @@ __cold ::std::ostream &operator<<(::std::ostream &out, const pair &it) { } __cold ::std::ostream &operator<<(::std::ostream &out, const pair_result &it) { - return out << "{" << (it.done ? "done: " : "non-done: ") << it.key << " => " - << it.value << "}"; + return out << "{" << (it.done ? "done: " : "non-done: ") << it.key << " => " << it.value << "}"; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const ::mdbx::env::geometry::size &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const ::mdbx::env::geometry::size &it) { switch (it.bytes) { case ::mdbx::env::geometry::default_value: return out << "default"; @@ -1738,8 +1638,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, return out << "maximal"; } - const auto bytes = (it.bytes < 0) ? out << "-", - size_t(-it.bytes) : size_t(it.bytes); + const auto bytes = (it.bytes < 0) ? out << "-", size_t(-it.bytes) : size_t(it.bytes); struct { size_t one; const char *suffix; @@ -1769,8 +1668,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, return out; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const env::geometry &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const env::geometry &it) { return // out << "\tlower " << env::geometry::size(it.size_lower) // << ",\n\tnow " << env::geometry::size(it.size_now) // @@ -1780,8 +1678,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, << ",\n\tpagesize " << env::geometry::size(it.pagesize) << "\n"; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const env::operate_parameters &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const env::operate_parameters &it) { return out << "{\n" // << "\tmax_maps " << it.max_maps // << ",\n\tmax_readers " << it.max_readers // @@ -1805,8 +1702,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, const env::mode &it) { } } -__cold ::std::ostream &operator<<(::std::ostream &out, - const env::durability &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const env::durability &it) { switch (it) { case env::durability::robust_synchronous: return out << "robust_synchronous"; @@ -1821,16 +1717,14 @@ __cold ::std::ostream &operator<<(::std::ostream &out, } } -__cold ::std::ostream &operator<<(::std::ostream &out, - const env::reclaiming_options &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const env::reclaiming_options &it) { return out << "{" // << "lifo: " << (it.lifo ? "yes" : "no") // << ", coalesce: " << (it.coalesce ? "yes" : "no") // << "}"; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const env::operate_options &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const env::operate_options &it) { static const char comma[] = ", "; const char *delimiter = ""; out << "{"; @@ -1859,8 +1753,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, return out << "}"; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const env_managed::create_parameters &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const env_managed::create_parameters &it) { return out << "{\n" // << "\tfile_mode " << std::oct << it.file_mode_bits << std::dec // << ",\n\tsubdirectory " << (it.use_subdirectory ? "yes" : "no") // @@ -1868,8 +1761,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, << it.geometry << "}"; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const MDBX_log_level_t &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const MDBX_log_level_t &it) { switch (it) { case MDBX_LOG_FATAL: return out << "LOG_FATAL"; @@ -1894,8 +1786,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, } } -__cold ::std::ostream &operator<<(::std::ostream &out, - const MDBX_debug_flags_t &it) { +__cold ::std::ostream &operator<<(::std::ostream &out, const MDBX_debug_flags_t &it) { if (it == MDBX_DBG_DONTCHANGE) return out << "DBG_DONTCHANGE"; @@ -1931,8 +1822,7 @@ __cold ::std::ostream &operator<<(::std::ostream &out, return out << "}"; } -__cold ::std::ostream &operator<<(::std::ostream &out, - const ::mdbx::error &err) { +__cold ::std::ostream &operator<<(::std::ostream &out, const ::mdbx::error &err) { return out << err.what() << " (" << long(err.code()) << ")"; } diff --git a/src/meta.c b/src/meta.c index b45d71c1..cbdea2d0 100644 --- a/src/meta.c +++ b/src/meta.c @@ -9,15 +9,11 @@ typedef struct meta_snap { } meta_snap_t; static inline txnid_t fetch_txnid(const volatile mdbx_atomic_uint32_t *ptr) { -#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ - MDBX_UNALIGNED_OK >= 8 - return atomic_load64((const volatile mdbx_atomic_uint64_t *)ptr, - mo_AcquireRelease); +#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8 + return atomic_load64((const volatile mdbx_atomic_uint64_t *)ptr, mo_AcquireRelease); #else - const uint32_t l = atomic_load32( - &ptr[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); - const uint32_t h = atomic_load32( - &ptr[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); + const uint32_t l = atomic_load32(&ptr[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); + const uint32_t h = atomic_load32(&ptr[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], mo_AcquireRelease); return (uint64_t)h << 32 | l; #endif } @@ -33,9 +29,7 @@ static inline meta_snap_t meta_snap(const volatile meta_t *meta) { return r; } -txnid_t meta_txnid(const volatile meta_t *meta) { - return meta_snap(meta).txnid; -} +txnid_t meta_txnid(const volatile meta_t *meta) { return meta_snap(meta).txnid; } meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n) { eASSERT(env, n < NUM_METAS); @@ -46,16 +40,13 @@ meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n) { return r; } -static uint8_t meta_cmp2pack(uint8_t c01, uint8_t c02, uint8_t c12, bool s0, - bool s1, bool s2) { +static uint8_t meta_cmp2pack(uint8_t c01, uint8_t c02, uint8_t c12, bool s0, bool s1, bool s2) { assert(c01 < 3 && c02 < 3 && c12 < 3); /* assert(s0 < 2 && s1 < 2 && s2 < 2); */ - const uint8_t recent = meta_cmp2recent(c01, s0, s1) - ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) - : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); - const uint8_t prefer_steady = meta_cmp2steady(c01, s0, s1) - ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) - : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); + const uint8_t recent = + meta_cmp2recent(c01, s0, s1) ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); + const uint8_t prefer_steady = + meta_cmp2steady(c01, s0, s1) ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); uint8_t tail; if (recent == 0) @@ -65,10 +56,8 @@ static uint8_t meta_cmp2pack(uint8_t c01, uint8_t c02, uint8_t c12, bool s0, else tail = meta_cmp2steady(c01, s0, s1) ? 1 : 0; - const bool valid = - c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; - const bool strict = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && - (c12 != 1 || s1 != s2); + const bool valid = c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; + const bool strict = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && (c12 != 1 || s1 != s2); return tail | recent << 2 | prefer_steady << 4 | strict << 6 | valid << 7; } @@ -82,21 +71,16 @@ static inline void meta_troika_unpack(troika_t *troika, const uint8_t packed) { } static const uint8_t troika_fsm_map[2 * 2 * 2 * 3 * 3 * 3] = { - 232, 201, 216, 216, 232, 233, 232, 232, 168, 201, 216, 152, 168, 233, 232, - 168, 233, 201, 216, 201, 233, 233, 232, 233, 168, 201, 152, 216, 232, 169, - 232, 168, 168, 193, 152, 152, 168, 169, 232, 168, 169, 193, 152, 194, 233, - 169, 232, 169, 232, 201, 216, 216, 232, 201, 232, 232, 168, 193, 216, 152, - 168, 193, 232, 168, 193, 193, 210, 194, 225, 193, 225, 193, 168, 137, 212, - 214, 232, 233, 168, 168, 168, 137, 212, 150, 168, 233, 168, 168, 169, 137, - 216, 201, 233, 233, 168, 169, 168, 137, 148, 214, 232, 169, 168, 168, 40, - 129, 148, 150, 168, 169, 168, 40, 169, 129, 152, 194, 233, 169, 168, 169, - 168, 137, 214, 214, 232, 201, 168, 168, 168, 129, 214, 150, 168, 193, 168, - 168, 129, 129, 210, 194, 225, 193, 161, 129, 212, 198, 212, 214, 228, 228, - 212, 212, 148, 201, 212, 150, 164, 233, 212, 148, 233, 201, 216, 201, 233, - 233, 216, 233, 148, 198, 148, 214, 228, 164, 212, 148, 148, 194, 148, 150, - 164, 169, 212, 148, 169, 194, 152, 194, 233, 169, 216, 169, 214, 198, 214, - 214, 228, 198, 212, 214, 150, 194, 214, 150, 164, 193, 212, 150, 194, 194, - 210, 194, 225, 193, 210, 194}; + 232, 201, 216, 216, 232, 233, 232, 232, 168, 201, 216, 152, 168, 233, 232, 168, 233, 201, 216, 201, 233, 233, + 232, 233, 168, 201, 152, 216, 232, 169, 232, 168, 168, 193, 152, 152, 168, 169, 232, 168, 169, 193, 152, 194, + 233, 169, 232, 169, 232, 201, 216, 216, 232, 201, 232, 232, 168, 193, 216, 152, 168, 193, 232, 168, 193, 193, + 210, 194, 225, 193, 225, 193, 168, 137, 212, 214, 232, 233, 168, 168, 168, 137, 212, 150, 168, 233, 168, 168, + 169, 137, 216, 201, 233, 233, 168, 169, 168, 137, 148, 214, 232, 169, 168, 168, 40, 129, 148, 150, 168, 169, + 168, 40, 169, 129, 152, 194, 233, 169, 168, 169, 168, 137, 214, 214, 232, 201, 168, 168, 168, 129, 214, 150, + 168, 193, 168, 168, 129, 129, 210, 194, 225, 193, 161, 129, 212, 198, 212, 214, 228, 228, 212, 212, 148, 201, + 212, 150, 164, 233, 212, 148, 233, 201, 216, 201, 233, 233, 216, 233, 148, 198, 148, 214, 228, 164, 212, 148, + 148, 194, 148, 150, 164, 169, 212, 148, 169, 194, 152, 194, 233, 169, 216, 169, 214, 198, 214, 214, 228, 198, + 212, 214, 150, 194, 214, 150, 164, 193, 212, 150, 194, 194, 210, 194, 225, 193, 210, 194}; __cold bool troika_verify_fsm(void) { bool ok = true; @@ -117,12 +101,10 @@ __cold bool troika_verify_fsm(void) { const bool strict = TROIKA_STRICT_VALID(&troika); const bool valid = TROIKA_VALID(&troika); - const uint8_t recent_chk = meta_cmp2recent(c01, s0, s1) - ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) - : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); + const uint8_t recent_chk = + meta_cmp2recent(c01, s0, s1) ? (meta_cmp2recent(c02, s0, s2) ? 0 : 2) : (meta_cmp2recent(c12, s1, s2) ? 1 : 2); const uint8_t prefer_steady_chk = - meta_cmp2steady(c01, s0, s1) ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) - : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); + meta_cmp2steady(c01, s0, s1) ? (meta_cmp2steady(c02, s0, s2) ? 0 : 2) : (meta_cmp2steady(c12, s1, s2) ? 1 : 2); uint8_t tail_chk; if (recent_chk == 0) @@ -132,20 +114,16 @@ __cold bool troika_verify_fsm(void) { else tail_chk = meta_cmp2steady(c01, s0, s1) ? 1 : 0; - const bool valid_chk = - c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; - const bool strict_chk = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && - (c12 != 1 || s1 != s2); + const bool valid_chk = c01 != 1 || s0 != s1 || c02 != 1 || s0 != s2 || c12 != 1 || s1 != s2; + const bool strict_chk = (c01 != 1 || s0 != s1) && (c02 != 1 || s0 != s2) && (c12 != 1 || s1 != s2); assert(troika.recent == recent_chk); assert(troika.prefer_steady == prefer_steady_chk); assert(tail == tail_chk); assert(valid == valid_chk); assert(strict == strict_chk); assert(troika_fsm_map[troika.fsm] == packed); - if (troika.recent != recent_chk || - troika.prefer_steady != prefer_steady_chk || tail != tail_chk || - valid != valid_chk || strict != strict_chk || - troika_fsm_map[troika.fsm] != packed) { + if (troika.recent != recent_chk || troika.prefer_steady != prefer_steady_chk || tail != tail_chk || + valid != valid_chk || strict != strict_chk || troika_fsm_map[troika.fsm] != packed) { ok = false; } } @@ -181,27 +159,24 @@ txnid_t recent_committed_txnid(const MDBX_env *env) { static inline bool meta_eq(const troika_t *troika, size_t a, size_t b) { assert(a < NUM_METAS && b < NUM_METAS); - return troika->txnid[a] == troika->txnid[b] && - (((troika->fsm >> a) ^ (troika->fsm >> b)) & 1) == 0 && + return troika->txnid[a] == troika->txnid[b] && (((troika->fsm >> a) ^ (troika->fsm >> b)) & 1) == 0 && troika->txnid[a]; } unsigned meta_eq_mask(const troika_t *troika) { - return meta_eq(troika, 0, 1) | meta_eq(troika, 1, 2) << 1 | - meta_eq(troika, 2, 0) << 2; + return meta_eq(troika, 0, 1) | meta_eq(troika, 1, 2) << 1 | meta_eq(troika, 2, 0) << 2; } __hot bool meta_should_retry(const MDBX_env *env, troika_t *troika) { const troika_t prev = *troika; *troika = meta_tap(env); - return prev.fsm != troika->fsm || prev.txnid[0] != troika->txnid[0] || - prev.txnid[1] != troika->txnid[1] || prev.txnid[2] != troika->txnid[2]; + return prev.fsm != troika->fsm || prev.txnid[0] != troika->txnid[0] || prev.txnid[1] != troika->txnid[1] || + prev.txnid[2] != troika->txnid[2]; } const char *durable_caption(const meta_t *const meta) { if (meta_is_steady(meta)) - return (meta_sign_get(meta) == meta_sign_calculate(meta)) ? "Steady" - : "Tainted"; + return (meta_sign_get(meta) == meta_sign_calculate(meta)) ? "Steady" : "Tainted"; return "Weak"; } @@ -214,20 +189,16 @@ __cold void meta_troika_dump(const MDBX_env *env, const troika_t *troika) { "base=%d-%" PRIaTXN ".%c, " "tail=%d-%" PRIaTXN ".%c, " "valid %c, strict %c", - troika->txnid[0], (troika->fsm & 1) ? 's' : 'w', troika->txnid[1], - (troika->fsm & 2) ? 's' : 'w', troika->txnid[2], - (troika->fsm & 4) ? 's' : 'w', troika->fsm, troika->recent, - recent.txnid, recent.is_steady ? 's' : 'w', troika->prefer_steady, - prefer_steady.txnid, prefer_steady.is_steady ? 's' : 'w', - troika->tail_and_flags % NUM_METAS, tail.txnid, - tail.is_steady ? 's' : 'w', TROIKA_VALID(troika) ? 'Y' : 'N', + troika->txnid[0], (troika->fsm & 1) ? 's' : 'w', troika->txnid[1], (troika->fsm & 2) ? 's' : 'w', + troika->txnid[2], (troika->fsm & 4) ? 's' : 'w', troika->fsm, troika->recent, recent.txnid, + recent.is_steady ? 's' : 'w', troika->prefer_steady, prefer_steady.txnid, prefer_steady.is_steady ? 's' : 'w', + troika->tail_and_flags % NUM_METAS, tail.txnid, tail.is_steady ? 's' : 'w', TROIKA_VALID(troika) ? 'Y' : 'N', TROIKA_STRICT_VALID(troika) ? 'Y' : 'N'); } /*----------------------------------------------------------------------------*/ -static int meta_unsteady(MDBX_env *env, const txnid_t inclusive_upto, - const pgno_t pgno) { +static int meta_unsteady(MDBX_env *env, const txnid_t inclusive_upto, const pgno_t pgno) { meta_t *const meta = METAPAGE(env, pgno); const txnid_t txnid = constmeta_txnid(meta); if (!meta_is_steady(meta) || txnid > inclusive_upto) @@ -236,8 +207,7 @@ static int meta_unsteady(MDBX_env *env, const txnid_t inclusive_upto, WARNING("wipe txn #%" PRIaTXN ", meta %" PRIaPGNO, txnid, pgno); const uint64_t wipe = DATASIGN_NONE; const void *ptr = &wipe; - size_t bytes = sizeof(meta->sign), - offset = ptr_dist(&meta->sign, env->dxb_mmap.base); + size_t bytes = sizeof(meta->sign), offset = ptr_dist(&meta->sign, env->dxb_mmap.base); if (env->flags & MDBX_WRITEMAP) { unaligned_poke_u64(4, meta->sign, wipe); osal_flush_incoherent_cpu_writeback(); @@ -265,8 +235,7 @@ __cold int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto) { if (err == MDBX_RESULT_TRUE) { err = MDBX_SUCCESS; if (!MDBX_AVOID_MSYNC && (env->flags & MDBX_WRITEMAP)) { - err = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + err = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), MDBX_SYNC_DATA | MDBX_SYNC_IODQ); #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.msync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ @@ -278,8 +247,7 @@ __cold int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto) { } } - osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), - globals.sys_pagesize); + osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), globals.sys_pagesize); /* force oldest refresh */ atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); @@ -291,8 +259,7 @@ __cold int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto) { } int meta_sync(const MDBX_env *env, const meta_ptr_t head) { - eASSERT(env, atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != - (uint32_t)head.txnid); + eASSERT(env, atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != (uint32_t)head.txnid); /* Функция может вызываться (в том числе) при (env->flags & * MDBX_NOMETASYNC) == 0 и env->fd4meta == env->dsync_fd, например если * предыдущая транзакция была выполненна с флагом MDBX_NOMETASYNC. */ @@ -300,8 +267,7 @@ int meta_sync(const MDBX_env *env, const meta_ptr_t head) { int rc = MDBX_RESULT_TRUE; if (env->flags & MDBX_WRITEMAP) { if (!MDBX_AVOID_MSYNC) { - rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), MDBX_SYNC_DATA | MDBX_SYNC_IODQ); #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.msync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ @@ -310,8 +276,7 @@ int meta_sync(const MDBX_env *env, const meta_ptr_t head) { env->lck->pgops.wops.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ const page_t *page = data_page(head.ptr_c); - rc = osal_pwrite(env->fd4meta, page, env->ps, - ptr_dist(page, env->dxb_mmap.base)); + rc = osal_pwrite(env->fd4meta, page, env->ps, ptr_dist(page, env->dxb_mmap.base)); if (likely(rc == MDBX_SUCCESS) && env->fd4meta == env->lazy_fd) { rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); @@ -332,8 +297,7 @@ int meta_sync(const MDBX_env *env, const meta_ptr_t head) { return rc; } -__cold static page_t *meta_model(const MDBX_env *env, page_t *model, size_t num, - const bin128_t *guid) { +__cold static page_t *meta_model(const MDBX_env *env, page_t *model, size_t num, const bin128_t *guid) { ENSURE(env, is_powerof2(env->ps)); ENSURE(env, env->ps >= MDBX_MIN_PAGESIZE); ENSURE(env, env->ps <= MDBX_MAX_PAGESIZE); @@ -350,10 +314,8 @@ __cold static page_t *meta_model(const MDBX_env *env, page_t *model, size_t num, model_meta->geometry.lower = bytes2pgno(env, env->geo_in_bytes.lower); model_meta->geometry.upper = bytes2pgno(env, env->geo_in_bytes.upper); - model_meta->geometry.grow_pv = - pages2pv(bytes2pgno(env, env->geo_in_bytes.grow)); - model_meta->geometry.shrink_pv = - pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink)); + model_meta->geometry.grow_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.grow)); + model_meta->geometry.shrink_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink)); model_meta->geometry.now = bytes2pgno(env, env->geo_in_bytes.now); model_meta->geometry.first_unallocated = NUM_METAS; @@ -362,12 +324,9 @@ __cold static page_t *meta_model(const MDBX_env *env, page_t *model, size_t num, ENSURE(env, model_meta->geometry.now >= model_meta->geometry.lower); ENSURE(env, model_meta->geometry.now <= model_meta->geometry.upper); ENSURE(env, model_meta->geometry.first_unallocated >= MIN_PAGENO); - ENSURE(env, - model_meta->geometry.first_unallocated <= model_meta->geometry.now); - ENSURE(env, model_meta->geometry.grow_pv == - pages2pv(pv2pages(model_meta->geometry.grow_pv))); - ENSURE(env, model_meta->geometry.shrink_pv == - pages2pv(pv2pages(model_meta->geometry.shrink_pv))); + ENSURE(env, model_meta->geometry.first_unallocated <= model_meta->geometry.now); + ENSURE(env, model_meta->geometry.grow_pv == pages2pv(pv2pages(model_meta->geometry.grow_pv))); + ENSURE(env, model_meta->geometry.shrink_pv == pages2pv(pv2pages(model_meta->geometry.shrink_pv))); model_meta->pagesize = env->ps; model_meta->trees.gc.flags = MDBX_INTEGERKEY; @@ -389,12 +348,9 @@ __cold meta_t *meta_init_triplet(const MDBX_env *env, void *buffer) { return page_meta(page2); } -__cold int __must_check_result meta_override(MDBX_env *env, size_t target, - txnid_t txnid, - const meta_t *shape) { +__cold int __must_check_result meta_override(MDBX_env *env, size_t target, txnid_t txnid, const meta_t *shape) { page_t *const page = env->page_auxbuf; - meta_model(env, page, target, - &((target == 0 && shape) ? shape : METAPAGE(env, 0))->dxbid); + meta_model(env, page, target, &((target == 0 && shape) ? shape : METAPAGE(env, 0))->dxbid); meta_t *const model = page_meta(page); meta_set_txnid(env, model, txnid); if (txnid) @@ -407,21 +363,18 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, return MDBX_PROBLEM; } if (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) - memcpy(&model->magic_and_version, &shape->magic_and_version, - sizeof(model->magic_and_version)); + memcpy(&model->magic_and_version, &shape->magic_and_version, sizeof(model->magic_and_version)); model->reserve16 = shape->reserve16; model->validator_id = shape->validator_id; model->extra_pagehdr = shape->extra_pagehdr; memcpy(&model->geometry, &shape->geometry, sizeof(model->geometry)); memcpy(&model->trees, &shape->trees, sizeof(model->trees)); memcpy(&model->canary, &shape->canary, sizeof(model->canary)); - memcpy(&model->pages_retired, &shape->pages_retired, - sizeof(model->pages_retired)); + memcpy(&model->pages_retired, &shape->pages_retired, sizeof(model->pages_retired)); if (txnid) { if ((!model->trees.gc.mod_txnid && model->trees.gc.root != P_INVALID) || (!model->trees.main.mod_txnid && model->trees.main.root != P_INVALID)) - memcpy(&model->magic_and_version, &shape->magic_and_version, - sizeof(model->magic_and_version)); + memcpy(&model->magic_and_version, &shape->magic_and_version, sizeof(model->magic_and_version)); if (unlikely(!coherency_check_meta(env, model, false))) { ERROR("bailout overriding meta-%zu since model failed " "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, @@ -452,8 +405,7 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.msync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync(&env->dxb_mmap, 0, - pgno_align2os_bytes(env, model->geometry.first_unallocated), + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, model->geometry.first_unallocated), MDBX_SYNC_DATA | MDBX_SYNC_IODQ); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -465,8 +417,7 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.msync.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ - rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, target + 1), - MDBX_SYNC_DATA | MDBX_SYNC_IODQ); + rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, target + 1), MDBX_SYNC_DATA | MDBX_SYNC_IODQ); } else { #if MDBX_ENABLE_PGOP_STAT env->lck->pgops.wops.weak += 1; @@ -478,28 +429,20 @@ __cold int __must_check_result meta_override(MDBX_env *env, size_t target, #endif /* MDBX_ENABLE_PGOP_STAT */ rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); } - osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), - globals.sys_pagesize); + osal_flush_incoherent_mmap(env->dxb_mmap.base, pgno2bytes(env, NUM_METAS), globals.sys_pagesize); } eASSERT(env, (!env->txn && (env->flags & ENV_ACTIVE) == 0) || - (env->stuck_meta == (int)target && - (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == - MDBX_EXCLUSIVE)); + (env->stuck_meta == (int)target && (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)); return rc; } -__cold int meta_validate(MDBX_env *env, meta_t *const meta, - const page_t *const page, const unsigned meta_number, +__cold int meta_validate(MDBX_env *env, meta_t *const meta, const page_t *const page, const unsigned meta_number, unsigned *guess_pagesize) { - const uint64_t magic_and_version = - unaligned_peek_u64(4, &meta->magic_and_version); - if (unlikely(magic_and_version != MDBX_DATA_MAGIC && - magic_and_version != MDBX_DATA_MAGIC_LEGACY_COMPAT && + const uint64_t magic_and_version = unaligned_peek_u64(4, &meta->magic_and_version); + if (unlikely(magic_and_version != MDBX_DATA_MAGIC && magic_and_version != MDBX_DATA_MAGIC_LEGACY_COMPAT && magic_and_version != MDBX_DATA_MAGIC_LEGACY_DEVEL)) { - ERROR("meta[%u] has invalid magic/version %" PRIx64, meta_number, - magic_and_version); - return ((magic_and_version >> 8) != MDBX_MAGIC) ? MDBX_INVALID - : MDBX_VERSION_MISMATCH; + ERROR("meta[%u] has invalid magic/version %" PRIx64, meta_number, magic_and_version); + return ((magic_and_version >> 8) != MDBX_MAGIC) ? MDBX_INVALID : MDBX_VERSION_MISMATCH; } if (unlikely(page->pgno != meta_number)) { @@ -512,11 +455,9 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, return MDBX_INVALID; } - if (unlikely(!is_powerof2(meta->pagesize) || - meta->pagesize < MDBX_MIN_PAGESIZE || + if (unlikely(!is_powerof2(meta->pagesize) || meta->pagesize < MDBX_MIN_PAGESIZE || meta->pagesize > MDBX_MAX_PAGESIZE)) { - WARNING("meta[%u] has invalid pagesize (%u), skip it", meta_number, - meta->pagesize); + WARNING("meta[%u] has invalid pagesize (%u), skip it", meta_number, meta->pagesize); return is_powerof2(meta->pagesize) ? MDBX_VERSION_MISMATCH : MDBX_INVALID; } @@ -535,81 +476,63 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, const uint64_t sign = meta_sign_get(meta); const uint64_t sign_stready = meta_sign_calculate(meta); if (SIGN_IS_STEADY(sign) && unlikely(sign != sign_stready)) { - WARNING("meta[%u] has invalid steady-checksum (0x%" PRIx64 " != 0x%" PRIx64 - "), skip it", - meta_number, sign, sign_stready); + WARNING("meta[%u] has invalid steady-checksum (0x%" PRIx64 " != 0x%" PRIx64 "), skip it", meta_number, sign, + sign_stready); return MDBX_RESULT_TRUE; } if (unlikely(meta->trees.gc.flags != MDBX_INTEGERKEY) && - ((meta->trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || - magic_and_version == MDBX_DATA_MAGIC)) { - WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, - "GC/FreeDB", meta->trees.gc.flags); + ((meta->trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY || magic_and_version == MDBX_DATA_MAGIC)) { + WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, "GC/FreeDB", meta->trees.gc.flags); return MDBX_INCOMPATIBLE; } if (unlikely(!check_table_flags(meta->trees.main.flags))) { - WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, - "MainDB", meta->trees.main.flags); + WARNING("meta[%u] has invalid %s flags 0x%x, skip it", meta_number, "MainDB", meta->trees.main.flags); return MDBX_INCOMPATIBLE; } - DEBUG("checking meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO - ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO - " +%u -%u, txn_id %" PRIaTXN ", %s", - page->pgno, meta->trees.main.root, meta->trees.gc.root, - meta->geometry.lower, meta->geometry.first_unallocated, - meta->geometry.now, meta->geometry.upper, - pv2pages(meta->geometry.grow_pv), pv2pages(meta->geometry.shrink_pv), + DEBUG("checking meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO + "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", + page->pgno, meta->trees.main.root, meta->trees.gc.root, meta->geometry.lower, meta->geometry.first_unallocated, + meta->geometry.now, meta->geometry.upper, pv2pages(meta->geometry.grow_pv), pv2pages(meta->geometry.shrink_pv), txnid, durable_caption(meta)); if (unlikely(txnid < MIN_TXNID || txnid > MAX_TXNID)) { - WARNING("meta[%u] has invalid txnid %" PRIaTXN ", skip it", meta_number, - txnid); + WARNING("meta[%u] has invalid txnid %" PRIaTXN ", skip it", meta_number, txnid); return MDBX_RESULT_TRUE; } - if (unlikely(meta->geometry.lower < MIN_PAGENO || - meta->geometry.lower > MAX_PAGENO + 1)) { - WARNING("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it", - meta_number, meta->geometry.lower); + if (unlikely(meta->geometry.lower < MIN_PAGENO || meta->geometry.lower > MAX_PAGENO + 1)) { + WARNING("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it", meta_number, meta->geometry.lower); return MDBX_INVALID; } - if (unlikely(meta->geometry.upper < MIN_PAGENO || - meta->geometry.upper > MAX_PAGENO + 1 || + if (unlikely(meta->geometry.upper < MIN_PAGENO || meta->geometry.upper > MAX_PAGENO + 1 || meta->geometry.upper < meta->geometry.lower)) { - WARNING("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it", - meta_number, meta->geometry.upper); + WARNING("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it", meta_number, meta->geometry.upper); return MDBX_INVALID; } - if (unlikely(meta->geometry.first_unallocated < MIN_PAGENO || - meta->geometry.first_unallocated - 1 > MAX_PAGENO)) { - WARNING("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it", - meta_number, meta->geometry.first_unallocated); + if (unlikely(meta->geometry.first_unallocated < MIN_PAGENO || meta->geometry.first_unallocated - 1 > MAX_PAGENO)) { + WARNING("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it", meta_number, meta->geometry.first_unallocated); return MDBX_CORRUPTED; } - const uint64_t used_bytes = - meta->geometry.first_unallocated * (uint64_t)meta->pagesize; + const uint64_t used_bytes = meta->geometry.first_unallocated * (uint64_t)meta->pagesize; if (unlikely(used_bytes > env->dxb_mmap.filesize)) { /* Here could be a race with DB-shrinking performed by other process */ int err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); if (unlikely(err != MDBX_SUCCESS)) return err; if (unlikely(used_bytes > env->dxb_mmap.filesize)) { - WARNING("meta[%u] used-bytes (%" PRIu64 ") beyond filesize (%" PRIu64 - "), skip it", - meta_number, used_bytes, env->dxb_mmap.filesize); + WARNING("meta[%u] used-bytes (%" PRIu64 ") beyond filesize (%" PRIu64 "), skip it", meta_number, used_bytes, + env->dxb_mmap.filesize); return MDBX_CORRUPTED; } } - if (unlikely(meta->geometry.first_unallocated - 1 > MAX_PAGENO || - used_bytes > MAX_MAPSIZE)) { - WARNING("meta[%u] has too large used-space (%" PRIu64 "), skip it", - meta_number, used_bytes); + if (unlikely(meta->geometry.first_unallocated - 1 > MAX_PAGENO || used_bytes > MAX_MAPSIZE)) { + WARNING("meta[%u] has too large used-space (%" PRIu64 "), skip it", meta_number, used_bytes); return MDBX_TOO_LARGE; } @@ -617,13 +540,10 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, uint64_t mapsize_min = geo_lower * (uint64_t)meta->pagesize; STATIC_ASSERT(MAX_MAPSIZE < PTRDIFF_MAX - MDBX_MAX_PAGESIZE); STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); - STATIC_ASSERT((uint64_t)(MAX_PAGENO + 1) * MDBX_MIN_PAGESIZE % (4ul << 20) == - 0); + STATIC_ASSERT((uint64_t)(MAX_PAGENO + 1) * MDBX_MIN_PAGESIZE % (4ul << 20) == 0); if (unlikely(mapsize_min < MIN_MAPSIZE || mapsize_min > MAX_MAPSIZE)) { - if (MAX_MAPSIZE != MAX_MAPSIZE64 && mapsize_min > MAX_MAPSIZE && - mapsize_min <= MAX_MAPSIZE64) { - eASSERT(env, meta->geometry.first_unallocated - 1 <= MAX_PAGENO && - used_bytes <= MAX_MAPSIZE); + if (MAX_MAPSIZE != MAX_MAPSIZE64 && mapsize_min > MAX_MAPSIZE && mapsize_min <= MAX_MAPSIZE64) { + eASSERT(env, meta->geometry.first_unallocated - 1 <= MAX_PAGENO && used_bytes <= MAX_MAPSIZE); WARNING("meta[%u] has too large min-mapsize (%" PRIu64 "), " "but size of used space still acceptable (%" PRIu64 ")", meta_number, mapsize_min, used_bytes); @@ -632,14 +552,12 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, geo_lower = MAX_PAGENO + 1; mapsize_min = geo_lower * (uint64_t)meta->pagesize; } - WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO - " instead of wrong %" PRIaPGNO + WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO " instead of wrong %" PRIaPGNO ", will be corrected on next commit(s)", meta_number, "lower", geo_lower, meta->geometry.lower); meta->geometry.lower = geo_lower; } else { - WARNING("meta[%u] has invalid min-mapsize (%" PRIu64 "), skip it", - meta_number, mapsize_min); + WARNING("meta[%u] has invalid min-mapsize (%" PRIu64 "), skip it", meta_number, mapsize_min); return MDBX_VERSION_MISMATCH; } } @@ -648,17 +566,13 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, uint64_t mapsize_max = geo_upper * (uint64_t)meta->pagesize; STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); if (unlikely(mapsize_max > MAX_MAPSIZE || - (MAX_PAGENO + 1) < - ceil_powerof2((size_t)mapsize_max, globals.sys_pagesize) / - (size_t)meta->pagesize)) { + (MAX_PAGENO + 1) < ceil_powerof2((size_t)mapsize_max, globals.sys_pagesize) / (size_t)meta->pagesize)) { if (mapsize_max > MAX_MAPSIZE64) { - WARNING("meta[%u] has invalid max-mapsize (%" PRIu64 "), skip it", - meta_number, mapsize_max); + WARNING("meta[%u] has invalid max-mapsize (%" PRIu64 "), skip it", meta_number, mapsize_max); return MDBX_VERSION_MISMATCH; } /* allow to open large DB from a 32-bit environment */ - eASSERT(env, meta->geometry.first_unallocated - 1 <= MAX_PAGENO && - used_bytes <= MAX_MAPSIZE); + eASSERT(env, meta->geometry.first_unallocated - 1 <= MAX_PAGENO && used_bytes <= MAX_MAPSIZE); WARNING("meta[%u] has too large max-mapsize (%" PRIu64 "), " "but size of used space still acceptable (%" PRIu64 ")", meta_number, mapsize_max, used_bytes); @@ -667,8 +581,7 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, geo_upper = MAX_PAGENO + 1; mapsize_max = geo_upper * (uint64_t)meta->pagesize; } - WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO - " instead of wrong %" PRIaPGNO + WARNING("meta[%u] consider get-%s pageno is %" PRIaPGNO " instead of wrong %" PRIaPGNO ", will be corrected on next commit(s)", meta_number, "upper", geo_upper, meta->geometry.upper); meta->geometry.upper = geo_upper; @@ -688,14 +601,12 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, geo_now = geo_upper; if (unlikely(meta->geometry.first_unallocated > geo_now)) { - WARNING("meta[%u] next-pageno (%" PRIaPGNO - ") is beyond end-pgno (%" PRIaPGNO "), skip it", - meta_number, meta->geometry.first_unallocated, geo_now); + WARNING("meta[%u] next-pageno (%" PRIaPGNO ") is beyond end-pgno (%" PRIaPGNO "), skip it", meta_number, + meta->geometry.first_unallocated, geo_now); return MDBX_CORRUPTED; } if (meta->geometry.now != geo_now) { - WARNING("meta[%u] consider geo-%s pageno is %" PRIaPGNO - " instead of wrong %" PRIaPGNO + WARNING("meta[%u] consider geo-%s pageno is %" PRIaPGNO " instead of wrong %" PRIaPGNO ", will be corrected on next commit(s)", meta_number, "now", geo_now, meta->geometry.now); meta->geometry.now = geo_now; @@ -703,43 +614,36 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, /* GC */ if (meta->trees.gc.root == P_INVALID) { - if (unlikely(meta->trees.gc.branch_pages || meta->trees.gc.height || - meta->trees.gc.items || meta->trees.gc.leaf_pages || - meta->trees.gc.large_pages)) { + if (unlikely(meta->trees.gc.branch_pages || meta->trees.gc.height || meta->trees.gc.items || + meta->trees.gc.leaf_pages || meta->trees.gc.large_pages)) { WARNING("meta[%u] has false-empty %s, skip it", meta_number, "GC"); return MDBX_CORRUPTED; } - } else if (unlikely(meta->trees.gc.root >= - meta->geometry.first_unallocated)) { - WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, - "GC", meta->trees.gc.root); + } else if (unlikely(meta->trees.gc.root >= meta->geometry.first_unallocated)) { + WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, "GC", meta->trees.gc.root); return MDBX_CORRUPTED; } /* MainDB */ if (meta->trees.main.root == P_INVALID) { - if (unlikely(meta->trees.main.branch_pages || meta->trees.main.height || - meta->trees.main.items || meta->trees.main.leaf_pages || - meta->trees.main.large_pages)) { + if (unlikely(meta->trees.main.branch_pages || meta->trees.main.height || meta->trees.main.items || + meta->trees.main.leaf_pages || meta->trees.main.large_pages)) { WARNING("meta[%u] has false-empty %s", meta_number, "MainDB"); return MDBX_CORRUPTED; } - } else if (unlikely(meta->trees.main.root >= - meta->geometry.first_unallocated)) { - WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, - "MainDB", meta->trees.main.root); + } else if (unlikely(meta->trees.main.root >= meta->geometry.first_unallocated)) { + WARNING("meta[%u] has invalid %s-root %" PRIaPGNO ", skip it", meta_number, "MainDB", meta->trees.main.root); return MDBX_CORRUPTED; } if (unlikely(meta->trees.gc.mod_txnid > txnid)) { - WARNING("meta[%u] has wrong mod_txnid %" PRIaTXN " for %s, skip it", - meta_number, meta->trees.gc.mod_txnid, "GC"); + WARNING("meta[%u] has wrong mod_txnid %" PRIaTXN " for %s, skip it", meta_number, meta->trees.gc.mod_txnid, "GC"); return MDBX_CORRUPTED; } if (unlikely(meta->trees.main.mod_txnid > txnid)) { - WARNING("meta[%u] has wrong mod_txnid %" PRIaTXN " for %s, skip it", - meta_number, meta->trees.main.mod_txnid, "MainDB"); + WARNING("meta[%u] has wrong mod_txnid %" PRIaTXN " for %s, skip it", meta_number, meta->trees.main.mod_txnid, + "MainDB"); return MDBX_CORRUPTED; } @@ -748,7 +652,5 @@ __cold int meta_validate(MDBX_env *env, meta_t *const meta, __cold int meta_validate_copy(MDBX_env *env, const meta_t *meta, meta_t *dest) { *dest = *meta; - return meta_validate(env, dest, data_page(meta), - bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), - nullptr); + return meta_validate(env, dest, data_page(meta), bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)), nullptr); } diff --git a/src/meta.h b/src/meta.h index 706061c1..899f1d5a 100644 --- a/src/meta.h +++ b/src/meta.h @@ -16,17 +16,11 @@ static inline uint64_t meta_sign_calculate(const meta_t *meta) { return (sign > DATASIGN_WEAK) ? sign : ~sign; } -static inline uint64_t meta_sign_get(const volatile meta_t *meta) { - return unaligned_peek_u64_volatile(4, meta->sign); -} +static inline uint64_t meta_sign_get(const volatile meta_t *meta) { return unaligned_peek_u64_volatile(4, meta->sign); } -static inline void meta_sign_as_steady(meta_t *meta) { - unaligned_poke_u64(4, meta->sign, meta_sign_calculate(meta)); -} +static inline void meta_sign_as_steady(meta_t *meta) { unaligned_poke_u64(4, meta->sign, meta_sign_calculate(meta)); } -static inline bool meta_is_steady(const volatile meta_t *meta) { - return SIGN_IS_STEADY(meta_sign_get(meta)); -} +static inline bool meta_is_steady(const volatile meta_t *meta) { return SIGN_IS_STEADY(meta_sign_get(meta)); } MDBX_INTERNAL troika_t meta_tap(const MDBX_env *env); MDBX_INTERNAL unsigned meta_eq_mask(const troika_t *troika); @@ -48,14 +42,12 @@ MDBX_INTERNAL txnid_t recent_committed_txnid(const MDBX_env *env); MDBX_INTERNAL int meta_sync(const MDBX_env *env, const meta_ptr_t head); MDBX_INTERNAL const char *durable_caption(const meta_t *const meta); -MDBX_INTERNAL void meta_troika_dump(const MDBX_env *env, - const troika_t *troika); +MDBX_INTERNAL void meta_troika_dump(const MDBX_env *env, const troika_t *troika); #define METAPAGE(env, n) page_meta(pgno2page(env, n)) #define METAPAGE_END(env) METAPAGE(env, NUM_METAS) -static inline meta_ptr_t meta_recent(const MDBX_env *env, - const troika_t *troika) { +static inline meta_ptr_t meta_recent(const MDBX_env *env, const troika_t *troika) { meta_ptr_t r; r.txnid = troika->txnid[troika->recent]; r.ptr_v = METAPAGE(env, troika->recent); @@ -63,8 +55,7 @@ static inline meta_ptr_t meta_recent(const MDBX_env *env, return r; } -static inline meta_ptr_t meta_prefer_steady(const MDBX_env *env, - const troika_t *troika) { +static inline meta_ptr_t meta_prefer_steady(const MDBX_env *env, const troika_t *troika) { meta_ptr_t r; r.txnid = troika->txnid[troika->prefer_steady]; r.ptr_v = METAPAGE(env, troika->prefer_steady); @@ -72,8 +63,7 @@ static inline meta_ptr_t meta_prefer_steady(const MDBX_env *env, return r; } -static inline meta_ptr_t meta_tail(const MDBX_env *env, - const troika_t *troika) { +static inline meta_ptr_t meta_tail(const MDBX_env *env, const troika_t *troika) { const uint8_t tail = troika->tail_and_flags & 3; MDBX_ANALYSIS_ASSUME(tail < NUM_METAS); meta_ptr_t r; @@ -89,72 +79,53 @@ static inline bool meta_is_used(const troika_t *troika, unsigned n) { static inline bool meta_bootid_match(const meta_t *meta) { - return memcmp(&meta->bootid, &globals.bootid, 16) == 0 && - (globals.bootid.x | globals.bootid.y) != 0; + return memcmp(&meta->bootid, &globals.bootid, 16) == 0 && (globals.bootid.x | globals.bootid.y) != 0; } -static inline bool meta_weak_acceptable(const MDBX_env *env, const meta_t *meta, - const int lck_exclusive) { +static inline bool meta_weak_acceptable(const MDBX_env *env, const meta_t *meta, const int lck_exclusive) { return lck_exclusive ? /* exclusive lock */ meta_bootid_match(meta) - : /* db already opened */ env->lck_mmap.lck && - (env->lck_mmap.lck->envmode.weak & MDBX_RDONLY) == 0; + : /* db already opened */ env->lck_mmap.lck && (env->lck_mmap.lck->envmode.weak & MDBX_RDONLY) == 0; } -MDBX_NOTHROW_PURE_FUNCTION static inline txnid_t -constmeta_txnid(const meta_t *meta) { +MDBX_NOTHROW_PURE_FUNCTION static inline txnid_t constmeta_txnid(const meta_t *meta) { const txnid_t a = unaligned_peek_u64(4, &meta->txnid_a); const txnid_t b = unaligned_peek_u64(4, &meta->txnid_b); return likely(a == b) ? a : 0; } -static inline void meta_update_begin(const MDBX_env *env, meta_t *meta, - txnid_t txnid) { +static inline void meta_update_begin(const MDBX_env *env, meta_t *meta, txnid_t txnid) { eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env)); - eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) < txnid && - unaligned_peek_u64(4, meta->txnid_b) < txnid); + eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) < txnid && unaligned_peek_u64(4, meta->txnid_b) < txnid); (void)env; -#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ - MDBX_UNALIGNED_OK >= 8 +#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8 atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, 0, mo_AcquireRelease); - atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_a, txnid, - mo_AcquireRelease); + atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_a, txnid, mo_AcquireRelease); #else - atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], 0, - mo_AcquireRelease); - atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], 0, - mo_AcquireRelease); - atomic_store32(&meta->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], - (uint32_t)txnid, mo_AcquireRelease); - atomic_store32(&meta->txnid_a[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], - (uint32_t)(txnid >> 32), mo_AcquireRelease); + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], 0, mo_AcquireRelease); + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], 0, mo_AcquireRelease); + atomic_store32(&meta->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], (uint32_t)txnid, mo_AcquireRelease); + atomic_store32(&meta->txnid_a[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], (uint32_t)(txnid >> 32), mo_AcquireRelease); #endif } -static inline void meta_update_end(const MDBX_env *env, meta_t *meta, - txnid_t txnid) { +static inline void meta_update_end(const MDBX_env *env, meta_t *meta, txnid_t txnid) { eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env)); eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) == txnid); eASSERT(env, unaligned_peek_u64(4, meta->txnid_b) < txnid); (void)env; jitter4testing(true); memcpy(&meta->bootid, &globals.bootid, 16); -#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && \ - MDBX_UNALIGNED_OK >= 8 - atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, txnid, - mo_AcquireRelease); +#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8 + atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, txnid, mo_AcquireRelease); #else - atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], - (uint32_t)txnid, mo_AcquireRelease); - atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], - (uint32_t)(txnid >> 32), mo_AcquireRelease); + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], (uint32_t)txnid, mo_AcquireRelease); + atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], (uint32_t)(txnid >> 32), mo_AcquireRelease); #endif } -static inline void meta_set_txnid(const MDBX_env *env, meta_t *meta, - const txnid_t txnid) { - eASSERT(env, !env->dxb_mmap.base || meta < METAPAGE(env, 0) || - meta >= METAPAGE_END(env)); +static inline void meta_set_txnid(const MDBX_env *env, meta_t *meta, const txnid_t txnid) { + eASSERT(env, !env->dxb_mmap.base || meta < METAPAGE(env, 0) || meta >= METAPAGE_END(env)); (void)env; /* update inconsistently since this function used ONLY for filling meta-image * for writing, but not the actual meta-page */ @@ -167,42 +138,31 @@ static inline uint8_t meta_cmp2int(txnid_t a, txnid_t b, uint8_t s) { return unlikely(a == b) ? 1 * s : (a > b) ? 2 * s : 0 * s; } -static inline uint8_t meta_cmp2recent(uint8_t ab_cmp2int, bool a_steady, - bool b_steady) { +static inline uint8_t meta_cmp2recent(uint8_t ab_cmp2int, bool a_steady, bool b_steady) { assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */); return ab_cmp2int > 1 || (ab_cmp2int == 1 && a_steady > b_steady); } -static inline uint8_t meta_cmp2steady(uint8_t ab_cmp2int, bool a_steady, - bool b_steady) { +static inline uint8_t meta_cmp2steady(uint8_t ab_cmp2int, bool a_steady, bool b_steady) { assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */); return a_steady > b_steady || (a_steady == b_steady && ab_cmp2int > 1); } -static inline bool meta_choice_recent(txnid_t a_txnid, bool a_steady, - txnid_t b_txnid, bool b_steady) { +static inline bool meta_choice_recent(txnid_t a_txnid, bool a_steady, txnid_t b_txnid, bool b_steady) { return meta_cmp2recent(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady); } -static inline bool meta_choice_steady(txnid_t a_txnid, bool a_steady, - txnid_t b_txnid, bool b_steady) { +static inline bool meta_choice_steady(txnid_t a_txnid, bool a_steady, txnid_t b_txnid, bool b_steady) { return meta_cmp2steady(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady); } MDBX_INTERNAL meta_t *meta_init_triplet(const MDBX_env *env, void *buffer); -MDBX_INTERNAL int meta_validate(MDBX_env *env, meta_t *const meta, - const page_t *const page, - const unsigned meta_number, +MDBX_INTERNAL int meta_validate(MDBX_env *env, meta_t *const meta, const page_t *const page, const unsigned meta_number, unsigned *guess_pagesize); -MDBX_INTERNAL int __must_check_result meta_validate_copy(MDBX_env *env, - const meta_t *meta, - meta_t *dest); +MDBX_INTERNAL int __must_check_result meta_validate_copy(MDBX_env *env, const meta_t *meta, meta_t *dest); -MDBX_INTERNAL int __must_check_result meta_override(MDBX_env *env, - size_t target, - txnid_t txnid, - const meta_t *shape); +MDBX_INTERNAL int __must_check_result meta_override(MDBX_env *env, size_t target, txnid_t txnid, const meta_t *shape); MDBX_INTERNAL int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto); diff --git a/src/misc.c b/src/misc.c index f1a58014..ef3a172f 100644 --- a/src/misc.c +++ b/src/misc.c @@ -14,11 +14,9 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { const int log2page = log2n_powerof2(pagesize); const intptr_t volume_pages = (volume + pagesize - 1) >> log2page; - const intptr_t redundancy_pages = - (redundancy < 0) ? -(intptr_t)((-redundancy + pagesize - 1) >> log2page) - : (intptr_t)(redundancy + pagesize - 1) >> log2page; - if (volume_pages >= total_ram_pages || - volume_pages + redundancy_pages >= total_ram_pages) + const intptr_t redundancy_pages = (redundancy < 0) ? -(intptr_t)((-redundancy + pagesize - 1) >> log2page) + : (intptr_t)(redundancy + pagesize - 1) >> log2page; + if (volume_pages >= total_ram_pages || volume_pages + redundancy_pages >= total_ram_pages) return MDBX_RESULT_FALSE; intptr_t avail_ram_pages; @@ -26,13 +24,10 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); - return (volume_pages + redundancy_pages >= avail_ram_pages) - ? MDBX_RESULT_FALSE - : MDBX_RESULT_TRUE; + return (volume_pages + redundancy_pages >= avail_ram_pages) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; } -int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, - uint64_t increment) { +int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) { bailout: @@ -111,30 +106,23 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, return MDBX_SUCCESS; } -int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, - const MDBX_val *b) { +int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(nullptr, txn->signature == txn_signature); tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); - tASSERT(txn, - dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0); + tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0); return txn->env->kvs[dbi].clc.k.cmp(a, b); } -int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, - const MDBX_val *b) { +int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(nullptr, txn->signature == txn_signature); tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID)); return txn->env->kvs[dbi].clc.v.cmp(a, b); } -__cold MDBX_cmp_func *mdbx_get_keycmp(MDBX_db_flags_t flags) { - return builtin_keycmp(flags); -} +__cold MDBX_cmp_func *mdbx_get_keycmp(MDBX_db_flags_t flags) { return builtin_keycmp(flags); } -__cold MDBX_cmp_func *mdbx_get_datacmp(MDBX_db_flags_t flags) { - return builtin_datacmp(flags); -} +__cold MDBX_cmp_func *mdbx_get_datacmp(MDBX_db_flags_t flags) { return builtin_datacmp(flags); } /*----------------------------------------------------------------------------*/ @@ -227,10 +215,8 @@ __cold const char *mdbx_strerror_r(int errnum, char *buf, size_t buflen) { const char *msg = mdbx_liberr2str(errnum); if (!msg && buflen > 0 && buflen < INT_MAX) { #if defined(_WIN32) || defined(_WIN64) - DWORD size = FormatMessageA( - FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, - errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, - nullptr); + DWORD size = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, errnum, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, nullptr); while (size && buf[size - 1] <= ' ') --size; buf[size] = 0; @@ -284,10 +270,8 @@ __cold const char *mdbx_strerror(int errnum) { const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, size_t buflen) { const char *msg = mdbx_liberr2str(errnum); if (!msg && buflen > 0 && buflen < INT_MAX) { - DWORD size = FormatMessageA( - FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, - errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, - nullptr); + DWORD size = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, errnum, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, nullptr); while (size && buf[size - 1] <= ' ') --size; buf[size] = 0; diff --git a/src/mvcc-readers.c b/src/mvcc-readers.c index f342599f..d51576da 100644 --- a/src/mvcc-readers.c +++ b/src/mvcc-readers.c @@ -45,8 +45,7 @@ bsr_t mvcc_bind_slot(MDBX_env *env) { result.err = mvcc_cleanup_dead(env, true, nullptr); if (result.err != MDBX_RESULT_TRUE) { lck_rdt_unlock(env); - result.err = - (result.err == MDBX_SUCCESS) ? MDBX_READERS_FULL : result.err; + result.err = (result.err == MDBX_SUCCESS) ? MDBX_READERS_FULL : result.err; return result; } } @@ -61,8 +60,7 @@ bsr_t mvcc_bind_slot(MDBX_env *env) { safe64_reset(&result.rslot->txnid, true); if (slot == nreaders) env->lck->rdt_length.weak = (uint32_t)++nreaders; - result.rslot->tid.weak = - (env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self(); + result.rslot->tid.weak = (env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self(); atomic_store32(&result.rslot->pid, env->pid, mo_AcquireRelease); lck_rdt_unlock(env); @@ -84,17 +82,14 @@ __hot txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady) { return env->lck->cached_oldest.weak = steady; } - const txnid_t prev_oldest = - atomic_load64(&lck->cached_oldest, mo_AcquireRelease); + const txnid_t prev_oldest = atomic_load64(&lck->cached_oldest, mo_AcquireRelease); eASSERT(env, steady >= prev_oldest); txnid_t new_oldest = prev_oldest; - while (nothing_changed != - atomic_load32(&lck->rdt_refresh_flag, mo_AcquireRelease)) { + while (nothing_changed != atomic_load32(&lck->rdt_refresh_flag, mo_AcquireRelease)) { lck->rdt_refresh_flag.weak = nothing_changed; jitter4testing(false); - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); new_oldest = steady; for (size_t i = 0; i < snap_nreaders; ++i) { @@ -105,11 +100,9 @@ __hot txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady) { const txnid_t rtxn = safe64_read(&lck->rdt[i].txnid); if (unlikely(rtxn < prev_oldest)) { - if (unlikely(nothing_changed == atomic_load32(&lck->rdt_refresh_flag, - mo_AcquireRelease)) && + if (unlikely(nothing_changed == atomic_load32(&lck->rdt_refresh_flag, mo_AcquireRelease)) && safe64_reset_compare(&lck->rdt[i].txnid, rtxn)) { - NOTICE("kick stuck reader[%zu of %zu].pid_%u %" PRIaTXN - " < prev-oldest %" PRIaTXN ", steady-txn %" PRIaTXN, + NOTICE("kick stuck reader[%zu of %zu].pid_%u %" PRIaTXN " < prev-oldest %" PRIaTXN ", steady-txn %" PRIaTXN, i, snap_nreaders, pid, rtxn, prev_oldest, steady); } continue; @@ -135,17 +128,13 @@ pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page) { lck_t *const lck = env->lck_mmap.lck; if (likely(lck != nullptr /* check for exclusive without-lck mode */)) { retry:; - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); for (size_t i = 0; i < snap_nreaders; ++i) { if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { /* jitter4testing(true); */ - const pgno_t snap_pages = - atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed); + const pgno_t snap_pages = atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed); const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); - if (unlikely(snap_pages != - atomic_load32(&lck->rdt[i].snapshot_pages_used, - mo_AcquireRelease) || + if (unlikely(snap_pages != atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_AcquireRelease) || snap_txnid != safe64_read(&lck->rdt[i].txnid))) goto retry; if (last_used_page < snap_pages && snap_txnid <= env->basal_txn->txnid) @@ -161,18 +150,14 @@ pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page) { pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest) { lck_t *const lck = env->lck_mmap.lck; if (likely(lck != nullptr /* exclusive mode */)) { - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); for (size_t i = 0; i < snap_nreaders; ++i) { retry: if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease) == env->pid) { /* jitter4testing(true); */ - const pgno_t snap_pages = - atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed); + const pgno_t snap_pages = atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed); const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); - if (unlikely(snap_pages != - atomic_load32(&lck->rdt[i].snapshot_pages_used, - mo_AcquireRelease) || + if (unlikely(snap_pages != atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_AcquireRelease) || snap_txnid != safe64_read(&lck->rdt[i].txnid))) goto retry; if (largest < snap_pages && @@ -219,8 +204,7 @@ static bool pid_insert(uint32_t *list, uint32_t pid) { return true; } -__cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, - int *dead) { +__cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, int *dead) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -234,13 +218,11 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, return MDBX_SUCCESS; } - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); uint32_t pidsbuf_onstask[142]; - uint32_t *const pids = - (snap_nreaders < ARRAY_LENGTH(pidsbuf_onstask)) - ? pidsbuf_onstask - : osal_malloc((snap_nreaders + 1) * sizeof(uint32_t)); + uint32_t *const pids = (snap_nreaders < ARRAY_LENGTH(pidsbuf_onstask)) + ? pidsbuf_onstask + : osal_malloc((snap_nreaders + 1) * sizeof(uint32_t)); if (unlikely(!pids)) return MDBX_ENOMEM; @@ -296,8 +278,7 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, /* clean it */ for (size_t ii = i; ii < snap_nreaders; ii++) { if (lck->rdt[ii].pid.weak == pid) { - DEBUG("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN, (size_t)pid, - lck->rdt[ii].txnid.weak); + DEBUG("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN, (size_t)pid, lck->rdt[ii].txnid.weak); atomic_store32(&lck->rdt[ii].pid, 0, mo_Relaxed); atomic_store32(&lck->rdt_refresh_flag, true, mo_AcquireRelease); count++; @@ -321,11 +302,9 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, int txn_park(MDBX_txn *txn, bool autounpark) { reader_slot_t *const rslot = txn->to.reader; - tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | - MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY); + tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY); tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED); - if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | - MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY)) + if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY)) return MDBX_BAD_TXN; const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); @@ -344,14 +323,12 @@ int txn_park(MDBX_txn *txn, bool autounpark) { atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease); atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed); - txn->flags += - autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED; + txn->flags += autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED; return MDBX_SUCCESS; } int txn_unpark(MDBX_txn *txn) { - if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | - MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != + if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != (MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) return MDBX_BAD_TXN; @@ -363,14 +340,11 @@ int txn_unpark(MDBX_txn *txn) { ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid); return MDBX_PROBLEM; } - if (unlikely(tid == MDBX_TID_TXN_OUSTED || - txnid >= SAFE64_INVALID_THRESHOLD)) + if (unlikely(tid == MDBX_TID_TXN_OUSTED || txnid >= SAFE64_INVALID_THRESHOLD)) break; if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) { - ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 - " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, - tid, " != must ", MDBX_TID_TXN_OUSTED, txnid, " != must ", - txn->txnid); + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, tid, " != must ", + MDBX_TID_TXN_OUSTED, txnid, " != must ", txn->txnid); break; } if (unlikely((txn->flags & MDBX_TXN_ERROR))) @@ -380,12 +354,9 @@ int txn_unpark(MDBX_txn *txn) { if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner))) continue; #else - atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), - mo_Relaxed); - if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, - (uint32_t)txn->owner))) { - atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), - mo_AcquireRelease); + atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), mo_Relaxed); + if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)txn->owner))) { + atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), mo_AcquireRelease); continue; } #endif @@ -413,8 +384,7 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { bool notify_eof_of_loop = false; int retry = 0; do { - const txnid_t steady = - env->txn->tw.troika.txnid[env->txn->tw.troika.prefer_steady]; + const txnid_t steady = env->txn->tw.troika.txnid[env->txn->tw.troika.prefer_steady]; env->lck->rdt_refresh_flag.weak = /* force refresh */ true; oldest = mvcc_shapshot_oldest(env, steady); eASSERT(env, oldest < env->basal_txn->txnid); @@ -435,8 +405,7 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { reader_slot_t *const rslot = &lck->rdt[i]; txnid_t rtxn = safe64_read(&rslot->txnid); retry: - if (rtxn == straggler && - (pid = atomic_load32(&rslot->pid, mo_AcquireRelease)) != 0) { + if (rtxn == straggler && (pid = atomic_load32(&rslot->pid, mo_AcquireRelease)) != 0) { const uint64_t tid = safe64_read(&rslot->tid); if (tid == MDBX_TID_TXN_PARKED) { /* Читающая транзакция была помечена владельцем как "припаркованная", @@ -454,25 +423,21 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { */ bool ousted = #if MDBX_64BIT_CAS - atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, - MDBX_TID_TXN_OUSTED); + atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, MDBX_TID_TXN_OUSTED); #else - atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, - (uint32_t)MDBX_TID_TXN_OUSTED); + atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)MDBX_TID_TXN_OUSTED); #endif if (likely(ousted)) { ousted = safe64_reset_compare(&rslot->txnid, rtxn); - NOTICE("ousted-%s parked read-txn %" PRIaTXN - ", pid %u, tid 0x%" PRIx64, - ousted ? "complete" : "half", rtxn, pid, tid); + NOTICE("ousted-%s parked read-txn %" PRIaTXN ", pid %u, tid 0x%" PRIx64, ousted ? "complete" : "half", rtxn, + pid, tid); eASSERT(env, ousted || safe64_read(&rslot->txnid) > straggler); continue; } rtxn = safe64_read(&rslot->txnid); goto retry; } - hold_retired = - atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed); + hold_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed); stucked = rslot; } } @@ -487,15 +452,10 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { const meta_ptr_t head = meta_recent(env, &env->txn->tw.troika); const txnid_t gap = (head.txnid - straggler) / xMDBX_TXNID_STEP; - const uint64_t head_retired = - unaligned_peek_u64(4, head.ptr_c->pages_retired); - const size_t space = - (head_retired > hold_retired) - ? pgno2bytes(env, (pgno_t)(head_retired - hold_retired)) - : 0; - int rc = - callback(env, env->txn, pid, (mdbx_tid_t)((intptr_t)tid), straggler, - (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry); + const uint64_t head_retired = unaligned_peek_u64(4, head.ptr_c->pages_retired); + const size_t space = (head_retired > hold_retired) ? pgno2bytes(env, (pgno_t)(head_retired - hold_retired)) : 0; + int rc = callback(env, env->txn, pid, (mdbx_tid_t)((intptr_t)tid), straggler, + (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry); if (rc < 0) /* hsr returned error and/or agree MDBX_MAP_FULL error */ break; @@ -523,10 +483,8 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { /* notify end of hsr-loop */ const txnid_t turn = oldest - straggler; if (turn) - NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, - straggler, oldest, turn); - callback(env, env->txn, 0, 0, straggler, - (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry); + NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, straggler, oldest, turn); + callback(env, env->txn, 0, 0, straggler, (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry); } return oldest; } diff --git a/src/node.c b/src/node.c index c8588fd0..934bad65 100644 --- a/src/node.c +++ b/src/node.c @@ -5,15 +5,13 @@ #include "internals.h" -__hot int __must_check_result node_add_dupfix(MDBX_cursor *mc, size_t indx, - const MDBX_val *key) { +__hot int __must_check_result node_add_dupfix(MDBX_cursor *mc, size_t indx, const MDBX_val *key) { page_t *mp = mc->pg[mc->top]; MDBX_ANALYSIS_ASSUME(key != nullptr); DKBUF_DEBUG; DEBUG("add to leaf2-%spage %" PRIaPGNO " index %zi, " " key size %" PRIuPTR " [%s]", - is_subpage(mp) ? "sub-" : "", mp->pgno, indx, key ? key->iov_len : 0, - DKEY_DEBUG(key)); + is_subpage(mp) ? "sub-" : "", mp->pgno, indx, key ? key->iov_len : 0, DKEY_DEBUG(key)); cASSERT(mc, key); cASSERT(mc, page_type_compat(mp) == (P_LEAF | P_DUPFIX)); @@ -45,14 +43,11 @@ __hot int __must_check_result node_add_dupfix(MDBX_cursor *mc, size_t indx, return MDBX_SUCCESS; } -int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, - const MDBX_val *key, pgno_t pgno) { +int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, const MDBX_val *key, pgno_t pgno) { page_t *mp = mc->pg[mc->top]; DKBUF_DEBUG; - DEBUG("add to branch-%spage %" PRIaPGNO " index %zi, node-pgno %" PRIaPGNO - " key size %" PRIuPTR " [%s]", - is_subpage(mp) ? "sub-" : "", mp->pgno, indx, pgno, - key ? key->iov_len : 0, DKEY_DEBUG(key)); + DEBUG("add to branch-%spage %" PRIaPGNO " index %zi, node-pgno %" PRIaPGNO " key size %" PRIuPTR " [%s]", + is_subpage(mp) ? "sub-" : "", mp->pgno, indx, pgno, key ? key->iov_len : 0, DKEY_DEBUG(key)); cASSERT(mc, page_type(mp) == P_BRANCH); STATIC_ASSERT(NODESIZE % 2 == 0); @@ -87,17 +82,15 @@ int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, return MDBX_SUCCESS; } -__hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, - const MDBX_val *key, MDBX_val *data, +__hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, const MDBX_val *key, MDBX_val *data, unsigned flags) { MDBX_ANALYSIS_ASSUME(key != nullptr); MDBX_ANALYSIS_ASSUME(data != nullptr); page_t *mp = mc->pg[mc->top]; DKBUF_DEBUG; - DEBUG("add to leaf-%spage %" PRIaPGNO " index %zi, data size %" PRIuPTR - " key size %" PRIuPTR " [%s]", - is_subpage(mp) ? "sub-" : "", mp->pgno, indx, data ? data->iov_len : 0, - key ? key->iov_len : 0, DKEY_DEBUG(key)); + DEBUG("add to leaf-%spage %" PRIaPGNO " index %zi, data size %" PRIuPTR " key size %" PRIuPTR " [%s]", + is_subpage(mp) ? "sub-" : "", mp->pgno, indx, data ? data->iov_len : 0, key ? key->iov_len : 0, + DKEY_DEBUG(key)); cASSERT(mc, key != nullptr && data != nullptr); cASSERT(mc, page_type_compat(mp) == P_LEAF); page_t *largepage = nullptr; @@ -106,19 +99,16 @@ __hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, if (unlikely(flags & N_BIG)) { /* Data already on large/overflow page. */ STATIC_ASSERT(sizeof(pgno_t) % 2 == 0); - node_bytes = - node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); + node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); cASSERT(mc, page_room(mp) >= node_bytes); } else if (unlikely(node_size(key, data) > mc->txn->env->leaf_nodemax)) { /* Put data on large/overflow page. */ if (unlikely(mc->tree->flags & MDBX_DUPSORT)) { - ERROR("Unexpected target %s flags 0x%x for large data-item", "dupsort-db", - mc->tree->flags); + ERROR("Unexpected target %s flags 0x%x for large data-item", "dupsort-db", mc->tree->flags); return MDBX_PROBLEM; } if (unlikely(flags & (N_DUP | N_TREE))) { - ERROR("Unexpected target %s flags 0x%x for large data-item", "node", - flags); + ERROR("Unexpected target %s flags 0x%x for large data-item", "node", flags); return MDBX_PROBLEM; } cASSERT(mc, page_room(mp) >= leaf_size(mc->txn->env, key, data)); @@ -127,12 +117,10 @@ __hot int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, if (unlikely(npr.err != MDBX_SUCCESS)) return npr.err; largepage = npr.page; - DEBUG("allocated %u large/overflow page(s) %" PRIaPGNO "for %" PRIuPTR - " data bytes", - largepage->pages, largepage->pgno, data->iov_len); + DEBUG("allocated %u large/overflow page(s) %" PRIaPGNO "for %" PRIuPTR " data bytes", largepage->pages, + largepage->pgno, data->iov_len); flags |= N_BIG; - node_bytes = - node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); + node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t); cASSERT(mc, node_bytes == leaf_size(mc->txn->env, key, data)); } else { cASSERT(mc, page_room(mp) >= leaf_size(mc->txn->env, key, data)); @@ -186,8 +174,7 @@ __hot void node_del(MDBX_cursor *mc, size_t ksize) { const size_t hole = mc->ki[mc->top]; const size_t nkeys = page_numkeys(mp); - DEBUG("delete node %zu on %s page %" PRIaPGNO, hole, - is_leaf(mp) ? "leaf" : "branch", mp->pgno); + DEBUG("delete node %zu on %s page %" PRIaPGNO, hole, is_leaf(mp) ? "leaf" : "branch", mp->pgno); cASSERT(mc, hole < nkeys); if (is_dupfix_leaf(mp)) { @@ -215,9 +202,7 @@ __hot void node_del(MDBX_cursor *mc, size_t ksize) { size_t r, w; for (r = w = 0; r < nkeys; r++) if (r != hole) - mp->entries[w++] = (mp->entries[r] < hole_offset) - ? mp->entries[r] + (indx_t)hole_size - : mp->entries[r]; + mp->entries[w++] = (mp->entries[r] < hole_offset) ? mp->entries[r] + (indx_t)hole_size : mp->entries[r]; void *const base = ptr_disp(mp, mp->upper + PAGEHDRSZ); memmove(ptr_disp(base, hole_size), base, hole_offset - mp->upper); @@ -236,14 +221,12 @@ __hot void node_del(MDBX_cursor *mc, size_t ksize) { } } -__noinline int node_read_bigdata(MDBX_cursor *mc, const node_t *node, - MDBX_val *data, const page_t *mp) { +__noinline int node_read_bigdata(MDBX_cursor *mc, const node_t *node, MDBX_val *data, const page_t *mp) { cASSERT(mc, node_flags(node) == N_BIG && data->iov_len == node_ds(node)); pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); if (unlikely((lp.err != MDBX_SUCCESS))) { - DEBUG("read large/overflow page %" PRIaPGNO " failed", - node_largedata_pgno(node)); + DEBUG("read large/overflow page %" PRIaPGNO " failed", node_largedata_pgno(node)); return lp.err; } @@ -254,9 +237,7 @@ __noinline int node_read_bigdata(MDBX_cursor *mc, const node_t *node, const size_t dsize = data->iov_len; const unsigned npages = largechunk_npages(env, dsize); if (unlikely(lp.page->pages < npages)) - return bad_page(lp.page, - "too less n-pages %u for bigdata-node (%zu bytes)", - lp.page->pages, dsize); + return bad_page(lp.page, "too less n-pages %u for bigdata-node (%zu bytes)", lp.page->pages, dsize); } return MDBX_SUCCESS; } @@ -265,8 +246,7 @@ node_t *node_shrink(page_t *mp, size_t indx, node_t *node) { assert(node == page_node(mp, indx)); page_t *sp = (page_t *)node_data(node); assert(is_subpage(sp) && page_numkeys(sp) > 0); - const size_t delta = - EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); + const size_t delta = EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); if (unlikely(delta) == 0) return node; @@ -303,15 +283,13 @@ node_t *node_shrink(page_t *mp, size_t indx, node_t *node) { return ptr_disp(node, delta); } -__hot struct node_search_result node_search(MDBX_cursor *mc, - const MDBX_val *key) { +__hot struct node_search_result node_search(MDBX_cursor *mc, const MDBX_val *key) { page_t *mp = mc->pg[mc->top]; const intptr_t nkeys = page_numkeys(mp); DKBUF_DEBUG; - DEBUG("searching %zu keys in %s %spage %" PRIaPGNO, nkeys, - is_leaf(mp) ? "leaf" : "branch", is_subpage(mp) ? "sub-" : "", - mp->pgno); + DEBUG("searching %zu keys in %s %spage %" PRIaPGNO, nkeys, is_leaf(mp) ? "leaf" : "branch", + is_subpage(mp) ? "sub-" : "", mp->pgno); struct node_search_result ret; ret.exact = false; @@ -333,8 +311,7 @@ __hot struct node_search_result node_search(MDBX_cursor *mc, do { i = (low + high) >> 1; nodekey.iov_base = page_dupfix_ptr(mp, i, nodekey.iov_len); - cASSERT(mc, ptr_disp(mp, mc->txn->env->ps) >= - ptr_disp(nodekey.iov_base, nodekey.iov_len)); + cASSERT(mc, ptr_disp(mp, mc->txn->env->ps) >= ptr_disp(nodekey.iov_base, nodekey.iov_len)); int cr = cmp(key, &nodekey); DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); if (cr > 0) @@ -349,10 +326,8 @@ __hot struct node_search_result node_search(MDBX_cursor *mc, /* store the key index */ mc->ki[mc->top] = (indx_t)i; - ret.node = - (i < nkeys) - ? /* fake for DUPFIX */ (node_t *)(intptr_t)-1 - : /* There is no entry larger or equal to the key. */ nullptr; + ret.node = (i < nkeys) ? /* fake for DUPFIX */ (node_t *)(intptr_t)-1 + : /* There is no entry larger or equal to the key. */ nullptr; return ret; } @@ -367,14 +342,12 @@ __hot struct node_search_result node_search(MDBX_cursor *mc, node = page_node(mp, i); nodekey.iov_len = node_ks(node); nodekey.iov_base = node_key(node); - cASSERT(mc, ptr_disp(mp, mc->txn->env->ps) >= - ptr_disp(nodekey.iov_base, nodekey.iov_len)); + cASSERT(mc, ptr_disp(mp, mc->txn->env->ps) >= ptr_disp(nodekey.iov_base, nodekey.iov_len)); int cr = cmp(key, &nodekey); if (is_leaf(mp)) DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); else - DEBUG("found branch index %zu [%s -> %" PRIaPGNO "], rc = %i", i, - DKEY_DEBUG(&nodekey), node_pgno(node), cr); + DEBUG("found branch index %zu [%s -> %" PRIaPGNO "], rc = %i", i, DKEY_DEBUG(&nodekey), node_pgno(node), cr); if (cr > 0) low = ++i; else if (cr < 0) @@ -387,8 +360,6 @@ __hot struct node_search_result node_search(MDBX_cursor *mc, /* store the key index */ mc->ki[mc->top] = (indx_t)i; - ret.node = (i < nkeys) - ? page_node(mp, i) - : /* There is no entry larger or equal to the key. */ nullptr; + ret.node = (i < nkeys) ? page_node(mp, i) : /* There is no entry larger or equal to the key. */ nullptr; return ret; } diff --git a/src/node.h b/src/node.h index 5411aeed..50de16e1 100644 --- a/src/node.h +++ b/src/node.h @@ -9,8 +9,7 @@ #define NODE_ADD_FLAGS (N_DUP | N_TREE | MDBX_RESERVE | MDBX_APPEND) /* Get the page number pointed to by a branch node */ -MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t -node_pgno(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_pgno(const node_t *const __restrict node) { pgno_t pgno = UNALIGNED_PEEK_32(node, node_t, child_pgno); return pgno; } @@ -23,8 +22,7 @@ static inline void node_set_pgno(node_t *const __restrict node, pgno_t pgno) { } /* Get the size of the data in a leaf node */ -MDBX_NOTHROW_PURE_FUNCTION static inline size_t -node_ds(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t node_ds(const node_t *const __restrict node) { return UNALIGNED_PEEK_32(node, node_t, dsize); } @@ -35,8 +33,7 @@ static inline void node_set_ds(node_t *const __restrict node, size_t size) { } /* The size of a key in a node */ -MDBX_NOTHROW_PURE_FUNCTION static inline size_t -node_ks(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t node_ks(const node_t *const __restrict node) { return UNALIGNED_PEEK_16(node, node_t, ksize); } @@ -46,54 +43,42 @@ static inline void node_set_ks(node_t *const __restrict node, size_t size) { UNALIGNED_POKE_16(node, node_t, ksize, (uint16_t)size); } -MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t -node_flags(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t node_flags(const node_t *const __restrict node) { return UNALIGNED_PEEK_8(node, node_t, flags); } -static inline void node_set_flags(node_t *const __restrict node, - uint8_t flags) { +static inline void node_set_flags(node_t *const __restrict node, uint8_t flags) { UNALIGNED_POKE_8(node, node_t, flags, flags); } /* Address of the key for the node */ -MDBX_NOTHROW_PURE_FUNCTION static inline void * -node_key(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline void *node_key(const node_t *const __restrict node) { return ptr_disp(node, NODESIZE); } /* Address of the data for a node */ -MDBX_NOTHROW_PURE_FUNCTION static inline void * -node_data(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline void *node_data(const node_t *const __restrict node) { return ptr_disp(node_key(node), node_ks(node)); } /* Size of a node in a leaf page with a given key and data. * This is node header plus key plus data size. */ -MDBX_NOTHROW_CONST_FUNCTION static inline size_t -node_size_len(const size_t key_len, const size_t value_len) { +MDBX_NOTHROW_CONST_FUNCTION static inline size_t node_size_len(const size_t key_len, const size_t value_len) { return NODESIZE + EVEN_CEIL(key_len + value_len); } -MDBX_NOTHROW_PURE_FUNCTION static inline size_t -node_size(const MDBX_val *key, const MDBX_val *value) { +MDBX_NOTHROW_PURE_FUNCTION static inline size_t node_size(const MDBX_val *key, const MDBX_val *value) { return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0); } -MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t -node_largedata_pgno(const node_t *const __restrict node) { +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_largedata_pgno(const node_t *const __restrict node) { assert(node_flags(node) & N_BIG); return peek_pgno(node_data(node)); } -MDBX_INTERNAL int __must_check_result node_read_bigdata(MDBX_cursor *mc, - const node_t *node, - MDBX_val *data, +MDBX_INTERNAL int __must_check_result node_read_bigdata(MDBX_cursor *mc, const node_t *node, MDBX_val *data, const page_t *mp); -static inline int __must_check_result node_read(MDBX_cursor *mc, - const node_t *node, - MDBX_val *data, - const page_t *mp) { +static inline int __must_check_result node_read(MDBX_cursor *mc, const node_t *node, MDBX_val *data, const page_t *mp) { data->iov_len = node_ds(node); data->iov_base = node_data(node); if (likely(node_flags(node) != N_BIG)) @@ -105,20 +90,12 @@ static inline int __must_check_result node_read(MDBX_cursor *mc, MDBX_INTERNAL nsr_t node_search(MDBX_cursor *mc, const MDBX_val *key); -MDBX_INTERNAL int __must_check_result node_add_branch(MDBX_cursor *mc, - size_t indx, - const MDBX_val *key, - pgno_t pgno); +MDBX_INTERNAL int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, const MDBX_val *key, pgno_t pgno); -MDBX_INTERNAL int __must_check_result node_add_leaf(MDBX_cursor *mc, - size_t indx, - const MDBX_val *key, - MDBX_val *data, +MDBX_INTERNAL int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, const MDBX_val *key, MDBX_val *data, unsigned flags); -MDBX_INTERNAL int __must_check_result node_add_dupfix(MDBX_cursor *mc, - size_t indx, - const MDBX_val *key); +MDBX_INTERNAL int __must_check_result node_add_dupfix(MDBX_cursor *mc, size_t indx, const MDBX_val *key); MDBX_INTERNAL void node_del(MDBX_cursor *mc, size_t ksize); diff --git a/src/options.h b/src/options.h index acfccfb1..1a80cd20 100644 --- a/src/options.h +++ b/src/options.h @@ -66,8 +66,7 @@ /** Does a system have battery-backed Real-Time Clock or just a fake. */ #ifndef MDBX_TRUST_RTC -#if defined(__linux__) || defined(__gnu_linux__) || defined(__NetBSD__) || \ - defined(__OpenBSD__) +#if defined(__linux__) || defined(__gnu_linux__) || defined(__NetBSD__) || defined(__OpenBSD__) #define MDBX_TRUST_RTC 0 /* a lot of embedded systems have a fake RTC */ #else #define MDBX_TRUST_RTC 1 @@ -131,15 +130,13 @@ #ifndef MDBX_PNL_PREALLOC_FOR_RADIXSORT #define MDBX_PNL_PREALLOC_FOR_RADIXSORT 1 -#elif !(MDBX_PNL_PREALLOC_FOR_RADIXSORT == 0 || \ - MDBX_PNL_PREALLOC_FOR_RADIXSORT == 1) +#elif !(MDBX_PNL_PREALLOC_FOR_RADIXSORT == 0 || MDBX_PNL_PREALLOC_FOR_RADIXSORT == 1) #error MDBX_PNL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1 #endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ #ifndef MDBX_DPL_PREALLOC_FOR_RADIXSORT #define MDBX_DPL_PREALLOC_FOR_RADIXSORT 1 -#elif !(MDBX_DPL_PREALLOC_FOR_RADIXSORT == 0 || \ - MDBX_DPL_PREALLOC_FOR_RADIXSORT == 1) +#elif !(MDBX_DPL_PREALLOC_FOR_RADIXSORT == 0 || MDBX_DPL_PREALLOC_FOR_RADIXSORT == 1) #error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1 #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ @@ -204,8 +201,7 @@ /** Size of buffer used during copying a environment/database file. */ #ifndef MDBX_ENVCOPY_WRITEBUF #define MDBX_ENVCOPY_WRITEBUF 1048576u -#elif MDBX_ENVCOPY_WRITEBUF < 65536u || MDBX_ENVCOPY_WRITEBUF > 1073741824u || \ - MDBX_ENVCOPY_WRITEBUF % 65536u +#elif MDBX_ENVCOPY_WRITEBUF < 65536u || MDBX_ENVCOPY_WRITEBUF > 1073741824u || MDBX_ENVCOPY_WRITEBUF % 65536u #error MDBX_ENVCOPY_WRITEBUF must be defined in range 65536..1073741824 and be multiple of 65536 #endif /* MDBX_ENVCOPY_WRITEBUF */ @@ -224,8 +220,7 @@ #else #define MDBX_ASSUME_MALLOC_OVERHEAD (sizeof(void *) * 2u) #endif -#elif MDBX_ASSUME_MALLOC_OVERHEAD < 0 || MDBX_ASSUME_MALLOC_OVERHEAD > 64 || \ - MDBX_ASSUME_MALLOC_OVERHEAD % 4 +#elif MDBX_ASSUME_MALLOC_OVERHEAD < 0 || MDBX_ASSUME_MALLOC_OVERHEAD > 64 || MDBX_ASSUME_MALLOC_OVERHEAD % 4 #error MDBX_ASSUME_MALLOC_OVERHEAD must be defined in range 0..64 and be multiple of 4 #endif /* MDBX_ASSUME_MALLOC_OVERHEAD */ @@ -252,15 +247,13 @@ #define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0 #elif defined(__e2k__) #define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0 -#elif __has_builtin(__builtin_cpu_supports) || \ - defined(__BUILTIN_CPU_SUPPORTS__) || \ +#elif __has_builtin(__builtin_cpu_supports) || defined(__BUILTIN_CPU_SUPPORTS__) || \ (defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23)) #define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1 #else #define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0 #endif -#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \ - MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1) +#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1) #error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1 #endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */ @@ -286,19 +279,15 @@ #define MDBX_LOCKING MDBX_LOCKING_WIN32FILES #else #ifndef MDBX_LOCKING -#if defined(_POSIX_THREAD_PROCESS_SHARED) && \ - _POSIX_THREAD_PROCESS_SHARED >= 200112L && !defined(__FreeBSD__) +#if defined(_POSIX_THREAD_PROCESS_SHARED) && _POSIX_THREAD_PROCESS_SHARED >= 200112L && !defined(__FreeBSD__) /* Some platforms define the EOWNERDEAD error code even though they * don't support Robust Mutexes. If doubt compile with -MDBX_LOCKING=2001. */ -#if defined(EOWNERDEAD) && _POSIX_THREAD_PROCESS_SHARED >= 200809L && \ - ((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) && \ - _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) || \ - (defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) && \ - _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) || \ - defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) && \ - (!defined(__GLIBC__) || \ - __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */) +#if defined(EOWNERDEAD) && _POSIX_THREAD_PROCESS_SHARED >= 200809L && \ + ((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) && _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) || \ + (defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) && _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) || \ + defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) && \ + (!defined(__GLIBC__) || __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */) #define MDBX_LOCKING MDBX_LOCKING_POSIX2008 #else #define MDBX_LOCKING MDBX_LOCKING_POSIX2001 @@ -316,12 +305,9 @@ /** Advanced: Using POSIX OFD-locks (autodetection by default). */ #ifndef MDBX_USE_OFDLOCKS -#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \ - defined(F_OFD_GETLK)) || \ - (defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \ - defined(F_OFD_GETLK64))) && \ - !defined(MDBX_SAFE4QEMU) && \ - !defined(__sun) /* OFD-lock are broken on Solaris */ +#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK)) || \ + (defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && defined(F_OFD_GETLK64))) && \ + !defined(MDBX_SAFE4QEMU) && !defined(__sun) /* OFD-lock are broken on Solaris */ #define MDBX_USE_OFDLOCKS 1 #else #define MDBX_USE_OFDLOCKS 0 @@ -335,8 +321,7 @@ /** Advanced: Using sendfile() syscall (autodetection by default). */ #ifndef MDBX_USE_SENDFILE -#if ((defined(__linux__) || defined(__gnu_linux__)) && \ - !defined(__ANDROID_API__)) || \ +#if ((defined(__linux__) || defined(__gnu_linux__)) && !defined(__ANDROID_API__)) || \ (defined(__ANDROID_API__) && __ANDROID_API__ >= 21) #define MDBX_USE_SENDFILE 1 #else @@ -360,14 +345,12 @@ //------------------------------------------------------------------------------ #ifndef MDBX_CPU_WRITEBACK_INCOHERENT -#if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || \ - defined(__hppa__) || defined(DOXYGEN) +#if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || defined(__hppa__) || defined(DOXYGEN) #define MDBX_CPU_WRITEBACK_INCOHERENT 0 #else #define MDBX_CPU_WRITEBACK_INCOHERENT 1 #endif -#elif !(MDBX_CPU_WRITEBACK_INCOHERENT == 0 || \ - MDBX_CPU_WRITEBACK_INCOHERENT == 1) +#elif !(MDBX_CPU_WRITEBACK_INCOHERENT == 0 || MDBX_CPU_WRITEBACK_INCOHERENT == 1) #error MDBX_CPU_WRITEBACK_INCOHERENT must be defined as 0 or 1 #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ @@ -377,31 +360,27 @@ #else #define MDBX_MMAP_INCOHERENT_FILE_WRITE 0 #endif -#elif !(MDBX_MMAP_INCOHERENT_FILE_WRITE == 0 || \ - MDBX_MMAP_INCOHERENT_FILE_WRITE == 1) +#elif !(MDBX_MMAP_INCOHERENT_FILE_WRITE == 0 || MDBX_MMAP_INCOHERENT_FILE_WRITE == 1) #error MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined as 0 or 1 #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ #ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE -#if defined(__mips) || defined(__mips__) || defined(__mips64) || \ - defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ - defined(__MWERKS__) || defined(__sgi) +#if defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) || \ + defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi) /* MIPS has cache coherency issues. */ #define MDBX_MMAP_INCOHERENT_CPU_CACHE 1 #else /* LY: assume no relevant mmap/dcache issues. */ #define MDBX_MMAP_INCOHERENT_CPU_CACHE 0 #endif -#elif !(MDBX_MMAP_INCOHERENT_CPU_CACHE == 0 || \ - MDBX_MMAP_INCOHERENT_CPU_CACHE == 1) +#elif !(MDBX_MMAP_INCOHERENT_CPU_CACHE == 0 || MDBX_MMAP_INCOHERENT_CPU_CACHE == 1) #error MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined as 0 or 1 #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ /** Assume system needs explicit syscall to sync/flush/write modified mapped * memory. */ #ifndef MDBX_MMAP_NEEDS_JOLT -#if MDBX_MMAP_INCOHERENT_FILE_WRITE || MDBX_MMAP_INCOHERENT_CPU_CACHE || \ - !(defined(__linux__) || defined(__gnu_linux__)) +#if MDBX_MMAP_INCOHERENT_FILE_WRITE || MDBX_MMAP_INCOHERENT_CPU_CACHE || !(defined(__linux__) || defined(__gnu_linux__)) #define MDBX_MMAP_NEEDS_JOLT 1 #else #define MDBX_MMAP_NEEDS_JOLT 0 @@ -456,8 +435,7 @@ #endif /* MDBX_64BIT_CAS */ #ifndef MDBX_UNALIGNED_OK -#if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) || \ - defined(ENABLE_UBSAN) +#if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) || defined(ENABLE_UBSAN) #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */ #elif defined(__ARM_FEATURE_UNALIGNED) #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */ diff --git a/src/osal.c b/src/osal.c index caff1ad2..cdef5712 100644 --- a/src/osal.c +++ b/src/osal.c @@ -39,8 +39,7 @@ static int ntstatus2errcode(NTSTATUS status) { ov.Internal = status; /* Zap: '_Param_(1)' could be '0' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6387); - return GetOverlappedResult(nullptr, &ov, &dummy, FALSE) ? MDBX_SUCCESS - : (int)GetLastError(); + return GetOverlappedResult(nullptr, &ov, &dummy, FALSE) ? MDBX_SUCCESS : (int)GetLastError(); } /* We use native NT APIs to setup the memory map, so that we can @@ -51,11 +50,10 @@ static int ntstatus2errcode(NTSTATUS status) { * declare them here. Using these APIs also means we must link to * ntdll.dll, which is not linked by default in user code. */ -extern NTSTATUS NTAPI NtCreateSection( - OUT PHANDLE SectionHandle, IN ACCESS_MASK DesiredAccess, - IN OPTIONAL POBJECT_ATTRIBUTES ObjectAttributes, - IN OPTIONAL PLARGE_INTEGER MaximumSize, IN ULONG SectionPageProtection, - IN ULONG AllocationAttributes, IN OPTIONAL HANDLE FileHandle); +extern NTSTATUS NTAPI NtCreateSection(OUT PHANDLE SectionHandle, IN ACCESS_MASK DesiredAccess, + IN OPTIONAL POBJECT_ATTRIBUTES ObjectAttributes, + IN OPTIONAL PLARGE_INTEGER MaximumSize, IN ULONG SectionPageProtection, + IN ULONG AllocationAttributes, IN OPTIONAL HANDLE FileHandle); typedef struct _SECTION_BASIC_INFORMATION { ULONG Unknown; @@ -63,27 +61,22 @@ typedef struct _SECTION_BASIC_INFORMATION { LARGE_INTEGER SectionSize; } SECTION_BASIC_INFORMATION, *PSECTION_BASIC_INFORMATION; -extern NTSTATUS NTAPI NtMapViewOfSection( - IN HANDLE SectionHandle, IN HANDLE ProcessHandle, IN OUT PVOID *BaseAddress, - IN ULONG_PTR ZeroBits, IN SIZE_T CommitSize, - IN OUT OPTIONAL PLARGE_INTEGER SectionOffset, IN OUT PSIZE_T ViewSize, - IN SECTION_INHERIT InheritDisposition, IN ULONG AllocationType, - IN ULONG Win32Protect); +extern NTSTATUS NTAPI NtMapViewOfSection(IN HANDLE SectionHandle, IN HANDLE ProcessHandle, IN OUT PVOID *BaseAddress, + IN ULONG_PTR ZeroBits, IN SIZE_T CommitSize, + IN OUT OPTIONAL PLARGE_INTEGER SectionOffset, IN OUT PSIZE_T ViewSize, + IN SECTION_INHERIT InheritDisposition, IN ULONG AllocationType, + IN ULONG Win32Protect); -extern NTSTATUS NTAPI NtUnmapViewOfSection(IN HANDLE ProcessHandle, - IN OPTIONAL PVOID BaseAddress); +extern NTSTATUS NTAPI NtUnmapViewOfSection(IN HANDLE ProcessHandle, IN OPTIONAL PVOID BaseAddress); /* Zap: Inconsistent annotation for 'NtClose'... */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(28251) extern NTSTATUS NTAPI NtClose(HANDLE Handle); -extern NTSTATUS NTAPI NtAllocateVirtualMemory( - IN HANDLE ProcessHandle, IN OUT PVOID *BaseAddress, IN ULONG_PTR ZeroBits, - IN OUT PSIZE_T RegionSize, IN ULONG AllocationType, IN ULONG Protect); +extern NTSTATUS NTAPI NtAllocateVirtualMemory(IN HANDLE ProcessHandle, IN OUT PVOID *BaseAddress, IN ULONG_PTR ZeroBits, + IN OUT PSIZE_T RegionSize, IN ULONG AllocationType, IN ULONG Protect); -extern NTSTATUS NTAPI NtFreeVirtualMemory(IN HANDLE ProcessHandle, - IN PVOID *BaseAddress, - IN OUT PSIZE_T RegionSize, +extern NTSTATUS NTAPI NtFreeVirtualMemory(IN HANDLE ProcessHandle, IN PVOID *BaseAddress, IN OUT PSIZE_T RegionSize, IN ULONG FreeType); #ifndef WOF_CURRENT_VERSION @@ -127,8 +120,7 @@ typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 { #endif #ifndef FSCTL_GET_EXTERNAL_BACKING -#define FSCTL_GET_EXTERNAL_BACKING \ - CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 196, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define FSCTL_GET_EXTERNAL_BACKING CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 196, METHOD_BUFFERED, FILE_ANY_ACCESS) #endif #ifndef ERROR_NOT_CAPABLE @@ -140,29 +132,23 @@ typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 { /*----------------------------------------------------------------------------*/ #if defined(__ANDROID_API__) -__extern_C void __assert2(const char *file, int line, const char *function, - const char *msg) __noreturn; -#define __assert_fail(assertion, file, line, function) \ - __assert2(file, line, function, assertion) +__extern_C void __assert2(const char *file, int line, const char *function, const char *msg) __noreturn; +#define __assert_fail(assertion, file, line, function) __assert2(file, line, function, assertion) #elif defined(__UCLIBC__) -MDBX_NORETURN __extern_C void __assert(const char *, const char *, unsigned, - const char *) +MDBX_NORETURN __extern_C void __assert(const char *, const char *, unsigned, const char *) #ifdef __THROW __THROW #else __nothrow #endif /* __THROW */ ; -#define __assert_fail(assertion, file, line, function) \ - __assert(assertion, file, line, function) +#define __assert_fail(assertion, file, line, function) __assert(assertion, file, line, function) -#elif _POSIX_C_SOURCE > 200212 && \ - /* workaround for avoid musl libc wrong prototype */ ( \ - defined(__GLIBC__) || defined(__GNU_LIBRARY__)) +#elif _POSIX_C_SOURCE > 200212 && \ + /* workaround for avoid musl libc wrong prototype */ (defined(__GLIBC__) || defined(__GNU_LIBRARY__)) /* Prototype should match libc runtime. ISO POSIX (2003) & LSB 1.x-3.x */ -MDBX_NORETURN __extern_C void __assert_fail(const char *assertion, - const char *file, unsigned line, +MDBX_NORETURN __extern_C void __assert_fail(const char *assertion, const char *file, unsigned line, const char *function) #ifdef __THROW __THROW @@ -172,8 +158,7 @@ MDBX_NORETURN __extern_C void __assert_fail(const char *assertion, ; #elif defined(__APPLE__) || defined(__MACH__) -__extern_C void __assert_rtn(const char *function, const char *file, int line, - const char *assertion) /* __nothrow */ +__extern_C void __assert_rtn(const char *function, const char *file, int line, const char *assertion) /* __nothrow */ #ifdef __dead2 __dead2 #else @@ -184,30 +169,20 @@ __extern_C void __assert_rtn(const char *function, const char *file, int line, #endif /* __disable_tail_calls */ ; -#define __assert_fail(assertion, file, line, function) \ - __assert_rtn(function, file, line, assertion) +#define __assert_fail(assertion, file, line, function) __assert_rtn(function, file, line, assertion) #elif defined(__sun) || defined(__SVR4) || defined(__svr4__) -MDBX_NORETURN __extern_C void __assert_c99(const char *assection, - const char *file, int line, - const char *function); -#define __assert_fail(assertion, file, line, function) \ - __assert_c99(assertion, file, line, function) +MDBX_NORETURN __extern_C void __assert_c99(const char *assection, const char *file, int line, const char *function); +#define __assert_fail(assertion, file, line, function) __assert_c99(assertion, file, line, function) #elif defined(__OpenBSD__) -__extern_C __dead void __assert2(const char *file, int line, - const char *function, +__extern_C __dead void __assert2(const char *file, int line, const char *function, const char *assertion) /* __nothrow */; -#define __assert_fail(assertion, file, line, function) \ - __assert2(file, line, function, assertion) +#define __assert_fail(assertion, file, line, function) __assert2(file, line, function, assertion) #elif defined(__NetBSD__) -__extern_C __dead void __assert13(const char *file, int line, - const char *function, +__extern_C __dead void __assert13(const char *file, int line, const char *function, const char *assertion) /* __nothrow */; -#define __assert_fail(assertion, file, line, function) \ - __assert13(file, line, function, assertion) -#elif defined(__FreeBSD__) || defined(__BSD__) || defined(__bsdi__) || \ - defined(__DragonFly__) -__extern_C void __assert(const char *function, const char *file, int line, - const char *assertion) /* __nothrow */ +#define __assert_fail(assertion, file, line, function) __assert13(file, line, function, assertion) +#elif defined(__FreeBSD__) || defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) +__extern_C void __assert(const char *function, const char *file, int line, const char *assertion) /* __nothrow */ #ifdef __dead2 __dead2 #else @@ -217,13 +192,11 @@ __extern_C void __assert(const char *function, const char *file, int line, __disable_tail_calls #endif /* __disable_tail_calls */ ; -#define __assert_fail(assertion, file, line, function) \ - __assert(function, file, line, assertion) +#define __assert_fail(assertion, file, line, function) __assert(function, file, line, assertion) #endif /* __assert_fail */ -__cold void mdbx_assert_fail(const MDBX_env *env, const char *msg, - const char *func, unsigned line) { +__cold void mdbx_assert_fail(const MDBX_env *env, const char *msg, const char *func, unsigned line) { #if MDBX_DEBUG if (env && env->assert_func) env->assert_func(env, msg, func, line); @@ -232,8 +205,7 @@ __cold void mdbx_assert_fail(const MDBX_env *env, const char *msg, assert_fail(msg, func, line); } -MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, - unsigned line) { +MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line) { #endif /* MDBX_DEBUG */ if (globals.logger.ptr) @@ -241,8 +213,7 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, else { #if defined(_WIN32) || defined(_WIN64) char *message = nullptr; - const int num = osal_asprintf(&message, "\r\nMDBX-ASSERTION: %s, %s:%u", - msg, func ? func : "unknown", line); + const int num = osal_asprintf(&message, "\r\nMDBX-ASSERTION: %s, %s:%u", msg, func ? func : "unknown", line); if (num < 1 || !message) message = ""; OutputDebugStringA(message); @@ -254,8 +225,7 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, while (1) { #if defined(_WIN32) || defined(_WIN64) #if !MDBX_WITHOUT_MSVC_CRT && defined(_DEBUG) - _CrtDbgReport(_CRT_ASSERT, func ? func : "unknown", line, "libmdbx", - "assertion failed: %s", msg); + _CrtDbgReport(_CRT_ASSERT, func ? func : "unknown", line, "libmdbx", "assertion failed: %s", msg); #else if (IsDebuggerPresent()) DebugBreak(); @@ -275,9 +245,7 @@ __cold void mdbx_panic(const char *fmt, ...) { const int num = osal_vasprintf(&message, fmt, ap); va_end(ap); const char *const const_message = - unlikely(num < 1 || !message) - ? "" - : message; + unlikely(num < 1 || !message) ? "" : message; if (globals.logger.ptr) debug_log(MDBX_LOG_FATAL, "panic", 0, "%s", const_message); @@ -285,8 +253,7 @@ __cold void mdbx_panic(const char *fmt, ...) { while (1) { #if defined(_WIN32) || defined(_WIN64) #if !MDBX_WITHOUT_MSVC_CRT && defined(_DEBUG) - _CrtDbgReport(_CRT_ASSERT, "mdbx.c", 0, "libmdbx", "panic: %s", - const_message); + _CrtDbgReport(_CRT_ASSERT, "mdbx.c", 0, "libmdbx", "panic: %s", const_message); #else OutputDebugStringA("\r\nMDBX-PANIC: "); OutputDebugStringA(const_message); @@ -346,19 +313,16 @@ MDBX_INTERNAL int osal_asprintf(char **strp, const char *fmt, ...) { #endif /* osal_asprintf */ #ifndef osal_memalign_alloc -MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, - void **result) { +MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, void **result) { assert(is_powerof2(alignment) && alignment >= sizeof(void *)); #if defined(_WIN32) || defined(_WIN64) (void)alignment; - *result = - VirtualAlloc(nullptr, bytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + *result = VirtualAlloc(nullptr, bytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); return *result ? MDBX_SUCCESS : MDBX_ENOMEM /* ERROR_OUTOFMEMORY */; #elif defined(_ISOC11_SOURCE) *result = aligned_alloc(alignment, ceil_powerof2(bytes, alignment)); return *result ? MDBX_SUCCESS : errno; -#elif _POSIX_VERSION >= 200112L && \ - (!defined(__ANDROID_API__) || __ANDROID_API__ >= 17) +#elif _POSIX_VERSION >= 200112L && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 17) *result = nullptr; return posix_memalign(result, alignment, bytes); #elif __GLIBC_PREREQ(2, 16) || __STDC_VERSION__ >= 201112L @@ -474,8 +438,7 @@ MDBX_INTERNAL int osal_condpair_signal(osal_condpair_t *condpair, bool part) { MDBX_INTERNAL int osal_condpair_wait(osal_condpair_t *condpair, bool part) { #if defined(_WIN32) || defined(_WIN64) - DWORD code = SignalObjectAndWait(condpair->mutex, condpair->event[part], - INFINITE, FALSE); + DWORD code = SignalObjectAndWait(condpair->mutex, condpair->event[part], INFINITE, FALSE); if (code == WAIT_OBJECT_0) { code = WaitForSingleObject(condpair->mutex, INFINITE); if (code == WAIT_OBJECT_0) @@ -521,11 +484,9 @@ MDBX_INTERNAL int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) __try { EnterCriticalSection(fastmutex); - } __except ( - (GetExceptionCode() == - 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) - ? EXCEPTION_EXECUTE_HANDLER - : EXCEPTION_CONTINUE_SEARCH) { + } __except ((GetExceptionCode() == 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) + ? EXCEPTION_EXECUTE_HANDLER + : EXCEPTION_CONTINUE_SEARCH) { return MDBX_EDEADLK; } return MDBX_SUCCESS; @@ -548,8 +509,7 @@ MDBX_INTERNAL int osal_fastmutex_release(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) MDBX_INTERNAL int osal_mb2w(const char *const src, wchar_t **const pdst) { - const size_t dst_wlen = MultiByteToWideChar( - CP_THREAD_ACP, MB_ERR_INVALID_CHARS, src, -1, nullptr, 0); + const size_t dst_wlen = MultiByteToWideChar(CP_THREAD_ACP, MB_ERR_INVALID_CHARS, src, -1, nullptr, 0); wchar_t *dst = *pdst; int rc = ERROR_INVALID_NAME; if (unlikely(dst_wlen < 2 || dst_wlen > /* MAX_PATH */ INT16_MAX)) @@ -561,9 +521,7 @@ MDBX_INTERNAL int osal_mb2w(const char *const src, wchar_t **const pdst) { goto bailout; *pdst = dst; - if (likely(dst_wlen == (size_t)MultiByteToWideChar(CP_THREAD_ACP, - MB_ERR_INVALID_CHARS, src, - -1, dst, (int)dst_wlen))) + if (likely(dst_wlen == (size_t)MultiByteToWideChar(CP_THREAD_ACP, MB_ERR_INVALID_CHARS, src, -1, dst, (int)dst_wlen))) return MDBX_SUCCESS; rc = ERROR_INVALID_NAME; @@ -623,8 +581,7 @@ static size_t osal_iov_max; MDBX_INTERNAL int osal_ioring_create(osal_ioring_t *ior #if defined(_WIN32) || defined(_WIN64) , - bool enable_direct, - mdbx_filehandle_t overlapped_fd + bool enable_direct, mdbx_filehandle_t overlapped_fd #endif /* Windows */ ) { memset(ior, 0, sizeof(osal_ioring_t)); @@ -649,9 +606,8 @@ MDBX_INTERNAL int osal_ioring_create(osal_ioring_t *ior static inline size_t ior_offset(const ior_item_t *item) { #if defined(_WIN32) || defined(_WIN64) - return item->ov.Offset | (size_t)((sizeof(size_t) > sizeof(item->ov.Offset)) - ? (uint64_t)item->ov.OffsetHigh << 32 - : 0); + return item->ov.Offset | + (size_t)((sizeof(size_t) > sizeof(item->ov.Offset)) ? (uint64_t)item->ov.OffsetHigh << 32 : 0); #else return item->offset; #endif /* !Windows */ @@ -660,9 +616,7 @@ static inline size_t ior_offset(const ior_item_t *item) { static inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) { #if defined(ior_sgv_element) assert(sgvcnt > 0); - return (ior_item_t *)ptr_disp(item, sizeof(ior_item_t) - - sizeof(ior_sgv_element) + - sizeof(ior_sgv_element) * sgvcnt); + return (ior_item_t *)ptr_disp(item, sizeof(ior_item_t) - sizeof(ior_sgv_element) + sizeof(ior_sgv_element) * sgvcnt); #else assert(sgvcnt == 1); (void)sgvcnt; @@ -670,17 +624,14 @@ static inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) { #endif } -MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, - void *data, const size_t bytes) { +MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, void *data, const size_t bytes) { assert(bytes && data); assert(bytes % MDBX_MIN_PAGESIZE == 0 && bytes <= MAX_WRITE); - assert(offset % MDBX_MIN_PAGESIZE == 0 && - offset + (uint64_t)bytes <= MAX_MAPSIZE); + assert(offset % MDBX_MIN_PAGESIZE == 0 && offset + (uint64_t)bytes <= MAX_MAPSIZE); #if defined(_WIN32) || defined(_WIN64) const unsigned segments = (unsigned)(bytes >> ior->pagesize_ln2); - const bool use_gather = - ior->direct && ior->overlapped_fd && ior->slots_left >= segments; + const bool use_gather = ior->direct && ior->overlapped_fd && ior->slots_left >= segments; #endif /* Windows */ ior_item_t *item = ior->pool; @@ -690,8 +641,7 @@ MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, likely(ior_last_bytes(ior, item) + bytes <= MAX_WRITE)) { #if defined(_WIN32) || defined(_WIN64) if (use_gather && - ((bytes | (uintptr_t)data | ior->last_bytes | - (uintptr_t)(uint64_t)item->sgv[0].Buffer) & + ((bytes | (uintptr_t)data | ior->last_bytes | (uintptr_t)(uint64_t)item->sgv[0].Buffer) & ior_alignment_mask) == 0 && ior->last_sgvcnt + (size_t)segments < OSAL_IOV_MAX) { assert(ior->overlapped_fd); @@ -708,8 +658,7 @@ MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, assert((item->single.iov_len & ior_WriteFile_flag) == 0); return MDBX_SUCCESS; } - const void *end = ptr_disp(item->single.iov_base, - item->single.iov_len - ior_WriteFile_flag); + const void *end = ptr_disp(item->single.iov_base, item->single.iov_len - ior_WriteFile_flag); if (unlikely(end == data)) { assert((item->single.iov_len & ior_WriteFile_flag) != 0); item->single.iov_len += bytes; @@ -717,8 +666,7 @@ MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, } #elif MDBX_HAVE_PWRITEV assert((int)item->sgvcnt > 0); - const void *end = ptr_disp(item->sgv[item->sgvcnt - 1].iov_base, - item->sgv[item->sgvcnt - 1].iov_len); + const void *end = ptr_disp(item->sgv[item->sgvcnt - 1].iov_base, item->sgv[item->sgvcnt - 1].iov_len); if (unlikely(end == data)) { item->sgv[item->sgvcnt - 1].iov_len += bytes; ior->last_bytes += bytes; @@ -754,8 +702,7 @@ MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, item->ov.Offset = (DWORD)offset; item->ov.OffsetHigh = HIGH_DWORD(offset); item->ov.hEvent = 0; - if (!use_gather || ((bytes | (uintptr_t)(data)) & ior_alignment_mask) != 0 || - segments > OSAL_IOV_MAX) { + if (!use_gather || ((bytes | (uintptr_t)(data)) & ior_alignment_mask) != 0 || segments > OSAL_IOV_MAX) { /* WriteFile() */ item->single.iov_base = data; item->single.iov_len = bytes + ior_WriteFile_flag; @@ -790,9 +737,7 @@ MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ior, const size_t offset, } MDBX_INTERNAL void osal_ioring_walk(osal_ioring_t *ior, iov_ctx_t *ctx, - void (*callback)(iov_ctx_t *ctx, - size_t offset, void *data, - size_t bytes)) { + void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes)) { for (ior_item_t *item = ior->pool; item <= ior->last;) { #if defined(_WIN32) || defined(_WIN64) size_t offset = ior_offset(item); @@ -833,14 +778,12 @@ MDBX_INTERNAL void osal_ioring_walk(osal_ioring_t *ior, iov_ctx_t *ctx, } } -MDBX_INTERNAL osal_ioring_write_result_t -osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { +MDBX_INTERNAL osal_ioring_write_result_t osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { osal_ioring_write_result_t r = {MDBX_SUCCESS, 0}; #if defined(_WIN32) || defined(_WIN64) - HANDLE *const end_wait_for = - ior->event_pool + ior->allocated + - /* был выделен один дополнительный элемент для async_done */ 1; + HANDLE *const end_wait_for = ior->event_pool + ior->allocated + + /* был выделен один дополнительный элемент для async_done */ 1; HANDLE *wait_for = end_wait_for; LONG async_started = 0; for (ior_item_t *item = ior->pool; item <= ior->last;) { @@ -867,8 +810,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { return r; } if (WriteFileGather(fd, item->sgv, (DWORD)bytes, nullptr, &item->ov)) { - assert(item->ov.Internal == 0 && - WaitForSingleObject(item->ov.hEvent, 0) == WAIT_OBJECT_0); + assert(item->ov.Internal == 0 && WaitForSingleObject(item->ov.hEvent, 0) == WAIT_OBJECT_0); ior_put_event(ior, item->ov.hEvent); item->ov.hEvent = 0; } else { @@ -877,9 +819,8 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { void *data = Ptr64ToPtr(item->sgv[0].Buffer); ERROR("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," " offset %" PRId64 ", err %d", - "WriteFileGather", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, data, ((page_t *)data)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); + "WriteFileGather", fd, __Wpedantic_format_voidptr(item), item - ior->pool, data, ((page_t *)data)->pgno, + bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); goto bailout_rc; } assert(wait_for > ior->event_pool + ior->event_stack); @@ -889,8 +830,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { assert(bytes < MAX_WRITE); retry: item->ov.hEvent = ior; - if (WriteFileEx(fd, item->single.iov_base, (DWORD)bytes, &item->ov, - ior_wocr)) { + if (WriteFileEx(fd, item->single.iov_base, (DWORD)bytes, &item->ov, ior_wocr)) { async_started += 1; } else { r.err = (int)GetLastError(); @@ -898,21 +838,18 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { default: ERROR("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," " offset %" PRId64 ", err %d", - "WriteFileEx", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, item->single.iov_base, - ((page_t *)item->single.iov_base)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); + "WriteFileEx", fd, __Wpedantic_format_voidptr(item), item - ior->pool, item->single.iov_base, + ((page_t *)item->single.iov_base)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), + r.err); goto bailout_rc; case ERROR_NOT_FOUND: case ERROR_USER_MAPPED_FILE: case ERROR_LOCK_VIOLATION: WARNING("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," " offset %" PRId64 ", err %d", - "WriteFileEx", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, item->single.iov_base, + "WriteFileEx", fd, __Wpedantic_format_voidptr(item), item - ior->pool, item->single.iov_base, ((page_t *)item->single.iov_base)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), - r.err); + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); SleepEx(0, true); goto retry; case ERROR_INVALID_USER_BUFFER: @@ -927,15 +864,13 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { } else { assert(bytes < MAX_WRITE); DWORD written = 0; - if (!WriteFile(fd, item->single.iov_base, (DWORD)bytes, &written, - &item->ov)) { + if (!WriteFile(fd, item->single.iov_base, (DWORD)bytes, &written, &item->ov)) { r.err = (int)GetLastError(); ERROR("%s: fd %p, item %p (%zu), addr %p pgno %u, bytes %zu," " offset %" PRId64 ", err %d", - "WriteFile", fd, __Wpedantic_format_voidptr(item), - item - ior->pool, item->single.iov_base, - ((page_t *)item->single.iov_base)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), r.err); + "WriteFile", fd, __Wpedantic_format_voidptr(item), item - ior->pool, item->single.iov_base, + ((page_t *)item->single.iov_base)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), + r.err); goto bailout_rc; } else if (unlikely(written != bytes)) { r.err = ERROR_WRITE_FAULT; @@ -945,8 +880,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { item = ior_next(item, i); } - assert(ior->async_waiting > ior->async_completed && - ior->async_waiting == INT_MAX); + assert(ior->async_waiting > ior->async_completed && ior->async_waiting == INT_MAX); ior->async_waiting = async_started; if (async_started > ior->async_completed && end_wait_for == wait_for) { assert(wait_for > ior->event_pool + ior->event_stack); @@ -963,18 +897,15 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { * WaitForMultipleObjectsEx(), но тогда это проблемы на стороне M$. */ DWORD madness; do - madness = WaitForMultipleObjectsEx((pending_count < MAXIMUM_WAIT_OBJECTS) - ? (DWORD)pending_count - : MAXIMUM_WAIT_OBJECTS, - wait_for, true, - /* сутки */ 86400000ul, true); + madness = WaitForMultipleObjectsEx( + (pending_count < MAXIMUM_WAIT_OBJECTS) ? (DWORD)pending_count : MAXIMUM_WAIT_OBJECTS, wait_for, true, + /* сутки */ 86400000ul, true); while (madness == WAIT_IO_COMPLETION); STATIC_ASSERT(WAIT_OBJECT_0 == 0); if (/* madness >= WAIT_OBJECT_0 && */ madness < WAIT_OBJECT_0 + MAXIMUM_WAIT_OBJECTS) r.err = MDBX_SUCCESS; - else if (madness >= WAIT_ABANDONED_0 && - madness < WAIT_ABANDONED_0 + MAXIMUM_WAIT_OBJECTS) { + else if (madness >= WAIT_ABANDONED_0 && madness < WAIT_ABANDONED_0 + MAXIMUM_WAIT_OBJECTS) { r.err = ERROR_ABANDONED_WAIT_0; goto bailout_rc; } else if (madness == WAIT_TIMEOUT) { @@ -1003,9 +934,8 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { if (unlikely(!GetOverlappedResult(fd, &item->ov, &written, true))) { ERROR("%s: item %p (%zu), addr %p pgno %u, bytes %zu," " offset %" PRId64 ", err %d", - "GetOverlappedResult", __Wpedantic_format_voidptr(item), - item - ior->pool, data, ((page_t *)data)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), + "GetOverlappedResult", __Wpedantic_format_voidptr(item), item - ior->pool, data, + ((page_t *)data)->pgno, bytes, item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), (int)GetLastError()); goto bailout_geterr; } @@ -1019,15 +949,12 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { if (unlikely(item->ov.Internal != MDBX_SUCCESS)) { DWORD written = 0; r.err = (int)item->ov.Internal; - if ((r.err & 0x80000000) && - GetOverlappedResult(nullptr, &item->ov, &written, true)) + if ((r.err & 0x80000000) && GetOverlappedResult(nullptr, &item->ov, &written, true)) r.err = (int)GetLastError(); ERROR("%s: item %p (%zu), addr %p pgno %u, bytes %zu," " offset %" PRId64 ", err %d", - "Result", __Wpedantic_format_voidptr(item), item - ior->pool, - data, ((page_t *)data)->pgno, bytes, - item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), - (int)GetLastError()); + "Result", __Wpedantic_format_voidptr(item), item - ior->pool, data, ((page_t *)data)->pgno, bytes, + item->ov.Offset + ((uint64_t)item->ov.OffsetHigh << 32), (int)GetLastError()); goto bailout_rc; } if (unlikely(item->ov.InternalHigh != bytes)) { @@ -1043,14 +970,12 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { assert(ior->async_waiting == ior->async_completed); #else - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); for (ior_item_t *item = ior->pool; item <= ior->last;) { #if MDBX_HAVE_PWRITEV assert(item->sgvcnt > 0); if (item->sgvcnt == 1) - r.err = osal_pwrite(fd, item->sgv[0].iov_base, item->sgv[0].iov_len, - item->offset); + r.err = osal_pwrite(fd, item->sgv[0].iov_base, item->sgv[0].iov_len, item->offset); else r.err = osal_pwritev(fd, item->sgv, item->sgvcnt, item->offset); @@ -1058,8 +983,7 @@ osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd) { item = ior_next(item, item->sgvcnt); #else - r.err = osal_pwrite(fd, item->single.iov_base, item->single.iov_len, - item->offset); + r.err = osal_pwrite(fd, item->single.iov_base, item->single.iov_len, item->offset); item = ior_next(item, 1); #endif r.wops += 1; @@ -1122,12 +1046,9 @@ MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *ior, size_t items) { #if defined(_WIN32) || defined(_WIN64) if (ior->state & IOR_STATE_LOCKED) return MDBX_SUCCESS; - const bool useSetFileIoOverlappedRange = - ior->overlapped_fd && imports.SetFileIoOverlappedRange && items > 42; + const bool useSetFileIoOverlappedRange = ior->overlapped_fd && imports.SetFileIoOverlappedRange && items > 42; const size_t ceiling = - useSetFileIoOverlappedRange - ? ((items < 65536 / 2 / sizeof(ior_item_t)) ? 65536 : 65536 * 4) - : 1024; + useSetFileIoOverlappedRange ? ((items < 65536 / 2 / sizeof(ior_item_t)) ? 65536 : 65536 * 4) : 1024; const size_t bytes = ceil_powerof2(sizeof(ior_item_t) * items, ceiling); items = bytes / sizeof(ior_item_t); #endif /* Windows */ @@ -1137,9 +1058,7 @@ MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *ior, size_t items) { if (items < ior->allocated) ior_cleanup(ior, items); #if defined(_WIN32) || defined(_WIN64) - void *ptr = osal_realloc( - ior->event_pool, - (items + /* extra for waiting the async_done */ 1) * sizeof(HANDLE)); + void *ptr = osal_realloc(ior->event_pool, (items + /* extra for waiting the async_done */ 1) * sizeof(HANDLE)); if (unlikely(!ptr)) return MDBX_ENOMEM; ior->event_pool = ptr; @@ -1159,14 +1078,12 @@ MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *ior, size_t items) { ior->pool = ptr; if (items > ior->allocated) - memset(ior->pool + ior->allocated, 0, - sizeof(ior_item_t) * (items - ior->allocated)); + memset(ior->pool + ior->allocated, 0, sizeof(ior_item_t) * (items - ior->allocated)); ior->allocated = (unsigned)items; ior->boundary = ptr_disp(ior->pool, ior->allocated); #if defined(_WIN32) || defined(_WIN64) if (useSetFileIoOverlappedRange) { - if (imports.SetFileIoOverlappedRange(ior->overlapped_fd, ptr, - (ULONG)bytes)) + if (imports.SetFileIoOverlappedRange(ior->overlapped_fd, ptr, (ULONG)bytes)) ior->state += IOR_STATE_LOCKED; else return GetLastError(); @@ -1218,9 +1135,7 @@ MDBX_INTERNAL int osal_fileexists(const pathchar_t *pathname) { if (GetFileAttributesW(pathname) != INVALID_FILE_ATTRIBUTES) return MDBX_RESULT_TRUE; int err = GetLastError(); - return (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) - ? MDBX_RESULT_FALSE - : err; + return (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) ? MDBX_RESULT_FALSE : err; #else if (access(pathname, F_OK) == 0) return MDBX_RESULT_TRUE; @@ -1239,8 +1154,7 @@ MDBX_INTERNAL pathchar_t *osal_fileext(const pathchar_t *pathname, size_t len) { return (pathchar_t *)ext; } -MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, - size_t len) { +MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, size_t len) { #if defined(_WIN32) || defined(_WIN64) for (size_t i = 0; i < len; ++i) { pathchar_t a = l[i]; @@ -1256,19 +1170,15 @@ MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, #endif } -MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, - const MDBX_env *env, const pathchar_t *pathname, - mdbx_filehandle_t *fd, - mdbx_mode_t unix_mode_bits) { +MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, const MDBX_env *env, + const pathchar_t *pathname, mdbx_filehandle_t *fd, mdbx_mode_t unix_mode_bits) { *fd = INVALID_HANDLE_VALUE; #if defined(_WIN32) || defined(_WIN64) DWORD CreationDisposition = unix_mode_bits ? OPEN_ALWAYS : OPEN_EXISTING; - DWORD FlagsAndAttributes = - FILE_FLAG_POSIX_SEMANTICS | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED; + DWORD FlagsAndAttributes = FILE_FLAG_POSIX_SEMANTICS | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED; DWORD DesiredAccess = FILE_READ_ATTRIBUTES; - DWORD ShareMode = - (env->flags & MDBX_EXCLUSIVE) ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE); + DWORD ShareMode = (env->flags & MDBX_EXCLUSIVE) ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE); switch (purpose) { default: @@ -1309,18 +1219,15 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, case MDBX_OPEN_DELETE: CreationDisposition = OPEN_EXISTING; ShareMode |= FILE_SHARE_DELETE; - DesiredAccess = - FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES | DELETE | SYNCHRONIZE; + DesiredAccess = FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES | DELETE | SYNCHRONIZE; break; } - *fd = CreateFileW(pathname, DesiredAccess, ShareMode, nullptr, - CreationDisposition, FlagsAndAttributes, nullptr); + *fd = CreateFileW(pathname, DesiredAccess, ShareMode, nullptr, CreationDisposition, FlagsAndAttributes, nullptr); if (*fd == INVALID_HANDLE_VALUE) { int err = (int)GetLastError(); if (err == ERROR_ACCESS_DENIED && purpose == MDBX_OPEN_LCK) { - if (GetFileAttributesW(pathname) == INVALID_FILE_ATTRIBUTES && - GetLastError() == ERROR_FILE_NOT_FOUND) + if (GetFileAttributesW(pathname) == INVALID_FILE_ATTRIBUTES && GetLastError() == ERROR_FILE_NOT_FOUND) err = ERROR_FILE_NOT_FOUND; } return err; @@ -1334,9 +1241,8 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, return err; } const DWORD AttributesDiff = - (info.dwFileAttributes ^ FlagsAndAttributes) & - (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED | - FILE_ATTRIBUTE_TEMPORARY | FILE_ATTRIBUTE_COMPRESSED); + (info.dwFileAttributes ^ FlagsAndAttributes) & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED | + FILE_ATTRIBUTE_TEMPORARY | FILE_ATTRIBUTE_COMPRESSED); if (AttributesDiff) (void)SetFileAttributesW(pathname, info.dwFileAttributes ^ AttributesDiff); @@ -1372,8 +1278,7 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, break; } - const bool direct_nocache_for_copy = - env->ps >= globals.sys_pagesize && purpose == MDBX_OPEN_COPY; + const bool direct_nocache_for_copy = env->ps >= globals.sys_pagesize && purpose == MDBX_OPEN_COPY; if (direct_nocache_for_copy) { #if defined(O_DIRECT) flags |= O_DIRECT; @@ -1392,18 +1297,15 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, int stub_fd0 = -1, stub_fd1 = -1, stub_fd2 = -1; static const char dev_null[] = "/dev/null"; if (!is_valid_fd(STDIN_FILENO)) { - WARNING("STD%s_FILENO/%d is invalid, open %s for temporary stub", "IN", - STDIN_FILENO, dev_null); + WARNING("STD%s_FILENO/%d is invalid, open %s for temporary stub", "IN", STDIN_FILENO, dev_null); stub_fd0 = open(dev_null, O_RDONLY | O_NOCTTY); } if (!is_valid_fd(STDOUT_FILENO)) { - WARNING("STD%s_FILENO/%d is invalid, open %s for temporary stub", "OUT", - STDOUT_FILENO, dev_null); + WARNING("STD%s_FILENO/%d is invalid, open %s for temporary stub", "OUT", STDOUT_FILENO, dev_null); stub_fd1 = open(dev_null, O_WRONLY | O_NOCTTY); } if (!is_valid_fd(STDERR_FILENO)) { - WARNING("STD%s_FILENO/%d is invalid, open %s for temporary stub", "ERR", - STDERR_FILENO, dev_null); + WARNING("STD%s_FILENO/%d is invalid, open %s for temporary stub", "ERR", STDERR_FILENO, dev_null); stub_fd2 = open(dev_null, O_WRONLY | O_NOCTTY); } #else @@ -1412,8 +1314,7 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, *fd = open(pathname, flags, unix_mode_bits); #if defined(O_DIRECT) - if (*fd < 0 && (flags & O_DIRECT) && - (errno == EINVAL || errno == EAFNOSUPPORT)) { + if (*fd < 0 && (flags & O_DIRECT) && (errno == EINVAL || errno == EAFNOSUPPORT)) { flags &= ~(O_DIRECT | O_EXCL); *fd = open(pathname, flags, unix_mode_bits); } @@ -1428,20 +1329,17 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, /* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */ #if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2 if (*fd == STDIN_FILENO) { - WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "IN", - STDIN_FILENO); + WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "IN", STDIN_FILENO); assert(stub_fd0 == -1); *fd = dup(stub_fd0 = *fd); } if (*fd == STDOUT_FILENO) { - WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "OUT", - STDOUT_FILENO); + WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "OUT", STDOUT_FILENO); assert(stub_fd1 == -1); *fd = dup(stub_fd1 = *fd); } if (*fd == STDERR_FILENO) { - WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "ERR", - STDERR_FILENO); + WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "ERR", STDERR_FILENO); assert(stub_fd2 == -1); *fd = dup(stub_fd2 = *fd); } @@ -1490,8 +1388,7 @@ MDBX_INTERNAL int osal_closefile(mdbx_filehandle_t fd) { #endif } -MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, - uint64_t offset) { +MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, uint64_t offset) { if (bytes > MAX_WRITE) return MDBX_EINVAL; #if defined(_WIN32) || defined(_WIN64) @@ -1506,8 +1403,7 @@ MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, return (rc == MDBX_SUCCESS) ? /* paranoia */ ERROR_READ_FAULT : rc; } #else - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); intptr_t read = pread(fd, buf, bytes, offset); if (read < 0) { int rc = errno; @@ -1517,8 +1413,7 @@ MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, return (bytes == (size_t)read) ? MDBX_SUCCESS : MDBX_ENODATA; } -MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, - size_t bytes, uint64_t offset) { +MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, size_t bytes, uint64_t offset) { while (true) { #if defined(_WIN32) || defined(_WIN64) OVERLAPPED ov; @@ -1527,17 +1422,13 @@ MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, ov.OffsetHigh = HIGH_DWORD(offset); DWORD written; - if (unlikely(!WriteFile( - fd, buf, likely(bytes <= MAX_WRITE) ? (DWORD)bytes : MAX_WRITE, - &written, &ov))) + if (unlikely(!WriteFile(fd, buf, likely(bytes <= MAX_WRITE) ? (DWORD)bytes : MAX_WRITE, &written, &ov))) return (int)GetLastError(); if (likely(bytes == written)) return MDBX_SUCCESS; #else - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); - const intptr_t written = - pwrite(fd, buf, likely(bytes <= MAX_WRITE) ? bytes : MAX_WRITE, offset); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); + const intptr_t written = pwrite(fd, buf, likely(bytes <= MAX_WRITE) ? bytes : MAX_WRITE, offset); if (likely(bytes == (size_t)written)) return MDBX_SUCCESS; if (written < 0) { @@ -1553,22 +1444,17 @@ MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, } } -MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, - size_t bytes) { +MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, size_t bytes) { while (true) { #if defined(_WIN32) || defined(_WIN64) DWORD written; - if (unlikely(!WriteFile( - fd, buf, likely(bytes <= MAX_WRITE) ? (DWORD)bytes : MAX_WRITE, - &written, nullptr))) + if (unlikely(!WriteFile(fd, buf, likely(bytes <= MAX_WRITE) ? (DWORD)bytes : MAX_WRITE, &written, nullptr))) return (int)GetLastError(); if (likely(bytes == written)) return MDBX_SUCCESS; #else - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); - const intptr_t written = - write(fd, buf, likely(bytes <= MAX_WRITE) ? bytes : MAX_WRITE); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); + const intptr_t written = write(fd, buf, likely(bytes <= MAX_WRITE) ? bytes : MAX_WRITE); if (likely(bytes == (size_t)written)) return MDBX_SUCCESS; if (written < 0) { @@ -1583,8 +1469,7 @@ MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, } } -int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, - uint64_t offset) { +int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, uint64_t offset) { size_t expected = 0; for (size_t i = 0; i < sgvcnt; ++i) expected += iov[i].iov_len; @@ -1597,14 +1482,12 @@ int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, written += iov[i].iov_len; offset += iov[i].iov_len; } - return (expected == written) ? MDBX_SUCCESS - : MDBX_EIO /* ERROR_WRITE_FAULT */; + return (expected == written) ? MDBX_SUCCESS : MDBX_EIO /* ERROR_WRITE_FAULT */; #else int rc; intptr_t written; do { - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); written = pwritev(fd, iov, sgvcnt, offset); if (likely(expected == (size_t)written)) return MDBX_SUCCESS; @@ -1614,16 +1497,14 @@ int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, #endif } -MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, - enum osal_syncmode_bits mode_bits) { +MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, enum osal_syncmode_bits mode_bits) { #if defined(_WIN32) || defined(_WIN64) if ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) && !FlushFileBuffers(fd)) return (int)GetLastError(); return MDBX_SUCCESS; #else -#if defined(__APPLE__) && \ - MDBX_APPLE_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY +#if defined(__APPLE__) && MDBX_APPLE_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY if (mode_bits & MDBX_SYNC_IODQ) return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno; #endif /* MacOS */ @@ -1670,8 +1551,7 @@ int osal_filesize(mdbx_filehandle_t fd, uint64_t *length) { #else struct stat st; - STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); + STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); if (fstat(fd, &st)) return errno; @@ -1716,21 +1596,16 @@ MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) { if (imports.SetFileInformationByHandle) { FILE_END_OF_FILE_INFO EndOfFileInfo; EndOfFileInfo.EndOfFile.QuadPart = length; - return imports.SetFileInformationByHandle(fd, FileEndOfFileInfo, - &EndOfFileInfo, - sizeof(FILE_END_OF_FILE_INFO)) + return imports.SetFileInformationByHandle(fd, FileEndOfFileInfo, &EndOfFileInfo, sizeof(FILE_END_OF_FILE_INFO)) ? MDBX_SUCCESS : (int)GetLastError(); } else { LARGE_INTEGER li; li.QuadPart = length; - return (SetFilePointerEx(fd, li, nullptr, FILE_BEGIN) && SetEndOfFile(fd)) - ? MDBX_SUCCESS - : (int)GetLastError(); + return (SetFilePointerEx(fd, li, nullptr, FILE_BEGIN) && SetEndOfFile(fd)) ? MDBX_SUCCESS : (int)GetLastError(); } #else - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); return ftruncate(fd, length) == 0 ? MDBX_SUCCESS : errno; #endif } @@ -1739,21 +1614,17 @@ MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) { #if defined(_WIN32) || defined(_WIN64) LARGE_INTEGER li; li.QuadPart = pos; - return SetFilePointerEx(fd, li, nullptr, FILE_BEGIN) ? MDBX_SUCCESS - : (int)GetLastError(); + return SetFilePointerEx(fd, li, nullptr, FILE_BEGIN) ? MDBX_SUCCESS : (int)GetLastError(); #else - STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); return (lseek(fd, pos, SEEK_SET) < 0) ? errno : MDBX_SUCCESS; #endif } /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL int -osal_thread_create(osal_thread_t *thread, - THREAD_RESULT(THREAD_CALL *start_routine)(void *), - void *arg) { +MDBX_INTERNAL int osal_thread_create(osal_thread_t *thread, THREAD_RESULT(THREAD_CALL *start_routine)(void *), + void *arg) { #if defined(_WIN32) || defined(_WIN64) *thread = CreateThread(nullptr, 0, start_routine, arg, 0, nullptr); return *thread ? MDBX_SUCCESS : (int)GetLastError(); @@ -1774,8 +1645,7 @@ MDBX_INTERNAL int osal_thread_join(osal_thread_t thread) { /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, - size_t length, enum osal_syncmode_bits mode_bits) { +MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, size_t length, enum osal_syncmode_bits mode_bits) { if (!MDBX_MMAP_NEEDS_JOLT && mode_bits == MDBX_SYNC_NONE) return MDBX_SUCCESS; @@ -1783,8 +1653,7 @@ MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, #if defined(_WIN32) || defined(_WIN64) if (!FlushViewOfFile(ptr, length)) return (int)GetLastError(); - if ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) && - !FlushFileBuffers(map->fd)) + if ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) && !FlushFileBuffers(map->fd)) return (int)GetLastError(); #else #if defined(__linux__) || defined(__gnu_linux__) @@ -1807,16 +1676,14 @@ MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, return MDBX_SUCCESS; } -MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, - const pathchar_t *pathname, int err) { +MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, const pathchar_t *pathname, int err) { #if defined(_WIN32) || defined(_WIN64) (void)pathname; (void)err; if (!imports.GetVolumeInformationByHandleW) return MDBX_ENOSYS; DWORD unused, flags; - if (!imports.GetVolumeInformationByHandleW(handle, nullptr, 0, nullptr, - &unused, &flags, nullptr, 0)) + if (!imports.GetVolumeInformationByHandleW(handle, nullptr, 0, nullptr, &unused, &flags, nullptr, 0)) return (int)GetLastError(); if ((flags & FILE_READ_ONLY_VOLUME) == 0) return MDBX_EACCESS; @@ -1858,9 +1725,8 @@ MDBX_INTERNAL int osal_check_fs_incore(mdbx_filehandle_t handle) { return MDBX_RESULT_TRUE; } -#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ - defined(__APPLE__) || defined(__MACH__) || defined(MFSNAMELEN) || \ +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__BSD__) || defined(__bsdi__) || \ + defined(__DragonFly__) || defined(__APPLE__) || defined(__MACH__) || defined(MFSNAMELEN) || \ defined(MFSTYPENAMELEN) || defined(VFS_NAMELEN) const char *const name = statfs_info.f_fstypename; const size_t name_len = sizeof(statfs_info.f_fstypename); @@ -1869,9 +1735,7 @@ MDBX_INTERNAL int osal_check_fs_incore(mdbx_filehandle_t handle) { const size_t name_len = 0; #endif if (name_len) { - if (strncasecmp("tmpfs", name, 6) == 0 || - strncasecmp("mfs", name, 4) == 0 || - strncasecmp("ramfs", name, 6) == 0 || + if (strncasecmp("tmpfs", name, 6) == 0 || strncasecmp("mfs", name, 4) == 0 || strncasecmp("ramfs", name, 6) == 0 || strncasecmp("romfs", name, 6) == 0) return MDBX_RESULT_TRUE; } @@ -1890,14 +1754,11 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { if (imports.GetFileInformationByHandleEx) { FILE_REMOTE_PROTOCOL_INFO RemoteProtocolInfo; - if (imports.GetFileInformationByHandleEx(handle, FileRemoteProtocolInfo, - &RemoteProtocolInfo, + if (imports.GetFileInformationByHandleEx(handle, FileRemoteProtocolInfo, &RemoteProtocolInfo, sizeof(RemoteProtocolInfo))) { - if ((RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_OFFLINE) && - !(flags & MDBX_RDONLY)) + if ((RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_OFFLINE) && !(flags & MDBX_RDONLY)) return ERROR_FILE_OFFLINE; - if (!(RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK) && - !(flags & MDBX_EXCLUSIVE)) + if (!(RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK) && !(flags & MDBX_EXCLUSIVE)) return ERROR_REMOTE_STORAGE_MEDIA_ERROR; } } @@ -1913,46 +1774,37 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { size_t reserved_for_microsoft_madness[42]; } GetExternalBacking_OutputBuffer; IO_STATUS_BLOCK StatusBlock; - rc = imports.NtFsControlFile(handle, nullptr, nullptr, nullptr, - &StatusBlock, FSCTL_GET_EXTERNAL_BACKING, - nullptr, 0, &GetExternalBacking_OutputBuffer, - sizeof(GetExternalBacking_OutputBuffer)); + rc = imports.NtFsControlFile(handle, nullptr, nullptr, nullptr, &StatusBlock, FSCTL_GET_EXTERNAL_BACKING, nullptr, + 0, &GetExternalBacking_OutputBuffer, sizeof(GetExternalBacking_OutputBuffer)); if (NT_SUCCESS(rc)) { if (!(flags & MDBX_EXCLUSIVE)) return ERROR_REMOTE_STORAGE_MEDIA_ERROR; - } else if (rc != STATUS_OBJECT_NOT_EXTERNALLY_BACKED && - rc != STATUS_INVALID_DEVICE_REQUEST && + } else if (rc != STATUS_OBJECT_NOT_EXTERNALLY_BACKED && rc != STATUS_INVALID_DEVICE_REQUEST && rc != STATUS_NOT_SUPPORTED) return ntstatus2errcode(rc); } - if (imports.GetVolumeInformationByHandleW && - imports.GetFinalPathNameByHandleW) { + if (imports.GetVolumeInformationByHandleW && imports.GetFinalPathNameByHandleW) { WCHAR *PathBuffer = osal_malloc(sizeof(WCHAR) * INT16_MAX); if (!PathBuffer) return MDBX_ENOMEM; int rc = MDBX_SUCCESS; DWORD VolumeSerialNumber, FileSystemFlags; - if (!imports.GetVolumeInformationByHandleW(handle, PathBuffer, INT16_MAX, - &VolumeSerialNumber, nullptr, + if (!imports.GetVolumeInformationByHandleW(handle, PathBuffer, INT16_MAX, &VolumeSerialNumber, nullptr, &FileSystemFlags, nullptr, 0)) { rc = (int)GetLastError(); goto bailout; } if ((flags & MDBX_RDONLY) == 0) { - if (FileSystemFlags & - (FILE_SEQUENTIAL_WRITE_ONCE | FILE_READ_ONLY_VOLUME | - FILE_VOLUME_IS_COMPRESSED)) { + if (FileSystemFlags & (FILE_SEQUENTIAL_WRITE_ONCE | FILE_READ_ONLY_VOLUME | FILE_VOLUME_IS_COMPRESSED)) { rc = ERROR_REMOTE_STORAGE_MEDIA_ERROR; goto bailout; } } - if (imports.GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, - FILE_NAME_NORMALIZED | - VOLUME_NAME_NT)) { + if (imports.GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, FILE_NAME_NORMALIZED | VOLUME_NAME_NT)) { if (_wcsnicmp(PathBuffer, L"\\Device\\Mup\\", 12) == 0) { if (!(flags & MDBX_EXCLUSIVE)) { rc = ERROR_REMOTE_STORAGE_MEDIA_ERROR; @@ -1961,18 +1813,14 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { } } - if (F_ISSET(flags, MDBX_RDONLY | MDBX_EXCLUSIVE) && - (FileSystemFlags & FILE_READ_ONLY_VOLUME)) { + if (F_ISSET(flags, MDBX_RDONLY | MDBX_EXCLUSIVE) && (FileSystemFlags & FILE_READ_ONLY_VOLUME)) { /* without-LCK (exclusive readonly) mode for DB on a read-only volume */ goto bailout; } - if (imports.GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, - FILE_NAME_NORMALIZED | - VOLUME_NAME_DOS)) { + if (imports.GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, FILE_NAME_NORMALIZED | VOLUME_NAME_DOS)) { UINT DriveType = GetDriveTypeW(PathBuffer); - if (DriveType == DRIVE_NO_ROOT_DIR && - _wcsnicmp(PathBuffer, L"\\\\?\\", 4) == 0 && + if (DriveType == DRIVE_NO_ROOT_DIR && _wcsnicmp(PathBuffer, L"\\\\?\\", 4) == 0 && _wcsnicmp(PathBuffer + 5, L":\\", 2) == 0) { PathBuffer[7] = 0; DriveType = GetDriveTypeW(PathBuffer + 4); @@ -2027,8 +1875,7 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { const unsigned type = 0; const char *const name = statvfs_info.f_basetype; const size_t name_len = sizeof(statvfs_info.f_basetype); -#elif defined(__sun) || defined(__SVR4) || defined(__svr4__) || \ - defined(ST_FSTYPSZ) || defined(_ST_FSTYPSZ) +#elif defined(__sun) || defined(__SVR4) || defined(__svr4__) || defined(ST_FSTYPSZ) || defined(_ST_FSTYPSZ) const unsigned type = 0; struct stat st; if (fstat(handle, &st)) @@ -2047,9 +1894,8 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { #if defined(MNT_LOCAL) || defined(MNT_EXPORTED) const unsigned long mnt_flags = statfs_info.f_flags; #endif /* MNT_LOCAL || MNT_EXPORTED */ -#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ - defined(__APPLE__) || defined(__MACH__) || defined(MFSNAMELEN) || \ +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__BSD__) || defined(__bsdi__) || \ + defined(__DragonFly__) || defined(__APPLE__) || defined(__MACH__) || defined(MFSNAMELEN) || \ defined(MFSTYPENAMELEN) || defined(VFS_NAMELEN) const char *const name = statfs_info.f_fstypename; const size_t name_len = sizeof(statfs_info.f_fstypename); @@ -2074,12 +1920,11 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { mounted = setmntent("/etc/mtab", "r"); if (mounted) { const struct mntent *ent; -#if defined(_BSD_SOURCE) || defined(_SVID_SOURCE) || defined(__BIONIC__) || \ +#if defined(_BSD_SOURCE) || defined(_SVID_SOURCE) || defined(__BIONIC__) || \ (defined(_DEFAULT_SOURCE) && __GLIBC_PREREQ(2, 19)) struct mntent entbuf; const bool should_copy = false; - while (nullptr != - (ent = getmntent_r(mounted, &entbuf, pathbuf, sizeof(pathbuf)))) + while (nullptr != (ent = getmntent_r(mounted, &entbuf, pathbuf, sizeof(pathbuf)))) #else const bool should_copy = true; while (nullptr != (ent = getmntent(mounted))) @@ -2088,8 +1933,7 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { struct stat mnt; if (!stat(ent->mnt_dir, &mnt) && mnt.st_dev == st.st_dev) { if (should_copy) { - name = - strncpy(pathbuf, ent->mnt_fsname, name_len = sizeof(pathbuf) - 1); + name = strncpy(pathbuf, ent->mnt_fsname, name_len = sizeof(pathbuf) - 1); pathbuf[name_len] = 0; } else { name = ent->mnt_fsname; @@ -2104,17 +1948,13 @@ static int osal_check_fs_local(mdbx_filehandle_t handle, int flags) { #endif if (name_len) { - if (((name_len > 2 && strncasecmp("nfs", name, 3) == 0) || - strncasecmp("cifs", name, name_len) == 0 || - strncasecmp("ncpfs", name, name_len) == 0 || - strncasecmp("smbfs", name, name_len) == 0 || + if (((name_len > 2 && strncasecmp("nfs", name, 3) == 0) || strncasecmp("cifs", name, name_len) == 0 || + strncasecmp("ncpfs", name, name_len) == 0 || strncasecmp("smbfs", name, name_len) == 0 || strcasecmp("9P" /* WSL2 */, name) == 0 || - ((name_len > 3 && strncasecmp("fuse", name, 4) == 0) && - strncasecmp("fuseblk", name, name_len) != 0)) && + ((name_len > 3 && strncasecmp("fuse", name, 4) == 0) && strncasecmp("fuseblk", name, name_len) != 0)) && !(flags & MDBX_EXCLUSIVE)) return MDBX_EREMOTE; - if (strcasecmp("ftp", name) == 0 || strcasecmp("http", name) == 0 || - strcasecmp("sshfs", name) == 0) + if (strcasecmp("ftp", name) == 0 || strcasecmp("http", name) == 0 || strcasecmp("sshfs", name) == 0) return MDBX_EREMOTE; } @@ -2166,18 +2006,15 @@ static int check_mmap_limit(const size_t limit) { if (should_check) { intptr_t pagesize, total_ram_pages, avail_ram_pages; - int err = - mdbx_get_sysraminfo(&pagesize, &total_ram_pages, &avail_ram_pages); + int err = mdbx_get_sysraminfo(&pagesize, &total_ram_pages, &avail_ram_pages); if (unlikely(err != MDBX_SUCCESS)) return err; const int log2page = log2n_powerof2(pagesize); - if ((limit >> (log2page + 7)) > (size_t)total_ram_pages || - (limit >> (log2page + 6)) > (size_t)avail_ram_pages) { + if ((limit >> (log2page + 7)) > (size_t)total_ram_pages || (limit >> (log2page + 6)) > (size_t)avail_ram_pages) { ERROR("%s (%zu pages) is too large for available (%zu pages) or total " "(%zu pages) system RAM", - "database upper size limit", limit >> log2page, avail_ram_pages, - total_ram_pages); + "database upper size limit", limit >> log2page, avail_ram_pages, total_ram_pages); return MDBX_TOO_LARGE; } } @@ -2185,8 +2022,8 @@ static int check_mmap_limit(const size_t limit) { return MDBX_SUCCESS; } -MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, - const size_t limit, const unsigned options) { +MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit, + const unsigned options) { assert(size <= limit); map->limit = 0; map->current = 0; @@ -2220,8 +2057,7 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, return err; #if defined(_WIN32) || defined(_WIN64) if (map->filesize < size) { - WARNING("file size (%zu) less than requested for mapping (%zu)", - (size_t)map->filesize, size); + WARNING("file size (%zu) less than requested for mapping (%zu)", (size_t)map->filesize, size); size = (size_t)map->filesize; } #else @@ -2235,10 +2071,8 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, err = NtCreateSection(&map->section, /* DesiredAccess */ (flags & MDBX_WRITEMAP) - ? SECTION_QUERY | SECTION_MAP_READ | - SECTION_EXTEND_SIZE | SECTION_MAP_WRITE - : SECTION_QUERY | SECTION_MAP_READ | - SECTION_EXTEND_SIZE, + ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE + : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, /* ObjectAttributes */ nullptr, /* MaximumSize (InitialSize) */ &SectionSize, /* SectionPageProtection */ @@ -2247,18 +2081,15 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, if (!NT_SUCCESS(err)) return ntstatus2errcode(err); - SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 - : globals.running_under_Wine ? size - : limit; - err = NtMapViewOfSection( - map->section, GetCurrentProcess(), &map->base, - /* ZeroBits */ 0, - /* CommitSize */ 0, - /* SectionOffset */ nullptr, &ViewSize, - /* InheritDisposition */ ViewUnmap, - /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, - /* Win32Protect */ - (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); + SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 : globals.running_under_Wine ? size : limit; + err = NtMapViewOfSection(map->section, GetCurrentProcess(), &map->base, + /* ZeroBits */ 0, + /* CommitSize */ 0, + /* SectionOffset */ nullptr, &ViewSize, + /* InheritDisposition */ ViewUnmap, + /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, + /* Win32Protect */ + (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); if (!NT_SUCCESS(err)) { NtClose(map->section); map->section = 0; @@ -2296,13 +2127,9 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, #define MAP_NORESERVE 0 #endif - map->base = mmap(nullptr, limit, - (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, - MAP_SHARED | MAP_FILE | MAP_NORESERVE | - (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0) | - ((options & MMAP_OPTION_SEMAPHORE) - ? MAP_HASSEMAPHORE | MAP_NOSYNC - : MAP_CONCEAL), + map->base = mmap(nullptr, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, + MAP_SHARED | MAP_FILE | MAP_NORESERVE | (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0) | + ((options & MMAP_OPTION_SEMAPHORE) ? MAP_HASSEMAPHORE | MAP_NOSYNC : MAP_CONCEAL), map->fd, 0); if (unlikely(map->base == MAP_FAILED)) { @@ -2334,9 +2161,8 @@ MDBX_INTERNAL int osal_munmap(osal_mmap_t *map) { /* Unpoisoning is required for ASAN to avoid false-positive diagnostic * when this memory will re-used by malloc or another mmapping. * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */ - MDBX_ASAN_UNPOISON_MEMORY_REGION( - map->base, (map->filesize && map->filesize < map->limit) ? map->filesize - : map->limit); + MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, + (map->filesize && map->filesize < map->limit) ? map->filesize : map->limit); #if defined(_WIN32) || defined(_WIN64) if (map->section) NtClose(map->section); @@ -2356,11 +2182,9 @@ MDBX_INTERNAL int osal_munmap(osal_mmap_t *map) { return MDBX_SUCCESS; } -MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, - size_t limit) { +MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, size_t limit) { int rc = osal_filesize(map->fd, &map->filesize); - VERBOSE("flags 0x%x, size %zu, limit %zu, filesize %" PRIu64, flags, size, - limit, map->filesize); + VERBOSE("flags 0x%x, size %zu, limit %zu, filesize %" PRIu64, flags, size, limit, map->filesize); assert(size <= limit); if (rc != MDBX_SUCCESS) { map->filesize = 0; @@ -2400,15 +2224,13 @@ MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, /* check ability of address space for growth before unmap */ PVOID BaseAddress = (PBYTE)map->base + map->limit; SIZE_T RegionSize = limit - map->limit; - status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0, - &RegionSize, MEM_RESERVE, PAGE_NOACCESS); + status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0, &RegionSize, MEM_RESERVE, PAGE_NOACCESS); if (status == (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018) return MDBX_UNABLE_EXTEND_MAPSIZE; if (!NT_SUCCESS(status)) return ntstatus2errcode(status); - status = NtFreeVirtualMemory(GetCurrentProcess(), &BaseAddress, &RegionSize, - MEM_RELEASE); + status = NtFreeVirtualMemory(GetCurrentProcess(), &BaseAddress, &RegionSize, MEM_RELEASE); if (!NT_SUCCESS(status)) return ntstatus2errcode(status); } @@ -2443,8 +2265,7 @@ MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, map->current = map->limit = 0; if (ReservedAddress) { ReservedSize = 0; - status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress, - &ReservedSize, MEM_RELEASE); + status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress, &ReservedSize, MEM_RELEASE); assert(NT_SUCCESS(status)); (void)status; } @@ -2455,8 +2276,7 @@ retry_file_and_section: /* resizing of the file may take a while, * therefore we reserve address space to avoid occupy it by other threads */ ReservedAddress = map->base; - status = NtAllocateVirtualMemory(GetCurrentProcess(), &ReservedAddress, 0, - &ReservedSize, MEM_RESERVE, PAGE_NOACCESS); + status = NtAllocateVirtualMemory(GetCurrentProcess(), &ReservedAddress, 0, &ReservedSize, MEM_RESERVE, PAGE_NOACCESS); if (!NT_SUCCESS(status)) { ReservedAddress = nullptr; if (status != (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018) @@ -2476,18 +2296,16 @@ retry_file_and_section: } SectionSize.QuadPart = size; - status = NtCreateSection( - &map->section, - /* DesiredAccess */ - (flags & MDBX_WRITEMAP) - ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | - SECTION_MAP_WRITE - : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, - /* ObjectAttributes */ nullptr, - /* MaximumSize (InitialSize) */ &SectionSize, - /* SectionPageProtection */ - (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, - /* AllocationAttributes */ SEC_RESERVE, map->fd); + status = NtCreateSection(&map->section, + /* DesiredAccess */ + (flags & MDBX_WRITEMAP) + ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE + : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, + /* ObjectAttributes */ nullptr, + /* MaximumSize (InitialSize) */ &SectionSize, + /* SectionPageProtection */ + (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, + /* AllocationAttributes */ SEC_RESERVE, map->fd); if (!NT_SUCCESS(status)) goto bailout_ntstatus; @@ -2495,8 +2313,7 @@ retry_file_and_section: if (ReservedAddress) { /* release reserved address space */ ReservedSize = 0; - status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress, - &ReservedSize, MEM_RELEASE); + status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress, &ReservedSize, MEM_RELEASE); ReservedAddress = nullptr; if (!NT_SUCCESS(status)) goto bailout_ntstatus; @@ -2504,19 +2321,18 @@ retry_file_and_section: retry_mapview:; SIZE_T ViewSize = (flags & MDBX_RDONLY) ? size : limit; - status = NtMapViewOfSection( - map->section, GetCurrentProcess(), &map->base, - /* ZeroBits */ 0, - /* CommitSize */ 0, - /* SectionOffset */ nullptr, &ViewSize, - /* InheritDisposition */ ViewUnmap, - /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, - /* Win32Protect */ - (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); + status = NtMapViewOfSection(map->section, GetCurrentProcess(), &map->base, + /* ZeroBits */ 0, + /* CommitSize */ 0, + /* SectionOffset */ nullptr, &ViewSize, + /* InheritDisposition */ ViewUnmap, + /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, + /* Win32Protect */ + (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); if (!NT_SUCCESS(status)) { - if (status == (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 && - map->base && (flags & MDBX_MRESIZE_MAY_MOVE) != 0) { + if (status == (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 && map->base && + (flags & MDBX_MRESIZE_MAY_MOVE) != 0) { /* try remap at another base address */ map->base = nullptr; goto retry_mapview; @@ -2550,8 +2366,7 @@ retry_mapview:; rc = MDBX_EPERM; map->current = (map->filesize > limit) ? limit : (size_t)map->filesize; } else { - if (size > map->filesize || - (size < map->filesize && (flags & txn_shrink_allowed))) { + if (size > map->filesize || (size < map->filesize && (flags & txn_shrink_allowed))) { rc = osal_ftruncate(map->fd, size); VERBOSE("ftruncate %zu, err %d", size, rc); if (rc != MDBX_SUCCESS) @@ -2566,9 +2381,8 @@ retry_mapview:; * this region and (therefore) do not need the help of ASAN. * - this allows us to clear the mask only within the file size * when closing the mapping. */ - MDBX_ASAN_UNPOISON_MEMORY_REGION( - ptr_disp(map->base, size), - ((map->current < map->limit) ? map->current : map->limit) - size); + MDBX_ASAN_UNPOISON_MEMORY_REGION(ptr_disp(map->base, size), + ((map->current < map->limit) ? map->current : map->limit) - size); } map->current = (size < map->limit) ? size : map->limit; } @@ -2617,15 +2431,13 @@ retry_mapview:; #endif /* Linux & _GNU_SOURCE */ const unsigned mmap_flags = - MAP_CONCEAL | MAP_SHARED | MAP_FILE | MAP_NORESERVE | - (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0); - const unsigned mmap_prot = - (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ; + MAP_CONCEAL | MAP_SHARED | MAP_FILE | MAP_NORESERVE | (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0); + const unsigned mmap_prot = (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ; if (ptr == MAP_FAILED) { /* Try to mmap additional space beyond the end of mapping. */ - ptr = mmap(ptr_disp(map->base, map->limit), limit - map->limit, mmap_prot, - mmap_flags | MAP_FIXED_NOREPLACE, map->fd, map->limit); + ptr = mmap(ptr_disp(map->base, map->limit), limit - map->limit, mmap_prot, mmap_flags | MAP_FIXED_NOREPLACE, + map->fd, map->limit); if (ptr == ptr_disp(map->base, map->limit)) /* успешно прилепили отображение в конец */ ptr = map->base; @@ -2668,33 +2480,25 @@ retry_mapview:; // coverity[pass_freed_arg : FALSE] ptr = mmap(map->base, limit, mmap_prot, - (flags & MDBX_MRESIZE_MAY_MOVE) - ? mmap_flags - : mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE - : MAP_FIXED), + (flags & MDBX_MRESIZE_MAY_MOVE) ? mmap_flags + : mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE : MAP_FIXED), map->fd, 0); - if (MAP_FIXED_NOREPLACE != 0 && MAP_FIXED_NOREPLACE != MAP_FIXED && - unlikely(ptr == MAP_FAILED) && !(flags & MDBX_MRESIZE_MAY_MOVE) && - errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL) + if (MAP_FIXED_NOREPLACE != 0 && MAP_FIXED_NOREPLACE != MAP_FIXED && unlikely(ptr == MAP_FAILED) && + !(flags & MDBX_MRESIZE_MAY_MOVE) && errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL) // coverity[pass_freed_arg : FALSE] - ptr = - mmap(map->base, limit, mmap_prot, mmap_flags | MAP_FIXED, map->fd, 0); + ptr = mmap(map->base, limit, mmap_prot, mmap_flags | MAP_FIXED, map->fd, 0); if (unlikely(ptr == MAP_FAILED)) { /* try to restore prev mapping */ // coverity[pass_freed_arg : FALSE] ptr = mmap(map->base, map->limit, mmap_prot, - (flags & MDBX_MRESIZE_MAY_MOVE) - ? mmap_flags - : mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE - : MAP_FIXED), + (flags & MDBX_MRESIZE_MAY_MOVE) ? mmap_flags + : mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE : MAP_FIXED), map->fd, 0); - if (MAP_FIXED_NOREPLACE != 0 && MAP_FIXED_NOREPLACE != MAP_FIXED && - unlikely(ptr == MAP_FAILED) && !(flags & MDBX_MRESIZE_MAY_MOVE) && - errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL) + if (MAP_FIXED_NOREPLACE != 0 && MAP_FIXED_NOREPLACE != MAP_FIXED && unlikely(ptr == MAP_FAILED) && + !(flags & MDBX_MRESIZE_MAY_MOVE) && errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL) // coverity[pass_freed_arg : FALSE] - ptr = mmap(map->base, map->limit, mmap_prot, mmap_flags | MAP_FIXED, - map->fd, 0); + ptr = mmap(map->base, map->limit, mmap_prot, mmap_flags | MAP_FIXED, map->fd, 0); if (unlikely(ptr == MAP_FAILED)) { VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current); /* Unpoisoning is required for ASAN to avoid false-positive diagnostic @@ -2702,8 +2506,7 @@ retry_mapview:; * See * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */ - MDBX_ASAN_UNPOISON_MEMORY_REGION( - map->base, (map->current < map->limit) ? map->current : map->limit); + MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, (map->current < map->limit) ? map->current : map->limit); map->limit = 0; map->current = 0; map->base = nullptr; @@ -2723,8 +2526,7 @@ retry_mapview:; * See * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */ - MDBX_ASAN_UNPOISON_MEMORY_REGION( - map->base, (map->current < map->limit) ? map->current : map->limit); + MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, (map->current < map->limit) ? map->current : map->limit); VALGRIND_MAKE_MEM_DEFINED(ptr, map->current); MDBX_ASAN_UNPOISON_MEMORY_REGION(ptr, map->current); @@ -2747,10 +2549,8 @@ retry_mapview:; /* Zap: Redundant code */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6287); - assert(rc != MDBX_SUCCESS || - (map->base != nullptr && map->base != MAP_FAILED && - map->current == size && map->limit == limit && - map->filesize >= size)); + assert(rc != MDBX_SUCCESS || (map->base != nullptr && map->base != MAP_FAILED && map->current == size && + map->limit == limit && map->filesize >= size)); return rc; } @@ -2758,8 +2558,7 @@ retry_mapview:; __cold MDBX_INTERNAL void osal_jitter(bool tiny) { for (;;) { -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ - defined(__x86_64__) +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) unsigned salt = 5296013u * (unsigned)__rdtsc(); salt ^= salt >> 11; salt *= 25810541u; @@ -2782,9 +2581,8 @@ __cold MDBX_INTERNAL void osal_jitter(bool tiny) { timer = CreateWaitableTimer(NULL, TRUE, NULL); LARGE_INTEGER ft; - ft.QuadPart = - coin * (int64_t)-10; // Convert to 100 nanosecond interval, - // negative value indicates relative time. + ft.QuadPart = coin * (int64_t)-10; // Convert to 100 nanosecond interval, + // negative value indicates relative time. SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0); WaitForSingleObject(timer, INFINITE); // CloseHandle(timer); @@ -2874,10 +2672,7 @@ MDBX_INTERNAL uint64_t osal_cputime(size_t *optional_page_faults) { #if defined(_WIN32) || defined(_WIN64) if (optional_page_faults) { PROCESS_MEMORY_COUNTERS pmc; - *optional_page_faults = - GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)) - ? pmc.PageFaultCount - : 0; + *optional_page_faults = GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)) ? pmc.PageFaultCount : 0; } FILETIME unused, usermode; if (GetThreadTimes(GetCurrentThread(), @@ -2905,8 +2700,7 @@ MDBX_INTERNAL uint64_t osal_cputime(size_t *optional_page_faults) { if (getrusage(RUSAGE_THREAD, &usage) == 0) { if (optional_page_faults) *optional_page_faults = usage.ru_majflt; - return usage.ru_utime.tv_sec * UINT64_C(1000000000) + - usage.ru_utime.tv_usec * 1000u; + return usage.ru_utime.tv_sec * UINT64_C(1000000000) + usage.ru_utime.tv_usec * 1000u; } if (optional_page_faults) *optional_page_faults = 0; @@ -3016,41 +2810,33 @@ __cold static uint64_t windows_bootime(void) { return 0; } -__cold static LSTATUS mdbx_RegGetValue(HKEY hKey, LPCSTR lpSubKey, - LPCSTR lpValue, PVOID pvData, - LPDWORD pcbData) { +__cold static LSTATUS mdbx_RegGetValue(HKEY hKey, LPCSTR lpSubKey, LPCSTR lpValue, PVOID pvData, LPDWORD pcbData) { LSTATUS rc; if (!imports.RegGetValueA) { /* an old Windows 2000/XP */ HKEY hSubKey; rc = RegOpenKeyA(hKey, lpSubKey, &hSubKey); if (rc == ERROR_SUCCESS) { - rc = - RegQueryValueExA(hSubKey, lpValue, nullptr, nullptr, pvData, pcbData); + rc = RegQueryValueExA(hSubKey, lpValue, nullptr, nullptr, pvData, pcbData); RegCloseKey(hSubKey); } return rc; } - rc = imports.RegGetValueA(hKey, lpSubKey, lpValue, RRF_RT_ANY, nullptr, + rc = imports.RegGetValueA(hKey, lpSubKey, lpValue, RRF_RT_ANY, nullptr, pvData, pcbData); + if (rc != ERROR_FILE_NOT_FOUND) + return rc; + + rc = imports.RegGetValueA(hKey, lpSubKey, lpValue, RRF_RT_ANY | 0x00010000 /* RRF_SUBKEY_WOW6464KEY */, nullptr, pvData, pcbData); if (rc != ERROR_FILE_NOT_FOUND) return rc; - - rc = imports.RegGetValueA(hKey, lpSubKey, lpValue, - RRF_RT_ANY | 0x00010000 /* RRF_SUBKEY_WOW6464KEY */, - nullptr, pvData, pcbData); - if (rc != ERROR_FILE_NOT_FOUND) - return rc; - return imports.RegGetValueA(hKey, lpSubKey, lpValue, - RRF_RT_ANY | - 0x00020000 /* RRF_SUBKEY_WOW6432KEY */, - nullptr, pvData, pcbData); + return imports.RegGetValueA(hKey, lpSubKey, lpValue, RRF_RT_ANY | 0x00020000 /* RRF_SUBKEY_WOW6432KEY */, nullptr, + pvData, pcbData); } #endif -__cold MDBX_MAYBE_UNUSED static bool -bootid_parse_uuid(bin128_t *s, const void *p, const size_t n) { +__cold MDBX_MAYBE_UNUSED static bool bootid_parse_uuid(bin128_t *s, const void *p, const size_t n) { if (n > 31) { unsigned bits = 0; for (unsigned i = 0; i < n; ++i) /* try parse an UUID in text form */ { @@ -3096,10 +2882,7 @@ __cold static bool proc_read_uuid(const char *path, bin128_t *target) { if (fd != -1) { struct statfs fs; char buf[42]; - const ssize_t len = - (fstatfs(fd, &fs) == 0 && fs.f_type == /* procfs */ 0x9FA0) - ? read(fd, buf, sizeof(buf)) - : -1; + const ssize_t len = (fstatfs(fd, &fs) == 0 && fs.f_type == /* procfs */ 0x9FA0) ? read(fd, buf, sizeof(buf)) : -1; const int err = close(fd); assert(err == 0); (void)err; @@ -3123,12 +2906,10 @@ __cold static bin128_t osal_bootid(void) { { char buf[42]; size_t len = sizeof(buf); - if (!sysctlbyname("kern.bootsessionuuid", buf, &len, nullptr, 0) && - bootid_parse_uuid(&uuid, buf, len)) + if (!sysctlbyname("kern.bootsessionuuid", buf, &len, nullptr, 0) && bootid_parse_uuid(&uuid, buf, len)) return uuid; -#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ - __MAC_OS_X_VERSION_MIN_REQUIRED > 1050 +#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED > 1050 uuid_t hostuuid; struct timespec wait = {0, 1000000000u / 42}; if (!gethostuuid(hostuuid, &wait)) @@ -3137,8 +2918,7 @@ __cold static bin128_t osal_bootid(void) { struct timeval boottime; len = sizeof(boottime); - if (!sysctlbyname("kern.boottime", &boottime, &len, nullptr, 0) && - len == sizeof(boottime) && boottime.tv_sec) + if (!sysctlbyname("kern.boottime", &boottime, &len, nullptr, 0) && len == sizeof(boottime) && boottime.tv_sec) got_boottime = true; } #endif /* Apple/Darwin */ @@ -3162,83 +2942,70 @@ __cold static bin128_t osal_bootid(void) { char DigitalProductId[248]; } buf; - static const char HKLM_MicrosoftCryptography[] = - "SOFTWARE\\Microsoft\\Cryptography"; + static const char HKLM_MicrosoftCryptography[] = "SOFTWARE\\Microsoft\\Cryptography"; DWORD len = sizeof(buf); /* Windows is madness and must die */ - if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_MicrosoftCryptography, - "MachineGuid", &buf.MachineGuid, - &len) == ERROR_SUCCESS && + if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_MicrosoftCryptography, "MachineGuid", &buf.MachineGuid, &len) == + ERROR_SUCCESS && len < sizeof(buf)) got_machineid = bootid_parse_uuid(&uuid, &buf.MachineGuid, len); if (!got_machineid) { /* again, Windows is madness */ - static const char HKLM_WindowsNT[] = - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion"; - static const char HKLM_WindowsNT_DPK[] = - "SOFTWARE\\Microsoft\\Windows " - "NT\\CurrentVersion\\DefaultProductKey"; - static const char HKLM_WindowsNT_DPK2[] = - "SOFTWARE\\Microsoft\\Windows " - "NT\\CurrentVersion\\DefaultProductKey2"; + static const char HKLM_WindowsNT[] = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion"; + static const char HKLM_WindowsNT_DPK[] = "SOFTWARE\\Microsoft\\Windows " + "NT\\CurrentVersion\\DefaultProductKey"; + static const char HKLM_WindowsNT_DPK2[] = "SOFTWARE\\Microsoft\\Windows " + "NT\\CurrentVersion\\DefaultProductKey2"; len = sizeof(buf); - if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_WindowsNT, - "DigitalProductId", &buf.DigitalProductId, - &len) == ERROR_SUCCESS && + if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_WindowsNT, "DigitalProductId", &buf.DigitalProductId, &len) == + ERROR_SUCCESS && len > 42 && len < sizeof(buf)) { bootid_collect(&uuid, &buf.DigitalProductId, len); got_machineid = true; } len = sizeof(buf); - if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_WindowsNT_DPK, - "DigitalProductId", &buf.DigitalProductId, - &len) == ERROR_SUCCESS && + if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_WindowsNT_DPK, "DigitalProductId", &buf.DigitalProductId, &len) == + ERROR_SUCCESS && len > 42 && len < sizeof(buf)) { bootid_collect(&uuid, &buf.DigitalProductId, len); got_machineid = true; } len = sizeof(buf); - if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_WindowsNT_DPK2, - "DigitalProductId", &buf.DigitalProductId, - &len) == ERROR_SUCCESS && + if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_WindowsNT_DPK2, "DigitalProductId", &buf.DigitalProductId, &len) == + ERROR_SUCCESS && len > 42 && len < sizeof(buf)) { bootid_collect(&uuid, &buf.DigitalProductId, len); got_machineid = true; } } - static const char HKLM_PrefetcherParams[] = - "SYSTEM\\CurrentControlSet\\Control\\Session Manager\\Memory " - "Management\\PrefetchParameters"; + static const char HKLM_PrefetcherParams[] = "SYSTEM\\CurrentControlSet\\Control\\Session Manager\\Memory " + "Management\\PrefetchParameters"; len = sizeof(buf); - if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_PrefetcherParams, "BootId", - &buf.BootId, &len) == ERROR_SUCCESS && + if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_PrefetcherParams, "BootId", &buf.BootId, &len) == ERROR_SUCCESS && len > 1 && len < sizeof(buf)) { bootid_collect(&uuid, &buf.BootId, len); got_bootseq = true; } len = sizeof(buf); - if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_PrefetcherParams, "BaseTime", - &buf.BaseTime, &len) == ERROR_SUCCESS && + if (mdbx_RegGetValue(HKEY_LOCAL_MACHINE, HKLM_PrefetcherParams, "BaseTime", &buf.BaseTime, &len) == ERROR_SUCCESS && len >= sizeof(buf.BaseTime) && buf.BaseTime) { bootid_collect(&uuid, &buf.BaseTime, len); got_boottime = true; } /* BootTime from SYSTEM_TIMEOFDAY_INFORMATION */ - NTSTATUS status = NtQuerySystemInformation( - 0x03 /* SystemTmeOfDayInformation */, &buf.SysTimeOfDayInfo, - sizeof(buf.SysTimeOfDayInfo), &len); + NTSTATUS status = NtQuerySystemInformation(0x03 /* SystemTmeOfDayInformation */, &buf.SysTimeOfDayInfo, + sizeof(buf.SysTimeOfDayInfo), &len); if (NT_SUCCESS(status) && len >= offsetof(union buf, SysTimeOfDayInfoHacked.BootTimeBias) + sizeof(buf.SysTimeOfDayInfoHacked.BootTimeBias) && buf.SysTimeOfDayInfoHacked.BootTime.QuadPart) { const uint64_t UnbiasedBootTime = - buf.SysTimeOfDayInfoHacked.BootTime.QuadPart - - buf.SysTimeOfDayInfoHacked.BootTimeBias; + buf.SysTimeOfDayInfoHacked.BootTime.QuadPart - buf.SysTimeOfDayInfoHacked.BootTimeBias; if (UnbiasedBootTime) { bootid_collect(&uuid, &UnbiasedBootTime, sizeof(UnbiasedBootTime)); got_boottime = true; @@ -3384,13 +3151,11 @@ __cold static bin128_t osal_bootid(void) { if (0x1CCCCCC > now.dwHighDateTime) #else struct timespec mono, real; - if (clock_gettime(CLOCK_MONOTONIC, &mono) || - clock_gettime(CLOCK_REALTIME, &real) || + if (clock_gettime(CLOCK_MONOTONIC, &mono) || clock_gettime(CLOCK_REALTIME, &real) || /* wrong time, RTC is mad or absent */ 1555555555l > real.tv_sec || /* seems no adjustment by RTC/NTP, i.e. a fake time */ - real.tv_sec < mono.tv_sec || 1234567890l > real.tv_sec - mono.tv_sec || - (real.tv_sec - mono.tv_sec) % 900u == 0) + real.tv_sec < mono.tv_sec || 1234567890l > real.tv_sec - mono.tv_sec || (real.tv_sec - mono.tv_sec) % 900u == 0) #endif goto lack; } @@ -3398,8 +3163,7 @@ __cold static bin128_t osal_bootid(void) { return uuid; } -__cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, - intptr_t *avail_pages) { +__cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, intptr_t *avail_pages) { if (!page_size && !total_pages && !avail_pages) return LOG_IFERR(MDBX_EINVAL); if (total_pages) @@ -3437,8 +3201,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, if (total_ram_Kb == -1) return LOG_IFERR(errno); const intptr_t total_ram_pages = (total_ram_Kb << 10) >> log2page; -#elif defined(HW_USERMEM) || defined(HW_PHYSMEM64) || defined(HW_MEMSIZE) || \ - defined(HW_PHYSMEM) +#elif defined(HW_USERMEM) || defined(HW_PHYSMEM64) || defined(HW_MEMSIZE) || defined(HW_PHYSMEM) size_t ram, len = sizeof(ram); static const int mib[] = {CTL_HW, #if defined(HW_USERMEM) @@ -3480,8 +3243,7 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, mach_msg_type_number_t count = HOST_VM_INFO_COUNT; vm_statistics_data_t vmstat; mach_port_t mport = mach_host_self(); - kern_return_t kerr = host_statistics(mach_host_self(), HOST_VM_INFO, - (host_info_t)&vmstat, &count); + kern_return_t kerr = host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&vmstat, &count); mach_port_deallocate(mach_task_self(), mport); if (unlikely(kerr != KERN_SUCCESS)) return LOG_IFERR(MDBX_ENOSYS); @@ -3525,9 +3287,8 @@ __cold int mdbx_get_sysraminfo(intptr_t *page_size, intptr_t *total_pages, #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED #include -#elif __GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || \ - defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ - defined(__APPLE__) || __has_include() +#elif __GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__BSD__) || defined(__bsdi__) || \ + defined(__DragonFly__) || defined(__APPLE__) || __has_include() #include #endif /* sys/random.h */ @@ -3558,23 +3319,19 @@ MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *env) { #endif /* FreeBSD */ #if defined(_WIN32) || defined(_WIN64) - if (imports.CoCreateGuid && imports.CoCreateGuid(&uuid) == 0 && - check_uuid(uuid)) + if (imports.CoCreateGuid && imports.CoCreateGuid(&uuid) == 0 && check_uuid(uuid)) return uuid; HCRYPTPROV hCryptProv = 0; - if (CryptAcquireContextW(&hCryptProv, nullptr, nullptr, PROV_RSA_FULL, - CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) { - const BOOL ok = - CryptGenRandom(hCryptProv, sizeof(uuid), (unsigned char *)&uuid); + if (CryptAcquireContextW(&hCryptProv, nullptr, nullptr, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) { + const BOOL ok = CryptGenRandom(hCryptProv, sizeof(uuid), (unsigned char *)&uuid); CryptReleaseContext(hCryptProv, 0); if (ok && check_uuid(uuid)) return uuid; } #elif defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && defined(__IPHONE_8_0) #if __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0 - if (CCRandomGenerateBytes(&uuid, sizeof(uuid)) == kCCSuccess && - check_uuid(uuid)) + if (CCRandomGenerateBytes(&uuid, sizeof(uuid)) == kCCSuccess && check_uuid(uuid)) return uuid; #endif /* iOS >= 8.x */ #else @@ -3587,14 +3344,13 @@ MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *env) { if (len == sizeof(uuid) && check_uuid(uuid)) return uuid; } -#if (__GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || \ - defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__)) && \ +#if (__GLIBC_PREREQ(2, 25) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__BSD__) || defined(__bsdi__) || \ + defined(__DragonFly__)) && \ !defined(__APPLE__) && !defined(__ANDROID_API__) if (getrandom(&uuid, sizeof(uuid), 0) == sizeof(uuid) && check_uuid(uuid)) return uuid; -#elif defined(__OpenBSD__) || (defined(__sun) && defined(__SVR4)) || \ - (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ - __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200) +#elif defined(__OpenBSD__) || (defined(__sun) && defined(__SVR4)) || \ + (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200) if (getentropy(&uuid, sizeof(uuid)) == 0 && check_uuid(uuid)) return uuid; #endif /* getrandom() / getentropy() */ @@ -3633,13 +3389,11 @@ void osal_ctor(void) { #else globals.sys_pagesize = sysconf(_SC_PAGE_SIZE); globals.sys_allocation_granularity = (MDBX_WORDBITS > 32) ? 65536 : 4096; - globals.sys_allocation_granularity = - (globals.sys_allocation_granularity > globals.sys_pagesize) - ? globals.sys_allocation_granularity - : globals.sys_pagesize; + globals.sys_allocation_granularity = (globals.sys_allocation_granularity > globals.sys_pagesize) + ? globals.sys_allocation_granularity + : globals.sys_pagesize; #endif - assert(globals.sys_pagesize > 0 && - (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0); + assert(globals.sys_pagesize > 0 && (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0); assert(globals.sys_allocation_granularity >= globals.sys_pagesize && globals.sys_allocation_granularity % globals.sys_pagesize == 0); globals.sys_pagesize_ln2 = log2n_powerof2(globals.sys_pagesize); @@ -3659,12 +3413,10 @@ void osal_ctor(void) { uint32_t proba = UINT32_MAX; while (true) { - unsigned time_conversion_checkup = - osal_monotime_to_16dot16(osal_16dot16_to_monotime(proba)); + unsigned time_conversion_checkup = osal_monotime_to_16dot16(osal_16dot16_to_monotime(proba)); unsigned one_more = (proba < UINT32_MAX) ? proba + 1 : proba; unsigned one_less = (proba > 0) ? proba - 1 : proba; - ENSURE(nullptr, time_conversion_checkup >= one_less && - time_conversion_checkup <= one_more); + ENSURE(nullptr, time_conversion_checkup >= one_less && time_conversion_checkup <= one_more); if (proba == 0) break; proba >>= 1; diff --git a/src/osal.h b/src/osal.h index 813baa9a..5a049a24 100644 --- a/src/osal.h +++ b/src/osal.h @@ -12,9 +12,8 @@ #if __has_include() #include -#elif defined(__mips) || defined(__mips__) || defined(__mips64) || \ - defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ - defined(__MWERKS__) || defined(__sgi) +#elif defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) || \ + defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi) /* MIPS should have explicit cache control */ #include #endif @@ -28,11 +27,9 @@ MDBX_MAYBE_UNUSED static inline void osal_compiler_barrier(void) { __memory_barrier(); #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) __compiler_barrier(); -#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ - (defined(HP_IA64) || defined(__ia64)) +#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64)) _Asm_sched_fence(/* LY: no-arg meaning 'all expect ALU', e.g. 0x3D3D */); -#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || \ - defined(__ppc64__) || defined(__powerpc64__) +#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) __fence(); #else #error "Could not guess the kind of compiler, please report to us." @@ -60,11 +57,9 @@ MDBX_MAYBE_UNUSED static inline void osal_memory_barrier(void) { #endif #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) __machine_rw_barrier(); -#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ - (defined(HP_IA64) || defined(__ia64)) +#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64)) _Asm_mf(); -#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || \ - defined(__ppc64__) || defined(__powerpc64__) +#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) __lwsync(); #else #error "Could not guess the kind of compiler, please report to us." @@ -101,9 +96,7 @@ typedef CRITICAL_SECTION osal_fastmutex_t; #if MDBX_WITHOUT_MSVC_CRT #ifndef osal_malloc -static inline void *osal_malloc(size_t bytes) { - return HeapAlloc(GetProcessHeap(), 0, bytes); -} +static inline void *osal_malloc(size_t bytes) { return HeapAlloc(GetProcessHeap(), 0, bytes); } #endif /* osal_malloc */ #ifndef osal_calloc @@ -114,8 +107,7 @@ static inline void *osal_calloc(size_t nelem, size_t size) { #ifndef osal_realloc static inline void *osal_realloc(void *ptr, size_t bytes) { - return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes) - : HeapAlloc(GetProcessHeap(), 0, bytes); + return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes) : HeapAlloc(GetProcessHeap(), 0, bytes); } #endif /* osal_realloc */ @@ -208,7 +200,7 @@ typedef struct osal_mmap { #elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE) -#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \ +#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \ MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 /* FIXME: add checks for IOS versions, etc */ #define MDBX_HAVE_PWRITEV 1 @@ -279,39 +271,29 @@ typedef struct osal_ioring { MDBX_INTERNAL int osal_ioring_create(osal_ioring_t * #if defined(_WIN32) || defined(_WIN64) , - bool enable_direct, - mdbx_filehandle_t overlapped_fd + bool enable_direct, mdbx_filehandle_t overlapped_fd #endif /* Windows */ ); MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *, size_t items); MDBX_INTERNAL void osal_ioring_destroy(osal_ioring_t *); MDBX_INTERNAL void osal_ioring_reset(osal_ioring_t *); -MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ctx, const size_t offset, - void *data, const size_t bytes); +MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ctx, const size_t offset, void *data, const size_t bytes); typedef struct osal_ioring_write_result { int err; unsigned wops; } osal_ioring_write_result_t; -MDBX_INTERNAL osal_ioring_write_result_t -osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd); +MDBX_INTERNAL osal_ioring_write_result_t osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd); MDBX_INTERNAL void osal_ioring_walk(osal_ioring_t *ior, iov_ctx_t *ctx, - void (*callback)(iov_ctx_t *ctx, - size_t offset, void *data, - size_t bytes)); + void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes)); -MDBX_MAYBE_UNUSED static inline unsigned -osal_ioring_left(const osal_ioring_t *ior) { - return ior->slots_left; -} +MDBX_MAYBE_UNUSED static inline unsigned osal_ioring_left(const osal_ioring_t *ior) { return ior->slots_left; } -MDBX_MAYBE_UNUSED static inline unsigned -osal_ioring_used(const osal_ioring_t *ior) { +MDBX_MAYBE_UNUSED static inline unsigned osal_ioring_used(const osal_ioring_t *ior) { return ior->allocated - ior->slots_left; } -MDBX_MAYBE_UNUSED static inline int -osal_ioring_prepare(osal_ioring_t *ior, size_t items, size_t bytes) { +MDBX_MAYBE_UNUSED static inline int osal_ioring_prepare(osal_ioring_t *ior, size_t items, size_t bytes) { items = (items > 32) ? items : 32; #if defined(_WIN32) || defined(_WIN64) if (ior->direct) { @@ -330,13 +312,11 @@ osal_ioring_prepare(osal_ioring_t *ior, size_t items, size_t bytes) { /*----------------------------------------------------------------------------*/ /* libc compatibility stuff */ -#if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) && \ - (defined(_GNU_SOURCE) || defined(_BSD_SOURCE)) +#if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) && (defined(_GNU_SOURCE) || defined(_BSD_SOURCE)) #define osal_asprintf asprintf #define osal_vasprintf vasprintf #else -MDBX_MAYBE_UNUSED MDBX_INTERNAL - MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...); +MDBX_MAYBE_UNUSED MDBX_INTERNAL MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...); MDBX_INTERNAL int osal_vasprintf(char **strp, const char *fmt, va_list ap); #endif @@ -358,14 +338,12 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL void osal_jitter(bool tiny); #else #define MAX_WRITE UINT32_C(0x3f000000) -#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \ - !defined(__ANDROID_API__) +#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && !defined(__ANDROID_API__) #define MDBX_F_SETLK F_SETLK64 #define MDBX_F_SETLKW F_SETLKW64 #define MDBX_F_GETLK F_GETLK64 -#if (__GLIBC_PREREQ(2, 28) && \ - (defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \ - defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \ +#if (__GLIBC_PREREQ(2, 28) && (defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \ + defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \ defined(fcntl64) #define MDBX_FCNTL fcntl64 #else @@ -383,8 +361,7 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL void osal_jitter(bool tiny); #define MDBX_STRUCT_FLOCK struct flock #endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */ -#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \ - defined(F_OFD_GETLK64) && !defined(__ANDROID_API__) +#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && defined(F_OFD_GETLK64) && !defined(__ANDROID_API__) #define MDBX_F_OFD_SETLK F_OFD_SETLK64 #define MDBX_F_OFD_SETLKW F_OFD_SETLKW64 #define MDBX_F_OFD_GETLK F_OFD_GETLK64 @@ -393,8 +370,7 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL void osal_jitter(bool tiny); #define MDBX_F_OFD_SETLKW F_OFD_SETLKW #define MDBX_F_OFD_GETLK F_OFD_GETLK #ifndef OFF_T_MAX -#define OFF_T_MAX \ - (((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff) +#define OFF_T_MAX (((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff) #endif /* OFF_T_MAX */ #endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */ @@ -414,8 +390,7 @@ MDBX_MAYBE_UNUSED static inline int osal_get_errno(void) { } #ifndef osal_memalign_alloc -MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, - void **result); +MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, void **result); #endif #ifndef osal_memalign_free MDBX_INTERNAL void osal_memalign_free(void *ptr); @@ -433,19 +408,13 @@ MDBX_INTERNAL int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex); MDBX_INTERNAL int osal_fastmutex_release(osal_fastmutex_t *fastmutex); MDBX_INTERNAL int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex); -MDBX_INTERNAL int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, - size_t sgvcnt, uint64_t offset); -MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count, - uint64_t offset); -MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, - size_t count, uint64_t offset); -MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, - size_t count); +MDBX_INTERNAL int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, uint64_t offset); +MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count, uint64_t offset); +MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, size_t count, uint64_t offset); +MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, size_t count); -MDBX_INTERNAL int -osal_thread_create(osal_thread_t *thread, - THREAD_RESULT(THREAD_CALL *start_routine)(void *), - void *arg); +MDBX_INTERNAL int osal_thread_create(osal_thread_t *thread, THREAD_RESULT(THREAD_CALL *start_routine)(void *), + void *arg); MDBX_INTERNAL int osal_thread_join(osal_thread_t thread); enum osal_syncmode_bits { @@ -456,8 +425,7 @@ enum osal_syncmode_bits { MDBX_SYNC_IODQ = 8 }; -MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, - const enum osal_syncmode_bits mode_bits); +MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, const enum osal_syncmode_bits mode_bits); MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length); MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos); MDBX_INTERNAL int osal_filesize(mdbx_filehandle_t fd, uint64_t *length); @@ -483,14 +451,11 @@ MDBX_MAYBE_UNUSED static inline bool osal_isdirsep(pathchar_t c) { c == '/'; } -MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, - size_t len); +MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, size_t len); MDBX_INTERNAL pathchar_t *osal_fileext(const pathchar_t *pathname, size_t len); MDBX_INTERNAL int osal_fileexists(const pathchar_t *pathname); -MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, - const MDBX_env *env, const pathchar_t *pathname, - mdbx_filehandle_t *fd, - mdbx_mode_t unix_mode_bits); +MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, const MDBX_env *env, + const pathchar_t *pathname, mdbx_filehandle_t *fd, mdbx_mode_t unix_mode_bits); MDBX_INTERNAL int osal_closefile(mdbx_filehandle_t fd); MDBX_INTERNAL int osal_removefile(const pathchar_t *pathname); MDBX_INTERNAL int osal_removedirectory(const pathchar_t *pathname); @@ -499,26 +464,21 @@ MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait); #define MMAP_OPTION_TRUNCATE 1 #define MMAP_OPTION_SEMAPHORE 2 -MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, - const size_t limit, const unsigned options); +MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit, const unsigned options); MDBX_INTERNAL int osal_munmap(osal_mmap_t *map); #define MDBX_MRESIZE_MAY_MOVE 0x00000100 #define MDBX_MRESIZE_MAY_UNMAP 0x00000200 -MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, - size_t limit); +MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, size_t limit); #if defined(_WIN32) || defined(_WIN64) typedef struct { unsigned limit, count; HANDLE handles[31]; } mdbx_handle_array_t; -MDBX_INTERNAL int -osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array); +MDBX_INTERNAL int osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array); MDBX_INTERNAL int osal_resume_threads_after_remap(mdbx_handle_array_t *array); #endif /* Windows */ -MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, - size_t length, enum osal_syncmode_bits mode_bits); -MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, - const pathchar_t *pathname, int err); +MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, size_t length, enum osal_syncmode_bits mode_bits); +MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, const pathchar_t *pathname, int err); MDBX_INTERNAL int osal_check_fs_incore(mdbx_filehandle_t handle); MDBX_MAYBE_UNUSED static inline uint32_t osal_getpid(void) { @@ -549,8 +509,7 @@ MDBX_INTERNAL int osal_check_tid4bionic(void); static inline int osal_check_tid4bionic(void) { return 0; } #endif /* __ANDROID_API__ || ANDROID) || BIONIC */ -MDBX_MAYBE_UNUSED static inline int -osal_pthread_mutex_lock(pthread_mutex_t *mutex) { +MDBX_MAYBE_UNUSED static inline int osal_pthread_mutex_lock(pthread_mutex_t *mutex) { int err = osal_check_tid4bionic(); return unlikely(err) ? err : pthread_mutex_lock(mutex); } @@ -561,8 +520,7 @@ MDBX_INTERNAL uint64_t osal_cputime(size_t *optional_page_faults); MDBX_INTERNAL uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16); MDBX_INTERNAL uint32_t osal_monotime_to_16dot16(uint64_t monotime); -MDBX_MAYBE_UNUSED static inline uint32_t -osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) { +MDBX_MAYBE_UNUSED static inline uint32_t osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) { uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime); return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0); } @@ -589,10 +547,8 @@ MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *); /*----------------------------------------------------------------------------*/ -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t -osal_bswap64(uint64_t v) { -#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \ - __has_builtin(__builtin_bswap64) +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t osal_bswap64(uint64_t v) { +#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || __has_builtin(__builtin_bswap64) return __builtin_bswap64(v); #elif defined(_MSC_VER) && !defined(__clang__) return _byteswap_uint64(v); @@ -601,19 +557,14 @@ osal_bswap64(uint64_t v) { #elif defined(bswap_64) return bswap_64(v); #else - return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | - ((v << 24) & UINT64_C(0x0000ff0000000000)) | - ((v << 8) & UINT64_C(0x000000ff00000000)) | - ((v >> 8) & UINT64_C(0x00000000ff000000)) | - ((v >> 24) & UINT64_C(0x0000000000ff0000)) | - ((v >> 40) & UINT64_C(0x000000000000ff00)); + return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | ((v << 24) & UINT64_C(0x0000ff0000000000)) | + ((v << 8) & UINT64_C(0x000000ff00000000)) | ((v >> 8) & UINT64_C(0x00000000ff000000)) | + ((v >> 24) & UINT64_C(0x0000000000ff0000)) | ((v >> 40) & UINT64_C(0x000000000000ff00)); #endif } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t -osal_bswap32(uint32_t v) { -#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \ - __has_builtin(__builtin_bswap32) +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t osal_bswap32(uint32_t v) { +#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || __has_builtin(__builtin_bswap32) return __builtin_bswap32(v); #elif defined(_MSC_VER) && !defined(__clang__) return _byteswap_ulong(v); @@ -622,7 +573,6 @@ osal_bswap32(uint32_t v) { #elif defined(bswap_32) return bswap_32(v); #else - return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | - ((v >> 8) & UINT32_C(0x0000ff00)); + return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | ((v >> 8) & UINT32_C(0x0000ff00)); #endif } diff --git a/src/page-get.c b/src/page-get.c index 57f2e177..a07768a7 100644 --- a/src/page-get.c +++ b/src/page-get.c @@ -3,17 +3,14 @@ #include "internals.h" -__cold int MDBX_PRINTF_ARGS(2, 3) - bad_page(const page_t *mp, const char *fmt, ...) { +__cold int MDBX_PRINTF_ARGS(2, 3) bad_page(const page_t *mp, const char *fmt, ...) { if (LOG_ENABLED(MDBX_LOG_ERROR)) { static const page_t *prev; if (prev != mp) { char buf4unknown[16]; prev = mp; - debug_log(MDBX_LOG_ERROR, "badpage", 0, - "corrupted %s-page #%u, mod-txnid %" PRIaTXN "\n", - pagetype_caption(page_type(mp), buf4unknown), mp->pgno, - mp->txnid); + debug_log(MDBX_LOG_ERROR, "badpage", 0, "corrupted %s-page #%u, mod-txnid %" PRIaTXN "\n", + pagetype_caption(page_type(mp), buf4unknown), mp->pgno, mp->txnid); } va_list args; @@ -24,17 +21,14 @@ __cold int MDBX_PRINTF_ARGS(2, 3) return MDBX_CORRUPTED; } -__cold void MDBX_PRINTF_ARGS(2, 3) - poor_page(const page_t *mp, const char *fmt, ...) { +__cold void MDBX_PRINTF_ARGS(2, 3) poor_page(const page_t *mp, const char *fmt, ...) { if (LOG_ENABLED(MDBX_LOG_NOTICE)) { static const page_t *prev; if (prev != mp) { char buf4unknown[16]; prev = mp; - debug_log(MDBX_LOG_NOTICE, "poorpage", 0, - "suboptimal %s-page #%u, mod-txnid %" PRIaTXN "\n", - pagetype_caption(page_type(mp), buf4unknown), mp->pgno, - mp->txnid); + debug_log(MDBX_LOG_NOTICE, "poorpage", 0, "suboptimal %s-page #%u, mod-txnid %" PRIaTXN "\n", + pagetype_caption(page_type(mp), buf4unknown), mp->pgno, mp->txnid); } va_list args; @@ -63,21 +57,17 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { const ptrdiff_t offset = ptr_dist(mp, env->dxb_mmap.base); unsigned flags_mask = P_ILL_BITS; unsigned flags_expected = 0; - if (offset < 0 || - offset > (ptrdiff_t)(pgno2bytes(env, mc->txn->geo.first_unallocated) - - ((mp->flags & P_SUBP) ? PAGEHDRSZ + 1 : env->ps))) { + if (offset < 0 || offset > (ptrdiff_t)(pgno2bytes(env, mc->txn->geo.first_unallocated) - + ((mp->flags & P_SUBP) ? PAGEHDRSZ + 1 : env->ps))) { /* should be dirty page without MDBX_WRITEMAP, or a subpage of. */ flags_mask -= P_SUBP; - if ((env->flags & MDBX_WRITEMAP) != 0 || - (!is_shadowed(mc->txn, mp) && !(mp->flags & P_SUBP))) - rc = bad_page(mp, "invalid page-address %p, offset %zi\n", - __Wpedantic_format_voidptr(mp), offset); + if ((env->flags & MDBX_WRITEMAP) != 0 || (!is_shadowed(mc->txn, mp) && !(mp->flags & P_SUBP))) + rc = bad_page(mp, "invalid page-address %p, offset %zi\n", __Wpedantic_format_voidptr(mp), offset); } else if (offset & (env->ps - 1)) flags_expected = P_SUBP; if (unlikely((mp->flags & flags_mask) != flags_expected)) - rc = bad_page(mp, "unknown/extra page-flags (have 0x%x, expect 0x%x)\n", - mp->flags & flags_mask, flags_expected); + rc = bad_page(mp, "unknown/extra page-flags (have 0x%x, expect 0x%x)\n", mp->flags & flags_mask, flags_expected); cASSERT(mc, (mc->checking & z_dupfix) == 0 || (mc->flags & z_inner) != 0); const uint8_t type = page_type(mp); @@ -86,82 +76,62 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { return bad_page(mp, "invalid type (%u)\n", type); case P_LARGE: if (unlikely(mc->flags & z_inner)) - rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", "large", - "nested dupsort tree", mc->tree->flags); + rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", "large", "nested dupsort tree", mc->tree->flags); const pgno_t npages = mp->pages; if (unlikely(npages < 1 || npages >= MAX_PAGENO / 2)) rc = bad_page(mp, "invalid n-pages (%u) for large-page\n", npages); if (unlikely(mp->pgno + npages > mc->txn->geo.first_unallocated)) - rc = bad_page( - mp, "end of large-page beyond (%u) allocated space (%u next-pgno)\n", - mp->pgno + npages, mc->txn->geo.first_unallocated); + rc = bad_page(mp, "end of large-page beyond (%u) allocated space (%u next-pgno)\n", mp->pgno + npages, + mc->txn->geo.first_unallocated); return rc; //-------------------------- end of large/overflow page handling case P_LEAF | P_SUBP: if (unlikely(mc->tree->height != 1)) - rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", - "leaf-sub", "nested dupsort db", mc->tree->flags); + rc = + bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", "leaf-sub", "nested dupsort db", mc->tree->flags); /* fall through */ __fallthrough; case P_LEAF: if (unlikely((mc->checking & z_dupfix) != 0)) - rc = bad_page(mp, - "unexpected leaf-page for dupfix subtree (db-lags 0x%x)\n", - mc->tree->flags); + rc = bad_page(mp, "unexpected leaf-page for dupfix subtree (db-lags 0x%x)\n", mc->tree->flags); break; case P_LEAF | P_DUPFIX | P_SUBP: if (unlikely(mc->tree->height != 1)) - rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", - "leaf2-sub", "nested dupsort db", mc->tree->flags); + rc = bad_page(mp, "unexpected %s-page for %s (db-flags 0x%x)\n", "leaf2-sub", "nested dupsort db", + mc->tree->flags); /* fall through */ __fallthrough; case P_LEAF | P_DUPFIX: if (unlikely((mc->checking & z_dupfix) == 0)) - rc = bad_page( - mp, - "unexpected leaf2-page for non-dupfix (sub)tree (db-flags 0x%x)\n", - mc->tree->flags); + rc = bad_page(mp, "unexpected leaf2-page for non-dupfix (sub)tree (db-flags 0x%x)\n", mc->tree->flags); break; case P_BRANCH: break; } - if (unlikely(mp->upper < mp->lower || (mp->lower & 1) || - PAGEHDRSZ + mp->upper > env->ps)) - rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", - mp->lower, mp->upper, page_space(env)); + if (unlikely(mp->upper < mp->lower || (mp->lower & 1) || PAGEHDRSZ + mp->upper > env->ps)) + rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", mp->lower, mp->upper, page_space(env)); const char *const end_of_page = ptr_disp(mp, env->ps); const size_t nkeys = page_numkeys(mp); STATIC_ASSERT(P_BRANCH == 1); if (unlikely(nkeys <= (uint8_t)(mp->flags & P_BRANCH))) { if ((!(mc->flags & z_inner) || mc->tree->items) && - (!(mc->checking & z_updating) || - !(is_modifable(mc->txn, mp) || (mp->flags & P_SUBP)))) - rc = - bad_page(mp, "%s-page nkeys (%zu) < %u\n", - is_branch(mp) ? "branch" : "leaf", nkeys, 1 + is_branch(mp)); + (!(mc->checking & z_updating) || !(is_modifable(mc->txn, mp) || (mp->flags & P_SUBP)))) + rc = bad_page(mp, "%s-page nkeys (%zu) < %u\n", is_branch(mp) ? "branch" : "leaf", nkeys, 1 + is_branch(mp)); } const size_t ksize_max = keysize_max(env->ps, 0); const size_t leaf2_ksize = mp->dupfix_ksize; if (is_dupfix_leaf(mp)) { - if (unlikely((mc->flags & z_inner) == 0 || - (mc->tree->flags & MDBX_DUPFIXED) == 0)) - rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", - mc->tree->flags); + if (unlikely((mc->flags & z_inner) == 0 || (mc->tree->flags & MDBX_DUPFIXED) == 0)) + rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", mc->tree->flags); else if (unlikely(leaf2_ksize != mc->tree->dupfix_size)) rc = bad_page(mp, "invalid leaf2_ksize %zu\n", leaf2_ksize); else if (unlikely(((leaf2_ksize & nkeys) ^ mp->upper) & 1)) - rc = bad_page( - mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", - mp->upper, nkeys, leaf2_ksize); + rc = bad_page(mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", mp->upper, nkeys, leaf2_ksize); } else { - if (unlikely((mp->upper & 1) || - PAGEHDRSZ + mp->upper + nkeys * sizeof(node_t) + nkeys - 1 > - env->ps)) - rc = - bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", - mp->upper, nkeys, page_space(env)); + if (unlikely((mp->upper & 1) || PAGEHDRSZ + mp->upper + nkeys * sizeof(node_t) + nkeys - 1 > env->ps)) + rc = bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", mp->upper, nkeys, page_space(env)); } MDBX_val here, prev = {0, 0}; @@ -170,17 +140,14 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { if (is_dupfix_leaf(mp)) { const char *const key = page_dupfix_ptr(mp, i, mc->tree->dupfix_size); if (unlikely(end_of_page < key + leaf2_ksize)) { - rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", - key + leaf2_ksize - end_of_page); + rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", key + leaf2_ksize - end_of_page); continue; } if (unlikely(leaf2_ksize != mc->clc->k.lmin)) { - if (unlikely(leaf2_ksize < mc->clc->k.lmin || - leaf2_ksize > mc->clc->k.lmax)) - rc = bad_page(mp, - "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", - leaf2_ksize, mc->clc->k.lmin, mc->clc->k.lmax); + if (unlikely(leaf2_ksize < mc->clc->k.lmin || leaf2_ksize > mc->clc->k.lmax)) + rc = bad_page(mp, "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", leaf2_ksize, mc->clc->k.lmin, + mc->clc->k.lmax); else mc->clc->k.lmin = mc->clc->k.lmax = leaf2_ksize; } @@ -188,16 +155,14 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { here.iov_base = (void *)key; here.iov_len = leaf2_ksize; if (prev.iov_base && unlikely(mc->clc->k.cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, - DKEY(&prev), DVAL(&here)); + rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, DKEY(&prev), DVAL(&here)); prev = here; } } else { const node_t *const node = page_node(mp, i); const char *const node_end = ptr_disp(node, NODESIZE); if (unlikely(node_end > end_of_page)) { - rc = bad_page(mp, "node[%zu] (%zu) beyond page-end\n", i, - node_end - end_of_page); + rc = bad_page(mp, "node[%zu] (%zu) beyond page-end\n", i, node_end - end_of_page); continue; } const size_t ksize = node_ks(node); @@ -205,44 +170,36 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { rc = bad_page(mp, "node[%zu] too long key (%zu)\n", i, ksize); const char *const key = node_key(node); if (unlikely(end_of_page < key + ksize)) { - rc = bad_page(mp, "node[%zu] key (%zu) beyond page-end\n", i, - key + ksize - end_of_page); + rc = bad_page(mp, "node[%zu] key (%zu) beyond page-end\n", i, key + ksize - end_of_page); continue; } if ((is_leaf(mp) || i > 0)) { if (unlikely(ksize < mc->clc->k.lmin || ksize > mc->clc->k.lmax)) - rc = bad_page( - mp, "node[%zu] key size (%zu) <> min/max key-length (%zu/%zu)\n", - i, ksize, mc->clc->k.lmin, mc->clc->k.lmax); + rc = bad_page(mp, "node[%zu] key size (%zu) <> min/max key-length (%zu/%zu)\n", i, ksize, mc->clc->k.lmin, + mc->clc->k.lmax); if ((mc->checking & z_ignord) == 0) { here.iov_base = (void *)key; here.iov_len = ksize; if (prev.iov_base && unlikely(mc->clc->k.cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "node[%zu] key wrong order (%s >= %s)\n", i, - DKEY(&prev), DVAL(&here)); + rc = bad_page(mp, "node[%zu] key wrong order (%s >= %s)\n", i, DKEY(&prev), DVAL(&here)); prev = here; } } if (is_branch(mp)) { if ((mc->checking & z_updating) == 0 && i == 0 && unlikely(ksize != 0)) - rc = bad_page(mp, "branch-node[%zu] wrong 0-node key-length (%zu)\n", - i, ksize); + rc = bad_page(mp, "branch-node[%zu] wrong 0-node key-length (%zu)\n", i, ksize); const pgno_t ref = node_pgno(node); - if (unlikely(ref < MIN_PAGENO) || - (unlikely(ref >= mc->txn->geo.first_unallocated) && - (unlikely(ref >= mc->txn->geo.now) || - !(mc->checking & z_retiring)))) + if (unlikely(ref < MIN_PAGENO) || (unlikely(ref >= mc->txn->geo.first_unallocated) && + (unlikely(ref >= mc->txn->geo.now) || !(mc->checking & z_retiring)))) rc = bad_page(mp, "branch-node[%zu] wrong pgno (%u)\n", i, ref); if (unlikely(node_flags(node))) - rc = bad_page(mp, "branch-node[%zu] wrong flags (%u)\n", i, - node_flags(node)); + rc = bad_page(mp, "branch-node[%zu] wrong flags (%u)\n", i, node_flags(node)); continue; } switch (node_flags(node)) { default: - rc = - bad_page(mp, "invalid node[%zu] flags (%u)\n", i, node_flags(node)); + rc = bad_page(mp, "invalid node[%zu] flags (%u)\n", i, node_flags(node)); break; case N_BIG /* data on large-page */: case 0 /* usual */: @@ -256,46 +213,36 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { const char *const data = node_data(node); if (node_flags(node) & N_BIG) { if (unlikely(end_of_page < data + sizeof(pgno_t))) { - rc = bad_page( - mp, "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", - "bigdata-pgno", i, nkeys, dsize, data + dsize - end_of_page); + rc = bad_page(mp, "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", "bigdata-pgno", i, nkeys, dsize, + data + dsize - end_of_page); continue; } if (unlikely(dsize <= v_clc.lmin || dsize > v_clc.lmax)) - rc = bad_page( - mp, - "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n", - dsize, v_clc.lmin, v_clc.lmax); - if (unlikely(node_size_len(node_ks(node), dsize) <= - mc->txn->env->leaf_nodemax) && + rc = bad_page(mp, "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n", dsize, v_clc.lmin, + v_clc.lmax); + if (unlikely(node_size_len(node_ks(node), dsize) <= mc->txn->env->leaf_nodemax) && mc->tree != &mc->txn->dbs[FREE_DBI]) poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize); if ((mc->checking & z_retiring) == 0) { - const pgr_t lp = - page_get_large(mc, node_largedata_pgno(node), mp->txnid); + const pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid); if (unlikely(lp.err != MDBX_SUCCESS)) return lp.err; cASSERT(mc, page_type(lp.page) == P_LARGE); const unsigned npages = largechunk_npages(env, dsize); if (unlikely(lp.page->pages != npages)) { if (lp.page->pages < npages) - rc = bad_page(lp.page, - "too less n-pages %u for bigdata-node (%zu bytes)", - lp.page->pages, dsize); + rc = bad_page(lp.page, "too less n-pages %u for bigdata-node (%zu bytes)", lp.page->pages, dsize); else if (mc->tree != &mc->txn->dbs[FREE_DBI]) - poor_page(lp.page, - "extra n-pages %u for bigdata-node (%zu bytes)", - lp.page->pages, dsize); + poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)", lp.page->pages, dsize); } } continue; } if (unlikely(end_of_page < data + dsize)) { - rc = bad_page(mp, - "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", - "data", i, nkeys, dsize, data + dsize - end_of_page); + rc = bad_page(mp, "node-%s(%zu of %zu, %zu bytes) beyond (%zu) page-end\n", "data", i, nkeys, dsize, + data + dsize - end_of_page); continue; } @@ -305,9 +252,7 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { continue; case 0 /* usual */: if (unlikely(dsize < v_clc.lmin || dsize > v_clc.lmax)) { - rc = bad_page( - mp, "node-data size (%zu) <> min/max value-length (%zu/%zu)\n", - dsize, v_clc.lmin, v_clc.lmax); + rc = bad_page(mp, "node-data size (%zu) <> min/max value-length (%zu/%zu)\n", dsize, v_clc.lmin, v_clc.lmax); continue; } break; @@ -319,15 +264,13 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { break; case N_TREE | N_DUP /* dupsorted sub-tree */: if (unlikely(dsize != sizeof(tree_t))) { - rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", - dsize, sizeof(tree_t)); + rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", dsize, sizeof(tree_t)); continue; } break; case N_DUP /* short sub-page */: if (unlikely(dsize <= PAGEHDRSZ)) { - rc = bad_page(mp, "invalid nested/sub-page record size (%zu)\n", - dsize); + rc = bad_page(mp, "invalid nested/sub-page record size (%zu)\n", dsize); continue; } else { const page_t *const sp = (page_t *)data; @@ -337,28 +280,23 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { case P_LEAF | P_DUPFIX | P_SUBP: break; default: - rc = bad_page(mp, "invalid nested/sub-page flags (0x%02x)\n", - sp->flags); + rc = bad_page(mp, "invalid nested/sub-page flags (0x%02x)\n", sp->flags); continue; } const char *const end_of_subpage = data + dsize; const intptr_t nsubkeys = page_numkeys(sp); - if (unlikely(nsubkeys == 0) && !(mc->checking & z_updating) && - mc->tree->items) - rc = bad_page(mp, "no keys on a %s-page\n", - is_dupfix_leaf(sp) ? "leaf2-sub" : "leaf-sub"); + if (unlikely(nsubkeys == 0) && !(mc->checking & z_updating) && mc->tree->items) + rc = bad_page(mp, "no keys on a %s-page\n", is_dupfix_leaf(sp) ? "leaf2-sub" : "leaf-sub"); MDBX_val sub_here, sub_prev = {0, 0}; for (int ii = 0; ii < nsubkeys; ii++) { if (is_dupfix_leaf(sp)) { /* DUPFIX pages have no entries[] or node headers */ const size_t sub_ksize = sp->dupfix_ksize; - const char *const sub_key = - page_dupfix_ptr(sp, ii, mc->tree->dupfix_size); + const char *const sub_key = page_dupfix_ptr(sp, ii, mc->tree->dupfix_size); if (unlikely(end_of_subpage < sub_key + sub_ksize)) { - rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n", - sub_key + sub_ksize - end_of_subpage); + rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n", sub_key + sub_ksize - end_of_subpage); continue; } @@ -374,24 +312,20 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { if ((mc->checking & z_ignord) == 0) { sub_here.iov_base = (void *)sub_key; sub_here.iov_len = sub_ksize; - if (sub_prev.iov_base && - unlikely(v_clc.cmp(&sub_prev, &sub_here) >= 0)) - rc = bad_page(mp, - "nested-leaf2-key #%u wrong order (%s >= %s)\n", - ii, DKEY(&sub_prev), DVAL(&sub_here)); + if (sub_prev.iov_base && unlikely(v_clc.cmp(&sub_prev, &sub_here) >= 0)) + rc = bad_page(mp, "nested-leaf2-key #%u wrong order (%s >= %s)\n", ii, DKEY(&sub_prev), + DVAL(&sub_here)); sub_prev = sub_here; } } else { const node_t *const sub_node = page_node(sp, ii); const char *const sub_node_end = ptr_disp(sub_node, NODESIZE); if (unlikely(sub_node_end > end_of_subpage)) { - rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n", - end_of_subpage - sub_node_end); + rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n", end_of_subpage - sub_node_end); continue; } if (unlikely(node_flags(sub_node) != 0)) - rc = bad_page(mp, "nested-node invalid flags (%u)\n", - node_flags(sub_node)); + rc = bad_page(mp, "nested-node invalid flags (%u)\n", node_flags(sub_node)); const size_t sub_ksize = node_ks(sub_node); const char *const sub_key = node_key(sub_node); @@ -406,19 +340,15 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { if ((mc->checking & z_ignord) == 0) { sub_here.iov_base = (void *)sub_key; sub_here.iov_len = sub_ksize; - if (sub_prev.iov_base && - unlikely(v_clc.cmp(&sub_prev, &sub_here) >= 0)) - rc = bad_page(mp, - "nested-node-key #%u wrong order (%s >= %s)\n", - ii, DKEY(&sub_prev), DVAL(&sub_here)); + if (sub_prev.iov_base && unlikely(v_clc.cmp(&sub_prev, &sub_here) >= 0)) + rc = bad_page(mp, "nested-node-key #%u wrong order (%s >= %s)\n", ii, DKEY(&sub_prev), + DVAL(&sub_here)); sub_prev = sub_here; } if (unlikely(sub_dsize != 0)) - rc = bad_page(mp, "nested-node non-empty data size (%zu)\n", - sub_dsize); + rc = bad_page(mp, "nested-node non-empty data size (%zu)\n", sub_dsize); if (unlikely(end_of_subpage < sub_key + sub_ksize)) - rc = bad_page(mp, "nested-node-key beyond (%zu) nested-page\n", - sub_key + sub_ksize - end_of_subpage); + rc = bad_page(mp, "nested-node-key beyond (%zu) nested-page\n", sub_key + sub_ksize - end_of_subpage); } } } @@ -429,9 +359,7 @@ __cold int page_check(const MDBX_cursor *const mc, const page_t *const mp) { return rc; } -static __always_inline int check_page_header(const uint16_t ILL, - const page_t *page, - MDBX_txn *const txn, +static __always_inline int check_page_header(const uint16_t ILL, const page_t *page, MDBX_txn *const txn, const txnid_t front) { if (unlikely(page->flags & ILL)) { if (ILL == P_ILL_BITS || (page->flags & P_ILL_BITS)) @@ -439,59 +367,44 @@ static __always_inline int check_page_header(const uint16_t ILL, else if (ILL & P_LARGE) { assert((ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) == 0); assert(page->flags & (P_BRANCH | P_LEAF | P_DUPFIX)); - return bad_page(page, "unexpected %s instead of %s (%u)\n", - "large/overflow", "branch/leaf/leaf2", page->flags); + return bad_page(page, "unexpected %s instead of %s (%u)\n", "large/overflow", "branch/leaf/leaf2", page->flags); } else if (ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) { assert((ILL & P_BRANCH) && (ILL & P_LEAF) && (ILL & P_DUPFIX)); assert(page->flags & (P_BRANCH | P_LEAF | P_DUPFIX)); - return bad_page(page, "unexpected %s instead of %s (%u)\n", - "branch/leaf/leaf2", "large/overflow", page->flags); + return bad_page(page, "unexpected %s instead of %s (%u)\n", "branch/leaf/leaf2", "large/overflow", page->flags); } else { assert(false); } } - if (unlikely(page->txnid > front) && - unlikely(page->txnid > txn->front_txnid || front < txn->txnid)) - return bad_page( - page, - "invalid page' txnid (%" PRIaTXN ") for %s' txnid (%" PRIaTXN ")\n", - page->txnid, - (front == txn->front_txnid && front != txn->txnid) ? "front-txn" - : "parent-page", - front); + if (unlikely(page->txnid > front) && unlikely(page->txnid > txn->front_txnid || front < txn->txnid)) + return bad_page(page, "invalid page' txnid (%" PRIaTXN ") for %s' txnid (%" PRIaTXN ")\n", page->txnid, + (front == txn->front_txnid && front != txn->txnid) ? "front-txn" : "parent-page", front); - if (((ILL & P_LARGE) || !is_largepage(page)) && - (ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) == 0) { + if (((ILL & P_LARGE) || !is_largepage(page)) && (ILL & (P_BRANCH | P_LEAF | P_DUPFIX)) == 0) { /* Контроль четности page->upper тут либо приводит к ложным ошибкам, * либо слишком дорог по количеству операций. Заковырка в том, что upper * может быть нечетным на DUPFIX-страницах, при нечетном количестве * элементов нечетной длины. Поэтому четность page->upper здесь не * проверяется, но соответствующие полные проверки есть в page_check(). */ - if (unlikely(page->upper < page->lower || (page->lower & 1) || - PAGEHDRSZ + page->upper > txn->env->ps)) - return bad_page(page, - "invalid page' lower(%u)/upper(%u) with limit %zu\n", - page->lower, page->upper, page_space(txn->env)); + if (unlikely(page->upper < page->lower || (page->lower & 1) || PAGEHDRSZ + page->upper > txn->env->ps)) + return bad_page(page, "invalid page' lower(%u)/upper(%u) with limit %zu\n", page->lower, page->upper, + page_space(txn->env)); } else if ((ILL & P_LARGE) == 0) { const pgno_t npages = page->pages; if (unlikely(npages < 1) || unlikely(npages >= MAX_PAGENO / 2)) return bad_page(page, "invalid n-pages (%u) for large-page\n", npages); if (unlikely(page->pgno + npages > txn->geo.first_unallocated)) - return bad_page( - page, - "end of large-page beyond (%u) allocated space (%u next-pgno)\n", - page->pgno + npages, txn->geo.first_unallocated); + return bad_page(page, "end of large-page beyond (%u) allocated space (%u next-pgno)\n", page->pgno + npages, + txn->geo.first_unallocated); } else { assert(false); } return MDBX_SUCCESS; } -__cold static __noinline pgr_t check_page_complete(const uint16_t ILL, - page_t *page, - const MDBX_cursor *const mc, +__cold static __noinline pgr_t check_page_complete(const uint16_t ILL, page_t *page, const MDBX_cursor *const mc, const txnid_t front) { pgr_t r = {page, check_page_header(ILL, page, mc->txn, front)}; if (likely(r.err == MDBX_SUCCESS)) @@ -501,9 +414,7 @@ __cold static __noinline pgr_t check_page_complete(const uint16_t ILL, return r; } -static __always_inline pgr_t page_get_inline(const uint16_t ILL, - const MDBX_cursor *const mc, - const pgno_t pgno, +static __always_inline pgr_t page_get_inline(const uint16_t ILL, const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front) { MDBX_txn *const txn = mc->txn; tASSERT(txn, front <= txn->front_txnid); @@ -527,8 +438,7 @@ static __always_inline pgr_t page_get_inline(const uint16_t ILL, * because the dirty list got full. Bring this page * back in from the map (but don't unspill it here, * leave that unless page_touch happens again). */ - if (unlikely(spiller->flags & MDBX_TXN_SPILLS) && - spill_search(spiller, pgno)) + if (unlikely(spiller->flags & MDBX_TXN_SPILLS) && spill_search(spiller, pgno)) break; const size_t i = dpl_search(spiller, pgno); @@ -543,9 +453,7 @@ static __always_inline pgr_t page_get_inline(const uint16_t ILL, } if (unlikely(r.page->pgno != pgno)) { - r.err = bad_page( - r.page, "pgno mismatch (%" PRIaPGNO ") != expected (%" PRIaPGNO ")\n", - r.page->pgno, pgno); + r.err = bad_page(r.page, "pgno mismatch (%" PRIaPGNO ") != expected (%" PRIaPGNO ")\n", r.page->pgno, pgno); goto bailout; } @@ -562,18 +470,14 @@ static __always_inline pgr_t page_get_inline(const uint16_t ILL, return r; } -pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, - const txnid_t front) { +pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front) { return page_get_inline(P_ILL_BITS, mc, pgno, front); } -__hot pgr_t page_get_three(const MDBX_cursor *const mc, const pgno_t pgno, - const txnid_t front) { +__hot pgr_t page_get_three(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front) { return page_get_inline(P_ILL_BITS | P_LARGE, mc, pgno, front); } -pgr_t page_get_large(const MDBX_cursor *const mc, const pgno_t pgno, - const txnid_t front) { - return page_get_inline(P_ILL_BITS | P_BRANCH | P_LEAF | P_DUPFIX, mc, pgno, - front); +pgr_t page_get_large(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front) { + return page_get_inline(P_ILL_BITS | P_BRANCH | P_LEAF | P_DUPFIX, mc, pgno, front); } diff --git a/src/page-iov.c b/src/page-iov.c index 700ff5d0..b6686017 100644 --- a/src/page-iov.c +++ b/src/page-iov.c @@ -3,17 +3,14 @@ #include "internals.h" -int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, - mdbx_filehandle_t fd, bool check_coherence) { +int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, mdbx_filehandle_t fd, + bool check_coherence) { ctx->env = txn->env; ctx->ior = &txn->env->ioring; ctx->fd = fd; ctx->coherency_timestamp = - (check_coherence || txn->env->lck->pgops.incoherence.weak) - ? 0 - : UINT64_MAX /* не выполнять сверку */; - ctx->err = osal_ioring_prepare(ctx->ior, items, - pgno_align2os_bytes(txn->env, npages)); + (check_coherence || txn->env->lck->pgops.incoherence.weak) ? 0 : UINT64_MAX /* не выполнять сверку */; + ctx->err = osal_ioring_prepare(ctx->ior, items, pgno_align2os_bytes(txn->env, npages)); if (likely(ctx->err == MDBX_SUCCESS)) { #if MDBX_NEED_WRITTEN_RANGE ctx->flush_begin = MAX_PAGENO; @@ -24,8 +21,7 @@ int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, return ctx->err; } -static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data, - size_t bytes) { +static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes) { MDBX_env *const env = ctx->env; eASSERT(env, (env->flags & MDBX_WRITEMAP) == 0); @@ -89,19 +85,15 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data, #ifndef MDBX_FORCE_CHECK_MMAP_COHERENCY #define MDBX_FORCE_CHECK_MMAP_COHERENCY 0 #endif /* MDBX_FORCE_CHECK_MMAP_COHERENCY */ - if ((MDBX_FORCE_CHECK_MMAP_COHERENCY || - ctx->coherency_timestamp != UINT64_MAX) && + if ((MDBX_FORCE_CHECK_MMAP_COHERENCY || ctx->coherency_timestamp != UINT64_MAX) && unlikely(memcmp(wp, rp, bytes))) { ctx->coherency_timestamp = 0; env->lck->pgops.incoherence.weak = - (env->lck->pgops.incoherence.weak >= INT32_MAX) - ? INT32_MAX - : env->lck->pgops.incoherence.weak + 1; + (env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1; WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->pgno, "(workaround for incoherent flaw of unified page/buffer cache)"); do - if (coherency_timeout(&ctx->coherency_timestamp, wp->pgno, env) != - MDBX_RESULT_TRUE) { + if (coherency_timeout(&ctx->coherency_timestamp, wp->pgno, env) != MDBX_RESULT_TRUE) { ctx->err = MDBX_PROBLEM; break; } @@ -160,8 +152,7 @@ int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, size_t npages) { #if MDBX_AVOID_MSYNC doit:; #endif /* MDBX_AVOID_MSYNC */ - int err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->pgno), dp, - pgno2bytes(env, npages)); + int err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->pgno), dp, pgno2bytes(env, npages)); if (unlikely(err != MDBX_SUCCESS)) { ctx->err = err; if (unlikely(err != MDBX_RESULT_TRUE)) { @@ -171,8 +162,7 @@ int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, size_t npages) { err = iov_write(ctx); tASSERT(txn, iov_empty(ctx)); if (likely(err == MDBX_SUCCESS)) { - err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->pgno), dp, - pgno2bytes(env, npages)); + err = osal_ioring_add(ctx->ior, pgno2bytes(env, dp->pgno), dp, pgno2bytes(env, npages)); if (unlikely(err != MDBX_SUCCESS)) { iov_complete(ctx); return ctx->err = err; @@ -188,11 +178,8 @@ int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, size_t npages) { } #if MDBX_NEED_WRITTEN_RANGE - ctx->flush_begin = - (ctx->flush_begin < dp->pgno) ? ctx->flush_begin : dp->pgno; - ctx->flush_end = (ctx->flush_end > dp->pgno + (pgno_t)npages) - ? ctx->flush_end - : dp->pgno + (pgno_t)npages; + ctx->flush_begin = (ctx->flush_begin < dp->pgno) ? ctx->flush_begin : dp->pgno; + ctx->flush_end = (ctx->flush_end > dp->pgno + (pgno_t)npages) ? ctx->flush_end : dp->pgno + (pgno_t)npages; #endif /* MDBX_NEED_WRITTEN_RANGE */ return MDBX_SUCCESS; } diff --git a/src/page-iov.h b/src/page-iov.h index 397f6fbe..bf367fef 100644 --- a/src/page-iov.h +++ b/src/page-iov.h @@ -24,15 +24,11 @@ struct iov_ctx { uint64_t coherency_timestamp; }; -MDBX_INTERNAL __must_check_result int -iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, - mdbx_filehandle_t fd, bool check_coherence); +MDBX_INTERNAL __must_check_result int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages, + mdbx_filehandle_t fd, bool check_coherence); -static inline bool iov_empty(const iov_ctx_t *ctx) { - return osal_ioring_used(ctx->ior) == 0; -} +static inline bool iov_empty(const iov_ctx_t *ctx) { return osal_ioring_used(ctx->ior) == 0; } -MDBX_INTERNAL __must_check_result int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, - page_t *dp, size_t npages); +MDBX_INTERNAL __must_check_result int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, size_t npages); MDBX_INTERNAL __must_check_result int iov_write(iov_ctx_t *ctx); diff --git a/src/page-ops.c b/src/page-ops.c index d07cde07..6ca5f332 100644 --- a/src/page-ops.c +++ b/src/page-ops.c @@ -42,13 +42,11 @@ pgr_t page_new(MDBX_cursor *mc, const unsigned flags) { } pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) { - pgr_t ret = likely(npages == 1) ? gc_alloc_single(mc) - : gc_alloc_ex(mc, npages, ALLOC_DEFAULT); + pgr_t ret = likely(npages == 1) ? gc_alloc_single(mc) : gc_alloc_ex(mc, npages, ALLOC_DEFAULT); if (unlikely(ret.err != MDBX_SUCCESS)) return ret; - DEBUG("dbi %zu allocated new large-page %" PRIaPGNO ", num %zu", - cursor_dbi(mc), ret.page->pgno, npages); + DEBUG("dbi %zu allocated new large-page %" PRIaPGNO ", num %zu", cursor_dbi(mc), ret.page->pgno, npages); ret.page->flags = P_LARGE; cASSERT(mc, *cursor_dbi_state(mc) & DBI_DIRTY); cASSERT(mc, mc->txn->flags & MDBX_TXN_DIRTY); @@ -62,8 +60,7 @@ pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) { return ret; } -__hot void page_copy(page_t *const dst, const page_t *const src, - const size_t size) { +__hot void page_copy(page_t *const dst, const page_t *const src, const size_t size) { STATIC_ASSERT(UINT16_MAX > MDBX_MAX_PAGESIZE - PAGEHDRSZ); STATIC_ASSERT(MDBX_MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4); void *copy_dst = dst; @@ -94,17 +91,14 @@ __hot void page_copy(page_t *const dst, const page_t *const src, bailout: if (src->flags & P_DUPFIX) - bad_page(src, "%s addr %p, n-keys %zu, ksize %u", - "invalid/corrupted source page", __Wpedantic_format_voidptr(src), + bad_page(src, "%s addr %p, n-keys %zu, ksize %u", "invalid/corrupted source page", __Wpedantic_format_voidptr(src), page_numkeys(src), src->dupfix_ksize); else - bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page", - __Wpedantic_format_voidptr(src), src->upper); + bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page", __Wpedantic_format_voidptr(src), src->upper); memset(dst, -1, size); } -__cold pgr_t __must_check_result page_unspill(MDBX_txn *const txn, - const page_t *const mp) { +__cold pgr_t __must_check_result page_unspill(MDBX_txn *const txn, const page_t *const mp) { VERBOSE("unspill page %" PRIaPGNO, mp->pgno); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0); tASSERT(txn, is_spilled(txn, mp)); @@ -139,13 +133,11 @@ __cold pgr_t __must_check_result page_unspill(MDBX_txn *const txn, ret.page->flags |= (scan == txn) ? 0 : P_SPILLED; ret.err = MDBX_SUCCESS; return ret; - } while (likely((scan = scan->parent) != nullptr && - (scan->flags & MDBX_TXN_SPILLS) != 0)); - ERROR("Page %" PRIaPGNO " mod-txnid %" PRIaTXN - " not found in the spill-list(s), current txn %" PRIaTXN + } while (likely((scan = scan->parent) != nullptr && (scan->flags & MDBX_TXN_SPILLS) != 0)); + ERROR("Page %" PRIaPGNO " mod-txnid %" PRIaTXN " not found in the spill-list(s), current txn %" PRIaTXN " front %" PRIaTXN ", root txn %" PRIaTXN " front %" PRIaTXN, - mp->pgno, mp->txnid, txn->txnid, txn->front_txnid, - txn->env->basal_txn->txnid, txn->env->basal_txn->front_txnid); + mp->pgno, mp->txnid, txn->txnid, txn->front_txnid, txn->env->basal_txn->txnid, + txn->env->basal_txn->front_txnid); ret.err = MDBX_PROBLEM; ret.page = nullptr; return ret; @@ -157,8 +149,7 @@ __hot int page_touch_modifable(MDBX_txn *txn, const page_t *const mp) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); const size_t n = dpl_search(txn, mp->pgno); - if (MDBX_AVOID_MSYNC && - unlikely(txn->tw.dirtylist->items[n].pgno != mp->pgno)) { + if (MDBX_AVOID_MSYNC && unlikely(txn->tw.dirtylist->items[n].pgno != mp->pgno)) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP)); tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length + 1); VERBOSE("unspill page %" PRIaPGNO, mp->pgno); @@ -169,18 +160,15 @@ __hot int page_touch_modifable(MDBX_txn *txn, const page_t *const mp) { } tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length); - tASSERT(txn, txn->tw.dirtylist->items[n].pgno == mp->pgno && - txn->tw.dirtylist->items[n].ptr == mp); + tASSERT(txn, txn->tw.dirtylist->items[n].pgno == mp->pgno && txn->tw.dirtylist->items[n].ptr == mp); if (!MDBX_AVOID_MSYNC || (txn->flags & MDBX_WRITEMAP) == 0) { - size_t *const ptr = - ptr_disp(txn->tw.dirtylist->items[n].ptr, -(ptrdiff_t)sizeof(size_t)); + size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr, -(ptrdiff_t)sizeof(size_t)); *ptr = txn->tw.dirtylru; } return MDBX_SUCCESS; } -__hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, - const page_t *const mp) { +__hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, const page_t *const mp) { tASSERT(txn, !is_modifable(txn, mp) && !is_largepage(mp)); if (is_subpage(mp)) { ((page_t *)mp)->txnid = txn->front_txnid; @@ -201,8 +189,7 @@ __hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, goto fail; const pgno_t pgno = np->pgno; - DEBUG("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, cursor_dbi_dbg(mc), - mp->pgno, pgno); + DEBUG("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, cursor_dbi_dbg(mc), mp->pgno, pgno); tASSERT(txn, mp->pgno != pgno); pnl_append_prereserved(txn->tw.retired_pages, mp->pgno); /* Update the parent page, if any, to point to the new page */ @@ -233,17 +220,14 @@ __hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, if (unlikely(!txn->parent)) { ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s " "page %" PRIaPGNO " mod-txnid %" PRIaTXN "," - " without parent transaction, current txn %" PRIaTXN - " front %" PRIaTXN, - is_branch(mp) ? "branch" : "leaf", mp->pgno, mp->txnid, - mc->txn->txnid, mc->txn->front_txnid); + " without parent transaction, current txn %" PRIaTXN " front %" PRIaTXN, + is_branch(mp) ? "branch" : "leaf", mp->pgno, mp->txnid, mc->txn->txnid, mc->txn->front_txnid); rc = MDBX_PROBLEM; goto fail; } DEBUG("clone db %d page %" PRIaPGNO, cursor_dbi_dbg(mc), mp->pgno); - tASSERT(txn, - txn->tw.dirtylist->length <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); + tASSERT(txn, txn->tw.dirtylist->length <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); /* No - copy it */ np = page_shadow_alloc(txn, 1); if (unlikely(!np)) { @@ -299,8 +283,7 @@ page_t *page_shadow_alloc(MDBX_txn *txn, size_t num) { if (likely(num == 1 && np)) { eASSERT(env, env->shadow_reserve_len > 0); MDBX_ASAN_UNPOISON_MEMORY_REGION(np, size); - VALGRIND_MEMPOOL_ALLOC(env, ptr_disp(np, -(ptrdiff_t)sizeof(size_t)), - size + sizeof(size_t)); + VALGRIND_MEMPOOL_ALLOC(env, ptr_disp(np, -(ptrdiff_t)sizeof(size_t)), size + sizeof(size_t)); VALGRIND_MAKE_MEM_DEFINED(&page_next(np), sizeof(page_t *)); env->shadow_reserve = page_next(np); env->shadow_reserve_len -= 1; @@ -338,8 +321,7 @@ void page_shadow_release(MDBX_env *env, page_t *dp, size_t npages) { MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages)); if (unlikely(env->flags & MDBX_PAGEPERTURB)) memset(dp, -1, pgno2bytes(env, npages)); - if (likely(npages == 1 && - env->shadow_reserve_len < env->options.dp_reserve_limit)) { + if (likely(npages == 1 && env->shadow_reserve_len < env->options.dp_reserve_limit)) { MDBX_ASAN_POISON_MEMORY_REGION(dp, env->ps); MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(dp), sizeof(page_t *)); page_next(dp) = env->shadow_reserve; @@ -354,8 +336,7 @@ void page_shadow_release(MDBX_env *env, page_t *dp, size_t npages) { } } -__cold static void page_kill(MDBX_txn *txn, page_t *mp, pgno_t pgno, - size_t npages) { +__cold static void page_kill(MDBX_txn *txn, page_t *mp, pgno_t pgno, size_t npages) { MDBX_env *const env = txn->env; DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno); eASSERT(env, pgno >= NUM_METAS && npages); @@ -391,8 +372,7 @@ static inline bool suitable4loose(const MDBX_txn *txn, pgno_t pgno) { return txn->tw.loose_count < txn->env->options.dp_loose_limit && (!MDBX_ENABLE_REFUND || /* skip pages near to the end in favor of compactification */ - txn->geo.first_unallocated > - pgno + txn->env->options.dp_loose_limit || + txn->geo.first_unallocated > pgno + txn->env->options.dp_loose_limit || txn->geo.first_unallocated <= txn->env->options.dp_loose_limit); } @@ -404,8 +384,7 @@ static inline bool suitable4loose(const MDBX_txn *txn, pgno_t pgno) { * * If the page wasn't dirtied in this txn, just add it * to this txn's free list. */ -int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, - page_t *mp /* maybe null */, +int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, page_t *mp /* maybe null */, unsigned pageflags /* maybe unknown/zero */) { int rc; MDBX_txn *const txn = mc->txn; @@ -423,13 +402,7 @@ int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, * So for flexibility and avoid extra internal dependencies we just * fallback to reading if dirty list was not allocated yet. */ size_t di = 0, si = 0, npages = 1; - enum page_status { - unknown, - frozen, - spilled, - shadowed, - modifable - } status = unknown; + enum page_status { unknown, frozen, spilled, shadowed, modifable } status = unknown; if (unlikely(!mp)) { if (ASSERT_ENABLED() && pageflags) { @@ -437,8 +410,7 @@ int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, check = page_get_any(mc, pgno, txn->front_txnid); if (unlikely(check.err != MDBX_SUCCESS)) return check.err; - tASSERT(txn, ((unsigned)check.page->flags & ~P_SPILLED) == - (pageflags & ~P_FROZEN)); + tASSERT(txn, ((unsigned)check.page->flags & ~P_SPILLED) == (pageflags & ~P_FROZEN)); tASSERT(txn, !(pageflags & P_FROZEN) || is_frozen(txn, check.page)); } if (pageflags & P_FROZEN) { @@ -540,8 +512,7 @@ status_done: /* Возврат страниц в нераспределенный "хвост" БД. * Содержимое страниц не уничтожается, а для вложенных транзакций граница * нераспределенного "хвоста" БД сдвигается только при их коммите. */ - if (MDBX_ENABLE_REFUND && - unlikely(pgno + npages == txn->geo.first_unallocated)) { + if (MDBX_ENABLE_REFUND && unlikely(pgno + npages == txn->geo.first_unallocated)) { const char *kind = nullptr; if (status == modifable) { /* Страница испачкана в этой транзакции, но до этого могла быть @@ -589,8 +560,7 @@ status_done: if (status == modifable) { /* Dirty page from this transaction */ /* If suitable we can reuse it through loose list */ - if (likely(npages == 1 && suitable4loose(txn, pgno)) && - (di || !txn->tw.dirtylist)) { + if (likely(npages == 1 && suitable4loose(txn, pgno)) && (di || !txn->tw.dirtylist)) { DEBUG("loosen dirty page %" PRIaPGNO, pgno); if (MDBX_DEBUG != 0 || unlikely(txn->env->flags & MDBX_PAGEPERTURB)) memset(page_data(mp), -1, txn->env->ps - PAGEHDRSZ); @@ -600,9 +570,7 @@ status_done: txn->tw.loose_pages = mp; txn->tw.loose_count++; #if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = (pgno + 2 > txn->tw.loose_refund_wl) - ? pgno + 2 - : txn->tw.loose_refund_wl; + txn->tw.loose_refund_wl = (pgno + 2 > txn->tw.loose_refund_wl) ? pgno + 2 : txn->tw.loose_refund_wl; #endif /* MDBX_ENABLE_REFUND */ VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), txn->env->ps - PAGEHDRSZ); MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), txn->env->ps - PAGEHDRSZ); @@ -617,9 +585,7 @@ status_done: * в том числе, позже выгружена и затем снова загружена и изменена. * В обоих случаях её нельзя затирать на диске и помечать недоступной * в asan и/или valgrind */ - for (MDBX_txn *parent = txn->parent; - parent && (parent->flags & MDBX_TXN_SPILLS); - parent = parent->parent) { + for (MDBX_txn *parent = txn->parent; parent && (parent->flags & MDBX_TXN_SPILLS); parent = parent->parent) { if (spill_intersect(parent, pgno, npages)) goto skip_invalidate; if (dpl_intersect(parent, pgno, npages)) @@ -631,11 +597,8 @@ status_done: #endif page_kill(txn, mp, pgno, npages); if ((txn->flags & MDBX_WRITEMAP) == 0) { - VALGRIND_MAKE_MEM_NOACCESS(page_data(pgno2page(txn->env, pgno)), - pgno2bytes(txn->env, npages) - PAGEHDRSZ); - MDBX_ASAN_POISON_MEMORY_REGION(page_data(pgno2page(txn->env, pgno)), - pgno2bytes(txn->env, npages) - - PAGEHDRSZ); + VALGRIND_MAKE_MEM_NOACCESS(page_data(pgno2page(txn->env, pgno)), pgno2bytes(txn->env, npages) - PAGEHDRSZ); + MDBX_ASAN_POISON_MEMORY_REGION(page_data(pgno2page(txn->env, pgno)), pgno2bytes(txn->env, npages) - PAGEHDRSZ); } } skip_invalidate: @@ -646,9 +609,7 @@ status_done: reclaim: DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno); rc = pnl_insert_span(&txn->tw.relist, pgno, npages); - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); return rc; } @@ -675,8 +636,7 @@ status_done: if (ASSERT_ENABLED()) { const page_t *parent_dp = nullptr; /* Check parent(s)'s dirty lists. */ - for (MDBX_txn *parent = txn->parent; parent && !parent_dp; - parent = parent->parent) { + for (MDBX_txn *parent = txn->parent; parent && !parent_dp; parent = parent->parent) { tASSERT(txn, !spill_search(parent, pgno)); parent_dp = debug_dpl_find(parent, pgno); } @@ -697,8 +657,7 @@ status_done: goto retire; } -__hot int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, - size_t npages) { +__hot int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, size_t npages) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); mp->txnid = txn->front_txnid; if (!txn->tw.dirtylist) { @@ -756,37 +715,27 @@ void recalculate_subpage_thresholds(MDBX_env *env) { size_t whole = env->leaf_nodemax - NODESIZE; env->subpage_limit = (whole * env->options.subpage.limit + 32767) >> 16; whole = env->subpage_limit; - env->subpage_reserve_limit = - (whole * env->options.subpage.reserve_limit + 32767) >> 16; + env->subpage_reserve_limit = (whole * env->options.subpage.reserve_limit + 32767) >> 16; eASSERT(env, env->leaf_nodemax >= env->subpage_limit + NODESIZE); eASSERT(env, env->subpage_limit >= env->subpage_reserve_limit); whole = env->leaf_nodemax; - env->subpage_room_threshold = - (whole * env->options.subpage.room_threshold + 32767) >> 16; - env->subpage_reserve_prereq = - (whole * env->options.subpage.reserve_prereq + 32767) >> 16; - if (env->subpage_room_threshold + env->subpage_reserve_limit > - (intptr_t)page_space(env)) + env->subpage_room_threshold = (whole * env->options.subpage.room_threshold + 32767) >> 16; + env->subpage_reserve_prereq = (whole * env->options.subpage.reserve_prereq + 32767) >> 16; + if (env->subpage_room_threshold + env->subpage_reserve_limit > (intptr_t)page_space(env)) env->subpage_reserve_prereq = page_space(env); - else if (env->subpage_reserve_prereq < - env->subpage_room_threshold + env->subpage_reserve_limit) - env->subpage_reserve_prereq = - env->subpage_room_threshold + env->subpage_reserve_limit; - eASSERT(env, env->subpage_reserve_prereq > - env->subpage_room_threshold + env->subpage_reserve_limit); + else if (env->subpage_reserve_prereq < env->subpage_room_threshold + env->subpage_reserve_limit) + env->subpage_reserve_prereq = env->subpage_room_threshold + env->subpage_reserve_limit; + eASSERT(env, env->subpage_reserve_prereq > env->subpage_room_threshold + env->subpage_reserve_limit); } -size_t page_subleaf2_reserve(const MDBX_env *env, size_t host_page_room, - size_t subpage_len, size_t item_len) { +size_t page_subleaf2_reserve(const MDBX_env *env, size_t host_page_room, size_t subpage_len, size_t item_len) { eASSERT(env, (subpage_len & 1) == 0); eASSERT(env, env->leaf_nodemax >= env->subpage_limit + NODESIZE); size_t reserve = 0; - for (size_t n = 0; - n < 5 && reserve + item_len <= env->subpage_reserve_limit && - EVEN_CEIL(subpage_len + item_len) <= env->subpage_limit && - host_page_room >= - env->subpage_reserve_prereq + EVEN_CEIL(subpage_len + item_len); + for (size_t n = 0; n < 5 && reserve + item_len <= env->subpage_reserve_limit && + EVEN_CEIL(subpage_len + item_len) <= env->subpage_limit && + host_page_room >= env->subpage_reserve_prereq + EVEN_CEIL(subpage_len + item_len); ++n) { subpage_len += item_len; reserve += item_len; diff --git a/src/page-ops.h b/src/page-ops.h index 63cdd0b5..bb4ebd16 100644 --- a/src/page-ops.h +++ b/src/page-ops.h @@ -5,9 +5,7 @@ #include "essentials.h" -MDBX_INTERNAL int __must_check_result tree_search_finalize(MDBX_cursor *mc, - const MDBX_val *key, - int flags); +MDBX_INTERNAL int __must_check_result tree_search_finalize(MDBX_cursor *mc, const MDBX_val *key, int flags); MDBX_INTERNAL int tree_search_lowest(MDBX_cursor *mc); enum page_search_flags { @@ -16,64 +14,47 @@ enum page_search_flags { Z_FIRST = 4, Z_LAST = 8, }; -MDBX_INTERNAL int __must_check_result tree_search(MDBX_cursor *mc, - const MDBX_val *key, - int flags); +MDBX_INTERNAL int __must_check_result tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags); #define MDBX_SPLIT_REPLACE MDBX_APPENDDUP /* newkey is not new */ -MDBX_INTERNAL int __must_check_result page_split(MDBX_cursor *mc, - const MDBX_val *const newkey, - MDBX_val *const newdata, - pgno_t newpgno, - const unsigned naf); +MDBX_INTERNAL int __must_check_result page_split(MDBX_cursor *mc, const MDBX_val *const newkey, MDBX_val *const newdata, + pgno_t newpgno, const unsigned naf); /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL int MDBX_PRINTF_ARGS(2, 3) - bad_page(const page_t *mp, const char *fmt, ...); +MDBX_INTERNAL int MDBX_PRINTF_ARGS(2, 3) bad_page(const page_t *mp, const char *fmt, ...); -MDBX_INTERNAL void MDBX_PRINTF_ARGS(2, 3) - poor_page(const page_t *mp, const char *fmt, ...); +MDBX_INTERNAL void MDBX_PRINTF_ARGS(2, 3) poor_page(const page_t *mp, const char *fmt, ...); -MDBX_NOTHROW_PURE_FUNCTION static inline bool is_frozen(const MDBX_txn *txn, - const page_t *mp) { +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_frozen(const MDBX_txn *txn, const page_t *mp) { return mp->txnid < txn->txnid; } -MDBX_NOTHROW_PURE_FUNCTION static inline bool is_spilled(const MDBX_txn *txn, - const page_t *mp) { +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_spilled(const MDBX_txn *txn, const page_t *mp) { return mp->txnid == txn->txnid; } -MDBX_NOTHROW_PURE_FUNCTION static inline bool is_shadowed(const MDBX_txn *txn, - const page_t *mp) { +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_shadowed(const MDBX_txn *txn, const page_t *mp) { return mp->txnid > txn->txnid; } -MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool -is_correct(const MDBX_txn *txn, const page_t *mp) { +MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_correct(const MDBX_txn *txn, const page_t *mp) { return mp->txnid <= txn->front_txnid; } -MDBX_NOTHROW_PURE_FUNCTION static inline bool is_modifable(const MDBX_txn *txn, - const page_t *mp) { +MDBX_NOTHROW_PURE_FUNCTION static inline bool is_modifable(const MDBX_txn *txn, const page_t *mp) { return mp->txnid == txn->front_txnid; } -MDBX_INTERNAL int __must_check_result page_check(const MDBX_cursor *const mc, - const page_t *const mp); +MDBX_INTERNAL int __must_check_result page_check(const MDBX_cursor *const mc, const page_t *const mp); -MDBX_INTERNAL pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, - const txnid_t front); +MDBX_INTERNAL pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front); -MDBX_INTERNAL pgr_t page_get_three(const MDBX_cursor *const mc, - const pgno_t pgno, const txnid_t front); +MDBX_INTERNAL pgr_t page_get_three(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front); -MDBX_INTERNAL pgr_t page_get_large(const MDBX_cursor *const mc, - const pgno_t pgno, const txnid_t front); +MDBX_INTERNAL pgr_t page_get_large(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front); -static inline int __must_check_result page_get(const MDBX_cursor *mc, - const pgno_t pgno, page_t **mp, +static inline int __must_check_result page_get(const MDBX_cursor *mc, const pgno_t pgno, page_t **mp, const txnid_t front) { pgr_t ret = page_get_three(mc, pgno, front); *mp = ret.page; @@ -82,21 +63,18 @@ static inline int __must_check_result page_get(const MDBX_cursor *mc, /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, - size_t npages); +MDBX_INTERNAL int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, size_t npages); MDBX_INTERNAL pgr_t page_new(MDBX_cursor *mc, const unsigned flags); MDBX_INTERNAL pgr_t page_new_large(MDBX_cursor *mc, const size_t npages); MDBX_INTERNAL int page_touch_modifable(MDBX_txn *txn, const page_t *const mp); -MDBX_INTERNAL int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, - const page_t *const mp); +MDBX_INTERNAL int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, const page_t *const mp); static inline int page_touch(MDBX_cursor *mc) { page_t *const mp = mc->pg[mc->top]; MDBX_txn *txn = mc->txn; tASSERT(txn, mc->txn->flags & MDBX_TXN_DIRTY); - tASSERT(txn, - F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY)); + tASSERT(txn, F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY)); tASSERT(txn, !is_largepage(mp)); if (ASSERT_ENABLED()) { if (mc->flags & z_inner) { @@ -119,40 +97,31 @@ static inline int page_touch(MDBX_cursor *mc) { return page_touch_unmodifable(txn, mc, mp); } -MDBX_INTERNAL void page_copy(page_t *const dst, const page_t *const src, - const size_t size); -MDBX_INTERNAL pgr_t __must_check_result page_unspill(MDBX_txn *const txn, - const page_t *const mp); +MDBX_INTERNAL void page_copy(page_t *const dst, const page_t *const src, const size_t size); +MDBX_INTERNAL pgr_t __must_check_result page_unspill(MDBX_txn *const txn, const page_t *const mp); MDBX_INTERNAL page_t *page_shadow_alloc(MDBX_txn *txn, size_t num); -MDBX_INTERNAL void page_shadow_release(MDBX_env *env, page_t *dp, - size_t npages); +MDBX_INTERNAL void page_shadow_release(MDBX_env *env, page_t *dp, size_t npages); -MDBX_INTERNAL int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, - page_t *mp /* maybe null */, +MDBX_INTERNAL int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, page_t *mp /* maybe null */, unsigned pageflags /* maybe unknown/zero */); -static inline int page_retire(MDBX_cursor *mc, page_t *mp) { - return page_retire_ex(mc, mp->pgno, mp, mp->flags); -} +static inline int page_retire(MDBX_cursor *mc, page_t *mp) { return page_retire_ex(mc, mp->pgno, mp, mp->flags); } -static inline void page_wash(MDBX_txn *txn, size_t di, page_t *const mp, - const size_t npages) { +static inline void page_wash(MDBX_txn *txn, size_t di, page_t *const mp, const size_t npages) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); mp->txnid = INVALID_TXNID; mp->flags = P_BAD; if (txn->tw.dirtylist) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, - MDBX_AVOID_MSYNC || (di && txn->tw.dirtylist->items[di].ptr == mp)); + tASSERT(txn, MDBX_AVOID_MSYNC || (di && txn->tw.dirtylist->items[di].ptr == mp)); if (!MDBX_AVOID_MSYNC || di) { dpl_remove_ex(txn, di, npages); txn->tw.dirtyroom++; tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) { page_shadow_release(txn->env, mp, npages); return; @@ -160,20 +129,14 @@ static inline void page_wash(MDBX_txn *txn, size_t di, page_t *const mp, } } else { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC && !di); - txn->tw.writemap_dirty_npages -= (txn->tw.writemap_dirty_npages > npages) - ? npages - : txn->tw.writemap_dirty_npages; + txn->tw.writemap_dirty_npages -= (txn->tw.writemap_dirty_npages > npages) ? npages : txn->tw.writemap_dirty_npages; } VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ); - VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), - pgno2bytes(txn->env, npages) - PAGEHDRSZ); - MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), - pgno2bytes(txn->env, npages) - PAGEHDRSZ); + VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), pgno2bytes(txn->env, npages) - PAGEHDRSZ); + MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), pgno2bytes(txn->env, npages) - PAGEHDRSZ); } -MDBX_INTERNAL size_t page_subleaf2_reserve(const MDBX_env *env, - size_t host_page_room, - size_t subpage_len, size_t item_len); +MDBX_INTERNAL size_t page_subleaf2_reserve(const MDBX_env *env, size_t host_page_room, size_t subpage_len, + size_t item_len); -#define page_next(mp) \ - (*(page_t **)ptr_disp((mp)->entries, sizeof(void *) - sizeof(uint32_t))) +#define page_next(mp) (*(page_t **)ptr_disp((mp)->entries, sizeof(void *) - sizeof(uint32_t))) diff --git a/src/page-search.c b/src/page-search.c index 127e9ba0..c19a9274 100644 --- a/src/page-search.c +++ b/src/page-search.c @@ -54,14 +54,11 @@ __hot int tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { cASSERT(mc, root >= NUM_METAS && root < mc->txn->geo.first_unallocated); if (mc->top < 0 || mc->pg[0]->pgno != root) { txnid_t pp_txnid = mc->tree->mod_txnid; - pp_txnid = /* tree->mod_txnid maybe zero in a legacy DB */ pp_txnid - ? pp_txnid - : mc->txn->txnid; + pp_txnid = /* tree->mod_txnid maybe zero in a legacy DB */ pp_txnid ? pp_txnid : mc->txn->txnid; if ((mc->txn->flags & MDBX_TXN_RDONLY) == 0) { MDBX_txn *scan = mc->txn; do - if ((scan->flags & MDBX_TXN_DIRTY) && - (dbi == MAIN_DBI || (scan->dbi_state[dbi] & DBI_DIRTY))) { + if ((scan->flags & MDBX_TXN_DIRTY) && (dbi == MAIN_DBI || (scan->dbi_state[dbi] & DBI_DIRTY))) { /* После коммита вложенных тразакций может быть mod_txnid > front */ pp_txnid = scan->front_txnid; break; @@ -75,8 +72,7 @@ __hot int tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { mc->top = 0; mc->ki[0] = (flags & Z_LAST) ? page_numkeys(mc->pg[0]) - 1 : 0; - DEBUG("db %d root page %" PRIaPGNO " has flags 0x%X", cursor_dbi_dbg(mc), - root, mc->pg[0]->flags); + DEBUG("db %d root page %" PRIaPGNO " has flags 0x%X", cursor_dbi_dbg(mc), root, mc->pg[0]->flags); if (flags & Z_MODIFY) { err = page_touch(mc); @@ -90,8 +86,7 @@ __hot int tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { return tree_search_finalize(mc, key, flags); } -__hot __noinline int tree_search_finalize(MDBX_cursor *mc, const MDBX_val *key, - int flags) { +__hot __noinline int tree_search_finalize(MDBX_cursor *mc, const MDBX_val *key, int flags) { cASSERT(mc, !is_poor(mc)); DKBUF_DEBUG; int err; @@ -128,16 +123,14 @@ __hot __noinline int tree_search_finalize(MDBX_cursor *mc, const MDBX_val *key, } if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) { - ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", - mp->pgno, mp->flags); + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags); err = MDBX_CORRUPTED; bailout: be_poor(mc); return err; } - DEBUG("found leaf page %" PRIaPGNO " for key [%s]", mp->pgno, - DKEY_DEBUG(key)); + DEBUG("found leaf page %" PRIaPGNO " for key [%s]", mp->pgno, DKEY_DEBUG(key)); /* Логически верно, но (в текущем понимании) нет необходимости. Однако, стоит ещё по-проверять/по-тестировать. Возможно есть сценарий, в котором очистка флагов всё-таки требуется. diff --git a/src/pnl.c b/src/pnl.c index e8825c6d..d40fe7e5 100644 --- a/src/pnl.c +++ b/src/pnl.c @@ -25,14 +25,11 @@ MDBX_INTERNAL void pnl_free(pnl_t pnl) { MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl) { assert(pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) >= MDBX_PNL_INITIAL && - pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < - MDBX_PNL_INITIAL * 3 / 2); - assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && - MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); + pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < MDBX_PNL_INITIAL * 3 / 2); + assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); MDBX_PNL_SETSIZE(*ppnl, 0); if (unlikely(MDBX_PNL_ALLOCLEN(*ppnl) > - MDBX_PNL_INITIAL * (MDBX_PNL_PREALLOC_FOR_RADIXSORT ? 8 : 4) - - MDBX_CACHELINE_SIZE / sizeof(pgno_t))) { + MDBX_PNL_INITIAL * (MDBX_PNL_PREALLOC_FOR_RADIXSORT ? 8 : 4) - MDBX_CACHELINE_SIZE / sizeof(pgno_t))) { size_t bytes = pnl_size2bytes(MDBX_PNL_INITIAL * 2); pnl_t pnl = osal_realloc(*ppnl - 1, bytes); if (likely(pnl)) { @@ -45,11 +42,9 @@ MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl) { } } -MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, - const size_t wanna) { +MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna) { const size_t allocated = MDBX_PNL_ALLOCLEN(*ppnl); - assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && - MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); + assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); if (likely(allocated >= wanna)) return MDBX_SUCCESS; @@ -58,9 +53,7 @@ MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, return MDBX_TXN_FULL; } - const size_t size = (wanna + wanna - allocated < PAGELIST_LIMIT) - ? wanna + wanna - allocated - : PAGELIST_LIMIT; + const size_t size = (wanna + wanna - allocated < PAGELIST_LIMIT) ? wanna + wanna - allocated : PAGELIST_LIMIT; size_t bytes = pnl_size2bytes(size); pnl_t pnl = osal_realloc(*ppnl - 1, bytes); if (likely(pnl)) { @@ -75,8 +68,8 @@ MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, return MDBX_ENOMEM; } -static __always_inline int __must_check_result pnl_append_stepped( - unsigned step, __restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +static __always_inline int __must_check_result pnl_append_stepped(unsigned step, __restrict pnl_t *ppnl, pgno_t pgno, + size_t n) { assert(n > 0); int rc = pnl_need(ppnl, n); if (unlikely(rc != MDBX_SUCCESS)) @@ -106,18 +99,15 @@ static __always_inline int __must_check_result pnl_append_stepped( return MDBX_SUCCESS; } -__hot MDBX_INTERNAL int __must_check_result -spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +__hot MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { return pnl_append_stepped(2, ppnl, pgno << 1, n); } -__hot MDBX_INTERNAL int __must_check_result -pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +__hot MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { return pnl_append_stepped(1, ppnl, pgno, n); } -__hot MDBX_INTERNAL int __must_check_result -pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +__hot MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { assert(n > 0); int rc = pnl_need(ppnl, n); if (unlikely(rc != MDBX_SUCCESS)) @@ -135,8 +125,7 @@ pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { return MDBX_SUCCESS; } -__hot __noinline MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, - const size_t limit) { +__hot __noinline MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, const size_t limit) { assert(limit >= MIN_PAGENO - MDBX_ENABLE_REFUND); if (likely(MDBX_PNL_GETSIZE(pnl))) { if (unlikely(MDBX_PNL_GETSIZE(pnl) > PAGELIST_LIMIT)) @@ -146,8 +135,7 @@ __hot __noinline MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, if (unlikely(MDBX_PNL_MOST(pnl) >= limit)) return false; - if ((!MDBX_DISABLE_VALIDATION || AUDIT_ENABLED()) && - likely(MDBX_PNL_GETSIZE(pnl) > 1)) { + if ((!MDBX_DISABLE_VALIDATION || AUDIT_ENABLED()) && likely(MDBX_PNL_GETSIZE(pnl) > 1)) { const pgno_t *scan = MDBX_PNL_BEGIN(pnl); const pgno_t *const end = MDBX_PNL_END(pnl); pgno_t prev = *scan++; @@ -161,10 +149,9 @@ __hot __noinline MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, return true; } -static __always_inline void -pnl_merge_inner(pgno_t *__restrict dst, const pgno_t *__restrict src_a, - const pgno_t *__restrict src_b, - const pgno_t *__restrict const src_b_detent) { +static __always_inline void pnl_merge_inner(pgno_t *__restrict dst, const pgno_t *__restrict src_a, + const pgno_t *__restrict src_b, + const pgno_t *__restrict const src_b_detent) { do { #if MDBX_HAVE_CMOV const bool flag = MDBX_PNL_ORDERED(*src_b, *src_a); @@ -203,14 +190,11 @@ __hot MDBX_INTERNAL size_t pnl_merge(pnl_t dst, const pnl_t src) { total += src_len; if (!MDBX_DEBUG && total < (MDBX_HAVE_CMOV ? 21 : 12)) goto avoid_call_libc_for_short_cases; - if (dst_len == 0 || - MDBX_PNL_ORDERED(MDBX_PNL_LAST(dst), MDBX_PNL_FIRST(src))) + if (dst_len == 0 || MDBX_PNL_ORDERED(MDBX_PNL_LAST(dst), MDBX_PNL_FIRST(src))) memcpy(MDBX_PNL_END(dst), MDBX_PNL_BEGIN(src), src_len * sizeof(pgno_t)); else if (MDBX_PNL_ORDERED(MDBX_PNL_LAST(src), MDBX_PNL_FIRST(dst))) { - memmove(MDBX_PNL_BEGIN(dst) + src_len, MDBX_PNL_BEGIN(dst), - dst_len * sizeof(pgno_t)); - memcpy(MDBX_PNL_BEGIN(dst), MDBX_PNL_BEGIN(src), - src_len * sizeof(pgno_t)); + memmove(MDBX_PNL_BEGIN(dst) + src_len, MDBX_PNL_BEGIN(dst), dst_len * sizeof(pgno_t)); + memcpy(MDBX_PNL_BEGIN(dst), MDBX_PNL_BEGIN(src), src_len * sizeof(pgno_t)); } else { avoid_call_libc_for_short_cases: dst[0] = /* the detent */ (MDBX_PNL_ASCENDING ? 0 : P_INVALID); @@ -227,8 +211,7 @@ __hot MDBX_INTERNAL size_t pnl_merge(pnl_t dst, const pnl_t src) { #else #define MDBX_PNL_EXTRACT_KEY(ptr) (P_INVALID - *(ptr)) #endif -RADIXSORT_IMPL(pgno, pgno_t, MDBX_PNL_EXTRACT_KEY, - MDBX_PNL_PREALLOC_FOR_RADIXSORT, 0) +RADIXSORT_IMPL(pgno, pgno_t, MDBX_PNL_EXTRACT_KEY, MDBX_PNL_PREALLOC_FOR_RADIXSORT, 0) SORT_IMPL(pgno_sort, false, pgno_t, MDBX_PNL_ORDERED) @@ -240,8 +223,7 @@ __hot __noinline MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl) { SEARCH_IMPL(pgno_bsearch, pgno_t, pgno_t, MDBX_PNL_ORDERED) -__hot __noinline MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, - pgno_t pgno) { +__hot __noinline MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno) { const pgno_t *begin = MDBX_PNL_BEGIN(pnl); const pgno_t *it = pgno_bsearch(begin, MDBX_PNL_GETSIZE(pnl), pgno); const pgno_t *end = begin + MDBX_PNL_GETSIZE(pnl); diff --git a/src/pnl.h b/src/pnl.h index 8995b54d..ba033775 100644 --- a/src/pnl.h +++ b/src/pnl.h @@ -26,16 +26,15 @@ typedef const pgno_t *const_pnl_t; #define MDBX_PNL_GRANULATE_LOG2 10 #define MDBX_PNL_GRANULATE (1 << MDBX_PNL_GRANULATE_LOG2) -#define MDBX_PNL_INITIAL \ - (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) +#define MDBX_PNL_INITIAL (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) #define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1]) #define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0])) -#define MDBX_PNL_SETSIZE(pl, size) \ - do { \ - const size_t __size = size; \ - assert(__size < INT_MAX); \ - (pl)[0] = (pgno_t)__size; \ +#define MDBX_PNL_SETSIZE(pl, size) \ + do { \ + const size_t __size = size; \ + assert(__size < INT_MAX); \ + (pl)[0] = (pgno_t)__size; \ } while (0) #define MDBX_PNL_FIRST(pl) ((pl)[1]) #define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)]) @@ -62,13 +61,10 @@ MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) { size += size; #endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + - (PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + - MDBX_PNL_GRANULATE + 3) * - sizeof(pgno_t) < + (PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + MDBX_PNL_GRANULATE + 3) * sizeof(pgno_t) < SIZE_MAX / 4 * 3); size_t bytes = - ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), - MDBX_PNL_GRANULATE * sizeof(pgno_t)) - + ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), MDBX_PNL_GRANULATE * sizeof(pgno_t)) - MDBX_ASSUME_MALLOC_OVERHEAD; return bytes; } @@ -87,21 +83,16 @@ MDBX_INTERNAL pnl_t pnl_alloc(size_t size); MDBX_INTERNAL void pnl_free(pnl_t pnl); -MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, - const size_t wanna); +MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna); -MDBX_MAYBE_UNUSED static inline int __must_check_result -pnl_need(pnl_t __restrict *__restrict ppnl, size_t num) { - assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && - MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); +MDBX_MAYBE_UNUSED static inline int __must_check_result pnl_need(pnl_t __restrict *__restrict ppnl, size_t num) { + assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); assert(num <= PAGELIST_LIMIT); const size_t wanna = MDBX_PNL_GETSIZE(*ppnl) + num; - return likely(MDBX_PNL_ALLOCLEN(*ppnl) >= wanna) ? MDBX_SUCCESS - : pnl_reserve(ppnl, wanna); + return likely(MDBX_PNL_ALLOCLEN(*ppnl) >= wanna) ? MDBX_SUCCESS : pnl_reserve(ppnl, wanna); } -MDBX_MAYBE_UNUSED static inline void -pnl_append_prereserved(__restrict pnl_t pnl, pgno_t pgno) { +MDBX_MAYBE_UNUSED static inline void pnl_append_prereserved(__restrict pnl_t pnl, pgno_t pgno) { assert(MDBX_PNL_GETSIZE(pnl) < MDBX_PNL_ALLOCLEN(pnl)); if (AUDIT_ENABLED()) { for (size_t i = MDBX_PNL_GETSIZE(pnl); i > 0; --i) @@ -113,14 +104,11 @@ pnl_append_prereserved(__restrict pnl_t pnl, pgno_t pgno) { MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl); -MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, - pgno_t pgno, size_t n); +MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n); -MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, - pgno_t pgno, size_t n); +MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n); -MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, - pgno_t pgno, size_t n); +MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n); MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno); @@ -128,10 +116,8 @@ MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl); MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, const size_t limit); -MDBX_MAYBE_UNUSED static inline bool pnl_check_allocated(const const_pnl_t pnl, - const size_t limit) { - return pnl == nullptr || (MDBX_PNL_ALLOCLEN(pnl) >= MDBX_PNL_GETSIZE(pnl) && - pnl_check(pnl, limit)); +MDBX_MAYBE_UNUSED static inline bool pnl_check_allocated(const const_pnl_t pnl, const size_t limit) { + return pnl == nullptr || (MDBX_PNL_ALLOCLEN(pnl) >= MDBX_PNL_GETSIZE(pnl) && pnl_check(pnl, limit)); } MDBX_MAYBE_UNUSED static inline void pnl_sort(pnl_t pnl, size_t limit4check) { @@ -140,8 +126,7 @@ MDBX_MAYBE_UNUSED static inline void pnl_sort(pnl_t pnl, size_t limit4check) { (void)limit4check; } -MDBX_MAYBE_UNUSED static inline size_t pnl_search(const pnl_t pnl, pgno_t pgno, - size_t limit) { +MDBX_MAYBE_UNUSED static inline size_t pnl_search(const pnl_t pnl, pgno_t pgno, size_t limit) { assert(pnl_check_allocated(pnl, limit)); if (MDBX_HAVE_CMOV) { /* cmov-ускоренный бинарный поиск может читать (но не использовать) один diff --git a/src/preface.h b/src/preface.h index 8bffbf6e..962c7ae7 100644 --- a/src/preface.h +++ b/src/preface.h @@ -4,8 +4,7 @@ #pragma once /* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */ -#if (defined(MDBX_DEBUG) && MDBX_DEBUG > 0) || \ - (defined(MDBX_FORCE_ASSERTIONS) && MDBX_FORCE_ASSERTIONS) +#if (defined(MDBX_DEBUG) && MDBX_DEBUG > 0) || (defined(MDBX_FORCE_ASSERTIONS) && MDBX_FORCE_ASSERTIONS) #undef NDEBUG #ifndef MDBX_DEBUG /* Чтобы избежать включения отладки только из-за включения assert-проверок */ @@ -29,8 +28,7 @@ #endif /* MDBX_DISABLE_GNU_SOURCE */ /* Should be defined before any includes */ -#if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && \ - !defined(ANDROID) +#if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && !defined(ANDROID) #define _FILE_OFFSET_BITS 64 #endif /* _FILE_OFFSET_BITS */ @@ -38,8 +36,7 @@ #define _DARWIN_C_SOURCE #endif /* _DARWIN_C_SOURCE */ -#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \ - !defined(__USE_MINGW_ANSI_STDIO) +#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && !defined(__USE_MINGW_ANSI_STDIO) #define __USE_MINGW_ANSI_STDIO 1 #endif /* MinGW */ @@ -56,8 +53,7 @@ #define UNICODE #endif /* UNICODE */ -#if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && \ - !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT +#if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT #define _NO_CRT_STDIO_INLINE #endif /* _NO_CRT_STDIO_INLINE */ @@ -72,8 +68,7 @@ #endif /* NOMINMAX */ /* Workaround for modern libstdc++ with CLANG < 4.x */ -#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && \ - defined(__clang__) && __clang_major__ < 4 +#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && defined(__clang__) && __clang_major__ < 4 #define __GLIBCXX_BITSIZE_INT_N_0 128 #define __GLIBCXX_TYPE_INT_N_0 __int128 #endif /* Workaround for modern libstdc++ with CLANG < 4.x */ @@ -107,8 +102,7 @@ * and how to and where you can obtain the latest "Visual Studio 2015" build * with all fixes. */ -#error \ - "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required." +#error "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required." #endif #if _MSC_VER > 1800 #pragma warning(disable : 4464) /* relative include path contains '..' */ @@ -117,9 +111,8 @@ #pragma warning(disable : 5045) /* will insert Spectre mitigation... */ #endif #if _MSC_VER > 1914 -#pragma warning( \ - disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ - producing 'defined' has undefined behavior */ +#pragma warning(disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ + producing 'defined' has undefined behavior */ #endif #if _MSC_VER < 1920 /* avoid "error C2219: syntax error: type qualifier must be after '*'" */ @@ -127,33 +120,32 @@ #endif #if _MSC_VER > 1930 #pragma warning(disable : 6235) /* is always a constant */ -#pragma warning(disable : 6237) /* is never evaluated and might \ +#pragma warning(disable : 6237) /* is never evaluated and might \ have side effects */ #endif #pragma warning(disable : 4710) /* 'xyz': function not inlined */ -#pragma warning(disable : 4711) /* function 'xyz' selected for automatic \ +#pragma warning(disable : 4711) /* function 'xyz' selected for automatic \ inline expansion */ -#pragma warning(disable : 4201) /* nonstandard extension used: nameless \ +#pragma warning(disable : 4201) /* nonstandard extension used: nameless \ struct/union */ #pragma warning(disable : 4702) /* unreachable code */ #pragma warning(disable : 4706) /* assignment within conditional expression */ #pragma warning(disable : 4127) /* conditional expression is constant */ -#pragma warning(disable : 4324) /* 'xyz': structure was padded due to \ +#pragma warning(disable : 4324) /* 'xyz': structure was padded due to \ alignment specifier */ #pragma warning(disable : 4310) /* cast truncates constant value */ -#pragma warning(disable : 4820) /* bytes padding added after data member for \ +#pragma warning(disable : 4820) /* bytes padding added after data member for \ alignment */ -#pragma warning(disable : 4548) /* expression before comma has no effect; \ +#pragma warning(disable : 4548) /* expression before comma has no effect; \ expected expression with side - effect */ -#pragma warning(disable : 4366) /* the result of the unary '&' operator may be \ +#pragma warning(disable : 4366) /* the result of the unary '&' operator may be \ unaligned */ -#pragma warning(disable : 4200) /* nonstandard extension used: zero-sized \ +#pragma warning(disable : 4200) /* nonstandard extension used: zero-sized \ array in struct/union */ -#pragma warning(disable : 4204) /* nonstandard extension used: non-constant \ +#pragma warning(disable : 4204) /* nonstandard extension used: non-constant \ aggregate initializer */ -#pragma warning( \ - disable : 4505) /* unreferenced local function has been removed */ -#endif /* _MSC_VER (warnings) */ +#pragma warning(disable : 4505) /* unreferenced local function has been removed */ +#endif /* _MSC_VER (warnings) */ #if defined(__GNUC__) && __GNUC__ < 9 #pragma GCC diagnostic ignored "-Wattributes" @@ -166,12 +158,12 @@ #ifdef _MSC_VER #pragma warning(push, 1) -#pragma warning(disable : 4548) /* expression before comma has no effect; \ +#pragma warning(disable : 4548) /* expression before comma has no effect; \ expected expression with side - effect */ -#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ +#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ * semantics are not enabled. Specify /EHsc */ -#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ - * mode specified; termination on exception is \ +#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ + * mode specified; termination on exception is \ * not guaranteed. Specify /EHsc */ #endif /* _MSC_VER (warnings) */ @@ -232,8 +224,7 @@ #ifndef __GNUC_PREREQ #if defined(__GNUC__) && defined(__GNUC_MINOR__) -#define __GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#define __GNUC_PREREQ(maj, min) ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) #else #define __GNUC_PREREQ(maj, min) (0) #endif @@ -241,8 +232,7 @@ #ifndef __CLANG_PREREQ #ifdef __clang__ -#define __CLANG_PREREQ(maj, min) \ - ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min)) +#define __CLANG_PREREQ(maj, min) ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min)) #else #define __CLANG_PREREQ(maj, min) (0) #endif @@ -250,8 +240,7 @@ #ifndef __GLIBC_PREREQ #if defined(__GLIBC__) && defined(__GLIBC_MINOR__) -#define __GLIBC_PREREQ(maj, min) \ - ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min)) +#define __GLIBC_PREREQ(maj, min) ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min)) #else #define __GLIBC_PREREQ(maj, min) (0) #endif @@ -261,8 +250,7 @@ /* pre-requirements */ #if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF -#error \ - "Sanity checking failed: Two's complement, reasonably sized integer types" +#error "Sanity checking failed: Two's complement, reasonably sized integer types" #endif #ifndef SSIZE_MAX @@ -294,8 +282,7 @@ #endif #ifdef __SANITIZE_THREAD__ -#warning \ - "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues." +#warning "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues." #endif /* __SANITIZE_THREAD__ */ /*----------------------------------------------------------------------------*/ @@ -327,8 +314,7 @@ #endif #endif /* __extern_C */ -#if !defined(nullptr) && !defined(__cplusplus) || \ - (__cplusplus < 201103L && !defined(_MSC_VER)) +#if !defined(nullptr) && !defined(__cplusplus) || (__cplusplus < 201103L && !defined(_MSC_VER)) #define nullptr NULL #endif @@ -340,9 +326,8 @@ #endif #endif /* Apple OSX & iOS */ -#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ - defined(__APPLE__) || defined(__MACH__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__BSD__) || defined(__bsdi__) || \ + defined(__DragonFly__) || defined(__APPLE__) || defined(__MACH__) #include #include #include @@ -359,8 +344,7 @@ #endif #else #include -#if !(defined(__sun) || defined(__SVR4) || defined(__svr4__) || \ - defined(_WIN32) || defined(_WIN64)) +#if !(defined(__sun) || defined(__SVR4) || defined(__svr4__) || defined(_WIN32) || defined(_WIN64)) #include #endif /* !Solaris */ #endif /* !xBSD */ @@ -469,43 +453,38 @@ __extern_C key_t ftok(const char *, int); /*----------------------------------------------------------------------------*/ /* Byteorder */ -#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \ - defined(i486) || defined(__i486) || defined(__i486__) || defined(i586) || \ - defined(__i586) || defined(__i586__) || defined(i686) || \ - defined(__i686) || defined(__i686__) || defined(_M_IX86) || \ - defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \ - defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \ - defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \ - defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__) +#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || defined(i486) || defined(__i486) || \ + defined(__i486__) || defined(i586) || defined(__i586) || defined(__i586__) || defined(i686) || defined(__i686) || \ + defined(__i686__) || defined(_M_IX86) || defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \ + defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__amd64) || \ + defined(_M_X64) || defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__) #ifndef __ia32__ /* LY: define neutral __ia32__ for x86 and x86-64 */ #define __ia32__ 1 #endif /* __ia32__ */ -#if !defined(__amd64__) && \ - (defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || \ - defined(_M_X64) || defined(_M_AMD64)) +#if !defined(__amd64__) && \ + (defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) || defined(_M_AMD64)) /* LY: define trusty __amd64__ for all AMD64/x86-64 arch */ #define __amd64__ 1 #endif /* __amd64__ */ #endif /* all x86 */ -#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ - !defined(__ORDER_BIG_ENDIAN__) +#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || !defined(__ORDER_BIG_ENDIAN__) -#if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || \ - defined(__ANDROID_API__) || defined(HAVE_ENDIAN_H) || __has_include() +#if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || defined(__ANDROID_API__) || defined(HAVE_ENDIAN_H) || \ + __has_include() #include -#elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || \ - defined(HAVE_MACHINE_ENDIAN_H) || __has_include() +#elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || defined(HAVE_MACHINE_ENDIAN_H) || \ + __has_include() #include #elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include() #include -#elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) || \ +#elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) || \ (__has_include() && __has_include()) #include #include -#elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ - defined(__NetBSD__) || defined(HAVE_SYS_PARAM_H) || __has_include() +#elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(HAVE_SYS_PARAM_H) || __has_include() #include #endif /* OS */ @@ -521,27 +500,19 @@ __extern_C key_t ftok(const char *, int); #define __ORDER_LITTLE_ENDIAN__ 1234 #define __ORDER_BIG_ENDIAN__ 4321 -#if defined(__LITTLE_ENDIAN__) || \ - (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ - defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ - defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ - defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ - defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ - defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ - defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ - defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ - defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ - defined(__WINDOWS__) +#if defined(__LITTLE_ENDIAN__) || (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || defined(__ARMEL__) || \ + defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ + defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || defined(__elbrus_4c__) || defined(__elbrus_8c__) || \ + defined(__bfin__) || defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \ + defined(__ia64) || defined(_M_IA64) || defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ + defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || defined(__WINDOWS__) #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ -#elif defined(__BIG_ENDIAN__) || \ - (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ - defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ - defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ - defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ - defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ - defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ - defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) +#elif defined(__BIG_ENDIAN__) || (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || defined(__ARMEB__) || \ + defined(__THUMBEB__) || defined(__AARCH64EB__) || defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ + defined(__m68k__) || defined(M68000) || defined(__hppa__) || defined(__hppa) || defined(__HPPA__) || \ + defined(__sparc__) || defined(__sparc) || defined(__370__) || defined(__THW_370__) || defined(__s390__) || \ + defined(__s390x__) || defined(__SYSC_ZARCH__) #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ #else @@ -561,17 +532,14 @@ __extern_C key_t ftok(const char *, int); #define MDBX_HAVE_CMOV 1 #elif defined(__thumb__) || defined(__thumb) || defined(__TARGET_ARCH_THUMB) #define MDBX_HAVE_CMOV 0 -#elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || \ - defined(__aarch64) || defined(__arm__) || defined(__arm) || \ - defined(__CC_ARM) +#elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || defined(__aarch64) || defined(__arm__) || \ + defined(__arm) || defined(__CC_ARM) #define MDBX_HAVE_CMOV 1 -#elif (defined(__riscv__) || defined(__riscv64)) && \ - (defined(__riscv_b) || defined(__riscv_bitmanip)) +#elif (defined(__riscv__) || defined(__riscv64)) && (defined(__riscv_b) || defined(__riscv_bitmanip)) #define MDBX_HAVE_CMOV 1 -#elif defined(i686) || defined(__i686) || defined(__i686__) || \ - (defined(_M_IX86) && _M_IX86 > 600) || defined(__x86_64) || \ - defined(__x86_64__) || defined(__amd64__) || defined(__amd64) || \ - defined(_M_X64) || defined(_M_AMD64) +#elif defined(i686) || defined(__i686) || defined(__i686__) || (defined(_M_IX86) && _M_IX86 > 600) || \ + defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \ + defined(_M_AMD64) #define MDBX_HAVE_CMOV 1 #else #define MDBX_HAVE_CMOV 0 @@ -597,8 +565,7 @@ __extern_C key_t ftok(const char *, int); #endif #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) #include -#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ - (defined(HP_IA64) || defined(__ia64)) +#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64)) #include #elif defined(__IBMC__) && defined(__powerpc) #include @@ -620,29 +587,26 @@ __extern_C key_t ftok(const char *, int); #endif /* Compiler */ #if !defined(__noop) && !defined(_MSC_VER) -#define __noop \ - do { \ +#define __noop \ + do { \ } while (0) #endif /* __noop */ -#if defined(__fallthrough) && \ - (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) +#if defined(__fallthrough) && (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) #undef __fallthrough #endif /* __fallthrough workaround for MinGW */ #ifndef __fallthrough -#if defined(__cplusplus) && (__has_cpp_attribute(fallthrough) && \ - (!defined(__clang__) || __clang__ > 4)) || \ +#if defined(__cplusplus) && (__has_cpp_attribute(fallthrough) && (!defined(__clang__) || __clang__ > 4)) || \ __cplusplus >= 201703L #define __fallthrough [[fallthrough]] #elif __GNUC_PREREQ(8, 0) && defined(__cplusplus) && __cplusplus >= 201103L #define __fallthrough [[fallthrough]] -#elif __GNUC_PREREQ(7, 0) && \ - (!defined(__LCC__) || (__LCC__ == 124 && __LCC_MINOR__ >= 12) || \ - (__LCC__ == 125 && __LCC_MINOR__ >= 5) || (__LCC__ >= 126)) +#elif __GNUC_PREREQ(7, 0) && (!defined(__LCC__) || (__LCC__ == 124 && __LCC_MINOR__ >= 12) || \ + (__LCC__ == 125 && __LCC_MINOR__ >= 5) || (__LCC__ >= 126)) #define __fallthrough __attribute__((__fallthrough__)) -#elif defined(__clang__) && defined(__cplusplus) && __cplusplus >= 201103L && \ - __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") +#elif defined(__clang__) && defined(__cplusplus) && __cplusplus >= 201103L && __has_feature(cxx_attributes) && \ + __has_warning("-Wimplicit-fallthrough") #define __fallthrough [[clang::fallthrough]] #else #define __fallthrough @@ -655,8 +619,8 @@ __extern_C key_t ftok(const char *, int); #elif defined(_MSC_VER) #define __unreachable() __assume(0) #else -#define __unreachable() \ - do { \ +#define __unreachable() \ + do { \ } while (1) #endif #endif /* __unreachable */ @@ -665,9 +629,9 @@ __extern_C key_t ftok(const char *, int); #if defined(__GNUC__) || defined(__clang__) || __has_builtin(__builtin_prefetch) #define __prefetch(ptr) __builtin_prefetch(ptr) #else -#define __prefetch(ptr) \ - do { \ - (void)(ptr); \ +#define __prefetch(ptr) \ + do { \ + (void)(ptr); \ } while (0) #endif #endif /* __prefetch */ @@ -677,8 +641,7 @@ __extern_C key_t ftok(const char *, int); #endif /* offsetof */ #ifndef container_of -#define container_of(ptr, type, member) \ - ((type *)((char *)(ptr) - offsetof(type, member))) +#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member))) #endif /* container_of */ /*----------------------------------------------------------------------------*/ @@ -750,8 +713,7 @@ __extern_C key_t ftok(const char *, int); #ifndef __hot #if defined(__OPTIMIZE__) -#if defined(__clang__) && !__has_attribute(__hot__) && \ - __has_attribute(__section__) && \ +#if defined(__clang__) && !__has_attribute(__hot__) && __has_attribute(__section__) && \ (defined(__linux__) || defined(__gnu_linux__)) /* just put frequently used functions in separate section */ #define __hot __attribute__((__section__("text.hot"))) __optimize("O3") @@ -767,8 +729,7 @@ __extern_C key_t ftok(const char *, int); #ifndef __cold #if defined(__OPTIMIZE__) -#if defined(__clang__) && !__has_attribute(__cold__) && \ - __has_attribute(__section__) && \ +#if defined(__clang__) && !__has_attribute(__cold__) && __has_attribute(__section__) && \ (defined(__linux__) || defined(__gnu_linux__)) /* just put infrequently used functions in separate section */ #define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os") @@ -791,8 +752,7 @@ __extern_C key_t ftok(const char *, int); #endif /* __flatten */ #ifndef likely -#if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && \ - !defined(__COVERITY__) +#if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && !defined(__COVERITY__) #define likely(cond) __builtin_expect(!!(cond), 1) #else #define likely(x) (!!(x)) @@ -800,8 +760,7 @@ __extern_C key_t ftok(const char *, int); #endif /* likely */ #ifndef unlikely -#if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && \ - !defined(__COVERITY__) +#if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && !defined(__COVERITY__) #define unlikely(cond) __builtin_expect(!!(cond), 0) #else #define unlikely(x) (!!(x)) @@ -821,8 +780,7 @@ __extern_C key_t ftok(const char *, int); #define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE #elif __has_attribute(__weak__) && __has_attribute(__weak_import__) #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__, __weak_import__)) -#elif __has_attribute(__weak__) || \ - (defined(__GNUC__) && __GNUC__ >= 4 && defined(__ELF__)) +#elif __has_attribute(__weak__) || (defined(__GNUC__) && __GNUC__ >= 4 && defined(__ELF__)) #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__)) #else #define MDBX_WEAK_IMPORT_ATTRIBUTE @@ -835,9 +793,7 @@ __extern_C key_t ftok(const char *, int); #ifndef MDBX_EXCLUDE_FOR_GPROF #ifdef ENABLE_GPROF -#define MDBX_EXCLUDE_FOR_GPROF \ - __attribute__((__no_instrument_function__, \ - __no_profile_instrument_function__)) +#define MDBX_EXCLUDE_FOR_GPROF __attribute__((__no_instrument_function__, __no_profile_instrument_function__)) #else #define MDBX_EXCLUDE_FOR_GPROF #endif /* ENABLE_GPROF */ @@ -846,10 +802,9 @@ __extern_C key_t ftok(const char *, int); /*----------------------------------------------------------------------------*/ #ifndef expect_with_probability -#if defined(__builtin_expect_with_probability) || \ - __has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0) -#define expect_with_probability(expr, value, prob) \ - __builtin_expect_with_probability(expr, value, prob) +#if defined(__builtin_expect_with_probability) || __has_builtin(__builtin_expect_with_probability) || \ + __GNUC_PREREQ(9, 0) +#define expect_with_probability(expr, value, prob) __builtin_expect_with_probability(expr, value, prob) #else #define expect_with_probability(expr, value, prob) (expr) #endif @@ -866,11 +821,9 @@ __extern_C key_t ftok(const char *, int); #if MDBX_GOOFY_MSVC_STATIC_ANALYZER || (defined(_MSC_VER) && _MSC_VER > 1919) #define MDBX_ANALYSIS_ASSUME(expr) __analysis_assume(expr) #ifdef _PREFAST_ -#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) \ - __pragma(prefast(suppress : warn_id)) +#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) __pragma(prefast(suppress : warn_id)) #else -#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) \ - __pragma(warning(suppress : warn_id)) +#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) __pragma(warning(suppress : warn_id)) #endif #else #define MDBX_ANALYSIS_ASSUME(expr) assert(expr) @@ -878,8 +831,7 @@ __extern_C key_t ftok(const char *, int); #endif /* MDBX_GOOFY_MSVC_STATIC_ANALYZER */ #ifndef FLEXIBLE_ARRAY_MEMBERS -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - (!defined(__cplusplus) && defined(_MSC_VER)) +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (!defined(__cplusplus) && defined(_MSC_VER)) #define FLEXIBLE_ARRAY_MEMBERS 1 #else #define FLEXIBLE_ARRAY_MEMBERS 0 @@ -938,8 +890,7 @@ template char (&__ArraySizeHelper(T (&array)[N]))[N]; #define CONCAT(a, b) a##b #define XCONCAT(a, b) CONCAT(a, b) -#define MDBX_TETRAD(a, b, c, d) \ - ((uint32_t)(a) << 24 | (uint32_t)(b) << 16 | (uint32_t)(c) << 8 | (d)) +#define MDBX_TETRAD(a, b, c, d) ((uint32_t)(a) << 24 | (uint32_t)(b) << 16 | (uint32_t)(c) << 8 | (d)) #define MDBX_STRING_TETRAD(str) MDBX_TETRAD(str[0], str[1], str[2], str[3]) @@ -953,14 +904,13 @@ template char (&__ArraySizeHelper(T (&array)[N]))[N]; #elif defined(_MSC_VER) #include #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr) -#elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ - __has_feature(c_static_assert) +#elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __has_feature(c_static_assert) #define STATIC_ASSERT_MSG(expr, msg) _Static_assert(expr, msg) #else -#define STATIC_ASSERT_MSG(expr, msg) \ - switch (0) { \ - case 0: \ - case (expr):; \ +#define STATIC_ASSERT_MSG(expr, msg) \ + switch (0) { \ + case 0: \ + case (expr):; \ } #endif #endif /* STATIC_ASSERT */ diff --git a/src/proto.h b/src/proto.h index 28562eb2..d4cc67f4 100644 --- a/src/proto.h +++ b/src/proto.h @@ -8,39 +8,26 @@ /* Internal prototypes */ /* audit.c */ -MDBX_INTERNAL int audit_ex(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc); +MDBX_INTERNAL int audit_ex(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc); /* mvcc-readers.c */ MDBX_INTERNAL bsr_t mvcc_bind_slot(MDBX_env *env); -MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, - pgno_t largest); -MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, - const txnid_t steady); -MDBX_INTERNAL pgno_t mvcc_snapshot_largest(const MDBX_env *env, - pgno_t last_used_page); -MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, - const txnid_t straggler); +MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest); +MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady); +MDBX_INTERNAL pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page); +MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler); MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rlocked, int *dead); MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t laggard); /* dxb.c */ -MDBX_INTERNAL int dxb_setup(MDBX_env *env, const int lck_rc, - const mdbx_mode_t mode_bits); -MDBX_INTERNAL int __must_check_result -dxb_read_header(MDBX_env *env, meta_t *meta, const int lck_exclusive, - const mdbx_mode_t mode_bits); +MDBX_INTERNAL int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits); +MDBX_INTERNAL int __must_check_result dxb_read_header(MDBX_env *env, meta_t *meta, const int lck_exclusive, + const mdbx_mode_t mode_bits); enum resize_mode { implicit_grow, impilict_shrink, explicit_resize }; -MDBX_INTERNAL int __must_check_result dxb_resize(MDBX_env *const env, - const pgno_t used_pgno, - const pgno_t size_pgno, - pgno_t limit_pgno, - const enum resize_mode mode); -MDBX_INTERNAL int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, - const bool enable, const bool force_whole); -MDBX_INTERNAL int __must_check_result dxb_sync_locked(MDBX_env *env, - unsigned flags, - meta_t *const pending, +MDBX_INTERNAL int __must_check_result dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const pgno_t size_pgno, + pgno_t limit_pgno, const enum resize_mode mode); +MDBX_INTERNAL int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, const bool enable, const bool force_whole); +MDBX_INTERNAL int __must_check_result dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, troika_t *const troika); #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) MDBX_INTERNAL void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn); @@ -60,9 +47,8 @@ MDBX_INTERNAL int txn_park(MDBX_txn *txn, bool autounpark); MDBX_INTERNAL int txn_unpark(MDBX_txn *txn); MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits); -#define TXN_END_NAMES \ - {"committed", "empty-commit", "abort", "reset", \ - "fail-begin", "fail-beginchild", "ousted", nullptr} +#define TXN_END_NAMES \ + {"committed", "empty-commit", "abort", "reset", "fail-begin", "fail-beginchild", "ousted", nullptr} enum { /* txn_end operation number, for logging */ TXN_END_COMMITTED, @@ -84,8 +70,7 @@ MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx); /* env.c */ MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode); -MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *out, size_t bytes, troika_t *troika); +MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika); MDBX_INTERNAL int env_sync(MDBX_env *env, bool force, bool nonblock); MDBX_INTERNAL int env_close(MDBX_env *env, bool resurrect_after_fork); MDBX_INTERNAL bool env_txn0_owned(const MDBX_env *env); @@ -97,27 +82,17 @@ MDBX_INTERNAL unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize); /* tree.c */ MDBX_INTERNAL int tree_drop(MDBX_cursor *mc, const bool may_have_tables); MDBX_INTERNAL int __must_check_result tree_rebalance(MDBX_cursor *mc); -MDBX_INTERNAL int __must_check_result tree_propagate_key(MDBX_cursor *mc, - const MDBX_val *key); +MDBX_INTERNAL int __must_check_result tree_propagate_key(MDBX_cursor *mc, const MDBX_val *key); MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env); MDBX_INTERNAL void recalculate_subpage_thresholds(MDBX_env *env); /* table.c */ MDBX_INTERNAL int __must_check_result tbl_fetch(MDBX_txn *txn, size_t dbi); -MDBX_INTERNAL int __must_check_result tbl_setup(const MDBX_env *env, - kvx_t *const kvx, - const tree_t *const db); +MDBX_INTERNAL int __must_check_result tbl_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db); /* coherency.c */ -MDBX_INTERNAL bool coherency_check_meta(const MDBX_env *env, - const volatile meta_t *meta, - bool report); -MDBX_INTERNAL int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, - uint64_t *timestamp); -MDBX_INTERNAL int coherency_check_written(const MDBX_env *env, - const txnid_t txnid, - const volatile meta_t *meta, - const intptr_t pgno, - uint64_t *timestamp); -MDBX_INTERNAL int coherency_timeout(uint64_t *timestamp, intptr_t pgno, - const MDBX_env *env); +MDBX_INTERNAL bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report); +MDBX_INTERNAL int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp); +MDBX_INTERNAL int coherency_check_written(const MDBX_env *env, const txnid_t txnid, const volatile meta_t *meta, + const intptr_t pgno, uint64_t *timestamp); +MDBX_INTERNAL int coherency_timeout(uint64_t *timestamp, intptr_t pgno, const MDBX_env *env); diff --git a/src/range-estimate.c b/src/range-estimate.c index 2deb3905..ea093088 100644 --- a/src/range-estimate.c +++ b/src/range-estimate.c @@ -10,20 +10,17 @@ typedef struct diff_result { } diff_t; /* calculates: r = x - y */ -__hot static int cursor_diff(const MDBX_cursor *const __restrict x, - const MDBX_cursor *const __restrict y, +__hot static int cursor_diff(const MDBX_cursor *const __restrict x, const MDBX_cursor *const __restrict y, diff_t *const __restrict r) { r->diff = 0; r->level = 0; r->root_nkeys = 0; if (unlikely(x->signature != cur_signature_live)) - return (x->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return (x->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN; if (unlikely(y->signature != cur_signature_live)) - return (y->signature == cur_signature_ready4dispose) ? MDBX_EINVAL - : MDBX_EBADSIGN; + return (y->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN; int rc = check_txn(x->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -86,8 +83,7 @@ __hot static int cursor_diff(const MDBX_cursor *const __restrict x, return MDBX_SUCCESS; } -__hot static ptrdiff_t estimate(const tree_t *tree, - diff_t *const __restrict dr) { +__hot static ptrdiff_t estimate(const tree_t *tree, diff_t *const __restrict dr) { /* root: branch-page => scale = leaf-factor * branch-factor^(N-1) * level-1: branch-page(s) => scale = leaf-factor * branch-factor^2 * level-2: branch-page(s) => scale = leaf-factor * branch-factor @@ -98,8 +94,7 @@ __hot static ptrdiff_t estimate(const tree_t *tree, if (btree_power < 0) return dr->diff; - ptrdiff_t estimated = - (ptrdiff_t)tree->items * dr->diff / (ptrdiff_t)tree->leaf_pages; + ptrdiff_t estimated = (ptrdiff_t)tree->items * dr->diff / (ptrdiff_t)tree->leaf_pages; if (btree_power == 0) return estimated; @@ -112,9 +107,7 @@ __hot static ptrdiff_t estimate(const tree_t *tree, total(branch_entries) = leaf_pages + branch_pages - 1 (root page) */ const size_t log2_fixedpoint = sizeof(size_t) - 1; const size_t half = UINT64_C(1) << (log2_fixedpoint - 1); - const size_t factor = - ((tree->leaf_pages + tree->branch_pages - 1) << log2_fixedpoint) / - tree->branch_pages; + const size_t factor = ((tree->leaf_pages + tree->branch_pages - 1) << log2_fixedpoint) / tree->branch_pages; while (1) { switch ((size_t)btree_power) { default: { @@ -149,11 +142,8 @@ __hot static ptrdiff_t estimate(const tree_t *tree, } } -__hot int mdbx_estimate_distance(const MDBX_cursor *first, - const MDBX_cursor *last, - ptrdiff_t *distance_items) { - if (unlikely(first == nullptr || last == nullptr || - distance_items == nullptr)) +__hot int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last, ptrdiff_t *distance_items) { + if (unlikely(first == nullptr || last == nullptr || distance_items == nullptr)) return LOG_IFERR(MDBX_EINVAL); *distance_items = 0; @@ -177,17 +167,14 @@ __hot int mdbx_estimate_distance(const MDBX_cursor *first, return MDBX_SUCCESS; } -__hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, - MDBX_val *data, MDBX_cursor_op move_op, +__hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op move_op, ptrdiff_t *distance_items) { - if (unlikely(cursor == nullptr || distance_items == nullptr || - move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE)) + if (unlikely(cursor == nullptr || distance_items == nullptr || move_op == MDBX_GET_CURRENT || + move_op == MDBX_GET_MULTIPLE)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(cursor->signature != cur_signature_live)) - return LOG_IFERR((cursor->signature == cur_signature_ready4dispose) - ? MDBX_EINVAL - : MDBX_EBADSIGN); + return LOG_IFERR((cursor->signature == cur_signature_ready4dispose) ? MDBX_EINVAL : MDBX_EBADSIGN); int rc = check_txn(cursor->txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -209,8 +196,7 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val stub_data; if (data == nullptr) { - const unsigned mask = - 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY; + const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY; if (unlikely(mask & (1 << move_op))) return LOG_IFERR(MDBX_EINVAL); stub_data.iov_base = nullptr; @@ -220,9 +206,8 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val stub_key; if (key == nullptr) { - const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | - 1 << MDBX_SET_KEY | 1 << MDBX_SET | - 1 << MDBX_SET_RANGE; + const unsigned mask = + 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | 1 << MDBX_SET | 1 << MDBX_SET_RANGE; if (unlikely(mask & (1 << move_op))) return LOG_IFERR(MDBX_EINVAL); stub_key.iov_base = nullptr; @@ -232,8 +217,7 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, next.outer.signature = cur_signature_live; rc = cursor_ops(&next.outer, key, data, move_op); - if (unlikely(rc != MDBX_SUCCESS && - (rc != MDBX_NOTFOUND || !is_pointed(&next.outer)))) + if (unlikely(rc != MDBX_SUCCESS && (rc != MDBX_NOTFOUND || !is_pointed(&next.outer)))) return LOG_IFERR(rc); if (move_op == MDBX_LAST) { @@ -243,11 +227,8 @@ __hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, return mdbx_estimate_distance(cursor, &next.outer, distance_items); } -__hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, - const MDBX_val *begin_key, - const MDBX_val *begin_data, - const MDBX_val *end_key, const MDBX_val *end_data, - ptrdiff_t *size_items) { +__hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *begin_key, const MDBX_val *begin_data, + const MDBX_val *end_key, const MDBX_val *end_data, ptrdiff_t *size_items) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -255,8 +236,7 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, if (unlikely(!size_items)) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(begin_data && - (begin_key == nullptr || begin_key == MDBX_EPSILON))) + if (unlikely(begin_data && (begin_key == nullptr || begin_key == MDBX_EPSILON))) return LOG_IFERR(MDBX_EINVAL); if (unlikely(end_data && (end_key == nullptr || end_key == MDBX_EPSILON))) @@ -285,20 +265,14 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, rc = outer_first(&begin.outer, nullptr, nullptr); if (unlikely(end_key == MDBX_EPSILON)) { /* LY: FIRST..+epsilon case */ - return LOG_IFERR( - (rc == MDBX_SUCCESS) - ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) - : rc); + return LOG_IFERR((rc == MDBX_SUCCESS) ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) : rc); } } else { if (unlikely(begin_key == MDBX_EPSILON)) { if (end_key == nullptr) { /* LY: -epsilon..LAST case */ rc = outer_last(&begin.outer, nullptr, nullptr); - return LOG_IFERR( - (rc == MDBX_SUCCESS) - ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) - : rc); + return LOG_IFERR((rc == MDBX_SUCCESS) ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) : rc); } /* LY: -epsilon..value case */ assert(end_key != MDBX_EPSILON); @@ -309,22 +283,19 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, end_key = begin_key; } if (end_key && !begin_data && !end_data && - (begin_key == end_key || - begin.outer.clc->k.cmp(begin_key, end_key) == 0)) { + (begin_key == end_key || begin.outer.clc->k.cmp(begin_key, end_key) == 0)) { /* LY: single key case */ - rc = cursor_seek(&begin.outer, (MDBX_val *)begin_key, nullptr, MDBX_SET) - .err; + rc = cursor_seek(&begin.outer, (MDBX_val *)begin_key, nullptr, MDBX_SET).err; if (unlikely(rc != MDBX_SUCCESS)) { *size_items = 0; return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc); } *size_items = 1; if (inner_pointed(&begin.outer)) - *size_items = - (sizeof(*size_items) >= sizeof(begin.inner.nested_tree.items) || - begin.inner.nested_tree.items <= PTRDIFF_MAX) - ? (size_t)begin.inner.nested_tree.items - : PTRDIFF_MAX; + *size_items = (sizeof(*size_items) >= sizeof(begin.inner.nested_tree.items) || + begin.inner.nested_tree.items <= PTRDIFF_MAX) + ? (size_t)begin.inner.nested_tree.items + : PTRDIFF_MAX; return MDBX_SUCCESS; } else { @@ -332,9 +303,7 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val proxy_data = {nullptr, 0}; if (begin_data) proxy_data = *begin_data; - rc = LOG_IFERR(cursor_seek(&begin.outer, &proxy_key, &proxy_data, - MDBX_SET_LOWERBOUND) - .err); + rc = LOG_IFERR(cursor_seek(&begin.outer, &proxy_key, &proxy_data, MDBX_SET_LOWERBOUND).err); } } @@ -356,8 +325,7 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val proxy_data = {nullptr, 0}; if (end_data) proxy_data = *end_data; - rc = cursor_seek(&end.outer, &proxy_key, &proxy_data, MDBX_SET_LOWERBOUND) - .err; + rc = cursor_seek(&end.outer, &proxy_key, &proxy_data, MDBX_SET_LOWERBOUND).err; } if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_NOTFOUND || !is_pointed(&end.outer)) @@ -367,10 +335,9 @@ __hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, rc = mdbx_estimate_distance(&begin.outer, &end.outer, size_items); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - assert(*size_items >= -(ptrdiff_t)begin.outer.tree->items && - *size_items <= (ptrdiff_t)begin.outer.tree->items); + assert(*size_items >= -(ptrdiff_t)begin.outer.tree->items && *size_items <= (ptrdiff_t)begin.outer.tree->items); -#if 0 /* LY: Was decided to returns as-is (i.e. negative) the estimation \ +#if 0 /* LY: Was decided to returns as-is (i.e. negative) the estimation \ * results for an inverted ranges. */ /* Commit 8ddfd1f34ad7cf7a3c4aa75d2e248ca7e639ed63 diff --git a/src/refund.c b/src/refund.c index 3742e569..2d1ef607 100644 --- a/src/refund.c +++ b/src/refund.c @@ -8,8 +8,7 @@ static void refund_reclaimed(MDBX_txn *txn) { /* Scanning in descend order */ pgno_t first_unallocated = txn->geo.first_unallocated; const pnl_t pnl = txn->tw.relist; - tASSERT(txn, - MDBX_PNL_GETSIZE(pnl) && MDBX_PNL_MOST(pnl) == first_unallocated - 1); + tASSERT(txn, MDBX_PNL_GETSIZE(pnl) && MDBX_PNL_MOST(pnl) == first_unallocated - 1); #if MDBX_PNL_ASCENDING size_t i = MDBX_PNL_GETSIZE(pnl); tASSERT(txn, pnl[i] == first_unallocated - 1); @@ -26,12 +25,10 @@ static void refund_reclaimed(MDBX_txn *txn) { for (size_t move = 0; move < len; ++move) pnl[1 + move] = pnl[i + move]; #endif - VERBOSE("refunded %" PRIaPGNO " pages: %" PRIaPGNO " -> %" PRIaPGNO, - txn->geo.first_unallocated - first_unallocated, + VERBOSE("refunded %" PRIaPGNO " pages: %" PRIaPGNO " -> %" PRIaPGNO, txn->geo.first_unallocated - first_unallocated, txn->geo.first_unallocated, first_unallocated); txn->geo.first_unallocated = first_unallocated; - tASSERT(txn, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - 1)); + tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - 1)); } static void refund_loose(MDBX_txn *txn) { @@ -58,18 +55,14 @@ static void refund_loose(MDBX_txn *txn) { } /* Collect loose-pages which may be refunded. */ - tASSERT(txn, - txn->geo.first_unallocated >= MIN_PAGENO + txn->tw.loose_count); + tASSERT(txn, txn->geo.first_unallocated >= MIN_PAGENO + txn->tw.loose_count); pgno_t most = MIN_PAGENO; size_t w = 0; for (const page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) { tASSERT(txn, lp->flags == P_LOOSE); tASSERT(txn, txn->geo.first_unallocated > lp->pgno); - if (likely(txn->geo.first_unallocated - txn->tw.loose_count <= - lp->pgno)) { - tASSERT(txn, - w < ((suitable == onstack) ? pnl_bytes2size(sizeof(onstack)) - : MDBX_PNL_ALLOCLEN(suitable))); + if (likely(txn->geo.first_unallocated - txn->tw.loose_count <= lp->pgno)) { + tASSERT(txn, w < ((suitable == onstack) ? pnl_bytes2size(sizeof(onstack)) : MDBX_PNL_ALLOCLEN(suitable))); suitable[++w] = lp->pgno; most = (lp->pgno > most) ? lp->pgno : most; } @@ -84,10 +77,8 @@ static void refund_loose(MDBX_txn *txn) { /* Scanning in descend order */ const intptr_t step = MDBX_PNL_ASCENDING ? -1 : 1; - const intptr_t begin = - MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(suitable) : 1; - const intptr_t end = - MDBX_PNL_ASCENDING ? 0 : MDBX_PNL_GETSIZE(suitable) + 1; + const intptr_t begin = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(suitable) : 1; + const intptr_t end = MDBX_PNL_ASCENDING ? 0 : MDBX_PNL_GETSIZE(suitable) + 1; tASSERT(txn, suitable[begin] >= suitable[end - step]); tASSERT(txn, most == suitable[begin]); @@ -97,8 +88,7 @@ static void refund_loose(MDBX_txn *txn) { most -= 1; } const size_t refunded = txn->geo.first_unallocated - most; - DEBUG("refund-suitable %zu pages %" PRIaPGNO " -> %" PRIaPGNO, refunded, - most, txn->geo.first_unallocated); + DEBUG("refund-suitable %zu pages %" PRIaPGNO " -> %" PRIaPGNO, refunded, most, txn->geo.first_unallocated); txn->geo.first_unallocated = most; txn->tw.loose_count -= refunded; if (dl) { @@ -126,22 +116,19 @@ static void refund_loose(MDBX_txn *txn) { } dpl_setlen(dl, w); tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); } goto unlink_loose; } } else { /* Dirtylist is mostly sorted, just refund loose pages at the end. */ dpl_sort(txn); - tASSERT(txn, - dl->length < 2 || dl->items[1].pgno < dl->items[dl->length].pgno); + tASSERT(txn, dl->length < 2 || dl->items[1].pgno < dl->items[dl->length].pgno); tASSERT(txn, dl->sorted == dl->length); /* Scan dirtylist tail-forward and cutoff suitable pages. */ size_t n; - for (n = dl->length; dl->items[n].pgno == txn->geo.first_unallocated - 1 && - dl->items[n].ptr->flags == P_LOOSE; + for (n = dl->length; dl->items[n].pgno == txn->geo.first_unallocated - 1 && dl->items[n].ptr->flags == P_LOOSE; --n) { tASSERT(txn, n > 0); page_t *dp = dl->items[n].ptr; @@ -158,8 +145,7 @@ static void refund_loose(MDBX_txn *txn) { txn->tw.dirtyroom += refunded; dl->pages_including_loose -= refunded; tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); /* Filter-out loose chain & dispose refunded pages. */ unlink_loose: @@ -188,18 +174,15 @@ static void refund_loose(MDBX_txn *txn) { bool txn_refund(MDBX_txn *txn) { const pgno_t before = txn->geo.first_unallocated; - if (txn->tw.loose_pages && - txn->tw.loose_refund_wl > txn->geo.first_unallocated) + if (txn->tw.loose_pages && txn->tw.loose_refund_wl > txn->geo.first_unallocated) refund_loose(txn); while (true) { - if (MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || - MDBX_PNL_MOST(txn->tw.relist) != txn->geo.first_unallocated - 1) + if (MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || MDBX_PNL_MOST(txn->tw.relist) != txn->geo.first_unallocated - 1) break; refund_reclaimed(txn); - if (!txn->tw.loose_pages || - txn->tw.loose_refund_wl <= txn->geo.first_unallocated) + if (!txn->tw.loose_pages || txn->tw.loose_refund_wl <= txn->geo.first_unallocated) break; const pgno_t memo = txn->geo.first_unallocated; diff --git a/src/sort.h b/src/sort.h index 3169e317..824c6bf5 100644 --- a/src/sort.h +++ b/src/sort.h @@ -14,21 +14,21 @@ * Thanks to John M. Gamble for the http://pages.ripco.net/~jgamble/nw.html */ #if MDBX_HAVE_CMOV -#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ - do { \ - const TYPE swap_tmp = (a); \ - const bool swap_cmp = expect_with_probability(CMP(swap_tmp, b), 0, .5); \ - (a) = swap_cmp ? swap_tmp : b; \ - (b) = swap_cmp ? b : swap_tmp; \ +#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ + do { \ + const TYPE swap_tmp = (a); \ + const bool swap_cmp = expect_with_probability(CMP(swap_tmp, b), 0, .5); \ + (a) = swap_cmp ? swap_tmp : b; \ + (b) = swap_cmp ? b : swap_tmp; \ } while (0) #else -#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ - do \ - if (expect_with_probability(!CMP(a, b), 0, .5)) { \ - const TYPE swap_tmp = (a); \ - (a) = (b); \ - (b) = swap_tmp; \ - } \ +#define SORT_CMP_SWAP(TYPE, CMP, a, b) \ + do \ + if (expect_with_probability(!CMP(a, b), 0, .5)) { \ + const TYPE swap_tmp = (a); \ + (a) = (b); \ + (b) = swap_tmp; \ + } \ while (0) #endif @@ -42,11 +42,11 @@ // [[1,2]] // [[0,2]] // [[0,1]] -#define SORT_NETWORK_3(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ +#define SORT_NETWORK_3(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ } while (0) // 5 comparators, 3 parallel operations @@ -61,13 +61,13 @@ // [[0,1],[2,3]] // [[0,2],[1,3]] // [[1,2]] -#define SORT_NETWORK_4(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ +#define SORT_NETWORK_4(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ } while (0) // 9 comparators, 5 parallel operations @@ -86,17 +86,17 @@ // [[2,4],[0,1]] // [[2,3],[1,4]] // [[1,2],[3,4]] -#define SORT_NETWORK_5(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ +#define SORT_NETWORK_5(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ } while (0) // 12 comparators, 6 parallel operations @@ -118,20 +118,20 @@ // [[0,3],[1,4]] // [[2,4],[1,3]] // [[2,3]] -#define SORT_NETWORK_6(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ +#define SORT_NETWORK_6(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ } while (0) // 16 comparators, 6 parallel operations @@ -155,24 +155,24 @@ // [[2,3],[4,5]] // [[1,4],[3,6]] // [[1,2],[3,4],[5,6]] -#define SORT_NETWORK_7(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ +#define SORT_NETWORK_7(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ } while (0) // 19 comparators, 6 parallel operations @@ -198,237 +198,236 @@ // [[2,3],[4,5]] // [[1,4],[3,6]] // [[1,2],[3,4],[5,6]] -#define SORT_NETWORK_8(TYPE, CMP, begin) \ - do { \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[7]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[7]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[6], begin[7]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ - SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ +#define SORT_NETWORK_8(TYPE, CMP, begin) \ + do { \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[7]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[7]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[6], begin[7]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]); \ + SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]); \ } while (0) -#define SORT_INNER(TYPE, CMP, begin, end, len) \ - switch (len) { \ - default: \ - assert(false); \ - __unreachable(); \ - case 0: \ - case 1: \ - break; \ - case 2: \ - SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ - break; \ - case 3: \ - SORT_NETWORK_3(TYPE, CMP, begin); \ - break; \ - case 4: \ - SORT_NETWORK_4(TYPE, CMP, begin); \ - break; \ - case 5: \ - SORT_NETWORK_5(TYPE, CMP, begin); \ - break; \ - case 6: \ - SORT_NETWORK_6(TYPE, CMP, begin); \ - break; \ - case 7: \ - SORT_NETWORK_7(TYPE, CMP, begin); \ - break; \ - case 8: \ - SORT_NETWORK_8(TYPE, CMP, begin); \ - break; \ +#define SORT_INNER(TYPE, CMP, begin, end, len) \ + switch (len) { \ + default: \ + assert(false); \ + __unreachable(); \ + case 0: \ + case 1: \ + break; \ + case 2: \ + SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]); \ + break; \ + case 3: \ + SORT_NETWORK_3(TYPE, CMP, begin); \ + break; \ + case 4: \ + SORT_NETWORK_4(TYPE, CMP, begin); \ + break; \ + case 5: \ + SORT_NETWORK_5(TYPE, CMP, begin); \ + break; \ + case 6: \ + SORT_NETWORK_6(TYPE, CMP, begin); \ + break; \ + case 7: \ + SORT_NETWORK_7(TYPE, CMP, begin); \ + break; \ + case 8: \ + SORT_NETWORK_8(TYPE, CMP, begin); \ + break; \ } -#define SORT_SWAP(TYPE, a, b) \ - do { \ - const TYPE swap_tmp = (a); \ - (a) = (b); \ - (b) = swap_tmp; \ +#define SORT_SWAP(TYPE, a, b) \ + do { \ + const TYPE swap_tmp = (a); \ + (a) = (b); \ + (b) = swap_tmp; \ } while (0) -#define SORT_PUSH(low, high) \ - do { \ - top->lo = (low); \ - top->hi = (high); \ - ++top; \ +#define SORT_PUSH(low, high) \ + do { \ + top->lo = (low); \ + top->hi = (high); \ + ++top; \ } while (0) -#define SORT_POP(low, high) \ - do { \ - --top; \ - low = top->lo; \ - high = top->hi; \ +#define SORT_POP(low, high) \ + do { \ + --top; \ + low = top->lo; \ + high = top->hi; \ } while (0) -#define SORT_IMPL(NAME, EXPECT_LOW_CARDINALITY_OR_PRESORTED, TYPE, CMP) \ - \ - static inline bool NAME##_is_sorted(const TYPE *first, const TYPE *last) { \ - while (++first <= last) \ - if (expect_with_probability(CMP(first[0], first[-1]), 1, .1)) \ - return false; \ - return true; \ - } \ - \ - typedef struct { \ - TYPE *lo, *hi; \ - } NAME##_stack; \ - \ - __hot static void NAME(TYPE *const __restrict begin, \ - TYPE *const __restrict end) { \ - NAME##_stack stack[sizeof(size_t) * CHAR_BIT], *__restrict top = stack; \ - \ - TYPE *__restrict hi = end - 1; \ - TYPE *__restrict lo = begin; \ - while (true) { \ - const ptrdiff_t len = hi - lo; \ - if (len < 8) { \ - SORT_INNER(TYPE, CMP, lo, hi + 1, len + 1); \ - if (unlikely(top == stack)) \ - break; \ - SORT_POP(lo, hi); \ - continue; \ - } \ - \ - TYPE *__restrict mid = lo + (len >> 1); \ - SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ - SORT_CMP_SWAP(TYPE, CMP, *mid, *hi); \ - SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ - \ - TYPE *right = hi - 1; \ - TYPE *left = lo + 1; \ - while (1) { \ - while (expect_with_probability(CMP(*left, *mid), 0, .5)) \ - ++left; \ - while (expect_with_probability(CMP(*mid, *right), 0, .5)) \ - --right; \ - if (unlikely(left > right)) { \ - if (EXPECT_LOW_CARDINALITY_OR_PRESORTED) { \ - if (NAME##_is_sorted(lo, right)) \ - lo = right + 1; \ - if (NAME##_is_sorted(left, hi)) \ - hi = left; \ - } \ - break; \ - } \ - SORT_SWAP(TYPE, *left, *right); \ - mid = (mid == left) ? right : (mid == right) ? left : mid; \ - ++left; \ - --right; \ - } \ - \ - if (right - lo > hi - left) { \ - SORT_PUSH(lo, right); \ - lo = left; \ - } else { \ - SORT_PUSH(left, hi); \ - hi = right; \ - } \ - } \ - \ - if (AUDIT_ENABLED()) { \ - for (TYPE *scan = begin + 1; scan < end; ++scan) \ - assert(CMP(scan[-1], scan[0])); \ - } \ +#define SORT_IMPL(NAME, EXPECT_LOW_CARDINALITY_OR_PRESORTED, TYPE, CMP) \ + \ + static inline bool NAME##_is_sorted(const TYPE *first, const TYPE *last) { \ + while (++first <= last) \ + if (expect_with_probability(CMP(first[0], first[-1]), 1, .1)) \ + return false; \ + return true; \ + } \ + \ + typedef struct { \ + TYPE *lo, *hi; \ + } NAME##_stack; \ + \ + __hot static void NAME(TYPE *const __restrict begin, TYPE *const __restrict end) { \ + NAME##_stack stack[sizeof(size_t) * CHAR_BIT], *__restrict top = stack; \ + \ + TYPE *__restrict hi = end - 1; \ + TYPE *__restrict lo = begin; \ + while (true) { \ + const ptrdiff_t len = hi - lo; \ + if (len < 8) { \ + SORT_INNER(TYPE, CMP, lo, hi + 1, len + 1); \ + if (unlikely(top == stack)) \ + break; \ + SORT_POP(lo, hi); \ + continue; \ + } \ + \ + TYPE *__restrict mid = lo + (len >> 1); \ + SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ + SORT_CMP_SWAP(TYPE, CMP, *mid, *hi); \ + SORT_CMP_SWAP(TYPE, CMP, *lo, *mid); \ + \ + TYPE *right = hi - 1; \ + TYPE *left = lo + 1; \ + while (1) { \ + while (expect_with_probability(CMP(*left, *mid), 0, .5)) \ + ++left; \ + while (expect_with_probability(CMP(*mid, *right), 0, .5)) \ + --right; \ + if (unlikely(left > right)) { \ + if (EXPECT_LOW_CARDINALITY_OR_PRESORTED) { \ + if (NAME##_is_sorted(lo, right)) \ + lo = right + 1; \ + if (NAME##_is_sorted(left, hi)) \ + hi = left; \ + } \ + break; \ + } \ + SORT_SWAP(TYPE, *left, *right); \ + mid = (mid == left) ? right : (mid == right) ? left : mid; \ + ++left; \ + --right; \ + } \ + \ + if (right - lo > hi - left) { \ + SORT_PUSH(lo, right); \ + lo = left; \ + } else { \ + SORT_PUSH(left, hi); \ + hi = right; \ + } \ + } \ + \ + if (AUDIT_ENABLED()) { \ + for (TYPE *scan = begin + 1; scan < end; ++scan) \ + assert(CMP(scan[-1], scan[0])); \ + } \ } /*------------------------------------------------------------------------------ * LY: radix sort for large chunks */ -#define RADIXSORT_IMPL(NAME, TYPE, EXTRACT_KEY, BUFFER_PREALLOCATED, END_GAP) \ - \ - __hot static bool NAME##_radixsort(TYPE *const begin, const size_t length) { \ - TYPE *tmp; \ - if (BUFFER_PREALLOCATED) { \ - tmp = begin + length + END_GAP; \ - /* memset(tmp, 0xDeadBeef, sizeof(TYPE) * length); */ \ - } else { \ - tmp = osal_malloc(sizeof(TYPE) * length); \ - if (unlikely(!tmp)) \ - return false; \ - } \ - \ - size_t key_shift = 0, key_diff_mask; \ - do { \ - struct { \ - pgno_t a[256], b[256]; \ - } counters; \ - memset(&counters, 0, sizeof(counters)); \ - \ - key_diff_mask = 0; \ - size_t prev_key = EXTRACT_KEY(begin) >> key_shift; \ - TYPE *r = begin, *end = begin + length; \ - do { \ - const size_t key = EXTRACT_KEY(r) >> key_shift; \ - counters.a[key & 255]++; \ - counters.b[(key >> 8) & 255]++; \ - key_diff_mask |= prev_key ^ key; \ - prev_key = key; \ - } while (++r != end); \ - \ - pgno_t ta = 0, tb = 0; \ - for (size_t i = 0; i < 256; ++i) { \ - const pgno_t ia = counters.a[i]; \ - counters.a[i] = ta; \ - ta += ia; \ - const pgno_t ib = counters.b[i]; \ - counters.b[i] = tb; \ - tb += ib; \ - } \ - \ - r = begin; \ - do { \ - const size_t key = EXTRACT_KEY(r) >> key_shift; \ - tmp[counters.a[key & 255]++] = *r; \ - } while (++r != end); \ - \ - if (unlikely(key_diff_mask < 256)) { \ - memcpy(begin, tmp, ptr_dist(end, begin)); \ - break; \ - } \ - end = (r = tmp) + length; \ - do { \ - const size_t key = EXTRACT_KEY(r) >> key_shift; \ - begin[counters.b[(key >> 8) & 255]++] = *r; \ - } while (++r != end); \ - \ - key_shift += 16; \ - } while (key_diff_mask >> 16); \ - \ - if (!(BUFFER_PREALLOCATED)) \ - osal_free(tmp); \ - return true; \ +#define RADIXSORT_IMPL(NAME, TYPE, EXTRACT_KEY, BUFFER_PREALLOCATED, END_GAP) \ + \ + __hot static bool NAME##_radixsort(TYPE *const begin, const size_t length) { \ + TYPE *tmp; \ + if (BUFFER_PREALLOCATED) { \ + tmp = begin + length + END_GAP; \ + /* memset(tmp, 0xDeadBeef, sizeof(TYPE) * length); */ \ + } else { \ + tmp = osal_malloc(sizeof(TYPE) * length); \ + if (unlikely(!tmp)) \ + return false; \ + } \ + \ + size_t key_shift = 0, key_diff_mask; \ + do { \ + struct { \ + pgno_t a[256], b[256]; \ + } counters; \ + memset(&counters, 0, sizeof(counters)); \ + \ + key_diff_mask = 0; \ + size_t prev_key = EXTRACT_KEY(begin) >> key_shift; \ + TYPE *r = begin, *end = begin + length; \ + do { \ + const size_t key = EXTRACT_KEY(r) >> key_shift; \ + counters.a[key & 255]++; \ + counters.b[(key >> 8) & 255]++; \ + key_diff_mask |= prev_key ^ key; \ + prev_key = key; \ + } while (++r != end); \ + \ + pgno_t ta = 0, tb = 0; \ + for (size_t i = 0; i < 256; ++i) { \ + const pgno_t ia = counters.a[i]; \ + counters.a[i] = ta; \ + ta += ia; \ + const pgno_t ib = counters.b[i]; \ + counters.b[i] = tb; \ + tb += ib; \ + } \ + \ + r = begin; \ + do { \ + const size_t key = EXTRACT_KEY(r) >> key_shift; \ + tmp[counters.a[key & 255]++] = *r; \ + } while (++r != end); \ + \ + if (unlikely(key_diff_mask < 256)) { \ + memcpy(begin, tmp, ptr_dist(end, begin)); \ + break; \ + } \ + end = (r = tmp) + length; \ + do { \ + const size_t key = EXTRACT_KEY(r) >> key_shift; \ + begin[counters.b[(key >> 8) & 255]++] = *r; \ + } while (++r != end); \ + \ + key_shift += 16; \ + } while (key_diff_mask >> 16); \ + \ + if (!(BUFFER_PREALLOCATED)) \ + osal_free(tmp); \ + return true; \ } /*------------------------------------------------------------------------------ * LY: Binary search */ #if defined(__clang__) && __clang_major__ > 4 && defined(__ia32__) -#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ - do \ - __asm __volatile("" \ - : "+r"(size) \ - : "r" /* the `b` constraint is more suitable here, but \ - cause CLANG to allocate and push/pop an one more \ - register, so using the `r` which avoids this. */ \ - (flag)); \ +#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ + do \ + __asm __volatile("" \ + : "+r"(size) \ + : "r" /* the `b` constraint is more suitable here, but \ + cause CLANG to allocate and push/pop an one more \ + register, so using the `r` which avoids this. */ \ + (flag)); \ while (0) #else -#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ - do { \ - /* nope for non-clang or non-x86 */; \ +#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag) \ + do { \ + /* nope for non-clang or non-x86 */; \ } while (0) #endif /* Workaround for CLANG */ diff --git a/src/spill.c b/src/spill.c index 0a02ad52..261adb78 100644 --- a/src/spill.c +++ b/src/spill.c @@ -4,33 +4,25 @@ #include "internals.h" void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) { - tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) && - txn->tw.spilled.least_removed > 0); - txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) - ? idx - : txn->tw.spilled.least_removed; + tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) && txn->tw.spilled.least_removed > 0); + txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) ? idx : txn->tw.spilled.least_removed; txn->tw.spilled.list[idx] |= 1; MDBX_PNL_SETSIZE(txn->tw.spilled.list, - MDBX_PNL_GETSIZE(txn->tw.spilled.list) - - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); + MDBX_PNL_GETSIZE(txn->tw.spilled.list) - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); while (unlikely(npages > 1)) { const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1; if (MDBX_PNL_ASCENDING) { - if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) || - (txn->tw.spilled.list[idx] >> 1) != pgno) + if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) || (txn->tw.spilled.list[idx] >> 1) != pgno) return; } else { if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno) return; - txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) - ? idx - : txn->tw.spilled.least_removed; + txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) ? idx : txn->tw.spilled.least_removed; } txn->tw.spilled.list[idx] |= 1; MDBX_PNL_SETSIZE(txn->tw.spilled.list, - MDBX_PNL_GETSIZE(txn->tw.spilled.list) - - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); + MDBX_PNL_GETSIZE(txn->tw.spilled.list) - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list))); --npages; } } @@ -57,8 +49,7 @@ pnl_t spill_purge(MDBX_txn *txn) { /*----------------------------------------------------------------------------*/ -static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, - const size_t npages) { +static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, const size_t npages) { tASSERT(txn, !(txn->flags & MDBX_WRITEMAP)); #if MDBX_ENABLE_PGOP_STAT txn->env->lck->pgops.spill.weak += npages; @@ -72,8 +63,7 @@ static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, /* Set unspillable LRU-label for dirty pages watched by txn. * Returns the number of pages marked as unspillable. */ -static size_t spill_cursor_keep(const MDBX_txn *const txn, - const MDBX_cursor *mc) { +static size_t spill_cursor_keep(const MDBX_txn *const txn, const MDBX_cursor *mc) { tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); size_t keep = 0; while (!is_poor(mc)) { @@ -87,8 +77,7 @@ static size_t spill_cursor_keep(const MDBX_txn *const txn, size_t const n = dpl_search(txn, mp->pgno); if (txn->tw.dirtylist->items[n].pgno == mp->pgno && /* не считаем дважды */ dpl_age(txn, n)) { - size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr, - -(ptrdiff_t)sizeof(size_t)); + size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr, -(ptrdiff_t)sizeof(size_t)); *ptr = txn->tw.dirtylru; tASSERT(txn, dpl_age(txn, n) == 0); ++keep; @@ -112,8 +101,7 @@ static size_t spill_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { size_t keep = m0 ? spill_cursor_keep(txn, m0) : 0; TXN_FOREACH_DBI_ALL(txn, dbi) { - if (F_ISSET(txn->dbi_state[dbi], DBI_DIRTY | DBI_VALID) && - txn->dbs[dbi].root != P_INVALID) + if (F_ISSET(txn->dbi_state[dbi], DBI_DIRTY | DBI_VALID) && txn->dbs[dbi].root != P_INVALID) for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next) if (mc != m0) keep += spill_cursor_keep(txn, mc); @@ -126,8 +114,7 @@ static size_t spill_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { * 0 = should be spilled; * ... * > 255 = must not be spilled. */ -MDBX_NOTHROW_PURE_FUNCTION static unsigned -spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { +MDBX_NOTHROW_PURE_FUNCTION static unsigned spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { dpl_t *const dl = txn->tw.dirtylist; const uint32_t age = dpl_age(txn, i); const size_t npages = dpl_npages(dl, i); @@ -139,8 +126,7 @@ spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { page_t *const dp = dl->items[i].ptr; if (dp->flags & (P_LOOSE | P_SPILLED)) { - DEBUG("skip %s %zu page %" PRIaPGNO, - (dp->flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno); + DEBUG("skip %s %zu page %" PRIaPGNO, (dp->flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno); return 256; } @@ -175,67 +161,49 @@ spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { return prio = (unsigned)factor; } -static size_t spill_gate(const MDBX_env *env, intptr_t part, - const size_t total) { - const intptr_t spill_min = - env->options.spill_min_denominator - ? (total + env->options.spill_min_denominator - 1) / - env->options.spill_min_denominator - : 1; +static size_t spill_gate(const MDBX_env *env, intptr_t part, const size_t total) { + const intptr_t spill_min = env->options.spill_min_denominator + ? (total + env->options.spill_min_denominator - 1) / env->options.spill_min_denominator + : 1; const intptr_t spill_max = - total - (env->options.spill_max_denominator - ? total / env->options.spill_max_denominator - : 0); + total - (env->options.spill_max_denominator ? total / env->options.spill_max_denominator : 0); part = (part < spill_max) ? part : spill_max; part = (part > spill_min) ? part : spill_min; eASSERT(env, part >= 0 && (size_t)part <= total); return (size_t)part; } -__cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, - const intptr_t wanna_spill_entries, - const intptr_t wanna_spill_npages, - const size_t need) { +__cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries, + const intptr_t wanna_spill_npages, const size_t need) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); int rc = MDBX_SUCCESS; if (unlikely(txn->tw.loose_count >= - (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : txn->tw.writemap_dirty_npages))) + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages))) goto done; - const size_t dirty_entries = - txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1; + const size_t dirty_entries = txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1; const size_t dirty_npages = - (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : txn->tw.writemap_dirty_npages) - + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages) - txn->tw.loose_count; - const size_t need_spill_entries = - spill_gate(txn->env, wanna_spill_entries, dirty_entries); - const size_t need_spill_npages = - spill_gate(txn->env, wanna_spill_npages, dirty_npages); + const size_t need_spill_entries = spill_gate(txn->env, wanna_spill_entries, dirty_entries); + const size_t need_spill_npages = spill_gate(txn->env, wanna_spill_npages, dirty_npages); - const size_t need_spill = (need_spill_entries > need_spill_npages) - ? need_spill_entries - : need_spill_npages; + const size_t need_spill = (need_spill_entries > need_spill_npages) ? need_spill_entries : need_spill_npages; if (!need_spill) goto done; if (txn->flags & MDBX_WRITEMAP) { - NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync", - dirty_entries, dirty_npages); + NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync", dirty_entries, dirty_npages); const MDBX_env *env = txn->env; tASSERT(txn, txn->tw.spilled.list == nullptr); - rc = osal_msync(&txn->env->dxb_mmap, 0, - pgno_align2os_bytes(env, txn->geo.first_unallocated), - MDBX_SYNC_KICK); + rc = osal_msync(&txn->env->dxb_mmap, 0, pgno_align2os_bytes(env, txn->geo.first_unallocated), MDBX_SYNC_KICK); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; #if MDBX_AVOID_MSYNC MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr); tASSERT(txn, dpl_check(txn)); - env->lck->unsynced_pages.weak += - txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count; + env->lck->unsynced_pages.weak += txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count; dpl_clear(txn->tw.dirtylist); txn->tw.dirtyroom = env->options.dp_limit - txn->tw.loose_count; for (page_t *lp = txn->tw.loose_pages; lp != nullptr; lp = page_next(lp)) { @@ -256,12 +224,10 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, goto done; } - NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write", - need_spill_entries, need_spill_npages); + NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write", need_spill_entries, need_spill_npages); MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr); tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1); - tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >= - need_spill_npages); + tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >= need_spill_npages); if (!txn->tw.spilled.list) { txn->tw.spilled.least_removed = INT_MAX; txn->tw.spilled.list = pnl_alloc(need_spill); @@ -338,10 +304,8 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, for (size_t i = 1; i <= dl->length; ++i) { const unsigned prio = spill_prio(txn, i, reciprocal); size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t)); - TRACE("page %" PRIaPGNO - ", lru %zu, is_multi %c, npages %u, age %u of %u, prio %u", - dl->items[i].pgno, *ptr, (dl->items[i].npages > 1) ? 'Y' : 'N', - dpl_npages(dl, i), dpl_age(txn, i), age_max, prio); + TRACE("page %" PRIaPGNO ", lru %zu, is_multi %c, npages %u, age %u of %u, prio %u", dl->items[i].pgno, *ptr, + (dl->items[i].npages > 1) ? 'Y' : 'N', dpl_npages(dl, i), dpl_age(txn, i), age_max, prio); if (prio < 256) { radix_entries[prio] += 1; spillable_entries += 1; @@ -354,20 +318,16 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, tASSERT(txn, spillable_npages >= spillable_entries); pgno_t spilled_entries = 0, spilled_npages = 0; if (likely(spillable_entries > 0)) { - size_t prio2spill = 0, prio2adjacent = 128, - amount_entries = radix_entries[0], amount_npages = radix_npages[0]; + size_t prio2spill = 0, prio2adjacent = 128, amount_entries = radix_entries[0], amount_npages = radix_npages[0]; for (size_t i = 1; i < 256; i++) { - if (amount_entries < need_spill_entries || - amount_npages < need_spill_npages) { + if (amount_entries < need_spill_entries || amount_npages < need_spill_npages) { prio2spill = i; prio2adjacent = i + (257 - i) / 2; amount_entries += radix_entries[i]; amount_npages += radix_npages[i]; - } else if (amount_entries + amount_entries < - spillable_entries + need_spill_entries + } else if (amount_entries + amount_entries < spillable_entries + need_spill_entries /* РАВНОЗНАЧНО: amount - need_spill < spillable - amount */ - || amount_npages + amount_npages < - spillable_npages + need_spill_npages) { + || amount_npages + amount_npages < spillable_npages + need_spill_npages) { prio2adjacent = i; amount_entries += radix_entries[i]; amount_npages += radix_npages[i]; @@ -377,44 +337,38 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, VERBOSE("prio2spill %zu, prio2adjacent %zu, spillable %zu/%zu," " wanna-spill %zu/%zu, amount %zu/%zu", - prio2spill, prio2adjacent, spillable_entries, spillable_npages, - need_spill_entries, need_spill_npages, amount_entries, - amount_npages); + prio2spill, prio2adjacent, spillable_entries, spillable_npages, need_spill_entries, need_spill_npages, + amount_entries, amount_npages); tASSERT(txn, prio2spill < prio2adjacent && prio2adjacent <= 256); iov_ctx_t ctx; - rc = iov_init( - txn, &ctx, amount_entries, amount_npages, + rc = iov_init(txn, &ctx, amount_entries, amount_npages, #if defined(_WIN32) || defined(_WIN64) - txn->env->ioring.overlapped_fd ? txn->env->ioring.overlapped_fd : + txn->env->ioring.overlapped_fd ? txn->env->ioring.overlapped_fd : #endif - txn->env->lazy_fd, - true); + txn->env->lazy_fd, + true); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; size_t r = 0, w = 0; pgno_t last = 0; - while (r < dl->length && (spilled_entries < need_spill_entries || - spilled_npages < need_spill_npages)) { + while (r < dl->length && (spilled_entries < need_spill_entries || spilled_npages < need_spill_npages)) { dl->items[++w] = dl->items[++r]; unsigned prio = spill_prio(txn, w, reciprocal); - if (prio > prio2spill && - (prio >= prio2adjacent || last != dl->items[w].pgno)) + if (prio > prio2spill && (prio >= prio2adjacent || last != dl->items[w].pgno)) continue; const size_t e = w; last = dpl_endpgno(dl, w); - while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno && - spill_prio(txn, w, reciprocal) < prio2adjacent) + while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno && spill_prio(txn, w, reciprocal) < prio2adjacent) ; for (size_t i = w; ++i <= e;) { const unsigned npages = dpl_npages(dl, i); prio = spill_prio(txn, i, reciprocal); - DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)", - (prio > prio2spill) ? "co-" : "", i, npages, dl->items[i].pgno, - dpl_age(txn, i), prio); + DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)", (prio > prio2spill) ? "co-" : "", i, npages, + dl->items[i].pgno, dpl_age(txn, i), prio); tASSERT(txn, prio < 256); ++spilled_entries; spilled_npages += npages; @@ -424,8 +378,7 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, } } - VERBOSE("spilled entries %u, spilled npages %u", spilled_entries, - spilled_npages); + VERBOSE("spilled entries %u, spilled npages %u", spilled_entries, spilled_npages); tASSERT(txn, spillable_entries == 0 || spilled_entries > 0); tASSERT(txn, spilled_npages >= spilled_entries); @@ -449,16 +402,14 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, txn->env->lck->unsynced_pages.weak += spilled_npages; pnl_sort(txn->tw.spilled.list, (size_t)txn->geo.first_unallocated << 1); txn->flags |= MDBX_TXN_SPILLS; - NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room", - spilled_entries, spilled_npages, txn->tw.dirtyroom); + NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room", spilled_entries, spilled_npages, + txn->tw.dirtyroom); } else { tASSERT(txn, rc == MDBX_SUCCESS); for (size_t i = 1; i <= dl->length; ++i) { page_t *dp = dl->items[i].ptr; - VERBOSE( - "unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u", - i, dp->pgno, dpl_npages(dl, i), dp->flags, dpl_age(txn, i), - spill_prio(txn, i, reciprocal)); + VERBOSE("unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u", i, dp->pgno, dpl_npages(dl, i), + dp->flags, dpl_age(txn, i), spill_prio(txn, i, reciprocal)); } } @@ -468,17 +419,13 @@ __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, "needed %zu, spillable %zu; " "spilled %u dirty-entries, now have %zu dirty-room", dl->length + spilled_entries, dl->length, - (txn->parent && txn->parent->tw.dirtylist) - ? (intptr_t)txn->parent->tw.dirtylist->length - : -1, - txn->tw.loose_count, need, spillable_entries, spilled_entries, - txn->tw.dirtyroom); + (txn->parent && txn->parent->tw.dirtylist) ? (intptr_t)txn->parent->tw.dirtylist->length : -1, + txn->tw.loose_count, need, spillable_entries, spilled_entries, txn->tw.dirtyroom); ENSURE(txn->env, txn->tw.loose_count + txn->tw.dirtyroom > need / 2); #endif /* xMDBX_DEBUG_SPILLING */ done: - return likely(txn->tw.dirtyroom + txn->tw.loose_count > - ((need > CURSOR_STACK_SIZE) ? CURSOR_STACK_SIZE : need)) + return likely(txn->tw.dirtyroom + txn->tw.loose_count > ((need > CURSOR_STACK_SIZE) ? CURSOR_STACK_SIZE : need)) ? MDBX_SUCCESS : MDBX_TXN_FULL; } diff --git a/src/spill.h b/src/spill.h index f4c427dd..32a8c9b2 100644 --- a/src/spill.h +++ b/src/spill.h @@ -7,10 +7,8 @@ MDBX_INTERNAL void spill_remove(MDBX_txn *txn, size_t idx, size_t npages); MDBX_INTERNAL pnl_t spill_purge(MDBX_txn *txn); -MDBX_INTERNAL int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, - const intptr_t wanna_spill_entries, - const intptr_t wanna_spill_npages, - const size_t need); +MDBX_INTERNAL int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries, + const intptr_t wanna_spill_npages, const size_t need); /*----------------------------------------------------------------------------*/ static inline size_t spill_search(const MDBX_txn *txn, pgno_t pgno) { @@ -23,8 +21,7 @@ static inline size_t spill_search(const MDBX_txn *txn, pgno_t pgno) { return (n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] == pgno) ? n : 0; } -static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, - size_t npages) { +static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) { const pnl_t pnl = txn->tw.spilled.list; if (likely(!pnl)) return false; @@ -32,23 +29,18 @@ static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, if (LOG_ENABLED(MDBX_LOG_EXTRA)) { DEBUG_EXTRA("PNL len %zu [", len); for (size_t i = 1; i <= len; ++i) - DEBUG_EXTRA_PRINT(" %li", (pnl[i] & 1) ? -(long)(pnl[i] >> 1) - : (long)(pnl[i] >> 1)); + DEBUG_EXTRA_PRINT(" %li", (pnl[i] & 1) ? -(long)(pnl[i] >> 1) : (long)(pnl[i] >> 1)); DEBUG_EXTRA_PRINT("%s\n", "]"); } const pgno_t spilled_range_begin = pgno << 1; const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1; #if MDBX_PNL_ASCENDING - const size_t n = - pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1); - tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || - spilled_range_begin <= pnl[n])); + const size_t n = pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1); + tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_begin <= pnl[n])); const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] <= spilled_range_last; #else - const size_t n = - pnl_search(pnl, spilled_range_last, (size_t)MAX_PAGENO + MAX_PAGENO + 1); - tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || - spilled_range_last >= pnl[n])); + const size_t n = pnl_search(pnl, spilled_range_last, (size_t)MAX_PAGENO + MAX_PAGENO + 1); + tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_last >= pnl[n])); const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] >= spilled_range_begin; #endif if (ASSERT_ENABLED()) { @@ -60,17 +52,13 @@ static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, return rc; } -static inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0, - const size_t need) { +static inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0, const size_t need) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, !m0 || cursor_is_tracked(m0)); - const intptr_t wanna_spill_entries = - txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0; + const intptr_t wanna_spill_entries = txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0; const intptr_t wanna_spill_npages = - need + - (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : txn->tw.writemap_dirty_npages) - + need + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages) - txn->tw.loose_count - txn->env->options.dp_limit; /* production mode */ diff --git a/src/table.c b/src/table.c index c37f72a5..c9a1e980 100644 --- a/src/table.c +++ b/src/table.c @@ -19,11 +19,8 @@ int tbl_setup(const MDBX_env *env, kvx_t *const kvx, const tree_t *const db) { kvx->clc.v.lmax = env_valsize_max(env, db->flags); if ((db->flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->dupfix_size) { - if (!MDBX_DISABLE_VALIDATION && - unlikely(db->dupfix_size < kvx->clc.v.lmin || - db->dupfix_size > kvx->clc.v.lmax)) { - ERROR("db.dupfix_size (%u) <> min/max value-length (%zu/%zu)", - db->dupfix_size, kvx->clc.v.lmin, kvx->clc.v.lmax); + if (!MDBX_DISABLE_VALIDATION && unlikely(db->dupfix_size < kvx->clc.v.lmin || db->dupfix_size > kvx->clc.v.lmax)) { + ERROR("db.dupfix_size (%u) <> min/max value-length (%zu/%zu)", db->dupfix_size, kvx->clc.v.lmin, kvx->clc.v.lmax); return MDBX_CORRUPTED; } kvx->clc.v.lmin = kvx->clc.v.lmax = db->dupfix_size; @@ -41,10 +38,8 @@ int tbl_fetch(MDBX_txn *txn, size_t dbi) { rc = tree_search(&couple.outer, &kvx->name, 0); if (unlikely(rc != MDBX_SUCCESS)) { bailout: - NOTICE("dbi %zu refs to inaccessible table `%*s` for txn %" PRIaTXN - " (err %d)", - dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, - txn->txnid, rc); + NOTICE("dbi %zu refs to inaccessible table `%*s` for txn %" PRIaTXN " (err %d)", dbi, (int)kvx->name.iov_len, + (const char *)kvx->name.iov_base, txn->txnid, rc); return (rc == MDBX_NOTFOUND) ? MDBX_BAD_DBI : rc; } @@ -55,21 +50,18 @@ int tbl_fetch(MDBX_txn *txn, size_t dbi) { goto bailout; } if (unlikely((node_flags(nsr.node) & (N_DUP | N_TREE)) != N_TREE)) { - NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", - dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, - txn->txnid, "wrong flags"); + NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len, + (const char *)kvx->name.iov_base, txn->txnid, "wrong flags"); return MDBX_INCOMPATIBLE; /* not a named DB */ } - rc = node_read(&couple.outer, nsr.node, &data, - couple.outer.pg[couple.outer.top]); + rc = node_read(&couple.outer, nsr.node, &data, couple.outer.pg[couple.outer.top]); if (unlikely(rc != MDBX_SUCCESS)) return rc; if (unlikely(data.iov_len != sizeof(tree_t))) { - NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", - dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, - txn->txnid, "wrong rec-size"); + NOTICE("dbi %zu refs to not a named table `%*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len, + (const char *)kvx->name.iov_base, txn->txnid, "wrong rec-size"); return MDBX_INCOMPATIBLE; /* not a named DB */ } @@ -80,8 +72,8 @@ int tbl_fetch(MDBX_txn *txn, size_t dbi) { if (unlikely((db->flags & DB_PERSISTENT_FLAGS) != flags)) { NOTICE("dbi %zu refs to the re-created table `%*s` for txn %" PRIaTXN " with different flags (present 0x%X != wanna 0x%X)", - dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, - txn->txnid, db->flags & DB_PERSISTENT_FLAGS, flags); + dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, db->flags & DB_PERSISTENT_FLAGS, + flags); return MDBX_INCOMPATIBLE; } @@ -90,8 +82,7 @@ int tbl_fetch(MDBX_txn *txn, size_t dbi) { const txnid_t pp_txnid = couple.outer.pg[couple.outer.top]->txnid; tASSERT(txn, txn->front_txnid >= pp_txnid); if (unlikely(db->mod_txnid > pp_txnid)) { - ERROR("db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", - db->mod_txnid, pp_txnid); + ERROR("db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", db->mod_txnid, pp_txnid); return MDBX_CORRUPTED; } #endif /* !MDBX_DISABLE_VALIDATION */ diff --git a/src/tls.c b/src/tls.c index cdfdda3e..7590c65f 100644 --- a/src/tls.c +++ b/src/tls.c @@ -29,21 +29,17 @@ static int uniq_peek(const osal_mmap_t *pending, osal_mmap_t *scan) { bait = 0 /* hush MSVC warning */; rc = osal_msync(scan, 0, sizeof(lck_t), MDBX_SYNC_DATA); if (rc == MDBX_SUCCESS) - rc = osal_pread(pending->fd, &bait, sizeof(scan_lck->bait_uniqueness), - offsetof(lck_t, bait_uniqueness)); + rc = osal_pread(pending->fd, &bait, sizeof(scan_lck->bait_uniqueness), offsetof(lck_t, bait_uniqueness)); } - if (likely(rc == MDBX_SUCCESS) && - bait == atomic_load64(&scan_lck->bait_uniqueness, mo_AcquireRelease)) + if (likely(rc == MDBX_SUCCESS) && bait == atomic_load64(&scan_lck->bait_uniqueness, mo_AcquireRelease)) rc = MDBX_RESULT_TRUE; - TRACE("uniq-peek: %s, bait 0x%016" PRIx64 ",%s rc %d", - pending_lck ? "mem" : "file", bait, + TRACE("uniq-peek: %s, bait 0x%016" PRIx64 ",%s rc %d", pending_lck ? "mem" : "file", bait, (rc == MDBX_RESULT_TRUE) ? " found," : (rc ? " FAILED," : ""), rc); return rc; } -static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, - uint64_t *abra) { +static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, uint64_t *abra) { if (*abra == 0) { const uintptr_t tid = osal_thread_self(); uintptr_t uit = 0; @@ -51,9 +47,7 @@ static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, *abra = rrxmrrxmsx_0(osal_monotime() + UINT64_C(5873865991930747) * uit); } const uint64_t cadabra = - rrxmrrxmsx_0(*abra + UINT64_C(7680760450171793) * (unsigned)osal_getpid()) - << 24 | - *abra >> 40; + rrxmrrxmsx_0(*abra + UINT64_C(7680760450171793) * (unsigned)osal_getpid()) << 24 | *abra >> 40; lck_t *const scan_lck = scan->lck; atomic_store64(&scan_lck->bait_uniqueness, cadabra, mo_AcquireRelease); *abra = *abra * UINT64_C(6364136223846793005) + 1; @@ -67,14 +61,12 @@ __cold int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found) { MDBX_env *const scan = rthc_table[i].env; if (!scan->lck_mmap.lck || &scan->lck_mmap == pending) continue; - int err = - atomic_load64(&scan->lck_mmap.lck->bait_uniqueness, mo_AcquireRelease) - ? uniq_peek(pending, &scan->lck_mmap) - : uniq_poke(pending, &scan->lck_mmap, &salt); + int err = atomic_load64(&scan->lck_mmap.lck->bait_uniqueness, mo_AcquireRelease) + ? uniq_peek(pending, &scan->lck_mmap) + : uniq_poke(pending, &scan->lck_mmap, &salt); if (err == MDBX_ENODATA) { uint64_t length = 0; - if (likely(osal_filesize(pending->fd, &length) == MDBX_SUCCESS && - length == 0)) { + if (likely(osal_filesize(pending->fd, &length) == MDBX_SUCCESS && length == 0)) { /* LY: skip checking since LCK-file is empty, i.e. just created. */ DEBUG("%s", "unique (new/empty lck)"); return MDBX_SUCCESS; @@ -114,8 +106,7 @@ static osal_thread_key_t rthc_key; static mdbx_atomic_uint32_t rthc_pending; static inline uint64_t rthc_signature(const void *addr, uint8_t kind) { - uint64_t salt = osal_thread_self() * UINT64_C(0xA2F0EEC059629A17) ^ - UINT64_C(0x01E07C6FDB596497) * (uintptr_t)(addr); + uint64_t salt = osal_thread_self() * UINT64_C(0xA2F0EEC059629A17) ^ UINT64_C(0x01E07C6FDB596497) * (uintptr_t)(addr); #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return salt << 8 | kind; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -128,45 +119,36 @@ static inline uint64_t rthc_signature(const void *addr, uint8_t kind) { #define MDBX_THREAD_RTHC_REGISTERED(addr) rthc_signature(addr, 0x0D) #define MDBX_THREAD_RTHC_COUNTED(addr) rthc_signature(addr, 0xC0) static __thread uint64_t rthc_thread_state -#if __has_attribute(tls_model) && \ - (defined(__PIC__) || defined(__pic__) || MDBX_BUILD_SHARED_LIBRARY) +#if __has_attribute(tls_model) && (defined(__PIC__) || defined(__pic__) || MDBX_BUILD_SHARED_LIBRARY) __attribute__((tls_model("local-dynamic"))) #endif ; -#if defined(__APPLE__) && defined(__SANITIZE_ADDRESS__) && \ - !defined(MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS) +#if defined(__APPLE__) && defined(__SANITIZE_ADDRESS__) && !defined(MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS) /* Avoid ASAN-trap due the target TLS-variable feed by Darwin's tlv_free() */ -#define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS \ - __attribute__((__no_sanitize_address__, __noinline__)) +#define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((__no_sanitize_address__, __noinline__)) #else #define MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS inline #endif -MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t rthc_read(const void *rthc) { - return *(volatile uint64_t *)rthc; -} +MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t rthc_read(const void *rthc) { return *(volatile uint64_t *)rthc; } -MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t -rthc_compare_and_clean(const void *rthc, const uint64_t signature) { +MDBX_ATTRIBUTE_NO_SANITIZE_ADDRESS static uint64_t rthc_compare_and_clean(const void *rthc, const uint64_t signature) { #if MDBX_64BIT_CAS return atomic_cas64((mdbx_atomic_uint64_t *)rthc, signature, 0); #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return atomic_cas32((mdbx_atomic_uint32_t *)rthc, (uint32_t)signature, 0); #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return atomic_cas32((mdbx_atomic_uint32_t *)rthc, (uint32_t)(signature >> 32), - 0); + return atomic_cas32((mdbx_atomic_uint32_t *)rthc, (uint32_t)(signature >> 32), 0); #else #error "FIXME: Unsupported byte order" #endif } -static inline int rthc_atexit(void (*dtor)(void *), void *obj, - void *dso_symbol) { +static inline int rthc_atexit(void (*dtor)(void *), void *obj, void *dso_symbol) { #ifndef MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL -#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) || \ - defined(HAVE___CXA_THREAD_ATEXIT_IMPL) || __GLIBC_PREREQ(2, 18) || \ - defined(BIONIC) +#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) || defined(HAVE___CXA_THREAD_ATEXIT_IMPL) || \ + __GLIBC_PREREQ(2, 18) || defined(BIONIC) #define MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL 1 #else #define MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL 0 @@ -174,11 +156,9 @@ static inline int rthc_atexit(void (*dtor)(void *), void *obj, #endif /* MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL */ #ifndef MDBX_HAVE_CXA_THREAD_ATEXIT -#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT) || \ - defined(HAVE___CXA_THREAD_ATEXIT) +#if defined(LIBCXXABI_HAS_CXA_THREAD_ATEXIT) || defined(HAVE___CXA_THREAD_ATEXIT) #define MDBX_HAVE_CXA_THREAD_ATEXIT 1 -#elif !MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL && \ - (defined(__linux__) || defined(__gnu_linux__)) +#elif !MDBX_HAVE_CXA_THREAD_ATEXIT_IMPL && (defined(__linux__) || defined(__gnu_linux__)) #define MDBX_HAVE_CXA_THREAD_ATEXIT 1 #else #define MDBX_HAVE_CXA_THREAD_ATEXIT 0 @@ -190,13 +170,11 @@ static inline int rthc_atexit(void (*dtor)(void *), void *obj, #define __cxa_thread_atexit __cxa_thread_atexit_impl #endif #if MDBX_HAVE_CXA_THREAD_ATEXIT || defined(__cxa_thread_atexit) - extern int __cxa_thread_atexit(void (*dtor)(void *), void *obj, - void *dso_symbol) MDBX_WEAK_IMPORT_ATTRIBUTE; + extern int __cxa_thread_atexit(void (*dtor)(void *), void *obj, void *dso_symbol) MDBX_WEAK_IMPORT_ATTRIBUTE; if (&__cxa_thread_atexit) rc = __cxa_thread_atexit(dtor, obj, dso_symbol); #elif defined(__APPLE__) || defined(_DARWIN_C_SOURCE) - extern void _tlv_atexit(void (*termfunc)(void *objAddr), void *objAddr) - MDBX_WEAK_IMPORT_ATTRIBUTE; + extern void _tlv_atexit(void (*termfunc)(void *objAddr), void *objAddr) MDBX_WEAK_IMPORT_ATTRIBUTE; if (&_tlv_atexit) { (void)dso_symbol; _tlv_atexit(dtor, obj); @@ -250,8 +228,7 @@ static inline int thread_key_create(osal_thread_key_t *key) { #else rc = pthread_key_create(key, nullptr); #endif - TRACE("&key = %p, value %" PRIuPTR ", rc %d", __Wpedantic_format_voidptr(key), - (uintptr_t)*key, rc); + TRACE("&key = %p, value %" PRIuPTR ", rc %d", __Wpedantic_format_voidptr(key), (uintptr_t)*key, rc); return rc; } @@ -259,21 +236,17 @@ void thread_rthc_set(osal_thread_key_t key, const void *value) { #if defined(_WIN32) || defined(_WIN64) ENSURE(nullptr, TlsSetValue(key, (void *)value)); #else - const uint64_t sign_registered = - MDBX_THREAD_RTHC_REGISTERED(&rthc_thread_state); + const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(&rthc_thread_state); const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(&rthc_thread_state); - if (value && unlikely(rthc_thread_state != sign_registered && - rthc_thread_state != sign_counted)) { + if (value && unlikely(rthc_thread_state != sign_registered && rthc_thread_state != sign_counted)) { rthc_thread_state = sign_registered; TRACE("thread registered 0x%" PRIxPTR, osal_thread_self()); - if (rthc_atexit(rthc_thread_dtor, &rthc_thread_state, - (void *)&mdbx_version /* dso_anchor */)) { + if (rthc_atexit(rthc_thread_dtor, &rthc_thread_state, (void *)&mdbx_version /* dso_anchor */)) { ENSURE(nullptr, pthread_setspecific(rthc_key, &rthc_thread_state) == 0); rthc_thread_state = sign_counted; const unsigned count_before = atomic_add32(&rthc_pending, 1); ENSURE(nullptr, count_before < INT_MAX); - NOTICE("fallback to pthreads' tsd, key %" PRIuPTR ", count %u", - (uintptr_t)rthc_key, count_before); + NOTICE("fallback to pthreads' tsd, key %" PRIuPTR ", count %u", (uintptr_t)rthc_key, count_before); (void)count_before; } } @@ -286,11 +259,9 @@ __cold void rthc_thread_dtor(void *rthc) { rthc_lock(); const uint32_t current_pid = osal_getpid(); #if defined(_WIN32) || defined(_WIN64) - TRACE(">> pid %d, thread 0x%" PRIxPTR ", module %p", current_pid, - osal_thread_self(), rthc); + TRACE(">> pid %d, thread 0x%" PRIxPTR ", module %p", current_pid, osal_thread_self(), rthc); #else - TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", current_pid, - osal_thread_self(), rthc); + TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", current_pid, osal_thread_self(), rthc); #endif for (size_t i = 0; i < rthc_count; ++i) { @@ -306,22 +277,18 @@ __cold void rthc_thread_dtor(void *rthc) { continue; #if !defined(_WIN32) && !defined(_WIN64) if (pthread_setspecific(env->me_txkey, nullptr) != 0) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p: ignore race with tsd-key deletion", - osal_thread_self(), __Wpedantic_format_voidptr(reader)); + TRACE("== thread 0x%" PRIxPTR ", rthc %p: ignore race with tsd-key deletion", osal_thread_self(), + __Wpedantic_format_voidptr(reader)); continue /* ignore race with tsd-key deletion by mdbx_env_close() */; } #endif - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, [%zi], %p ... %p (%+i), rtch-pid %i, " + TRACE("== thread 0x%" PRIxPTR ", rthc %p, [%zi], %p ... %p (%+i), rtch-pid %i, " "current-pid %i", - osal_thread_self(), __Wpedantic_format_voidptr(reader), i, - __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - (int)(reader - begin), reader->pid.weak, current_pid); + osal_thread_self(), __Wpedantic_format_voidptr(reader), i, __Wpedantic_format_voidptr(begin), + __Wpedantic_format_voidptr(end), (int)(reader - begin), reader->pid.weak, current_pid); if (atomic_load32(&reader->pid, mo_Relaxed) == current_pid) { - TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), - __Wpedantic_format_voidptr(reader)); + TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), __Wpedantic_format_voidptr(reader)); (void)atomic_cas32(&reader->pid, current_pid, 0); atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed); } @@ -334,26 +301,20 @@ __cold void rthc_thread_dtor(void *rthc) { const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(rthc); const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(rthc); const uint64_t state = rthc_read(rthc); - if (state == sign_registered && - rthc_compare_and_clean(rthc, sign_registered)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), rthc, osal_getpid(), "registered", state); - } else if (state == sign_counted && - rthc_compare_and_clean(rthc, sign_counted)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), rthc, osal_getpid(), "counted", state); + if (state == sign_registered && rthc_compare_and_clean(rthc, sign_registered)) { + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", osal_thread_self(), rthc, + osal_getpid(), "registered", state); + } else if (state == sign_counted && rthc_compare_and_clean(rthc, sign_counted)) { + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", osal_thread_self(), rthc, + osal_getpid(), "counted", state); ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); } else { - WARNING("thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), rthc, osal_getpid(), "wrong", state); + WARNING("thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", osal_thread_self(), rthc, + osal_getpid(), "wrong", state); } if (atomic_load32(&rthc_pending, mo_AcquireRelease) == 0) { - TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, wake", osal_thread_self(), - rthc, osal_getpid()); + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, wake", osal_thread_self(), rthc, osal_getpid()); ENSURE(nullptr, pthread_cond_broadcast(&rthc_cond) == 0); } @@ -367,8 +328,7 @@ __cold void rthc_thread_dtor(void *rthc) { } __cold int rthc_register(MDBX_env *const env) { - TRACE(">> env %p, rthc_count %u, rthc_limit %u", - __Wpedantic_format_voidptr(env), rthc_count, rthc_limit); + TRACE(">> env %p, rthc_count %u, rthc_limit %u", __Wpedantic_format_voidptr(env), rthc_count, rthc_limit); int rc = MDBX_SUCCESS; for (size_t i = 0; i < rthc_count; ++i) @@ -380,8 +340,7 @@ __cold int rthc_register(MDBX_env *const env) { env->me_txkey = 0; if (unlikely(rthc_count == rthc_limit)) { rthc_entry_t *new_table = - osal_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table, - sizeof(rthc_entry_t) * rthc_limit * 2); + osal_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table, sizeof(rthc_entry_t) * rthc_limit * 2); if (unlikely(new_table == nullptr)) { rc = MDBX_ENOMEM; goto bailout; @@ -400,14 +359,12 @@ __cold int rthc_register(MDBX_env *const env) { } rthc_table[rthc_count].env = env; - TRACE("== [%i] = env %p, key %" PRIuPTR, rthc_count, - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey); + TRACE("== [%i] = env %p, key %" PRIuPTR, rthc_count, __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey); ++rthc_count; bailout: - TRACE("<< env %p, key %" PRIuPTR ", rthc_count %u, rthc_limit %u, rc %d", - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, - rthc_limit, rc); + TRACE("<< env %p, key %" PRIuPTR ", rthc_count %u, rthc_limit %u, rc %d", __Wpedantic_format_voidptr(env), + (uintptr_t)env->me_txkey, rthc_count, rthc_limit, rc); return rc; } @@ -418,10 +375,8 @@ __cold static int rthc_drown(MDBX_env *const env) { if (likely(env->lck_mmap.lck && current_pid == env->pid)) { reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0]; reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers]; - TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", - (current_pid == env->pid) ? "cleanup" : "skip", - __Wpedantic_format_voidptr(env), env->pid, - __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", (current_pid == env->pid) ? "cleanup" : "skip", + __Wpedantic_format_voidptr(env), env->pid, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), current_pid); bool cleaned = false; for (reader_slot_t *r = begin; r < end; ++r) { @@ -434,8 +389,7 @@ __cold static int rthc_drown(MDBX_env *const env) { if (cleaned) atomic_store32(&env->lck_mmap.lck->rdt_refresh_flag, true, mo_Relaxed); rc = rthc_uniq_check(&env->lck_mmap, &inprocess_neighbor); - if (!inprocess_neighbor && env->registered_reader_pid && - env->lck_mmap.fd != INVALID_HANDLE_VALUE) { + if (!inprocess_neighbor && env->registered_reader_pid && env->lck_mmap.fd != INVALID_HANDLE_VALUE) { int err = lck_rpid_clear(env); rc = rc ? rc : err; } @@ -446,9 +400,8 @@ __cold static int rthc_drown(MDBX_env *const env) { } __cold int rthc_remove(MDBX_env *const env) { - TRACE(">>> env %p, key %zu, rthc_count %u, rthc_limit %u", - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, - rthc_limit); + TRACE(">>> env %p, key %zu, rthc_count %u, rthc_limit %u", __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, + rthc_count, rthc_limit); int rc = MDBX_SUCCESS; if (likely(env->pid)) @@ -469,9 +422,8 @@ __cold int rthc_remove(MDBX_env *const env) { } } - TRACE("<<< %p, key %zu, rthc_count %u, rthc_limit %u", - __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, - rthc_limit); + TRACE("<<< %p, key %zu, rthc_count %u, rthc_limit %u", __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, + rthc_count, rthc_limit); return rc; } @@ -508,8 +460,8 @@ __cold void rthc_ctor(void) { #else ENSURE(nullptr, pthread_atfork(nullptr, nullptr, rthc_afterfork) == 0); ENSURE(nullptr, pthread_key_create(&rthc_key, rthc_thread_dtor) == 0); - TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), - __Wpedantic_format_voidptr(&rthc_key), (unsigned)rthc_key); + TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), __Wpedantic_format_voidptr(&rthc_key), + (unsigned)rthc_key); #endif } @@ -517,33 +469,23 @@ __cold void rthc_dtor(const uint32_t current_pid) { rthc_lock(); #if !defined(_WIN32) && !defined(_WIN64) uint64_t *rthc = pthread_getspecific(rthc_key); - TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 - ", left %d", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - rthc ? rthc_read(rthc) : ~UINT64_C(0), + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 ", left %d", osal_thread_self(), + __Wpedantic_format_voidptr(rthc), current_pid, rthc ? rthc_read(rthc) : ~UINT64_C(0), atomic_load32(&rthc_pending, mo_Relaxed)); if (rthc) { const uint64_t sign_registered = MDBX_THREAD_RTHC_REGISTERED(rthc); const uint64_t sign_counted = MDBX_THREAD_RTHC_COUNTED(rthc); const uint64_t state = rthc_read(rthc); - if (state == sign_registered && - rthc_compare_and_clean(rthc, sign_registered)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - "registered", state); - } else if (state == sign_counted && - rthc_compare_and_clean(rthc, sign_counted)) { - TRACE("== thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - "counted", state); + if (state == sign_registered && rthc_compare_and_clean(rthc, sign_registered)) { + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", osal_thread_self(), + __Wpedantic_format_voidptr(rthc), current_pid, "registered", state); + } else if (state == sign_counted && rthc_compare_and_clean(rthc, sign_counted)) { + TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", osal_thread_self(), + __Wpedantic_format_voidptr(rthc), current_pid, "counted", state); ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); } else { - WARNING("thread 0x%" PRIxPTR - ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, - "wrong", state); + WARNING("thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", osal_thread_self(), + __Wpedantic_format_voidptr(rthc), current_pid, "wrong", state); } } @@ -558,8 +500,7 @@ __cold void rthc_dtor(const uint32_t current_pid) { abstime.tv_sec += 600; #endif - for (unsigned left; - (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { + for (unsigned left; (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { NOTICE("tls-cleanup: pid %d, pending %u, wait for...", current_pid, left); const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); if (rc && rc != EINTR) @@ -581,9 +522,8 @@ __cold void rthc_dtor(const uint32_t current_pid) { for (reader_slot_t *reader = begin; reader < end; ++reader) { TRACE("== [%zi] = key %" PRIuPTR ", %p ... %p, rthc %p (%+i), " "rthc-pid %i, current-pid %i", - i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), - __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), - (int)(reader - begin), reader->pid.weak, current_pid); + i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + __Wpedantic_format_voidptr(reader), (int)(reader - begin), reader->pid.weak, current_pid); if (atomic_load32(&reader->pid, mo_Relaxed) == current_pid) { (void)atomic_cas32(&reader->pid, current_pid, 0); TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); diff --git a/src/tools/chk.c b/src/tools/chk.c index 22e38460..a22fd57b 100644 --- a/src/tools/chk.c +++ b/src/tools/chk.c @@ -30,8 +30,7 @@ static BOOL WINAPI ConsoleBreakHandlerRoutine(DWORD dwCtrlType) { static uint64_t GetMilliseconds(void) { LARGE_INTEGER Counter, Frequency; - return (QueryPerformanceFrequency(&Frequency) && - QueryPerformanceCounter(&Counter)) + return (QueryPerformanceFrequency(&Frequency) && QueryPerformanceCounter(&Counter)) ? Counter.QuadPart * 1000ul / Frequency.QuadPart : 0; } @@ -93,9 +92,8 @@ static void lf_flush(void) { } static bool silently(enum MDBX_chk_severity severity) { - int cutoff = - chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift - : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); + int cutoff = chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift + : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); int prio = (severity >> MDBX_chk_severity_prio_shift); if (chk.scope && chk.scope->stage == MDBX_chk_tables && verbose < 2) prio += 1; @@ -125,11 +123,9 @@ static FILE *prefix(enum MDBX_chk_severity severity) { " ////// " // F +2 }; - const bool nl = - line_struct.scope_depth != chk.scope_nesting || - (line_struct.severity != severity && - (line_struct.severity != MDBX_chk_processing || - severity < MDBX_chk_result || severity > MDBX_chk_resolution)); + const bool nl = line_struct.scope_depth != chk.scope_nesting || + (line_struct.severity != severity && (line_struct.severity != MDBX_chk_processing || + severity < MDBX_chk_result || severity > MDBX_chk_resolution)); if (nl) lf(); if (severity < MDBX_chk_warning) @@ -157,8 +153,7 @@ static void suffix(size_t cookie, const char *str) { } } -static size_t MDBX_PRINTF_ARGS(2, 3) - print(enum MDBX_chk_severity severity, const char *msg, ...) { +static size_t MDBX_PRINTF_ARGS(2, 3) print(enum MDBX_chk_severity severity, const char *msg, ...) { FILE *out = prefix(severity); if (out) { va_list args; @@ -171,8 +166,7 @@ static size_t MDBX_PRINTF_ARGS(2, 3) return 0; } -static FILE *MDBX_PRINTF_ARGS(2, 3) - print_ln(enum MDBX_chk_severity severity, const char *msg, ...) { +static FILE *MDBX_PRINTF_ARGS(2, 3) print_ln(enum MDBX_chk_severity severity, const char *msg, ...) { FILE *out = prefix(severity); if (out) { va_list args; @@ -185,15 +179,12 @@ static FILE *MDBX_PRINTF_ARGS(2, 3) return out; } -static void logger(MDBX_log_level_t level, const char *function, int line, - const char *fmt, va_list args) { +static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { if (level <= MDBX_LOG_ERROR) mdbx_env_chk_encount_problem(&chk); - const unsigned kind = (level > MDBX_LOG_NOTICE) - ? level - MDBX_LOG_NOTICE + - (MDBX_chk_extra & MDBX_chk_severity_kind_mask) - : level; + const unsigned kind = + (level > MDBX_LOG_NOTICE) ? level - MDBX_LOG_NOTICE + (MDBX_chk_extra & MDBX_chk_severity_kind_mask) : level; const unsigned prio = kind << MDBX_chk_severity_prio_shift; enum MDBX_chk_severity severity = prio + kind; FILE *out = prefix(severity); @@ -204,8 +195,8 @@ static void logger(MDBX_log_level_t level, const char *function, int line, if (have_lf) for (size_t i = 0; i < line_struct.scope_depth; ++i) fputs(" ", out); - fprintf(out, have_lf ? " %s(), %u" : " (%s:%u)", - function + (strncmp(function, "mdbx_", 5) ? 0 : 5), line); + fprintf(out, have_lf ? " %s(), %u" : " (%s:%u)", function + (strncmp(function, "mdbx_", 5) ? 0 : 5), + line); lf(); } else if (have_lf) { line_struct.empty = true; @@ -249,8 +240,8 @@ static bool check_break(MDBX_chk_context_t *ctx) { return true; } -static int scope_push(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, - MDBX_chk_scope_t *inner, const char *fmt, va_list args) { +static int scope_push(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, MDBX_chk_scope_t *inner, const char *fmt, + va_list args) { (void)scope; if (fmt && *fmt) { FILE *out = prefix(MDBX_chk_processing); @@ -264,22 +255,19 @@ static int scope_push(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, return MDBX_SUCCESS; } -static void scope_pop(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, - MDBX_chk_scope_t *inner) { +static void scope_pop(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, MDBX_chk_scope_t *inner) { (void)ctx; (void)scope; suffix(inner->usr_o.number, inner->subtotal_issues ? "error(s)" : "done"); flush(); } -static MDBX_chk_user_table_cookie_t *table_filter(MDBX_chk_context_t *ctx, - const MDBX_val *name, +static MDBX_chk_user_table_cookie_t *table_filter(MDBX_chk_context_t *ctx, const MDBX_val *name, MDBX_db_flags_t flags) { (void)ctx; (void)flags; return (!only_table.iov_base || - (only_table.iov_len == name->iov_len && - memcmp(only_table.iov_base, name->iov_base, name->iov_len) == 0)) + (only_table.iov_len == name->iov_len && memcmp(only_table.iov_base, name->iov_base, name->iov_len) == 0)) ? (void *)(intptr_t)-1 : nullptr; } @@ -293,8 +281,7 @@ static int stage_begin(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage) { } static int conclude(MDBX_chk_context_t *ctx); -static int stage_end(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage, - int err) { +static int stage_end(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage, int err) { if (stage == MDBX_chk_conclude && !err) err = conclude(ctx); suffix(anchor_lineno, err ? "error(s)" : "done"); @@ -303,14 +290,12 @@ static int stage_end(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage, return err; } -static MDBX_chk_line_t *print_begin(MDBX_chk_context_t *ctx, - enum MDBX_chk_severity severity) { +static MDBX_chk_line_t *print_begin(MDBX_chk_context_t *ctx, enum MDBX_chk_severity severity) { (void)ctx; if (silently(severity)) return nullptr; if (line_struct.ctx) { - if (line_struct.severity == MDBX_chk_processing && - severity >= MDBX_chk_result && severity <= MDBX_chk_resolution && + if (line_struct.severity == MDBX_chk_processing && severity >= MDBX_chk_result && severity <= MDBX_chk_resolution && line_output) fputc(' ', line_output); else @@ -356,39 +341,36 @@ static const MDBX_chk_callbacks_t cb = {.check_break = check_break, .print_format = print_format}; static void usage(char *prog) { - fprintf( - stderr, - "usage: %s " - "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s table] [-u|U] dbpath\n" - " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be repeated upto 9 times for extra details\n" - " -q\t\tbe quiet\n" - " -c\t\tforce cooperative mode (don't try exclusive)\n" - " -w\t\twrite-mode checking\n" - " -d\t\tdisable page-by-page traversal of B-tree\n" - " -i\t\tignore wrong order errors (for custom comparators case)\n" - " -s table\tprocess a specific subdatabase only\n" - " -u\t\twarmup database before checking\n" - " -U\t\twarmup and try lock database pages in memory before checking\n" - " -0|1|2\tforce using specific meta-page 0, or 2 for checking\n" - " -t\t\tturn to a specified meta-page on successful check\n" - " -T\t\tturn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!\n", - prog); + fprintf(stderr, + "usage: %s " + "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s table] [-u|U] dbpath\n" + " -V\t\tprint version and exit\n" + " -v\t\tmore verbose, could be repeated upto 9 times for extra details\n" + " -q\t\tbe quiet\n" + " -c\t\tforce cooperative mode (don't try exclusive)\n" + " -w\t\twrite-mode checking\n" + " -d\t\tdisable page-by-page traversal of B-tree\n" + " -i\t\tignore wrong order errors (for custom comparators case)\n" + " -s table\tprocess a specific subdatabase only\n" + " -u\t\twarmup database before checking\n" + " -U\t\twarmup and try lock database pages in memory before checking\n" + " -0|1|2\tforce using specific meta-page 0, or 2 for checking\n" + " -t\t\tturn to a specified meta-page on successful check\n" + " -T\t\tturn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!\n", + prog); exit(EXIT_INTERRUPTED); } static int conclude(MDBX_chk_context_t *ctx) { int err = MDBX_SUCCESS; if (ctx->result.total_problems == 1 && ctx->result.problems_meta == 1 && - (chk_flags & - (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && - (env_flags & MDBX_RDONLY) == 0 && !only_table.iov_base && - stuck_meta < 0 && ctx->result.steady_txnid < ctx->result.recent_txnid) { - const size_t step_lineno = - print(MDBX_chk_resolution, - "Perform sync-to-disk for make steady checkpoint" - " at txn-id #%" PRIi64 "...", - ctx->result.recent_txnid); + (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + (env_flags & MDBX_RDONLY) == 0 && !only_table.iov_base && stuck_meta < 0 && + ctx->result.steady_txnid < ctx->result.recent_txnid) { + const size_t step_lineno = print(MDBX_chk_resolution, + "Perform sync-to-disk for make steady checkpoint" + " at txn-id #%" PRIi64 "...", + ctx->result.recent_txnid); flush(); err = error_fn("walk_pages", mdbx_env_sync_ex(ctx->env, true, false)); if (err == MDBX_SUCCESS) { @@ -398,19 +380,13 @@ static int conclude(MDBX_chk_context_t *ctx) { } } - if (turn_meta && stuck_meta >= 0 && - (chk_flags & - (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && - !only_table.iov_base && - (env_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { - const bool successful_check = - (err | ctx->result.total_problems | ctx->result.problems_meta) == 0; + if (turn_meta && stuck_meta >= 0 && (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + !only_table.iov_base && (env_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { + const bool successful_check = (err | ctx->result.total_problems | ctx->result.problems_meta) == 0; if (successful_check || force_turn_meta) { - const size_t step_lineno = print( - MDBX_chk_resolution, - "Performing turn to the specified meta-page (%d) due to %s!", - stuck_meta, - successful_check ? "successful check" : "the -T option was given"); + const size_t step_lineno = + print(MDBX_chk_resolution, "Performing turn to the specified meta-page (%d) due to %s!", stuck_meta, + successful_check ? "successful check" : "the -T option was given"); flush(); err = mdbx_env_turn_for_recovery(ctx->env, stuck_meta); if (err != MDBX_SUCCESS) @@ -475,12 +451,9 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.patch, - mdbx_version.tweak, mdbx_version.git.describe, - mdbx_version.git.datetime, mdbx_version.git.commit, - mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, - mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, - mdbx_build.options); + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, mdbx_version.tweak, mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, + mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, mdbx_build.options); return EXIT_SUCCESS; case 'v': if (verbose >= 9 && 0) @@ -546,8 +519,7 @@ int main(int argc, char *argv[]) { break; case 'U': warmup = true; - warmup_flags = - MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock; + warmup_flags = MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock; break; default: usage(prog); @@ -566,21 +538,17 @@ int main(int argc, char *argv[]) { } if (turn_meta) { if (stuck_meta < 0) { - error_fmt( - "meta-page must be specified (by -0, -1 or -2 options) to turn to " - "it."); + error_fmt("meta-page must be specified (by -0, -1 or -2 options) to turn to " + "it."); rc = EXIT_INTERRUPTED; } if (env_flags & MDBX_RDONLY) { - error_fmt( - "write-mode must be enabled to turn to the specified meta-page."); + error_fmt("write-mode must be enabled to turn to the specified meta-page."); rc = EXIT_INTERRUPTED; } - if (only_table.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | - MDBX_CHK_SKIP_KV_TRAVERSAL))) { - error_fmt( - "whole database checking with b-tree traversal are required to turn " - "to the specified meta-page."); + if (only_table.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL))) { + error_fmt("whole database checking with b-tree traversal are required to turn " + "to the specified meta-page."); rc = EXIT_INTERRUPTED; } } @@ -604,20 +572,15 @@ int main(int argc, char *argv[]) { print(MDBX_chk_result, "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode with " "verbosity level %u (%s)...", - mdbx_version.git.describe, mdbx_version.git.datetime, - mdbx_version.git.tree, envname, + mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, (env_flags & MDBX_RDONLY) ? "only" : "write", verbose, (verbose > 8) - ? (MDBX_DEBUG ? "extra details for debugging" - : "same as 8 for non-debug builds with MDBX_DEBUG=0") + ? (MDBX_DEBUG ? "extra details for debugging" : "same as 8 for non-debug builds with MDBX_DEBUG=0") : "of 0..9"); lf_flush(); - mdbx_setup_debug((verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) - ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) - : MDBX_LOG_TRACE, - MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | - MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE, - logger); + mdbx_setup_debug( + (verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) : MDBX_LOG_TRACE, + MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE, logger); rc = mdbx_env_create(&env); if (rc) { @@ -632,18 +595,16 @@ int main(int argc, char *argv[]) { } if (stuck_meta >= 0) { - rc = mdbx_env_open_for_recovery(env, envname, stuck_meta, - (env_flags & MDBX_RDONLY) ? false : true); + rc = mdbx_env_open_for_recovery(env, envname, stuck_meta, (env_flags & MDBX_RDONLY) ? false : true); } else { rc = mdbx_env_open(env, envname, env_flags, 0); - if ((env_flags & MDBX_EXCLUSIVE) && - (rc == MDBX_BUSY || + if ((env_flags & MDBX_EXCLUSIVE) && (rc == MDBX_BUSY || #if defined(_WIN32) || defined(_WIN64) - rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION + rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION #else - rc == EBUSY || rc == EAGAIN + rc == EBUSY || rc == EAGAIN #endif - )) { + )) { env_flags &= ~MDBX_EXCLUSIVE; rc = mdbx_env_open(env, envname, env_flags | MDBX_ACCEDE, 0); } @@ -652,13 +613,10 @@ int main(int argc, char *argv[]) { if (rc) { error_fn("mdbx_env_open", rc); if (rc == MDBX_WANNA_RECOVERY && (env_flags & MDBX_RDONLY)) - print_ln(MDBX_chk_result, - "Please run %s in the read-write mode (with '-w' option).", - prog); + print_ln(MDBX_chk_result, "Please run %s in the read-write mode (with '-w' option).", prog); goto bailout; } - print_ln(MDBX_chk_verbose, "%s mode", - (env_flags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); + print_ln(MDBX_chk_verbose, "%s mode", (env_flags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); if (warmup) { anchor_lineno = print(MDBX_chk_verbose, "warming up..."); @@ -671,9 +629,7 @@ int main(int argc, char *argv[]) { suffix(anchor_lineno, rc ? "timeout" : "done"); } - rc = mdbx_env_chk(env, &cb, &chk, chk_flags, - MDBX_chk_result + (verbose << MDBX_chk_severity_prio_shift), - 0); + rc = mdbx_env_chk(env, &cb, &chk, chk_flags, MDBX_chk_result + (verbose << MDBX_chk_severity_prio_shift), 0); if (rc) { if (chk.result.total_problems == 0) error_fn("mdbx_env_chk", rc); @@ -683,8 +639,7 @@ int main(int argc, char *argv[]) { bailout: if (env) { - const bool dont_sync = rc != 0 || chk.result.total_problems || - (chk_flags & MDBX_CHK_READWRITE) == 0; + const bool dont_sync = rc != 0 || chk.result.total_problems || (chk_flags & MDBX_CHK_READWRITE) == 0; mdbx_env_close_ex(env, dont_sync); } flush(); @@ -702,21 +657,17 @@ bailout: error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } - elapsed = timestamp_finish.tv_sec - timestamp_start.tv_sec + - (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9; + elapsed = + timestamp_finish.tv_sec - timestamp_start.tv_sec + (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9; #endif /* !WINDOWS */ if (chk.result.total_problems) { - print_ln(MDBX_chk_result, - "Total %" PRIuSIZE " error%s detected, elapsed %.3f seconds.", - chk.result.total_problems, + print_ln(MDBX_chk_result, "Total %" PRIuSIZE " error%s detected, elapsed %.3f seconds.", chk.result.total_problems, (chk.result.total_problems > 1) ? "s are" : " is", elapsed); - if (chk.result.problems_meta || chk.result.problems_kv || - chk.result.problems_gc) + if (chk.result.problems_meta || chk.result.problems_kv || chk.result.problems_gc) return EXIT_FAILURE_CHECK_MAJOR; return EXIT_FAILURE_CHECK_MINOR; } - print_ln(MDBX_chk_result, "No error is detected, elapsed %.3f seconds.", - elapsed); + print_ln(MDBX_chk_result, "No error is detected, elapsed %.3f seconds.", elapsed); return EXIT_SUCCESS; } diff --git a/src/tools/copy.c b/src/tools/copy.c index 4441a982..122bea72 100644 --- a/src/tools/copy.c +++ b/src/tools/copy.c @@ -37,20 +37,19 @@ static void signal_handler(int sig) { #endif /* !WINDOWS */ static void usage(const char *prog) { - fprintf( - stderr, - "usage: %s [-V] [-q] [-c] [-d] [-p] [-u|U] src_path [dest_path]\n" - " -V\t\tprint version and exit\n" - " -q\t\tbe quiet\n" - " -c\t\tenable compactification (skip unused pages)\n" - " -d\t\tenforce copy to be a dynamic size DB\n" - " -p\t\tusing transaction parking/ousting during copying MVCC-snapshot\n" - " \t\tto avoid stopping recycling and overflowing the DB\n" - " -u\t\twarmup database before copying\n" - " -U\t\twarmup and try lock database pages in memory before copying\n" - " src_path\tsource database\n" - " dest_path\tdestination (stdout if not specified)\n", - prog); + fprintf(stderr, + "usage: %s [-V] [-q] [-c] [-d] [-p] [-u|U] src_path [dest_path]\n" + " -V\t\tprint version and exit\n" + " -q\t\tbe quiet\n" + " -c\t\tenable compactification (skip unused pages)\n" + " -d\t\tenforce copy to be a dynamic size DB\n" + " -p\t\tusing transaction parking/ousting during copying MVCC-snapshot\n" + " \t\tto avoid stopping recycling and overflowing the DB\n" + " -u\t\twarmup database before copying\n" + " -U\t\twarmup and try lock database pages in memory before copying\n" + " src_path\tsource database\n" + " dest_path\tdestination (stdout if not specified)\n", + prog); exit(EXIT_FAILURE); } @@ -79,10 +78,8 @@ int main(int argc, char *argv[]) { warmup = true; else if (argv[1][1] == 'U' && argv[1][2] == '\0') { warmup = true; - warmup_flags = - MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock; - } else if ((argv[1][1] == 'h' && argv[1][2] == '\0') || - strcmp(argv[1], "--help") == 0) + warmup_flags = MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock; + } else if ((argv[1][1] == 'h' && argv[1][2] == '\0') || strcmp(argv[1], "--help") == 0) usage(progname); else if (argv[1][1] == 'V' && argv[1][2] == '\0') { printf("mdbx_copy version %d.%d.%d.%d\n" @@ -91,12 +88,9 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.patch, - mdbx_version.tweak, mdbx_version.git.describe, - mdbx_version.git.datetime, mdbx_version.git.commit, - mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, - mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, - mdbx_build.options); + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, mdbx_version.tweak, mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, + mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, mdbx_build.options); return EXIT_SUCCESS; } else argc = 0; @@ -119,10 +113,9 @@ int main(int argc, char *argv[]) { #endif /* !WINDOWS */ if (!quiet) { - fprintf((argc == 2) ? stderr : stdout, - "mdbx_copy %s (%s, T-%s)\nRunning for copy %s to %s...\n", - mdbx_version.git.describe, mdbx_version.git.datetime, - mdbx_version.git.tree, argv[1], (argc == 2) ? "stdout" : argv[2]); + fprintf((argc == 2) ? stderr : stdout, "mdbx_copy %s (%s, T-%s)\nRunning for copy %s to %s...\n", + mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, argv[1], + (argc == 2) ? "stdout" : argv[2]); fflush(nullptr); } @@ -150,8 +143,7 @@ int main(int argc, char *argv[]) { rc = mdbx_env_copy(env, argv[2], cpflags); } if (rc) - fprintf(stderr, "%s: %s failed, error %d (%s)\n", progname, act, rc, - mdbx_strerror(rc)); + fprintf(stderr, "%s: %s failed, error %d (%s)\n", progname, act, rc, mdbx_strerror(rc)); mdbx_env_close(env); return rc ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/src/tools/drop.c b/src/tools/drop.c index 8de80cd9..9f5cfbfd 100644 --- a/src/tools/drop.c +++ b/src/tools/drop.c @@ -54,8 +54,7 @@ static void usage(void) { static void error(const char *func, int rc) { if (!quiet) - fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, - mdbx_strerror(rc)); + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); } int main(int argc, char *argv[]) { @@ -86,12 +85,9 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.patch, - mdbx_version.tweak, mdbx_version.git.describe, - mdbx_version.git.datetime, mdbx_version.git.commit, - mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, - mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, - mdbx_build.options); + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, mdbx_version.tweak, mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, + mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, mdbx_build.options); return EXIT_SUCCESS; case 'q': quiet = true; @@ -127,8 +123,7 @@ int main(int argc, char *argv[]) { envname = argv[optind]; if (!quiet) { - printf("mdbx_drop %s (%s, T-%s)\nRunning for %s/%s...\n", - mdbx_version.git.describe, mdbx_version.git.datetime, + printf("mdbx_drop %s (%s, T-%s)\nRunning for %s/%s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, subname ? subname : "@MAIN"); fflush(nullptr); } diff --git a/src/tools/dump.c b/src/tools/dump.c index de93422f..f7c1a49d 100644 --- a/src/tools/dump.c +++ b/src/tools/dump.c @@ -95,8 +95,7 @@ bool quiet = false, rescue = false; const char *prog; static void error(const char *func, int rc) { if (!quiet) - fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, - mdbx_strerror(rc)); + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); } /* Dump in BDB-compatible format */ @@ -126,10 +125,8 @@ static int dump_tbl(MDBX_txn *txn, MDBX_dbi dbi, char *name) { if (mode & GLOBAL) { mode -= GLOBAL; if (info.mi_geo.upper != info.mi_geo.lower) - printf("geometry=l%" PRIu64 ",c%" PRIu64 ",u%" PRIu64 ",s%" PRIu64 - ",g%" PRIu64 "\n", - info.mi_geo.lower, info.mi_geo.current, info.mi_geo.upper, - info.mi_geo.shrink, info.mi_geo.grow); + printf("geometry=l%" PRIu64 ",c%" PRIu64 ",u%" PRIu64 ",s%" PRIu64 ",g%" PRIu64 "\n", info.mi_geo.lower, + info.mi_geo.current, info.mi_geo.upper, info.mi_geo.shrink, info.mi_geo.grow); printf("mapsize=%" PRIu64 "\n", info.mi_geo.upper); printf("maxreaders=%u\n", info.mi_maxreaders); @@ -140,8 +137,7 @@ static int dump_tbl(MDBX_txn *txn, MDBX_dbi dbi, char *name) { return rc; } if (canary.v) - printf("canary=v%" PRIu64 ",x%" PRIu64 ",y%" PRIu64 ",z%" PRIu64 "\n", - canary.v, canary.x, canary.y, canary.z); + printf("canary=v%" PRIu64 ",x%" PRIu64 ",y%" PRIu64 ",z%" PRIu64 "\n", canary.v, canary.x, canary.y, canary.z); } printf("format=%s\n", mode & PRINT ? "print" : "bytevalue"); if (name) @@ -153,10 +149,7 @@ static int dump_tbl(MDBX_txn *txn, MDBX_dbi dbi, char *name) { else if (!name) printf("txnid=%" PRIaTXN "\n", mdbx_txn_id(txn)); */ - printf("duplicates=%d\n", (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | - MDBX_INTEGERDUP | MDBX_REVERSEDUP)) - ? 1 - : 0); + printf("duplicates=%d\n", (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) ? 1 : 0); for (int i = 0; dbflags[i].bit; i++) if (flags & dbflags[i].bit) printf("%s=1\n", dbflags[i].name); @@ -187,8 +180,7 @@ static int dump_tbl(MDBX_txn *txn, MDBX_dbi dbi, char *name) { } } - while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) == - MDBX_SUCCESS) { + while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) == MDBX_SUCCESS) { if (user_break) { rc = MDBX_EINTR; break; @@ -212,31 +204,27 @@ static int dump_tbl(MDBX_txn *txn, MDBX_dbi dbi, char *name) { } static void usage(void) { - fprintf( - stderr, - "usage: %s " - "[-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s table] [-u|U] " - "dbpath\n" - " -V\t\tprint version and exit\n" - " -q\t\tbe quiet\n" - " -f\t\twrite to file instead of stdout\n" - " -l\t\tlist tables and exit\n" - " -p\t\tuse printable characters\n" - " -r\t\trescue mode (ignore errors to dump corrupted DB)\n" - " -a\t\tdump main DB and all tables\n" - " -s name\tdump only the specified named table\n" - " -u\t\twarmup database before dumping\n" - " -U\t\twarmup and try lock database pages in memory before dumping\n" - " \t\tby default dump only the main DB\n", - prog); + fprintf(stderr, + "usage: %s " + "[-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s table] [-u|U] " + "dbpath\n" + " -V\t\tprint version and exit\n" + " -q\t\tbe quiet\n" + " -f\t\twrite to file instead of stdout\n" + " -l\t\tlist tables and exit\n" + " -p\t\tuse printable characters\n" + " -r\t\trescue mode (ignore errors to dump corrupted DB)\n" + " -a\t\tdump main DB and all tables\n" + " -s name\tdump only the specified named table\n" + " -u\t\twarmup database before dumping\n" + " -U\t\twarmup and try lock database pages in memory before dumping\n" + " \t\tby default dump only the main DB\n", + prog); exit(EXIT_FAILURE); } static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return (a->iov_len == b->iov_len && - memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) - ? 0 - : 1; + return (a->iov_len == b->iov_len && memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) ? 0 : 1; } int main(int argc, char *argv[]) { @@ -274,12 +262,9 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.patch, - mdbx_version.tweak, mdbx_version.git.describe, - mdbx_version.git.datetime, mdbx_version.git.commit, - mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, - mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, - mdbx_build.options); + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, mdbx_version.tweak, mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, + mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, mdbx_build.options); return EXIT_SUCCESS; case 'l': list = true; @@ -292,8 +277,7 @@ int main(int argc, char *argv[]) { break; case 'f': if (freopen(optarg, "w", stdout) == nullptr) { - fprintf(stderr, "%s: %s: reopen: %s\n", prog, optarg, - mdbx_strerror(errno)); + fprintf(stderr, "%s: %s: reopen: %s\n", prog, optarg, mdbx_strerror(errno)); exit(EXIT_FAILURE); } break; @@ -318,8 +302,7 @@ int main(int argc, char *argv[]) { break; case 'U': warmup = true; - warmup_flags = - MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock; + warmup_flags = MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock; break; default: usage(); @@ -344,9 +327,8 @@ int main(int argc, char *argv[]) { envname = argv[optind]; if (!quiet) { - fprintf(stderr, "mdbx_dump %s (%s, T-%s)\nRunning for %s...\n", - mdbx_version.git.describe, mdbx_version.git.datetime, - mdbx_version.git.tree, envname); + fprintf(stderr, "mdbx_dump %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.tree, envname); fflush(nullptr); } @@ -364,11 +346,8 @@ int main(int argc, char *argv[]) { } } - err = mdbx_env_open( - env, envname, - envflags | (rescue ? MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION - : MDBX_RDONLY), - 0); + err = mdbx_env_open(env, envname, envflags | (rescue ? MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION : MDBX_RDONLY), + 0); if (unlikely(err != MDBX_SUCCESS)) { error("mdbx_env_open", err); goto env_close; @@ -414,8 +393,7 @@ int main(int argc, char *argv[]) { bool have_raw = false; int count = 0; MDBX_val key; - while (MDBX_SUCCESS == - (err = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { + while (MDBX_SUCCESS == (err = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { if (user_break) { err = MDBX_EINTR; break; @@ -434,8 +412,7 @@ int main(int argc, char *argv[]) { subname[key.iov_len] = '\0'; MDBX_dbi sub_dbi; - err = mdbx_dbi_open_ex(txn, subname, MDBX_DB_ACCEDE, &sub_dbi, - rescue ? equal_or_greater : nullptr, + err = mdbx_dbi_open_ex(txn, subname, MDBX_DB_ACCEDE, &sub_dbi, rescue ? equal_or_greater : nullptr, rescue ? equal_or_greater : nullptr); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_INCOMPATIBLE) { @@ -455,8 +432,7 @@ int main(int argc, char *argv[]) { if (!rescue) break; if (!quiet) - fprintf(stderr, "%s: %s: ignore %s for `%s` and continue\n", prog, - envname, mdbx_strerror(err), subname); + fprintf(stderr, "%s: %s: ignore %s for `%s` and continue\n", prog, envname, mdbx_strerror(err), subname); /* Here is a hack for rescue mode, don't do that: * - we should restart transaction in case error due * database corruption; @@ -491,8 +467,7 @@ int main(int argc, char *argv[]) { err = dump_tbl(txn, MAIN_DBI, nullptr); else if (!count) { if (!quiet) - fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, - envname); + fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, envname); err = MDBX_NOTFOUND; } } else { diff --git a/src/tools/load.c b/src/tools/load.c index 9182926f..efdd50c9 100644 --- a/src/tools/load.c +++ b/src/tools/load.c @@ -44,11 +44,10 @@ static size_t lineno; static void error(const char *func, int rc) { if (!quiet) { if (lineno) - fprintf(stderr, "%s: at input line %" PRIiSIZE ": %s() error %d, %s\n", - prog, lineno, func, rc, mdbx_strerror(rc)); - else - fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, + fprintf(stderr, "%s: at input line %" PRIiSIZE ": %s() error %d, %s\n", prog, lineno, func, rc, mdbx_strerror(rc)); + else + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); } } @@ -60,9 +59,7 @@ static char *valstr(char *line, const char *item) { if (line[len] > ' ') return nullptr; if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", prog, - lineno, item); + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", prog, lineno, item); exit(EXIT_FAILURE); } char *ptr = strchr(line, '\n'); @@ -80,9 +77,7 @@ static bool valnum(char *line, const char *item, uint64_t *value) { *value = strtoull(str, &end, 0); if (end && *end) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unexpected number format for '%s'\n", - prog, lineno, item); + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected number format for '%s'\n", prog, lineno, item); exit(EXIT_FAILURE); } return true; @@ -95,8 +90,7 @@ static bool valbool(char *line, const char *item, bool *value) { if (u64 > 1) { if (!quiet) - fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected value for '%s'\n", - prog, lineno, item); + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected value for '%s'\n", prog, lineno, item); exit(EXIT_FAILURE); } *value = u64 != 0; @@ -129,11 +123,10 @@ typedef struct flagbit { #define S(s) STRLENOF(s), s -flagbit dbflags[] = { - {MDBX_REVERSEKEY, S("reversekey")}, {MDBX_DUPSORT, S("duplicates")}, - {MDBX_DUPSORT, S("dupsort")}, {MDBX_INTEGERKEY, S("integerkey")}, - {MDBX_DUPFIXED, S("dupfix")}, {MDBX_INTEGERDUP, S("integerdup")}, - {MDBX_REVERSEDUP, S("reversedup")}, {0, 0, nullptr}}; +flagbit dbflags[] = {{MDBX_REVERSEKEY, S("reversekey")}, {MDBX_DUPSORT, S("duplicates")}, + {MDBX_DUPSORT, S("dupsort")}, {MDBX_INTEGERKEY, S("integerkey")}, + {MDBX_DUPFIXED, S("dupfix")}, {MDBX_INTEGERDUP, S("integerdup")}, + {MDBX_REVERSEDUP, S("reversedup")}, {0, 0, nullptr}}; static int readhdr(void) { /* reset parameters */ @@ -158,10 +151,8 @@ static int readhdr(void) { if (valnum(dbuf.iov_base, "VERSION", &u64)) { if (u64 != 3) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unsupported value %" PRIu64 - " for %s\n", - prog, lineno, u64, "VERSION"); + fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported value %" PRIu64 " for %s\n", prog, lineno, u64, + "VERSION"); exit(EXIT_FAILURE); } continue; @@ -170,16 +161,12 @@ static int readhdr(void) { if (valnum(dbuf.iov_base, "db_pagesize", &u64)) { if (!(mode & GLOBAL) && envinfo.mi_dxb_pagesize != u64) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore value %" PRIu64 - " for '%s' in non-global context\n", - prog, lineno, u64, "db_pagesize"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore value %" PRIu64 " for '%s' in non-global context\n", prog, + lineno, u64, "db_pagesize"); } else if (u64 < MDBX_MIN_PAGESIZE || u64 > MDBX_MAX_PAGESIZE) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore unsupported value %" PRIu64 - " for %s\n", - prog, lineno, u64, "db_pagesize"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore unsupported value %" PRIu64 " for %s\n", prog, lineno, u64, + "db_pagesize"); } else envinfo.mi_dxb_pagesize = (uint32_t)u64; continue; @@ -196,9 +183,7 @@ static int readhdr(void) { continue; } if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unsupported value '%s' for %s\n", prog, - lineno, str, "format"); + fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported value '%s' for %s\n", prog, lineno, str, "format"); exit(EXIT_FAILURE); } @@ -220,9 +205,7 @@ static int readhdr(void) { if (str) { if (strcmp(str, "btree") != 0) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unsupported value '%s' for %s\n", - prog, lineno, str, "type"); + fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported value '%s' for %s\n", prog, lineno, str, "type"); free(subname); exit(EXIT_FAILURE); } @@ -232,10 +215,8 @@ static int readhdr(void) { if (valnum(dbuf.iov_base, "mapaddr", &u64)) { if (u64) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 - " for %s\n", - prog, lineno, u64, "mapaddr"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 " for %s\n", prog, lineno, u64, + "mapaddr"); } continue; } @@ -243,16 +224,12 @@ static int readhdr(void) { if (valnum(dbuf.iov_base, "mapsize", &u64)) { if (!(mode & GLOBAL)) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore value %" PRIu64 - " for '%s' in non-global context\n", - prog, lineno, u64, "mapsize"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore value %" PRIu64 " for '%s' in non-global context\n", prog, + lineno, u64, "mapsize"); } else if (u64 < MIN_MAPSIZE || u64 > MAX_MAPSIZE64) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 - " for %s\n", - prog, lineno, u64, "mapsize"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 " for %s\n", prog, lineno, u64, + "mapsize"); } else envinfo.mi_mapsize = (size_t)u64; continue; @@ -261,16 +238,12 @@ static int readhdr(void) { if (valnum(dbuf.iov_base, "maxreaders", &u64)) { if (!(mode & GLOBAL)) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore value %" PRIu64 - " for '%s' in non-global context\n", - prog, lineno, u64, "maxreaders"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore value %" PRIu64 " for '%s' in non-global context\n", prog, + lineno, u64, "maxreaders"); } else if (u64 < 1 || u64 > MDBX_READERS_LIMIT) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 - " for %s\n", - prog, lineno, u64, "maxreaders"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 " for %s\n", prog, lineno, u64, + "maxreaders"); } else envinfo.mi_maxreaders = (int)u64; continue; @@ -279,10 +252,8 @@ static int readhdr(void) { if (valnum(dbuf.iov_base, "txnid", &u64)) { if (u64 < MIN_TXNID || u64 > MAX_TXNID) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 - " for %s\n", - prog, lineno, u64, "txnid"); + fprintf(stderr, "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 " for %s\n", prog, lineno, u64, + "txnid"); } else txnid = u64; continue; @@ -301,16 +272,11 @@ static int readhdr(void) { "%s: line %" PRIiSIZE ": ignore values %s" " for '%s' in non-global context\n", prog, lineno, str, "geometry"); - } else if (sscanf(str, - "l%" PRIu64 ",c%" PRIu64 ",u%" PRIu64 ",s%" PRIu64 - ",g%" PRIu64, - &envinfo.mi_geo.lower, &envinfo.mi_geo.current, - &envinfo.mi_geo.upper, &envinfo.mi_geo.shrink, + } else if (sscanf(str, "l%" PRIu64 ",c%" PRIu64 ",u%" PRIu64 ",s%" PRIu64 ",g%" PRIu64, &envinfo.mi_geo.lower, + &envinfo.mi_geo.current, &envinfo.mi_geo.upper, &envinfo.mi_geo.shrink, &envinfo.mi_geo.grow) != 5) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", - prog, lineno, "geometry"); + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", prog, lineno, "geometry"); exit(EXIT_FAILURE); } continue; @@ -324,12 +290,10 @@ static int readhdr(void) { "%s: line %" PRIiSIZE ": ignore values %s" " for '%s' in non-global context\n", prog, lineno, str, "canary"); - } else if (sscanf(str, "v%" PRIu64 ",x%" PRIu64 ",y%" PRIu64 ",z%" PRIu64, - &canary.v, &canary.x, &canary.y, &canary.z) != 4) { + } else if (sscanf(str, "v%" PRIu64 ",x%" PRIu64 ",y%" PRIu64 ",z%" PRIu64, &canary.v, &canary.x, &canary.y, + &canary.z) != 4) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", - prog, lineno, "canary"); + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", prog, lineno, "canary"); exit(EXIT_FAILURE); } continue; @@ -353,9 +317,8 @@ static int readhdr(void) { } if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": unrecognized keyword ignored: %s\n", - prog, lineno, (char *)dbuf.iov_base); + fprintf(stderr, "%s: line %" PRIiSIZE ": unrecognized keyword ignored: %s\n", prog, lineno, + (char *)dbuf.iov_base); next:; } return EOF; @@ -363,8 +326,7 @@ static int readhdr(void) { static int badend(void) { if (!quiet) - fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected end of input\n", prog, - lineno); + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected end of input\n", prog, lineno); return errno ? errno : MDBX_ENODATA; } @@ -416,9 +378,7 @@ __hot static int readline(MDBX_val *out, MDBX_val *buf) { buf->iov_base = osal_realloc(buf->iov_base, buf->iov_len * 2); if (!buf->iov_base) { if (!quiet) - fprintf(stderr, - "%s: line %" PRIiSIZE ": out of memory, line too long\n", prog, - lineno); + fprintf(stderr, "%s: line %" PRIiSIZE ": out of memory, line too long\n", prog, lineno); return MDBX_ENOMEM; } c1 = buf->iov_base; @@ -490,10 +450,7 @@ static void usage(void) { } static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return (a->iov_len == b->iov_len && - memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) - ? 0 - : 1; + return (a->iov_len == b->iov_len && memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) ? 0 : 1; } int main(int argc, char *argv[]) { @@ -530,12 +487,9 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.patch, - mdbx_version.tweak, mdbx_version.git.describe, - mdbx_version.git.datetime, mdbx_version.git.commit, - mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, - mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, - mdbx_build.options); + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, mdbx_version.tweak, mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, + mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, mdbx_build.options); return EXIT_SUCCESS; case 'a': putflags |= MDBX_APPEND; @@ -543,8 +497,7 @@ int main(int argc, char *argv[]) { case 'f': if (freopen(optarg, "r", stdin) == nullptr) { if (!quiet) - fprintf(stderr, "%s: %s: open: %s\n", prog, optarg, - mdbx_strerror(errno)); + fprintf(stderr, "%s: %s: open: %s\n", prog, optarg, mdbx_strerror(errno)); exit(EXIT_FAILURE); } break; @@ -592,8 +545,7 @@ int main(int argc, char *argv[]) { envname = argv[optind]; if (!quiet) - printf("mdbx_load %s (%s, T-%s)\nRunning for %s...\n", - mdbx_version.git.describe, mdbx_version.git.datetime, + printf("mdbx_load %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); fflush(nullptr); @@ -638,25 +590,22 @@ int main(int argc, char *argv[]) { if (envinfo.mi_geo.current | envinfo.mi_mapsize) { if (envinfo.mi_geo.current) { - err = mdbx_env_set_geometry( - env, (intptr_t)envinfo.mi_geo.lower, (intptr_t)envinfo.mi_geo.current, - (intptr_t)envinfo.mi_geo.upper, (intptr_t)envinfo.mi_geo.shrink, - (intptr_t)envinfo.mi_geo.grow, - envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); + err = mdbx_env_set_geometry(env, (intptr_t)envinfo.mi_geo.lower, (intptr_t)envinfo.mi_geo.current, + (intptr_t)envinfo.mi_geo.upper, (intptr_t)envinfo.mi_geo.shrink, + (intptr_t)envinfo.mi_geo.grow, + envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); } else { if (envinfo.mi_mapsize > MAX_MAPSIZE) { if (!quiet) - fprintf( - stderr, - "Database size is too large for current system (mapsize=%" PRIu64 - " is great than system-limit %zu)\n", - envinfo.mi_mapsize, (size_t)MAX_MAPSIZE); + fprintf(stderr, + "Database size is too large for current system (mapsize=%" PRIu64 + " is great than system-limit %zu)\n", + envinfo.mi_mapsize, (size_t)MAX_MAPSIZE); goto bailout; } - err = mdbx_env_set_geometry( - env, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, - (intptr_t)envinfo.mi_mapsize, 0, 0, - envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); + err = mdbx_env_set_geometry(env, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, + (intptr_t)envinfo.mi_mapsize, 0, 0, + envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); } if (unlikely(err != MDBX_SUCCESS)) { error("mdbx_env_set_geometry", err); @@ -673,8 +622,7 @@ int main(int argc, char *argv[]) { kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, 0) + (size_t)1; if (kbuf.iov_len >= INTPTR_MAX / 2) { if (!quiet) - fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n", - kbuf.iov_len); + fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n", kbuf.iov_len); goto bailout; } @@ -709,10 +657,9 @@ int main(int argc, char *argv[]) { } const char *const dbi_name = subname ? subname : "@MAIN"; - err = - mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi, - (putflags & MDBX_APPEND) ? equal_or_greater : nullptr, - (putflags & MDBX_APPEND) ? equal_or_greater : nullptr); + err = mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi, + (putflags & MDBX_APPEND) ? equal_or_greater : nullptr, + (putflags & MDBX_APPEND) ? equal_or_greater : nullptr); if (unlikely(err != MDBX_SUCCESS)) { error("mdbx_dbi_open_ex", err); goto bailout; @@ -726,9 +673,7 @@ int main(int argc, char *argv[]) { } if (present_sequence > sequence) { if (!quiet) - fprintf(stderr, - "present sequence for '%s' value (%" PRIu64 - ") is greater than loaded (%" PRIu64 ")\n", + fprintf(stderr, "present sequence for '%s' value (%" PRIu64 ") is greater than loaded (%" PRIu64 ")\n", dbi_name, present_sequence, sequence); err = MDBX_RESULT_TRUE; goto bailout; @@ -750,8 +695,7 @@ int main(int argc, char *argv[]) { } if (putflags & MDBX_APPEND) - putflags = (dbi_flags & MDBX_DUPSORT) ? putflags | MDBX_APPENDDUP - : putflags & ~MDBX_APPENDDUP; + putflags = (dbi_flags & MDBX_DUPSORT) ? putflags | MDBX_APPENDDUP : putflags & ~MDBX_APPENDDUP; err = mdbx_cursor_open(txn, dbi, &mc); if (unlikely(err != MDBX_SUCCESS)) { @@ -770,8 +714,7 @@ int main(int argc, char *argv[]) { err = readline(&data, &dbuf); if (err) { if (!quiet) - fprintf(stderr, "%s: line %" PRIiSIZE ": failed to read key value\n", - prog, lineno); + fprintf(stderr, "%s: line %" PRIiSIZE ": failed to read key value\n", prog, lineno); goto bailout; } @@ -780,8 +723,7 @@ int main(int argc, char *argv[]) { continue; if (err == MDBX_BAD_VALSIZE && rescue) { if (!quiet) - fprintf(stderr, "%s: skip line %" PRIiSIZE ": due %s\n", prog, lineno, - mdbx_strerror(err)); + fprintf(stderr, "%s: skip line %" PRIiSIZE ": due %s\n", prog, lineno, mdbx_strerror(err)); continue; } if (unlikely(err != MDBX_SUCCESS)) { diff --git a/src/tools/stat.c b/src/tools/stat.c index f8808caa..57c99b45 100644 --- a/src/tools/stat.c +++ b/src/tools/stat.c @@ -61,27 +61,24 @@ static void usage(const char *prog) { exit(EXIT_FAILURE); } -static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, - mdbx_tid_t thread, uint64_t txnid, uint64_t lag, - size_t bytes_used, size_t bytes_retained) { +static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, + uint64_t lag, size_t bytes_used, size_t bytes_retained) { (void)ctx; if (num == 1) printf("Reader Table\n" " #\tslot\t%6s %*s %20s %10s %13s %13s\n", - "pid", (int)sizeof(size_t) * 2, "thread", "txnid", "lag", "used", - "retained"); + "pid", (int)sizeof(size_t) * 2, "thread", "txnid", "lag", "used", "retained"); if (thread < (mdbx_tid_t)((intptr_t)MDBX_TID_TXN_OUSTED)) - printf(" %3d)\t[%d]\t%6" PRIdSIZE " %*" PRIxPTR, num, slot, (size_t)pid, - (int)sizeof(size_t) * 2, (uintptr_t)thread); + printf(" %3d)\t[%d]\t%6" PRIdSIZE " %*" PRIxPTR, num, slot, (size_t)pid, (int)sizeof(size_t) * 2, + (uintptr_t)thread); else printf(" %3d)\t[%d]\t%6" PRIdSIZE " %sed", num, slot, (size_t)pid, - (thread == (mdbx_tid_t)((uintptr_t)MDBX_TID_TXN_PARKED)) ? "park" - : "oust"); + (thread == (mdbx_tid_t)((uintptr_t)MDBX_TID_TXN_PARKED)) ? "park" : "oust"); if (txnid) - printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag, - bytes_used / 1048576.0, bytes_retained / 1048576.0); + printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag, bytes_used / 1048576.0, + bytes_retained / 1048576.0); else printf(" %20s %10s %13s %13s\n", "-", "0", "0", "0"); @@ -92,8 +89,7 @@ const char *prog; bool quiet = false; static void error(const char *func, int rc) { if (!quiet) - fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, - mdbx_strerror(rc)); + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); } int main(int argc, char *argv[]) { @@ -129,12 +125,9 @@ int main(int argc, char *argv[]) { " - build: %s for %s by %s\n" " - flags: %s\n" " - options: %s\n", - mdbx_version.major, mdbx_version.minor, mdbx_version.patch, - mdbx_version.tweak, mdbx_version.git.describe, - mdbx_version.git.datetime, mdbx_version.git.commit, - mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, - mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, - mdbx_build.options); + mdbx_version.major, mdbx_version.minor, mdbx_version.patch, mdbx_version.tweak, mdbx_version.git.describe, + mdbx_version.git.datetime, mdbx_version.git.commit, mdbx_version.git.tree, mdbx_sourcery_anchor, + mdbx_build.datetime, mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, mdbx_build.options); return EXIT_SUCCESS; case 'q': quiet = true; @@ -187,8 +180,7 @@ int main(int argc, char *argv[]) { envname = argv[optind]; envname = argv[optind]; if (!quiet) { - printf("mdbx_stat %s (%s, T-%s)\nRunning for %s...\n", - mdbx_version.git.describe, mdbx_version.git.datetime, + printf("mdbx_stat %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); fflush(nullptr); } @@ -232,39 +224,27 @@ int main(int argc, char *argv[]) { if (pgop) { printf("Page Operations (for current session):\n"); - printf(" New: %8" PRIu64 "\t// quantity of a new pages added\n", - mei.mi_pgop_stat.newly); - printf(" CoW: %8" PRIu64 - "\t// quantity of pages copied for altering\n", - mei.mi_pgop_stat.cow); + printf(" New: %8" PRIu64 "\t// quantity of a new pages added\n", mei.mi_pgop_stat.newly); + printf(" CoW: %8" PRIu64 "\t// quantity of pages copied for altering\n", mei.mi_pgop_stat.cow); printf(" Clone: %8" PRIu64 "\t// quantity of parent's dirty pages " "clones for nested transactions\n", mei.mi_pgop_stat.clone); - printf(" Split: %8" PRIu64 - "\t// page splits during insertions or updates\n", - mei.mi_pgop_stat.split); - printf(" Merge: %8" PRIu64 - "\t// page merges during deletions or updates\n", - mei.mi_pgop_stat.merge); + printf(" Split: %8" PRIu64 "\t// page splits during insertions or updates\n", mei.mi_pgop_stat.split); + printf(" Merge: %8" PRIu64 "\t// page merges during deletions or updates\n", mei.mi_pgop_stat.merge); printf(" Spill: %8" PRIu64 "\t// quantity of spilled/ousted `dirty` " "pages during large transactions\n", mei.mi_pgop_stat.spill); printf(" Unspill: %8" PRIu64 "\t// quantity of unspilled/redone `dirty` " "pages during large transactions\n", mei.mi_pgop_stat.unspill); - printf(" WOP: %8" PRIu64 - "\t// number of explicit write operations (not a pages) to a disk\n", + printf(" WOP: %8" PRIu64 "\t// number of explicit write operations (not a pages) to a disk\n", mei.mi_pgop_stat.wops); - printf(" PreFault: %8" PRIu64 - "\t// number of prefault write operations (not a pages)\n", + printf(" PreFault: %8" PRIu64 "\t// number of prefault write operations (not a pages)\n", mei.mi_pgop_stat.prefault); - printf(" mInCore: %8" PRIu64 "\t// number of mincore() calls\n", - mei.mi_pgop_stat.mincore); - printf(" mSync: %8" PRIu64 - "\t// number of explicit msync-to-disk operations (not a pages)\n", + printf(" mInCore: %8" PRIu64 "\t// number of mincore() calls\n", mei.mi_pgop_stat.mincore); + printf(" mSync: %8" PRIu64 "\t// number of explicit msync-to-disk operations (not a pages)\n", mei.mi_pgop_stat.msync); - printf(" fSync: %8" PRIu64 - "\t// number of explicit fsync-to-disk operations (not a pages)\n", + printf(" fSync: %8" PRIu64 "\t// number of explicit fsync-to-disk operations (not a pages)\n", mei.mi_pgop_stat.fsync); } @@ -272,18 +252,15 @@ int main(int argc, char *argv[]) { printf("Environment Info\n"); printf(" Pagesize: %u\n", mei.mi_dxb_pagesize); if (mei.mi_geo.lower != mei.mi_geo.upper) { - printf(" Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64 - "/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 " pages (+%" PRIu64 - "/-%" PRIu64 ")\n", - mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow, - mei.mi_geo.shrink, mei.mi_geo.lower / mei.mi_dxb_pagesize, - mei.mi_geo.upper / mei.mi_dxb_pagesize, - mei.mi_geo.grow / mei.mi_dxb_pagesize, - mei.mi_geo.shrink / mei.mi_dxb_pagesize); - printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n", - mei.mi_mapsize, mei.mi_mapsize / mei.mi_dxb_pagesize); - printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", - mei.mi_geo.current, mei.mi_geo.current / mei.mi_dxb_pagesize); + printf(" Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64 "/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 + " pages (+%" PRIu64 "/-%" PRIu64 ")\n", + mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow, mei.mi_geo.shrink, + mei.mi_geo.lower / mei.mi_dxb_pagesize, mei.mi_geo.upper / mei.mi_dxb_pagesize, + mei.mi_geo.grow / mei.mi_dxb_pagesize, mei.mi_geo.shrink / mei.mi_dxb_pagesize); + printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n", mei.mi_mapsize, + mei.mi_mapsize / mei.mi_dxb_pagesize); + printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", mei.mi_geo.current, + mei.mi_geo.current / mei.mi_dxb_pagesize); #if defined(_WIN32) || defined(_WIN64) if (mei.mi_geo.shrink && mei.mi_geo.current != mei.mi_geo.upper) printf(" WARNING: Due Windows system limitations a " @@ -293,12 +270,11 @@ int main(int argc, char *argv[]) { "until it will be closed or reopened in read-write mode.\n"); #endif } else { - printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", - mei.mi_geo.current, mei.mi_geo.current / mei.mi_dxb_pagesize); + printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", mei.mi_geo.current, + mei.mi_geo.current / mei.mi_dxb_pagesize); } printf(" Last transaction ID: %" PRIu64 "\n", mei.mi_recent_txnid); - printf(" Latter reader transaction ID: %" PRIu64 " (%" PRIi64 ")\n", - mei.mi_latter_reader_txnid, + printf(" Latter reader transaction ID: %" PRIu64 " (%" PRIi64 ")\n", mei.mi_latter_reader_txnid, mei.mi_latter_reader_txnid - mei.mi_recent_txnid); printf(" Max readers: %u\n", mei.mi_maxreaders); printf(" Number of reader slots uses: %u\n", mei.mi_numreaders); @@ -352,8 +328,7 @@ int main(int argc, char *argv[]) { pgno_t pages = 0, *iptr; pgno_t reclaimable = 0; MDBX_val key, data; - while (MDBX_SUCCESS == - (rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT))) { + while (MDBX_SUCCESS == (rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT))) { if (user_break) { rc = MDBX_EINTR; break; @@ -367,29 +342,23 @@ int main(int argc, char *argv[]) { if (freinfo > 1) { char *bad = ""; - pgno_t prev = - MDBX_PNL_ASCENDING ? NUM_METAS - 1 : (pgno_t)mei.mi_last_pgno + 1; + pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : (pgno_t)mei.mi_last_pgno + 1; pgno_t span = 1; for (unsigned i = 0; i < number; ++i) { pgno_t pg = iptr[i]; if (MDBX_PNL_DISORDERED(prev, pg)) bad = " [bad sequence]"; prev = pg; - while (i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span) - : pgno_sub(pg, span))) + while (i + span < number && iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span) : pgno_sub(pg, span))) ++span; } - printf(" Transaction %" PRIaTXN ", %" PRIaPGNO - " pages, maxspan %" PRIaPGNO "%s\n", - *(txnid_t *)key.iov_base, number, span, bad); + printf(" Transaction %" PRIaTXN ", %" PRIaPGNO " pages, maxspan %" PRIaPGNO "%s\n", *(txnid_t *)key.iov_base, + number, span, bad); if (freinfo > 2) { for (unsigned i = 0; i < number; i += span) { const pgno_t pg = iptr[i]; for (span = 1; - i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span) - : pgno_sub(pg, span)); + i + span < number && iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span) : pgno_sub(pg, span)); ++span) ; if (span > 1) @@ -443,8 +412,7 @@ int main(int argc, char *argv[]) { value = reclaimable; printf(" Reclaimable: %" PRIu64 " %.1f%%\n", value, value / percent); - value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1) + - reclaimable; + value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1) + reclaimable; printf(" Available: %" PRIu64 " %.1f%%\n", value, value / percent); } else printf(" GC: %" PRIaPGNO " pages\n", pages); @@ -474,8 +442,7 @@ int main(int argc, char *argv[]) { } MDBX_val key; - while (MDBX_SUCCESS == - (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { + while (MDBX_SUCCESS == (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { MDBX_dbi xdbi; if (memchr(key.iov_base, '\0', key.iov_len)) continue; diff --git a/src/tools/wingetopt.c b/src/tools/wingetopt.c index 4f27d648..96210cc6 100644 --- a/src/tools/wingetopt.c +++ b/src/tools/wingetopt.c @@ -11,12 +11,12 @@ #ifdef _MSC_VER #pragma warning(push, 1) -#pragma warning(disable : 4548) /* expression before comma has no effect; \ +#pragma warning(disable : 4548) /* expression before comma has no effect; \ expected expression with side - effect */ -#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ +#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ * semantics are not enabled. Specify /EHsc */ -#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ - * mode specified; termination on exception is \ +#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ + * mode specified; termination on exception is \ * not guaranteed. Specify /EHsc */ #if !defined(_CRT_SECURE_NO_WARNINGS) #define _CRT_SECURE_NO_WARNINGS @@ -70,8 +70,7 @@ int getopt(int argc, char *const argv[], const char *opts) { if (argv[optind][sp + 1] != '\0') optarg = &argv[optind++][sp + 1]; else if (++optind >= argc) { - fprintf(stderr, "%s: %s -- %c\n", argv[0], "option requires an argument", - c); + fprintf(stderr, "%s: %s -- %c\n", argv[0], "option requires an argument", c); sp = 1; return '?'; } else diff --git a/src/tree.c b/src/tree.c index 0f430749..67b69ddc 100644 --- a/src/tree.c +++ b/src/tree.c @@ -5,8 +5,7 @@ #include "internals.h" -static MDBX_cursor *cursor_clone(const MDBX_cursor *csrc, - cursor_couple_t *couple) { +static MDBX_cursor *cursor_clone(const MDBX_cursor *csrc, cursor_couple_t *couple) { cASSERT(csrc, csrc->txn->txnid >= csrc->txn->env->lck->cached_oldest.weak); couple->outer.next = nullptr; couple->outer.backup = nullptr; @@ -40,13 +39,10 @@ static MDBX_cursor *cursor_clone(const MDBX_cursor *csrc, void recalculate_merge_thresholds(MDBX_env *env) { const size_t bytes = page_space(env); - env->merge_threshold = - (uint16_t)(bytes - - (bytes * env->options.merge_threshold_16dot16_percent >> 16)); + env->merge_threshold = (uint16_t)(bytes - (bytes * env->options.merge_threshold_16dot16_percent >> 16)); env->merge_threshold_gc = - (uint16_t)(bytes - ((env->options.merge_threshold_16dot16_percent > 19005) - ? bytes / 3 /* 33 % */ - : bytes / 4 /* 25 % */)); + (uint16_t)(bytes - ((env->options.merge_threshold_16dot16_percent > 19005) ? bytes / 3 /* 33 % */ + : bytes / 4 /* 25 % */)); } int tree_drop(MDBX_cursor *mc, const bool may_have_tables) { @@ -60,9 +56,8 @@ int tree_drop(MDBX_cursor *mc, const bool may_have_tables) { if (!(may_have_tables | mc->tree->large_pages)) cursor_pop(mc); - rc = pnl_need(&txn->tw.retired_pages, (size_t)mc->tree->branch_pages + - (size_t)mc->tree->leaf_pages + - (size_t)mc->tree->large_pages); + rc = pnl_need(&txn->tw.retired_pages, + (size_t)mc->tree->branch_pages + (size_t)mc->tree->leaf_pages + (size_t)mc->tree->large_pages); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -100,9 +95,7 @@ int tree_drop(MDBX_cursor *mc, const bool may_have_tables) { cASSERT(mc, mc->top + 1 < mc->tree->height); mc->checking |= z_retiring; const unsigned pagetype = (is_frozen(txn, mp) ? P_FROZEN : 0) + - ((mc->top + 2 == mc->tree->height) - ? (mc->checking & (P_LEAF | P_DUPFIX)) - : P_BRANCH); + ((mc->top + 2 == mc->tree->height) ? (mc->checking & (P_LEAF | P_DUPFIX)) : P_BRANCH); for (size_t i = 0; i < nkeys; i++) { node_t *node = page_node(mp, i); tASSERT(txn, (node_flags(node) & (N_BIG | N_TREE | N_DUP)) == 0); @@ -153,8 +146,7 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { cASSERT(csrc, csrc->top == cdst->top); if (unlikely(page_type(psrc) != page_type(pdst))) { bailout: - ERROR("Wrong or mismatch pages's types (src %d, dst %d) to move node", - page_type(psrc), page_type(pdst)); + ERROR("Wrong or mismatch pages's types (src %d, dst %d) to move node", page_type(psrc), page_type(pdst)); csrc->txn->flags |= MDBX_TXN_ERROR; return MDBX_PROBLEM; } @@ -225,8 +217,7 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { mn->top = top; mn->ki[mn->top] = 0; - const intptr_t delta = EVEN_CEIL(key.iov_len) - - EVEN_CEIL(node_ks(page_node(mn->pg[mn->top], 0))); + const intptr_t delta = EVEN_CEIL(key.iov_len) - EVEN_CEIL(node_ks(page_node(mn->pg[mn->top], 0))); const intptr_t needed = branch_size(cdst->txn->env, &key4move) + delta; const intptr_t have = page_room(pdst); if (unlikely(needed > have)) @@ -255,10 +246,8 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { pdst = cdst->pg[cdst->top]; } - DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO - " to node %u on page %" PRIaPGNO, - "branch", csrc->ki[csrc->top], DKEY_DEBUG(&key4move), psrc->pgno, - cdst->ki[cdst->top], pdst->pgno); + DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO " to node %u on page %" PRIaPGNO, "branch", csrc->ki[csrc->top], + DKEY_DEBUG(&key4move), psrc->pgno, cdst->ki[cdst->top], pdst->pgno); /* Add the node to the destination page. */ rc = node_add_branch(cdst, cdst->ki[cdst->top], &key4move, srcpg); } break; @@ -275,13 +264,10 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { data.iov_base = node_data(srcnode); key4move.iov_len = node_ks(srcnode); key4move.iov_base = node_key(srcnode); - DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO - " to node %u on page %" PRIaPGNO, - "leaf", csrc->ki[csrc->top], DKEY_DEBUG(&key4move), psrc->pgno, - cdst->ki[cdst->top], pdst->pgno); + DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO " to node %u on page %" PRIaPGNO, "leaf", csrc->ki[csrc->top], + DKEY_DEBUG(&key4move), psrc->pgno, cdst->ki[cdst->top], pdst->pgno); /* Add the node to the destination page. */ - rc = node_add_leaf(cdst, cdst->ki[cdst->top], &key4move, &data, - node_flags(srcnode)); + rc = node_add_leaf(cdst, cdst->ki[cdst->top], &key4move, &data, node_flags(srcnode)); } break; case P_LEAF | P_DUPFIX: { @@ -290,12 +276,9 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { return rc; psrc = csrc->pg[csrc->top]; pdst = cdst->pg[cdst->top]; - key4move = - page_dupfix_key(psrc, csrc->ki[csrc->top], csrc->tree->dupfix_size); - DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO - " to node %u on page %" PRIaPGNO, - "leaf2", csrc->ki[csrc->top], DKEY_DEBUG(&key4move), psrc->pgno, - cdst->ki[cdst->top], pdst->pgno); + key4move = page_dupfix_key(psrc, csrc->ki[csrc->top], csrc->tree->dupfix_size); + DEBUG("moving %s-node %u [%s] on page %" PRIaPGNO " to node %u on page %" PRIaPGNO, "leaf2", csrc->ki[csrc->top], + DKEY_DEBUG(&key4move), psrc->pgno, cdst->ki[cdst->top], pdst->pgno); /* Add the node to the destination page. */ rc = node_add_dupfix(cdst, cdst->ki[cdst->top], &key4move); } break; @@ -329,13 +312,11 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { if (!is_related(csrc, m3)) continue; - if (m3 != cdst && m3->pg[csrc->top] == pdst && - m3->ki[csrc->top] >= cdst->ki[csrc->top]) { + if (m3 != cdst && m3->pg[csrc->top] == pdst && m3->ki[csrc->top] >= cdst->ki[csrc->top]) { m3->ki[csrc->top] += 1; } - if (/* m3 != csrc && */ m3->pg[csrc->top] == psrc && - m3->ki[csrc->top] == csrc->ki[csrc->top]) { + if (/* m3 != csrc && */ m3->pg[csrc->top] == psrc && m3->ki[csrc->top] == csrc->ki[csrc->top]) { m3->pg[csrc->top] = pdst; m3->ki[csrc->top] = cdst->ki[cdst->top]; cASSERT(csrc, csrc->top > 0); @@ -387,8 +368,7 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { key.iov_len = node_ks(srcnode); key.iov_base = node_key(srcnode); } - DEBUG("update separator for source page %" PRIaPGNO " to [%s]", - psrc->pgno, DKEY_DEBUG(&key)); + DEBUG("update separator for source page %" PRIaPGNO " to [%s]", psrc->pgno, DKEY_DEBUG(&key)); cursor_couple_t couple; MDBX_cursor *const mn = cursor_clone(csrc, &couple); @@ -423,8 +403,7 @@ static int node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft) { key.iov_len = node_ks(srcnode); key.iov_base = node_key(srcnode); } - DEBUG("update separator for destination page %" PRIaPGNO " to [%s]", - pdst->pgno, DKEY_DEBUG(&key)); + DEBUG("update separator for destination page %" PRIaPGNO " to [%s]", pdst->pgno, DKEY_DEBUG(&key)); cursor_couple_t couple; MDBX_cursor *const mn = cursor_clone(cdst, &couple); cASSERT(cdst, mn->top > 0); @@ -465,12 +444,10 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(csrc, csrc->clc == cdst->clc && csrc->tree == cdst->tree); cASSERT(csrc, csrc->top > 0); /* can't merge root page */ cASSERT(cdst, cdst->top > 0); - cASSERT(cdst, cdst->top + 1 < cdst->tree->height || - is_leaf(cdst->pg[cdst->tree->height - 1])); - cASSERT(csrc, csrc->top + 1 < csrc->tree->height || - is_leaf(csrc->pg[csrc->tree->height - 1])); - cASSERT(cdst, csrc->txn->env->options.prefer_waf_insteadof_balance || - page_room(pdst) >= page_used(cdst->txn->env, psrc)); + cASSERT(cdst, cdst->top + 1 < cdst->tree->height || is_leaf(cdst->pg[cdst->tree->height - 1])); + cASSERT(csrc, csrc->top + 1 < csrc->tree->height || is_leaf(csrc->pg[csrc->tree->height - 1])); + cASSERT(cdst, + csrc->txn->env->options.prefer_waf_insteadof_balance || page_room(pdst) >= page_used(cdst->txn->env, psrc)); const int pagetype = page_type(psrc); /* Move all nodes from src to dst */ @@ -560,10 +537,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } pdst = cdst->pg[cdst->top]; - DEBUG("dst page %" PRIaPGNO " now has %zu keys (%u.%u%% filled)", - pdst->pgno, page_numkeys(pdst), - page_fill_percentum_x10(cdst->txn->env, pdst) / 10, - page_fill_percentum_x10(cdst->txn->env, pdst) % 10); + DEBUG("dst page %" PRIaPGNO " now has %zu keys (%u.%u%% filled)", pdst->pgno, page_numkeys(pdst), + page_fill_percentum_x10(cdst->txn->env, pdst) / 10, page_fill_percentum_x10(cdst->txn->env, pdst) % 10); cASSERT(csrc, psrc == csrc->pg[csrc->top]); cASSERT(cdst, pdst == cdst->pg[cdst->top]); @@ -598,11 +573,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { m3->pg[csrc->top] = pdst; m3->ki[csrc->top] += (indx_t)dst_nkeys; m3->ki[csrc->top - 1] = cdst->ki[csrc->top - 1]; - } else if (m3->pg[csrc->top - 1] == csrc->pg[csrc->top - 1] && - m3->ki[csrc->top - 1] > csrc->ki[csrc->top - 1]) { - cASSERT(m3, m3->ki[csrc->top - 1] > 0 && - m3->ki[csrc->top - 1] <= - page_numkeys(m3->pg[csrc->top - 1])); + } else if (m3->pg[csrc->top - 1] == csrc->pg[csrc->top - 1] && m3->ki[csrc->top - 1] > csrc->ki[csrc->top - 1]) { + cASSERT(m3, m3->ki[csrc->top - 1] > 0 && m3->ki[csrc->top - 1] <= page_numkeys(m3->pg[csrc->top - 1])); m3->ki[csrc->top - 1] -= 1; } @@ -641,8 +613,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { if (is_leaf(cdst->pg[cdst->top])) { /* LY: don't touch cursor if top-page is a LEAF */ - cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || - page_type(cdst->pg[cdst->top]) == pagetype); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || page_type(cdst->pg[cdst->top]) == pagetype); return MDBX_SUCCESS; } @@ -656,8 +627,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { if (top_page == cdst->pg[cdst->top]) { /* LY: don't touch cursor if prev top-page already on the top */ cASSERT(cdst, cdst->ki[cdst->top] == top_indx); - cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || - page_type(cdst->pg[cdst->top]) == pagetype); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || page_type(cdst->pg[cdst->top]) == pagetype); return MDBX_SUCCESS; } @@ -671,18 +641,15 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(cdst, cdst->ki[new_top] == top_indx); /* LY: restore cursor stack */ cdst->top = (int8_t)new_top; - cASSERT(cdst, cdst->top + 1 < cdst->tree->height || - is_leaf(cdst->pg[cdst->tree->height - 1])); - cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || - page_type(cdst->pg[cdst->top]) == pagetype); + cASSERT(cdst, cdst->top + 1 < cdst->tree->height || is_leaf(cdst->pg[cdst->tree->height - 1])); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || page_type(cdst->pg[cdst->top]) == pagetype); return MDBX_SUCCESS; } page_t *const stub_page = (page_t *)(~(uintptr_t)top_page); const indx_t stub_indx = top_indx; - if (save_height > cdst->tree->height && - ((cdst->pg[save_top] == top_page && cdst->ki[save_top] == top_indx) || - (cdst->pg[save_top] == stub_page && cdst->ki[save_top] == stub_indx))) { + if (save_height > cdst->tree->height && ((cdst->pg[save_top] == top_page && cdst->ki[save_top] == top_indx) || + (cdst->pg[save_top] == stub_page && cdst->ki[save_top] == stub_indx))) { /* LY: restore cursor stack */ cdst->pg[new_top] = top_page; cdst->ki[new_top] = top_indx; @@ -691,10 +658,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cdst->ki[new_top + 1] = INT16_MAX; #endif cdst->top = (int8_t)new_top; - cASSERT(cdst, cdst->top + 1 < cdst->tree->height || - is_leaf(cdst->pg[cdst->tree->height - 1])); - cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || - page_type(cdst->pg[cdst->top]) == pagetype); + cASSERT(cdst, cdst->top + 1 < cdst->tree->height || is_leaf(cdst->pg[cdst->tree->height - 1])); + cASSERT(cdst, is_leaf(cdst->pg[cdst->top]) || page_type(cdst->pg[cdst->top]) == pagetype); return MDBX_SUCCESS; } @@ -707,8 +672,7 @@ bailout: int tree_rebalance(MDBX_cursor *mc) { cASSERT(mc, cursor_is_tracked(mc)); cASSERT(mc, mc->top >= 0); - cASSERT(mc, mc->top + 1 < mc->tree->height || - is_leaf(mc->pg[mc->tree->height - 1])); + cASSERT(mc, mc->top + 1 < mc->tree->height || is_leaf(mc->pg[mc->tree->height - 1])); const page_t *const tp = mc->pg[mc->top]; const uint8_t pagetype = page_type(tp); @@ -716,29 +680,22 @@ int tree_rebalance(MDBX_cursor *mc) { const size_t minkeys = (pagetype & P_BRANCH) + (size_t)1; /* Pages emptier than this are candidates for merging. */ - size_t room_threshold = likely(mc->tree != &mc->txn->dbs[FREE_DBI]) - ? mc->txn->env->merge_threshold - : mc->txn->env->merge_threshold_gc; + size_t room_threshold = + likely(mc->tree != &mc->txn->dbs[FREE_DBI]) ? mc->txn->env->merge_threshold : mc->txn->env->merge_threshold_gc; const size_t numkeys = page_numkeys(tp); const size_t room = page_room(tp); - DEBUG("rebalancing %s page %" PRIaPGNO - " (has %zu keys, fill %u.%u%%, used %zu, room %zu bytes)", - is_leaf(tp) ? "leaf" : "branch", tp->pgno, numkeys, - page_fill_percentum_x10(mc->txn->env, tp) / 10, - page_fill_percentum_x10(mc->txn->env, tp) % 10, - page_used(mc->txn->env, tp), room); + DEBUG("rebalancing %s page %" PRIaPGNO " (has %zu keys, fill %u.%u%%, used %zu, room %zu bytes)", + is_leaf(tp) ? "leaf" : "branch", tp->pgno, numkeys, page_fill_percentum_x10(mc->txn->env, tp) / 10, + page_fill_percentum_x10(mc->txn->env, tp) % 10, page_used(mc->txn->env, tp), room); cASSERT(mc, is_modifable(mc->txn, tp)); if (unlikely(numkeys < minkeys)) { - DEBUG("page %" PRIaPGNO " must be merged due keys < %zu threshold", - tp->pgno, minkeys); + DEBUG("page %" PRIaPGNO " must be merged due keys < %zu threshold", tp->pgno, minkeys); } else if (unlikely(room > room_threshold)) { - DEBUG("page %" PRIaPGNO " should be merged due room %zu > %zu threshold", - tp->pgno, room, room_threshold); + DEBUG("page %" PRIaPGNO " should be merged due room %zu > %zu threshold", tp->pgno, room, room_threshold); } else { - DEBUG("no need to rebalance page %" PRIaPGNO ", room %zu < %zu threshold", - tp->pgno, room, room_threshold); + DEBUG("no need to rebalance page %" PRIaPGNO ", room %zu < %zu threshold", tp->pgno, room, room_threshold); cASSERT(mc, mc->tree->items > 0); return MDBX_SUCCESS; } @@ -752,11 +709,9 @@ int tree_rebalance(MDBX_cursor *mc) { DEBUG("%s", "tree is completely empty"); cASSERT(mc, is_leaf(mp)); cASSERT(mc, (*cursor_dbi_state(mc) & DBI_DIRTY) != 0); - cASSERT(mc, mc->tree->branch_pages == 0 && mc->tree->large_pages == 0 && - mc->tree->leaf_pages == 1); + cASSERT(mc, mc->tree->branch_pages == 0 && mc->tree->large_pages == 0 && mc->tree->leaf_pages == 1); /* Adjust cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; - m2 = m2->next) { + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; m2 = m2->next) { MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; if (!is_poor(m3) && m3->pg[0] == mp) { be_poor(m3); @@ -790,8 +745,7 @@ int tree_rebalance(MDBX_cursor *mc) { } /* Adjust other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; - m2 = m2->next) { + for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; m2 = m2->next) { MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2; if (is_related(mc, m3) && m3->pg[0] == mp) { for (intptr_t i = 0; i < mc->tree->height; i++) { @@ -801,14 +755,11 @@ int tree_rebalance(MDBX_cursor *mc) { m3->top -= 1; } } - cASSERT(mc, is_leaf(mc->pg[mc->top]) || - page_type(mc->pg[mc->top]) == pagetype); - cASSERT(mc, mc->top + 1 < mc->tree->height || - is_leaf(mc->pg[mc->tree->height - 1])); + cASSERT(mc, is_leaf(mc->pg[mc->top]) || page_type(mc->pg[mc->top]) == pagetype); + cASSERT(mc, mc->top + 1 < mc->tree->height || is_leaf(mc->pg[mc->tree->height - 1])); return page_retire(mc, mp); } - DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", - mp->pgno, mp->flags); + DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", mp->pgno, mp->flags); return MDBX_SUCCESS; } @@ -829,17 +780,14 @@ int tree_rebalance(MDBX_cursor *mc) { page_t *left = nullptr, *right = nullptr; if (mn->ki[pre_top] > 0) { - rc = - page_get(mn, node_pgno(page_node(mn->pg[pre_top], mn->ki[pre_top] - 1)), - &left, mc->pg[mc->top]->txnid); + rc = page_get(mn, node_pgno(page_node(mn->pg[pre_top], mn->ki[pre_top] - 1)), &left, mc->pg[mc->top]->txnid); if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(mc, page_type(left) == page_type(mc->pg[mc->top])); } if (mn->ki[pre_top] + (size_t)1 < page_numkeys(mn->pg[pre_top])) { - rc = page_get( - mn, node_pgno(page_node(mn->pg[pre_top], mn->ki[pre_top] + (size_t)1)), - &right, mc->pg[mc->top]->txnid); + rc = page_get(mn, node_pgno(page_node(mn->pg[pre_top], mn->ki[pre_top] + (size_t)1)), &right, + mc->pg[mc->top]->txnid); if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(mc, page_type(right) == page_type(mc->pg[mc->top])); @@ -857,8 +805,7 @@ int tree_rebalance(MDBX_cursor *mc) { bool involve = !(left && right); retry: cASSERT(mc, mc->top > 0); - if (left_room > room_threshold && left_room >= right_room && - (is_modifable(mc->txn, left) || involve)) { + if (left_room > room_threshold && left_room >= right_room && (is_modifable(mc->txn, left) || involve)) { /* try merge with left */ cASSERT(mc, left_nkeys >= minkeys); mn->pg[mn->top] = left; @@ -878,8 +825,7 @@ retry: return rc; } } - if (right_room > room_threshold && - (is_modifable(mc->txn, right) || involve)) { + if (right_room > room_threshold && (is_modifable(mc->txn, right) || involve)) { /* try merge with right */ cASSERT(mc, right_nkeys >= minkeys); mn->pg[mn->top] = right; @@ -897,8 +843,7 @@ retry: } } - if (left_nkeys > minkeys && - (right_nkeys <= left_nkeys || right_room >= left_room) && + if (left_nkeys > minkeys && (right_nkeys <= left_nkeys || right_room >= left_room) && (is_modifable(mc->txn, left) || involve)) { /* try move from left */ mn->pg[mn->top] = left; @@ -939,16 +884,13 @@ retry: return MDBX_SUCCESS; } - if (mc->txn->env->options.prefer_waf_insteadof_balance && - likely(room_threshold > 0)) { + if (mc->txn->env->options.prefer_waf_insteadof_balance && likely(room_threshold > 0)) { room_threshold = 0; goto retry; } if (likely(!involve) && - (likely(mc->tree != &mc->txn->dbs[FREE_DBI]) || mc->txn->tw.loose_pages || - MDBX_PNL_GETSIZE(mc->txn->tw.relist) || - (mc->flags & z_gcu_preparation) || (mc->txn->flags & txn_gc_drained) || - room_threshold)) { + (likely(mc->tree != &mc->txn->dbs[FREE_DBI]) || mc->txn->tw.loose_pages || MDBX_PNL_GETSIZE(mc->txn->tw.relist) || + (mc->flags & z_gcu_preparation) || (mc->txn->flags & txn_gc_drained) || room_threshold)) { involve = true; goto retry; } @@ -957,17 +899,14 @@ retry: goto retry; } - ERROR("Unable to merge/rebalance %s page %" PRIaPGNO - " (has %zu keys, fill %u.%u%%, used %zu, room %zu bytes)", - is_leaf(tp) ? "leaf" : "branch", tp->pgno, numkeys, - page_fill_percentum_x10(mc->txn->env, tp) / 10, - page_fill_percentum_x10(mc->txn->env, tp) % 10, - page_used(mc->txn->env, tp), room); + ERROR("Unable to merge/rebalance %s page %" PRIaPGNO " (has %zu keys, fill %u.%u%%, used %zu, room %zu bytes)", + is_leaf(tp) ? "leaf" : "branch", tp->pgno, numkeys, page_fill_percentum_x10(mc->txn->env, tp) / 10, + page_fill_percentum_x10(mc->txn->env, tp) % 10, page_used(mc->txn->env, tp), room); return MDBX_PROBLEM; } -int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, - MDBX_val *const newdata, pgno_t newpgno, const unsigned naf) { +int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, MDBX_val *const newdata, pgno_t newpgno, + const unsigned naf) { unsigned flags; int rc = MDBX_SUCCESS, foliage = 0; MDBX_env *const env = mc->txn->env; @@ -988,11 +927,8 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, STATIC_ASSERT(P_BRANCH == 1); const size_t minkeys = (mp->flags & P_BRANCH) + (size_t)1; - DEBUG(">> splitting %s-page %" PRIaPGNO - " and adding %zu+%zu [%s] at %i, nkeys %zi", - is_leaf(mp) ? "leaf" : "branch", mp->pgno, newkey->iov_len, - newdata ? newdata->iov_len : 0, DKEY_DEBUG(newkey), mc->ki[mc->top], - nkeys); + DEBUG(">> splitting %s-page %" PRIaPGNO " and adding %zu+%zu [%s] at %i, nkeys %zi", is_leaf(mp) ? "leaf" : "branch", + mp->pgno, newkey->iov_len, newdata ? newdata->iov_len : 0, DKEY_DEBUG(newkey), mc->ki[mc->top], nkeys); cASSERT(mc, nkeys + 1 >= minkeys * 2); /* Create a new sibling page. */ @@ -1057,10 +993,8 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mn->ki[mn->top] = 0; mn->ki[prev_top] = mc->ki[prev_top] + 1; - size_t split_indx = - (newindx < nkeys) - ? /* split at the middle */ (nkeys + 1) >> 1 - : /* split at the end (i.e. like append-mode ) */ nkeys - minkeys + 1; + size_t split_indx = (newindx < nkeys) ? /* split at the middle */ (nkeys + 1) >> 1 + : /* split at the end (i.e. like append-mode ) */ nkeys - minkeys + 1; eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1); cASSERT(mc, !is_branch(mp) || newindx > 0); @@ -1094,11 +1028,9 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, if (foliage) { TRACE("pure-left: foliage %u, top %i, ptop %zu, split_indx %zi, " "minkeys %zi, sepkey %s, parent-room %zu, need4split %zu", - foliage, mc->top, prev_top, split_indx, minkeys, - DKEY_DEBUG(&sepkey), page_room(mc->pg[prev_top]), + foliage, mc->top, prev_top, split_indx, minkeys, DKEY_DEBUG(&sepkey), page_room(mc->pg[prev_top]), branch_size(env, &sepkey)); - TRACE("pure-left: newkey %s, newdata %s, newindx %zu", - DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); + TRACE("pure-left: newkey %s, newdata %s, newindx %zu", DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); } } } @@ -1112,8 +1044,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sepkey = *newkey; } else if (unlikely(pure_left)) { /* newindx == split_indx == 0 */ - TRACE("pure-left: no-split, but add new pure page at the %s", - "left/before"); + TRACE("pure-left: no-split, but add new pure page at the %s", "left/before"); cASSERT(mc, newindx == 0 && split_indx == 0 && minkeys == 1); TRACE("pure-left: old-first-key is %s", DKEY_DEBUG(&sepkey)); } else { @@ -1139,8 +1070,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, void *const ins = page_dupfix_ptr(mp, mc->ki[mc->top], ksize); memcpy(sister->entries, split, rsize); sepkey.iov_base = sister->entries; - memmove(ptr_disp(ins, ksize), ins, - (split_indx - mc->ki[mc->top]) * ksize); + memmove(ptr_disp(ins, ksize), ins, (split_indx - mc->ki[mc->top]) * ksize); memcpy(ins, newkey->iov_base, ksize); cASSERT(mc, UINT16_MAX - mp->lower >= (int)sizeof(indx_t)); mp->lower += sizeof(indx_t); @@ -1151,16 +1081,14 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, memcpy(sister->entries, split, distance * ksize); void *const ins = page_dupfix_ptr(sister, distance, ksize); memcpy(ins, newkey->iov_base, ksize); - memcpy(ptr_disp(ins, ksize), ptr_disp(split, distance * ksize), - rsize - distance * ksize); + memcpy(ptr_disp(ins, ksize), ptr_disp(split, distance * ksize), rsize - distance * ksize); cASSERT(mc, UINT16_MAX - sister->lower >= (int)sizeof(indx_t)); sister->lower += sizeof(indx_t); cASSERT(mc, sister->upper >= ksize - sizeof(indx_t)); sister->upper -= (indx_t)(ksize - sizeof(indx_t)); cASSERT(mc, distance <= (int)UINT16_MAX); mc->ki[mc->top] = (indx_t)distance; - cASSERT(mc, - (((ksize & page_numkeys(sister)) ^ sister->upper) & 1) == 0); + cASSERT(mc, (((ksize & page_numkeys(sister)) ^ sister->upper) & 1) == 0); } if (AUDIT_ENABLED()) { @@ -1180,8 +1108,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } const size_t max_space = page_space(env); - const size_t new_size = is_leaf(mp) ? leaf_size(env, newkey, newdata) - : branch_size(env, newkey); + const size_t new_size = is_leaf(mp) ? leaf_size(env, newkey, newdata) : branch_size(env, newkey); /* prepare to insert */ size_t i = 0; @@ -1218,8 +1145,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, split_indx += mp->flags & P_BRANCH; } eASSERT(env, split_indx >= minkeys && split_indx <= nkeys + 1 - minkeys); - const size_t dim_nodes = - (newindx >= split_indx) ? split_indx : nkeys - split_indx; + const size_t dim_nodes = (newindx >= split_indx) ? split_indx : nkeys - split_indx; const size_t dim_used = (sizeof(indx_t) + NODESIZE + 1) * dim_nodes; if (new_size >= dim_used) { /* Search for best acceptable split point */ @@ -1239,15 +1165,13 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, node_t *node = ptr_disp(mp, tmp_ki_copy->entries[i] + PAGEHDRSZ); size = NODESIZE + node_ks(node) + sizeof(indx_t); if (is_leaf(mp)) - size += - (node_flags(node) & N_BIG) ? sizeof(pgno_t) : node_ds(node); + size += (node_flags(node) & N_BIG) ? sizeof(pgno_t) : node_ds(node); size = EVEN_CEIL(size); } before += size; after -= size; - TRACE("step %zu, size %zu, before %zu, after %zu, max %zu", i, size, - before, after, max_space); + TRACE("step %zu, size %zu, before %zu, after %zu, max %zu", i, size, before, after, max_space); if (before <= max_space && after <= max_space) { const size_t split = i + (dir > 0); @@ -1271,8 +1195,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sepkey = *newkey; if (split_indx != newindx) { - node_t *node = - ptr_disp(mp, tmp_ki_copy->entries[split_indx] + PAGEHDRSZ); + node_t *node = ptr_disp(mp, tmp_ki_copy->entries[split_indx] + PAGEHDRSZ); sepkey.iov_len = node_ks(node); sepkey.iov_base = node_key(node); } @@ -1308,8 +1231,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, /* Right page might now have changed parent. * Check if left page also changed parent. */ - if (mn->pg[prev_top] != mc->pg[prev_top] && - mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { + if (mn->pg[prev_top] != mc->pg[prev_top] && mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { for (intptr_t i = 0; i < prev_top; i++) { mc->pg[i] = mn->pg[i]; mc->ki[i] = mn->ki[i]; @@ -1334,14 +1256,11 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, page_t *ptop_page = mc->pg[prev_top]; TRACE("pure-left: adding to parent page %u node[%u] left-leaf page #%u key " "%s", - ptop_page->pgno, mc->ki[prev_top], sister->pgno, - DKEY(mc->ki[prev_top] ? newkey : nullptr)); + ptop_page->pgno, mc->ki[prev_top], sister->pgno, DKEY(mc->ki[prev_top] ? newkey : nullptr)); assert(mc->top == prev_top + 1); mc->top = (uint8_t)prev_top; - rc = node_add_branch(mc, mc->ki[prev_top], - mc->ki[prev_top] ? newkey : nullptr, sister->pgno); - cASSERT(mc, mp == mc->pg[prev_top + 1] && newindx == mc->ki[prev_top + 1] && - prev_top == mc->top); + rc = node_add_branch(mc, mc->ki[prev_top], mc->ki[prev_top] ? newkey : nullptr, sister->pgno); + cASSERT(mc, mp == mc->pg[prev_top + 1] && newindx == mc->ki[prev_top + 1] && prev_top == mc->top); if (likely(rc == MDBX_SUCCESS) && mc->ki[prev_top] == 0) { node_t *node = page_node(mc->pg[prev_top], 1); @@ -1351,12 +1270,10 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mc->ki[prev_top] = 1; rc = tree_propagate_key(mc, &sepkey); cASSERT(mc, mc->top == prev_top && mc->ki[prev_top] == 1); - cASSERT(mc, - mp == mc->pg[prev_top + 1] && newindx == mc->ki[prev_top + 1]); + cASSERT(mc, mp == mc->pg[prev_top + 1] && newindx == mc->ki[prev_top + 1]); mc->ki[prev_top] = 0; } else { - TRACE("pure-left: no-need-update prev-first key on parent %s", - DKEY(&sepkey)); + TRACE("pure-left: no-need-update prev-first key on parent %s", DKEY(&sepkey)); } mc->top++; @@ -1367,8 +1284,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, cASSERT(mc, node_pgno(node) == mp->pgno && mc->pg[prev_top] == ptop_page); } else { mn->top -= 1; - TRACE("add-to-parent the right-entry[%u] for new sibling-page", - mn->ki[prev_top]); + TRACE("add-to-parent the right-entry[%u] for new sibling-page", mn->ki[prev_top]); rc = node_add_branch(mn, mn->ki[prev_top], &sepkey, sister->pgno); mn->top += 1; if (unlikely(rc != MDBX_SUCCESS)) @@ -1403,8 +1319,8 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sepkey = get_key(page_node(mc->pg[mc->top - i], mc->ki[mc->top - i])); if (mc->clc->k.cmp(newkey, &sepkey) < 0) { mc->top -= (int8_t)i; - DEBUG("pure-left: update new-first on parent [%i] page %u key %s", - mc->ki[mc->top], mc->pg[mc->top]->pgno, DKEY(newkey)); + DEBUG("pure-left: update new-first on parent [%i] page %u key %s", mc->ki[mc->top], mc->pg[mc->top]->pgno, + DKEY(newkey)); rc = tree_propagate_key(mc, newkey); mc->top += (int8_t)i; if (unlikely(rc != MDBX_SUCCESS)) @@ -1474,16 +1390,14 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } } while (ii != split_indx); - TRACE("ii %zu, nkeys %zu, n %zu, pgno #%u", ii, nkeys, n, - mc->pg[mc->top]->pgno); + TRACE("ii %zu, nkeys %zu, n %zu, pgno #%u", ii, nkeys, n, mc->pg[mc->top]->pgno); nkeys = page_numkeys(tmp_ki_copy); for (size_t i = 0; i < nkeys; i++) mp->entries[i] = tmp_ki_copy->entries[i]; mp->lower = tmp_ki_copy->lower; mp->upper = tmp_ki_copy->upper; - memcpy(page_node(mp, nkeys - 1), page_node(tmp_ki_copy, nkeys - 1), - env->ps - tmp_ki_copy->upper - PAGEHDRSZ); + memcpy(page_node(mp, nkeys - 1), page_node(tmp_ki_copy, nkeys - 1), env->ps - tmp_ki_copy->upper - PAGEHDRSZ); /* reset back to original page */ if (newindx < split_indx) { @@ -1492,8 +1406,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mc->pg[mc->top] = sister; mc->ki[prev_top]++; /* Make sure ki is still valid. */ - if (mn->pg[prev_top] != mc->pg[prev_top] && - mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { + if (mn->pg[prev_top] != mc->pg[prev_top] && mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { for (intptr_t i = 0; i <= prev_top; i++) { mc->pg[i] = mn->pg[i]; mc->ki[i] = mn->ki[i]; @@ -1504,8 +1417,7 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mc->pg[mc->top] = sister; mc->ki[prev_top]++; /* Make sure ki is still valid. */ - if (mn->pg[prev_top] != mc->pg[prev_top] && - mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { + if (mn->pg[prev_top] != mc->pg[prev_top] && mc->ki[prev_top] >= page_numkeys(mc->pg[prev_top])) { for (intptr_t i = 0; i <= prev_top; i++) { mc->pg[i] = mn->pg[i]; mc->ki[i] = mn->ki[i]; @@ -1545,16 +1457,14 @@ int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, m3->pg[i] = mn->pg[i]; } } - } else if (!did_split_parent && m3->top >= prev_top && - m3->pg[prev_top] == mc->pg[prev_top] && + } else if (!did_split_parent && m3->top >= prev_top && m3->pg[prev_top] == mc->pg[prev_top] && m3->ki[prev_top] >= mc->ki[prev_top]) { m3->ki[prev_top]++; /* also for the `pure-left` case */ } if (inner_pointed(m3) && is_leaf(mp)) cursor_inner_refresh(m3, m3->pg[mc->top], m3->ki[mc->top]); } - TRACE("mp #%u left: %zd, sister #%u left: %zd", mp->pgno, page_room(mp), - sister->pgno, page_room(sister)); + TRACE("mp #%u left: %zd, sister #%u left: %zd", mp->pgno, page_room(mp), sister->pgno, page_room(sister)); done: if (tmp_ki_copy) @@ -1596,8 +1506,8 @@ int tree_propagate_key(MDBX_cursor *mc, const MDBX_val *key) { MDBX_val k2; k2.iov_base = node_key(node); k2.iov_len = node_ks(node); - DEBUG("update key %zi (offset %zu) [%s] to [%s] on page %" PRIaPGNO, indx, - ptr, DVAL_DEBUG(&k2), DKEY_DEBUG(key), mp->pgno); + DEBUG("update key %zi (offset %zu) [%s] to [%s] on page %" PRIaPGNO, indx, ptr, DVAL_DEBUG(&k2), DKEY_DEBUG(key), + mp->pgno); #endif /* MDBX_DEBUG */ /* Sizes must be 2-byte aligned. */ diff --git a/src/txl.c b/src/txl.c index aca3758d..024b099f 100644 --- a/src/txl.c +++ b/src/txl.c @@ -6,8 +6,7 @@ static inline size_t txl_size2bytes(const size_t size) { assert(size > 0 && size <= txl_max * 2); size_t bytes = - ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(txnid_t) * (size + 2), - txl_granulate * sizeof(txnid_t)) - + ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(txnid_t) * (size + 2), txl_granulate * sizeof(txnid_t)) - MDBX_ASSUME_MALLOC_OVERHEAD; return bytes; } @@ -38,11 +37,9 @@ MDBX_INTERNAL void txl_free(txl_t txl) { osal_free(txl - 1); } -MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, - const size_t wanna) { +MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wanna) { const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptxl); - assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && - MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); + assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); if (likely(allocated >= wanna)) return MDBX_SUCCESS; @@ -51,9 +48,7 @@ MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, return MDBX_TXN_FULL; } - const size_t size = (wanna + wanna - allocated < txl_max) - ? wanna + wanna - allocated - : txl_max; + const size_t size = (wanna + wanna - allocated < txl_max) ? wanna + wanna - allocated : txl_max; size_t bytes = txl_size2bytes(size); txl_t txl = osal_realloc(*ptxl - 1, bytes); if (likely(txl)) { @@ -68,14 +63,11 @@ MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, return MDBX_ENOMEM; } -static __always_inline int __must_check_result -txl_need(txl_t __restrict *__restrict ptxl, size_t num) { - assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && - MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); +static __always_inline int __must_check_result txl_need(txl_t __restrict *__restrict ptxl, size_t num) { + assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); assert(num <= PAGELIST_LIMIT); const size_t wanna = (size_t)MDBX_PNL_GETSIZE(*ptxl) + num; - return likely(MDBX_PNL_ALLOCLEN(*ptxl) >= wanna) ? MDBX_SUCCESS - : txl_reserve(ptxl, wanna); + return likely(MDBX_PNL_ALLOCLEN(*ptxl) >= wanna) ? MDBX_SUCCESS : txl_reserve(ptxl, wanna); } static __always_inline void txl_xappend(txl_t __restrict txl, txnid_t id) { @@ -86,12 +78,9 @@ static __always_inline void txl_xappend(txl_t __restrict txl, txnid_t id) { #define TXNID_SORT_CMP(first, last) ((first) > (last)) SORT_IMPL(txnid_sort, false, txnid_t, TXNID_SORT_CMP) -MDBX_INTERNAL void txl_sort(txl_t txl) { - txnid_sort(MDBX_PNL_BEGIN(txl), MDBX_PNL_END(txl)); -} +MDBX_INTERNAL void txl_sort(txl_t txl) { txnid_sort(MDBX_PNL_BEGIN(txl), MDBX_PNL_END(txl)); } -MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, - txnid_t id) { +MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id) { if (unlikely(MDBX_PNL_GETSIZE(*ptxl) == MDBX_PNL_ALLOCLEN(*ptxl))) { int rc = txl_need(ptxl, txl_granulate); if (unlikely(rc != MDBX_SUCCESS)) diff --git a/src/txl.h b/src/txl.h index a17fbee6..e80db522 100644 --- a/src/txl.h +++ b/src/txl.h @@ -11,8 +11,7 @@ typedef const txnid_t *const_txl_t; enum txl_rules { txl_granulate = 32, - txl_initial = - txl_granulate - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t), + txl_initial = txl_granulate - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t), txl_max = (1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t) }; @@ -20,7 +19,6 @@ MDBX_INTERNAL txl_t txl_alloc(void); MDBX_INTERNAL void txl_free(txl_t txl); -MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, - txnid_t id); +MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id); MDBX_INTERNAL void txl_sort(txl_t txl); diff --git a/src/txn.c b/src/txn.c index f6ac98d5..166e7682 100644 --- a/src/txn.c +++ b/src/txn.c @@ -4,8 +4,7 @@ #include "internals.h" __hot txnid_t txn_snapshot_oldest(const MDBX_txn *const txn) { - return mvcc_shapshot_oldest( - txn->env, txn->tw.troika.txnid[txn->tw.troika.prefer_steady]); + return mvcc_shapshot_oldest(txn->env, txn->tw.troika.txnid[txn->tw.troika.prefer_steady]); } static void done_cursors(MDBX_txn *txn, const bool merge) { @@ -59,16 +58,14 @@ int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { dl->sorted = dpl_setlen(dl, w); txn->tw.dirtyroom += r - 1 - w; tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); tASSERT(txn, txn->tw.dirtylist->length == txn->tw.loose_count); tASSERT(txn, txn->tw.dirtylist->pages_including_loose == txn->tw.loose_count); return rc; } /* Merge child txn into parent */ -static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, - const size_t parent_retired_len) { +static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0); dpl_t *const src = dpl_sort(txn); @@ -84,8 +81,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, parent->tw.dirtyroom += dst->sorted - n; dst->sorted = dpl_setlen(dst, n); tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->parent ? parent->parent->tw.dirtyroom - : parent->env->options.dp_limit)); + (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit)); } /* Remove reclaimed pages from parent's dirty list */ @@ -94,8 +90,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, /* Move retired pages from parent's dirty & spilled list to reclaimed */ size_t r, w, d, s, l; - for (r = w = parent_retired_len; - ++r <= MDBX_PNL_GETSIZE(parent->tw.retired_pages);) { + for (r = w = parent_retired_len; ++r <= MDBX_PNL_GETSIZE(parent->tw.retired_pages);) { const pgno_t pgno = parent->tw.retired_pages[r]; const size_t di = dpl_exist(parent, pgno); const size_t si = !di ? spill_search(parent, pgno) : 0; @@ -103,8 +98,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const char *kind; if (di) { page_t *dp = dst->items[di].ptr; - tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | - P_SPILLED)) == 0); + tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_SPILLED)) == 0); npages = dpl_npages(dst, di); page_wash(parent, di, dp, npages); kind = "dirty"; @@ -128,8 +122,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, break; } #else - while (w > parent_retired_len && - parent->tw.retired_pages[w - 1] == pgno + l) { + while (w > parent_retired_len && parent->tw.retired_pages[w - 1] == pgno + l) { --w; if (++l == npages) break; @@ -145,22 +138,19 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, continue; } - DEBUG("reclaim retired parent's %u -> %zu %s page %" PRIaPGNO, npages, l, - kind, pgno); + DEBUG("reclaim retired parent's %u -> %zu %s page %" PRIaPGNO, npages, l, kind, pgno); int err = pnl_insert_span(&parent->tw.relist, pgno, l); ENSURE(txn->env, err == MDBX_SUCCESS); } MDBX_PNL_SETSIZE(parent->tw.retired_pages, w); /* Filter-out parent spill list */ - if (parent->tw.spilled.list && - MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) { + if (parent->tw.spilled.list && MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) { const pnl_t sl = spill_purge(parent); size_t len = MDBX_PNL_GETSIZE(sl); if (len) { /* Remove refunded pages from parent's spill list */ - if (MDBX_ENABLE_REFUND && - MDBX_PNL_MOST(sl) >= (parent->geo.first_unallocated << 1)) { + if (MDBX_ENABLE_REFUND && MDBX_PNL_MOST(sl) >= (parent->geo.first_unallocated << 1)) { #if MDBX_PNL_ASCENDING size_t i = MDBX_PNL_GETSIZE(sl); assert(MDBX_PNL_MOST(sl) == MDBX_PNL_LAST(sl)); @@ -182,8 +172,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, memmove(sl + 1, sl + 1 + i, len * sizeof(sl[0])); #endif } - tASSERT(txn, pnl_check_allocated(sl, (size_t)parent->geo.first_unallocated - << 1)); + tASSERT(txn, pnl_check_allocated(sl, (size_t)parent->geo.first_unallocated << 1)); /* Remove reclaimed pages from parent's spill list */ s = MDBX_PNL_GETSIZE(sl), r = MDBX_PNL_GETSIZE(reclaimed_list); @@ -200,8 +189,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, s -= !cmp; r -= cmp; } else { - DEBUG("remove reclaimed parent's spilled page %" PRIaPGNO, - reclaimed_pgno); + DEBUG("remove reclaimed parent's spilled page %" PRIaPGNO, reclaimed_pgno); spill_remove(parent, s, 1); --s; --r; @@ -231,8 +219,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, continue; } - DEBUG("remove dirtied parent's spilled %u page %" PRIaPGNO, npages, - dirty_pgno_form); + DEBUG("remove dirtied parent's spilled %u page %" PRIaPGNO, npages, dirty_pgno_form); spill_remove(parent, s, 1); s += step; } @@ -244,27 +231,22 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, /* Remove anything in our spill list from parent's dirty list */ if (txn->tw.spilled.list) { - tASSERT(txn, - pnl_check_allocated(txn->tw.spilled.list, - (size_t)parent->geo.first_unallocated << 1)); + tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list, (size_t)parent->geo.first_unallocated << 1)); dpl_sift(parent, txn->tw.spilled.list, true); tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->parent ? parent->parent->tw.dirtyroom - : parent->env->options.dp_limit)); + (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit)); } /* Find length of merging our dirty list with parent's and release * filter-out pages */ for (l = 0, d = dst->length, s = src->length; d > 0 && s > 0;) { page_t *sp = src->items[s].ptr; - tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | - P_LOOSE | P_SPILLED)) == 0); + tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_LOOSE | P_SPILLED)) == 0); const unsigned s_npages = dpl_npages(src, s); const pgno_t s_pgno = src->items[s].pgno; page_t *dp = dst->items[d].ptr; - tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | - P_SPILLED)) == 0); + tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_SPILLED)) == 0); const unsigned d_npages = dpl_npages(dst, d); const pgno_t d_pgno = dst->items[d].pgno; @@ -289,8 +271,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, while (s > 0) { page_t *sp = src->items[s].ptr; - tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | - P_LOOSE | P_SPILLED)) == 0); + tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_LOOSE | P_SPILLED)) == 0); if (sp->flags != P_LOOSE) { sp->txnid = parent->front_txnid; sp->flags &= ~P_SPILLED; @@ -318,9 +299,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, } assert(l > d); if (dst->items[d].ptr) { - dst->items[l--] = (dst->items[d].pgno > src->items[s].pgno) - ? dst->items[d--] - : src->items[s--]; + dst->items[l--] = (dst->items[d].pgno > src->items[s].pgno) ? dst->items[d--] : src->items[s--]; } else --d; } @@ -360,9 +339,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, } assert(l < d); if (dst->items[d].ptr) { - dst->items[l++] = (dst->items[d].pgno < src->items[s].pgno) - ? dst->items[d++] - : src->items[s++]; + dst->items[l++] = (dst->items[d].pgno < src->items[s].pgno) ? dst->items[d++] : src->items[s++]; } else ++d; } @@ -412,8 +389,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, parent->flags &= ~MDBX_TXN_HAS_CHILD; if (parent->tw.spilled.list) { - assert(pnl_check_allocated(parent->tw.spilled.list, - (size_t)parent->geo.first_unallocated << 1)); + assert(pnl_check_allocated(parent->tw.spilled.list, (size_t)parent->geo.first_unallocated << 1)); if (MDBX_PNL_GETSIZE(parent->tw.spilled.list)) parent->flags |= MDBX_TXN_SPILLS; } @@ -424,19 +400,15 @@ static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { if (MDBX_ENABLE_PROFGC) { pgop_stat_t *const ptr = &env->lck->pgops; latency->gc_prof.work_counter = ptr->gc_prof.work.spe_counter; - latency->gc_prof.work_rtime_monotonic = - osal_monotime_to_16dot16(ptr->gc_prof.work.rtime_monotonic); - latency->gc_prof.work_xtime_cpu = - osal_monotime_to_16dot16(ptr->gc_prof.work.xtime_cpu); + latency->gc_prof.work_rtime_monotonic = osal_monotime_to_16dot16(ptr->gc_prof.work.rtime_monotonic); + latency->gc_prof.work_xtime_cpu = osal_monotime_to_16dot16(ptr->gc_prof.work.xtime_cpu); latency->gc_prof.work_rsteps = ptr->gc_prof.work.rsteps; latency->gc_prof.work_xpages = ptr->gc_prof.work.xpages; latency->gc_prof.work_majflt = ptr->gc_prof.work.majflt; latency->gc_prof.self_counter = ptr->gc_prof.self.spe_counter; - latency->gc_prof.self_rtime_monotonic = - osal_monotime_to_16dot16(ptr->gc_prof.self.rtime_monotonic); - latency->gc_prof.self_xtime_cpu = - osal_monotime_to_16dot16(ptr->gc_prof.self.xtime_cpu); + latency->gc_prof.self_rtime_monotonic = osal_monotime_to_16dot16(ptr->gc_prof.self.rtime_monotonic); + latency->gc_prof.self_xtime_cpu = osal_monotime_to_16dot16(ptr->gc_prof.self.xtime_cpu); latency->gc_prof.self_rsteps = ptr->gc_prof.self.rsteps; latency->gc_prof.self_xpages = ptr->gc_prof.self.xpages; latency->gc_prof.self_majflt = ptr->gc_prof.self.majflt; @@ -453,8 +425,7 @@ static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { } int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { - STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - - MDBX_TXN_ERROR - MDBX_TXN_PARKED); + STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR - MDBX_TXN_PARKED); const uint64_t ts_0 = latency ? osal_monotime() : 0; uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; @@ -483,13 +454,11 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { } /* txn_end() mode for a commit which writes nothing */ - unsigned end_mode = - TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; + unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; if (unlikely(txn->flags & MDBX_TXN_RDONLY)) goto done; - if ((txn->flags & MDBX_NOSTICKYTHREADS) && - unlikely(txn->owner != osal_thread_self())) { + if ((txn->flags & MDBX_NOSTICKYTHREADS) && unlikely(txn->owner != osal_thread_self())) { rc = MDBX_THREAD_MISMATCH; goto fail; } @@ -512,25 +481,19 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { eASSERT(env, txn != env->basal_txn); MDBX_txn *const parent = txn->parent; eASSERT(env, parent->signature == txn_signature); - eASSERT(env, - parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); + eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); eASSERT(env, dpl_check(txn)); - if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && - parent->n_dbi == txn->n_dbi) { + if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) { TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0); - if ((txn->dbi_state[i] & DBI_STALE) && - !(parent->dbi_state[i] & DBI_STALE)) - tASSERT(txn, - memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); + if ((txn->dbi_state[i] & DBI_STALE) && !(parent->dbi_state[i] & DBI_STALE)) + tASSERT(txn, memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); } tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0); - tASSERT(txn, memcmp(&parent->canary, &txn->canary, - sizeof(parent->canary)) == 0); - tASSERT(txn, !txn->tw.spilled.list || - MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); + tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0); + tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); tASSERT(txn, txn->tw.loose_count == 0); /* fast completion of pure nested transaction */ @@ -543,8 +506,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { * if allocation fails. */ const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - const size_t retired_delta = - MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; + const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; if (retired_delta) { rc = pnl_need(&txn->tw.relist, retired_delta); if (unlikely(rc != MDBX_SUCCESS)) @@ -553,18 +515,15 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (txn->tw.spilled.list) { if (parent->tw.spilled.list) { - rc = pnl_need(&parent->tw.spilled.list, - MDBX_PNL_GETSIZE(txn->tw.spilled.list)); + rc = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list)); if (unlikely(rc != MDBX_SUCCESS)) goto fail; } spill_purge(txn); } - if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > - parent->tw.dirtylist->detent && - !dpl_reserve(parent, txn->tw.dirtylist->length + - parent->tw.dirtylist->length))) { + if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent && + !dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) { rc = MDBX_ENOMEM; goto fail; } @@ -604,17 +563,12 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { parent->dbs[dbi] = txn->dbs[dbi]; /* preserve parent's status */ - const uint8_t state = - txn->dbi_state[dbi] | - (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); - DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, - (parent->dbi_state[dbi] != state) ? "update" : "still", + const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still", parent->dbi_state[dbi], state); parent->dbi_state[dbi] = state; } else { - eASSERT(env, - txn->dbi_state[dbi] == (parent->dbi_state[dbi] & - ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); + eASSERT(env, txn->dbi_state[dbi] == (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); } } @@ -635,15 +589,13 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (ASSERT_ENABLED()) { /* Check parent's loose pages not suitable for refund */ for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) { - tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && - lp->pgno + 1 < parent->geo.first_unallocated); + tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated); MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); } /* Check parent's reclaimed pages not suitable for refund */ if (MDBX_PNL_GETSIZE(parent->tw.relist)) - tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < - parent->geo.first_unallocated); + tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < parent->geo.first_unallocated); } #endif /* MDBX_ENABLE_REFUND */ @@ -659,17 +611,14 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { } else { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : env->options.dp_limit)); } done_cursors(txn, false); end_mode |= TXN_END_EOTDONE; if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && (txn->flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { - TXN_FOREACH_DBI_ALL(txn, i) { - tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); - } + TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); } #if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT rc = txn_end(txn, end_mode); if (unlikely(rc != MDBX_SUCCESS)) @@ -681,10 +630,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { #endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ } - DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - txn->txnid, (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root, - txn->dbs[FREE_DBI].root); + DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, (void *)txn, + (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); if (txn->n_dbi > CORE_DBS) { /* Update table root pointers */ @@ -698,9 +645,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if ((txn->dbi_state[i] & DBI_DIRTY) == 0) continue; tree_t *const db = &txn->dbs[i]; - DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN - " -> %" PRIaTXN, - i, db->mod_txnid, txn->txnid); + DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN " -> %" PRIaTXN, i, db->mod_txnid, txn->txnid); /* Может быть mod_txnid > front после коммита вложенных тразакций */ db->mod_txnid = txn->txnid; MDBX_val data = {db, sizeof(tree_t)}; @@ -726,13 +671,9 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { goto fail; tASSERT(txn, txn->tw.loose_count == 0); - txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) - ? txn->txnid - : txn->dbs[FREE_DBI].mod_txnid; + txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[FREE_DBI].mod_txnid; - txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) - ? txn->txnid - : txn->dbs[MAIN_DBI].mod_txnid; + txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[MAIN_DBI].mod_txnid; ts_2 = latency ? osal_monotime() : 0; ts_3 = ts_2; @@ -745,8 +686,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { bool need_flush_for_nometasync = false; const meta_ptr_t head = meta_recent(env, &txn->tw.troika); - const uint32_t meta_sync_txnid = - atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); + const uint32_t meta_sync_txnid = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); /* sync prev meta */ if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) { /* Исправление унаследованного от LMDB недочета: @@ -767,10 +707,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { * sync-операцией выполняемой после записи данных текущей транзакции. * Соответственно, требуется явно обновлять мета-страницу, что полностью * уничтожает выгоду от NOMETASYNC. */ - const uint32_t txnid_dist = - ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) - ? MDBX_NOMETASYNC_LAZY_FD - : MDBX_NOMETASYNC_LAZY_WRITEMAP; + const uint32_t txnid_dist = ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD + : MDBX_NOMETASYNC_LAZY_WRITEMAP; /* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync() * или msync() для гарантированной фиксации на диске мета-страницы, * которая была "лениво" отправлена на запись в предыдущей транзакции, @@ -807,8 +745,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { #endif /* Windows */ iov_ctx_t write_ctx; - rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, - txn->tw.dirtylist->pages_including_loose, fd, false); + rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, txn->tw.dirtylist->pages_including_loose, fd, false); if (unlikely(rc != MDBX_SUCCESS)) { ERROR("txn-%s: error %d", "iov-init", rc); goto fail; @@ -835,8 +772,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { meta.validator_id = head.ptr_c->validator_id; meta.extra_pagehdr = head.ptr_c->extra_pagehdr; unaligned_poke_u64(4, meta.pages_retired, - unaligned_peek_u64(4, head.ptr_c->pages_retired) + - MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + unaligned_peek_u64(4, head.ptr_c->pages_retired) + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); meta.geometry = txn->geo; meta.trees.gc = txn->dbs[FREE_DBI]; meta.trees.main = txn->dbs[MAIN_DBI]; @@ -847,15 +783,13 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { #if MDBX_ENABLE_BIGFOOT if (gcu_ctx.bigfoot > txn->txnid) { commit_txnid = gcu_ctx.bigfoot; - TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, - (size_t)(commit_txnid - txn->txnid)); + TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid)); } #endif meta.unsafe_sign = DATASIGN_NONE; meta_set_txnid(env, &meta, commit_txnid); - rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, - &txn->tw.troika); + rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, &txn->tw.troika); ts_5 = latency ? osal_monotime() : 0; if (unlikely(rc != MDBX_SUCCESS)) { @@ -874,8 +808,7 @@ done: provide_latency: if (latency) { latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0; - latency->gc_wallclock = - (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0; + latency->gc_wallclock = (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0; latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0; latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0; latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0; @@ -897,8 +830,7 @@ fail: int txn_abort(MDBX_txn *txn) { if (txn->flags & MDBX_TXN_RDONLY) /* LY: don't close DBI-handles */ - return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | - TXN_END_FREE); + return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE); if (unlikely(txn->flags & MDBX_TXN_FINISHED)) return MDBX_BAD_TXN; @@ -923,8 +855,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { flags |= env->flags & (MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); if (flags & MDBX_TXN_RDONLY) { - eASSERT(env, (flags & ~(txn_ro_begin_flags | MDBX_WRITEMAP | - MDBX_NOSTICKYTHREADS)) == 0); + eASSERT(env, (flags & ~(txn_ro_begin_flags | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); txn->flags = flags; reader_slot_t *r = txn->to.reader; STATIC_ASSERT(sizeof(uintptr_t) <= sizeof(r->tid)); @@ -932,8 +863,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { eASSERT(env, !(env->flags & MDBX_NOSTICKYTHREADS)); r = thread_rthc_get(env->me_txkey); if (likely(r)) { - if (unlikely(!r->pid.weak) && - (globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN)) { + if (unlikely(!r->pid.weak) && (globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN)) { thread_rthc_set(env->me_txkey, nullptr); r = nullptr; } else { @@ -946,8 +876,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } if (likely(r)) { - if (unlikely(r->pid.weak != env->pid || - r->txnid.weak < SAFE64_INVALID_THRESHOLD)) + if (unlikely(r->pid.weak != env->pid || r->txnid.weak < SAFE64_INVALID_THRESHOLD)) return MDBX_BAD_RSLOT; } else if (env->lck_mmap.lck) { bsr_t brs = mvcc_bind_slot(env); @@ -970,8 +899,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { return MDBX_SUCCESS; } txn->owner = (uintptr_t)r->tid.weak; - if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && - unlikely(env->basal_txn->owner == txn->owner) && + if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && unlikely(env->basal_txn->owner == txn->owner) && (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) return MDBX_TXN_OVERLAPPING; @@ -980,26 +908,18 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { size_t loop = 0; troika_t troika = meta_tap(env); while (1) { - const meta_ptr_t head = - likely(env->stuck_meta < 0) - ? /* regular */ meta_recent(env, &troika) - : /* recovery mode */ meta_ptr(env, env->stuck_meta); + const meta_ptr_t head = likely(env->stuck_meta < 0) ? /* regular */ meta_recent(env, &troika) + : /* recovery mode */ meta_ptr(env, env->stuck_meta); if (likely(r != nullptr)) { safe64_reset(&r->txnid, true); - atomic_store32(&r->snapshot_pages_used, - head.ptr_v->geometry.first_unallocated, mo_Relaxed); - atomic_store64( - &r->snapshot_pages_retired, - unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired), - mo_Relaxed); + atomic_store32(&r->snapshot_pages_used, head.ptr_v->geometry.first_unallocated, mo_Relaxed); + atomic_store64(&r->snapshot_pages_retired, unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired), + mo_Relaxed); safe64_write(&r->txnid, head.txnid); eASSERT(env, r->pid.weak == osal_getpid()); - eASSERT(env, r->tid.weak == ((env->flags & MDBX_NOSTICKYTHREADS) - ? 0 - : osal_thread_self())); + eASSERT(env, r->tid.weak == ((env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self())); eASSERT(env, r->txnid.weak == head.txnid || - (r->txnid.weak >= SAFE64_INVALID_THRESHOLD && - head.txnid < env->lck->cached_oldest.weak)); + (r->txnid.weak >= SAFE64_INVALID_THRESHOLD && head.txnid < env->lck->cached_oldest.weak)); atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease); } else { /* exclusive mode without lck */ @@ -1013,8 +933,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { timestamp = 0; continue; } - ERROR("bailout waiting for valid snapshot (%s)", - "meta-pages are too volatile"); + ERROR("bailout waiting for valid snapshot (%s)", "meta-pages are too volatile"); rc = MDBX_PROBLEM; goto read_failed; } @@ -1029,8 +948,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { goto read_failed; } - const uint64_t snap_oldest = - atomic_load64(&env->lck->cached_oldest, mo_AcquireRelease); + const uint64_t snap_oldest = atomic_load64(&env->lck->cached_oldest, mo_AcquireRelease); if (unlikely(txn->txnid < snap_oldest)) { if (env->stuck_meta < 0) goto retry; @@ -1041,8 +959,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { goto read_failed; } - if (likely(r != nullptr) && - unlikely(txn->txnid != atomic_load64(&r->txnid, mo_Relaxed))) + if (likely(r != nullptr) && unlikely(txn->txnid != atomic_load64(&r->txnid, mo_Relaxed))) goto retry; break; } @@ -1058,23 +975,19 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } tASSERT(txn, rc == MDBX_SUCCESS); - ENSURE(env, - txn->txnid >= - /* paranoia is appropriate here */ env->lck->cached_oldest.weak); + ENSURE(env, txn->txnid >= + /* paranoia is appropriate here */ env->lck->cached_oldest.weak); tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags)); } else { - eASSERT(env, (flags & ~(txn_rw_begin_flags | MDBX_TXN_SPILLS | - MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); + eASSERT(env, (flags & ~(txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0); const uintptr_t tid = osal_thread_self(); if (unlikely(txn->owner == tid || /* not recovery mode */ env->stuck_meta >= 0)) return MDBX_BUSY; lck_t *const lck = env->lck_mmap.lck; - if (lck && (env->flags & MDBX_NOSTICKYTHREADS) == 0 && - (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + if (lck && (env->flags & MDBX_NOSTICKYTHREADS) == 0 && (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) { + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); for (size_t i = 0; i < snap_nreaders; ++i) { if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) == env->pid && unlikely(atomic_load64(&lck->rdt[i].tid, mo_Relaxed) == tid)) { @@ -1153,8 +1066,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { eASSERT(env, txn->tw.writemap_spilled_npages == 0); } - txn->front_txnid = - txn->txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); + txn->front_txnid = txn->txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); /* Setup db info */ tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY); @@ -1162,10 +1074,8 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { VALGRIND_MAKE_MEM_UNDEFINED(txn->dbi_state, env->max_dbi); #if MDBX_ENABLE_DBI_SPARSE txn->n_dbi = CORE_DBS; - VALGRIND_MAKE_MEM_UNDEFINED( - txn->dbi_sparse, - ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / - CHAR_BIT); + VALGRIND_MAKE_MEM_UNDEFINED(txn->dbi_sparse, + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT); txn->dbi_sparse[0] = (1 << CORE_DBS) - 1; #else txn->n_dbi = (env->n_dbi < 8) ? env->n_dbi : 8; @@ -1177,13 +1087,10 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { txn->cursors[FREE_DBI] = nullptr; txn->cursors[MAIN_DBI] = nullptr; txn->dbi_seqs[FREE_DBI] = 0; - txn->dbi_seqs[MAIN_DBI] = - atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease); + txn->dbi_seqs[MAIN_DBI] = atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease); - if (unlikely(env->dbs_flags[MAIN_DBI] != - (DB_VALID | txn->dbs[MAIN_DBI].flags))) { - const bool need_txn_lock = - env->basal_txn && env->basal_txn->owner != osal_thread_self(); + if (unlikely(env->dbs_flags[MAIN_DBI] != (DB_VALID | txn->dbs[MAIN_DBI].flags))) { + const bool need_txn_lock = env->basal_txn && env->basal_txn->owner != osal_thread_self(); bool should_unlock = false; if (need_txn_lock) { rc = lck_txn_lock(env, true); @@ -1202,10 +1109,8 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { * то следующая будет ждать на dbi_lock */ !env->txn) { if (env->dbs_flags[MAIN_DBI] != 0 || MDBX_DEBUG) - NOTICE("renew MainDB for %s-txn %" PRIaTXN - " since db-flags changes 0x%x -> 0x%x", - (txn->flags & MDBX_TXN_RDONLY) ? "ro" : "rw", txn->txnid, - env->dbs_flags[MAIN_DBI] & ~DB_VALID, + NOTICE("renew MainDB for %s-txn %" PRIaTXN " since db-flags changes 0x%x -> 0x%x", + (txn->flags & MDBX_TXN_RDONLY) ? "ro" : "rw", txn->txnid, env->dbs_flags[MAIN_DBI] & ~DB_VALID, txn->dbs[MAIN_DBI].flags); env->dbs_flags[MAIN_DBI] = DB_POISON; atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); @@ -1213,14 +1118,12 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { if (likely(rc == MDBX_SUCCESS)) { seq = dbi_seq_next(env, MAIN_DBI); env->dbs_flags[MAIN_DBI] = DB_VALID | txn->dbs[MAIN_DBI].flags; - txn->dbi_seqs[MAIN_DBI] = atomic_store32(&env->dbi_seqs[MAIN_DBI], - seq, mo_AcquireRelease); + txn->dbi_seqs[MAIN_DBI] = atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); } } else { ERROR("MainDB db-flags changes 0x%x -> 0x%x ahead of read-txn " "%" PRIaTXN, - txn->dbs[MAIN_DBI].flags, env->dbs_flags[MAIN_DBI] & ~DB_VALID, - txn->txnid); + txn->dbs[MAIN_DBI].flags, env->dbs_flags[MAIN_DBI] & ~DB_VALID, txn->txnid); rc = MDBX_INCOMPATIBLE; } } @@ -1235,8 +1138,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, - "GC/FreeDB"); + ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, "GC/FreeDB"); rc = MDBX_INCOMPATIBLE; goto bailout; } @@ -1249,8 +1151,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { } else { const size_t size_bytes = pgno2bytes(env, txn->geo.end_pgno); const size_t used_bytes = pgno2bytes(env, txn->geo.first_unallocated); - const size_t required_bytes = - (txn->flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; + const size_t required_bytes = (txn->flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); if (unlikely(required_bytes > env->dxb_mmap.current)) { /* Размер БД (для пишущих транзакций) или используемых данных (для @@ -1259,13 +1160,11 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { * границы размера БД и отображения. В читающих транзакциях нельзя * изменять размер файла, который может быть больше необходимого этой * транзакции. */ - if (txn->geo.upper > MAX_PAGENO + 1 || - bytes2pgno(env, pgno2bytes(env, txn->geo.upper)) != txn->geo.upper) { + if (txn->geo.upper > MAX_PAGENO + 1 || bytes2pgno(env, pgno2bytes(env, txn->geo.upper)) != txn->geo.upper) { rc = MDBX_UNABLE_EXTEND_MAPSIZE; goto bailout; } - rc = dxb_resize(env, txn->geo.first_unallocated, txn->geo.end_pgno, - txn->geo.upper, implicit_grow); + rc = dxb_resize(env, txn->geo.first_unallocated, txn->geo.end_pgno, txn->geo.upper, implicit_grow); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); @@ -1300,9 +1199,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { eASSERT(env, env->dxb_mmap.filesize >= required_bytes); if (env->dxb_mmap.current > env->dxb_mmap.filesize) env->dxb_mmap.current = - (env->dxb_mmap.limit < env->dxb_mmap.filesize) - ? env->dxb_mmap.limit - : (size_t)env->dxb_mmap.filesize; + (env->dxb_mmap.limit < env->dxb_mmap.filesize) ? env->dxb_mmap.limit : (size_t)env->dxb_mmap.filesize; } #if defined(_WIN32) || defined(_WIN64) imports.srwl_ReleaseShared(&env->remap_guard); @@ -1315,8 +1212,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - eASSERT(env, pgno2bytes(env, txn->geo.first_unallocated) <= - env->dxb_mmap.current); + eASSERT(env, pgno2bytes(env, txn->geo.first_unallocated) <= env->dxb_mmap.current); eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current); if (txn->flags & MDBX_TXN_RDONLY) { #if defined(_WIN32) || defined(_WIN64) @@ -1326,8 +1222,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { * since Wine don't support section extending, * i.e. in both cases unmap+map are required. */ used_bytes < env->geo_in_bytes.upper && env->geo_in_bytes.grow)) && - /* avoid recursive use SRW */ (txn->flags & MDBX_NOSTICKYTHREADS) == - 0) { + /* avoid recursive use SRW */ (txn->flags & MDBX_NOSTICKYTHREADS) == 0) { txn->flags |= txn_shrink_allowed; imports.srwl_AcquireShared(&env->remap_guard); } @@ -1352,11 +1247,9 @@ int txn_end(MDBX_txn *txn, unsigned mode) { MDBX_env *env = txn->env; static const char *const names[] = TXN_END_NAMES; - DEBUG("%s txn %" PRIaTXN "%c-0x%X %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - names[mode & TXN_END_OPMASK], txn->txnid, - (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', txn->flags, (void *)txn, - (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + DEBUG("%s txn %" PRIaTXN "%c-0x%X %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK], + txn->txnid, (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', txn->flags, (void *)txn, (void *)env, + txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ done_cursors(txn, false); @@ -1369,21 +1262,17 @@ int txn_end(MDBX_txn *txn, unsigned mode) { if (likely(!(txn->flags & MDBX_TXN_FINISHED))) { if (likely((txn->flags & MDBX_TXN_PARKED) == 0)) { ENSURE(env, txn->txnid >= - /* paranoia is appropriate here */ env->lck - ->cached_oldest.weak); - eASSERT(env, txn->txnid == slot->txnid.weak && - slot->txnid.weak >= env->lck->cached_oldest.weak); + /* paranoia is appropriate here */ env->lck->cached_oldest.weak); + eASSERT(env, txn->txnid == slot->txnid.weak && slot->txnid.weak >= env->lck->cached_oldest.weak); } else { - if ((mode & TXN_END_OPMASK) != TXN_END_OUSTED && - safe64_read(&slot->tid) == MDBX_TID_TXN_OUSTED) + if ((mode & TXN_END_OPMASK) != TXN_END_OUSTED && safe64_read(&slot->tid) == MDBX_TID_TXN_OUSTED) mode = (mode & ~TXN_END_OPMASK) | TXN_END_OUSTED; do { safe64_reset(&slot->txnid, false); atomic_store64(&slot->tid, txn->owner, mo_AcquireRelease); atomic_yield(); } while ( - unlikely(safe64_read(&slot->txnid) < SAFE64_INVALID_THRESHOLD || - safe64_read(&slot->tid) != txn->owner)); + unlikely(safe64_read(&slot->txnid) < SAFE64_INVALID_THRESHOLD || safe64_read(&slot->tid) != txn->owner)); } dxb_sanitize_tail(env, nullptr); atomic_store32(&slot->snapshot_pages_used, 0, mo_Relaxed); @@ -1404,14 +1293,12 @@ int txn_end(MDBX_txn *txn, unsigned mode) { imports.srwl_ReleaseShared(&env->remap_guard); #endif txn->n_dbi = 0; /* prevent further DBI activity */ - txn->flags = ((mode & TXN_END_OPMASK) != TXN_END_OUSTED) - ? MDBX_TXN_RDONLY | MDBX_TXN_FINISHED - : MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED; + txn->flags = ((mode & TXN_END_OPMASK) != TXN_END_OUSTED) ? MDBX_TXN_RDONLY | MDBX_TXN_FINISHED + : MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED; txn->owner = 0; } else if (!(txn->flags & MDBX_TXN_FINISHED)) { - ENSURE(env, - txn->txnid >= - /* paranoia is appropriate here */ env->lck->cached_oldest.weak); + ENSURE(env, txn->txnid >= + /* paranoia is appropriate here */ env->lck->cached_oldest.weak); if (txn == env->basal_txn) dxb_sanitize_tail(env, nullptr); @@ -1433,28 +1320,20 @@ int txn_end(MDBX_txn *txn, unsigned mode) { eASSERT(env, txn->parent != nullptr); MDBX_txn *const parent = txn->parent; eASSERT(env, parent->signature == txn_signature); - eASSERT(env, parent->nested == txn && - (parent->flags & MDBX_TXN_HAS_CHILD) != 0); - eASSERT(env, - pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - - MDBX_ENABLE_REFUND)); - eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, - sizeof(troika_t)) == 0); + eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); + eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, sizeof(troika_t)) == 0); txn->owner = 0; if (txn->tw.gc.reclaimed) { - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) >= - (uintptr_t)parent->tw.gc.reclaimed); - MDBX_PNL_SETSIZE(txn->tw.gc.reclaimed, - (uintptr_t)parent->tw.gc.reclaimed); + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) >= (uintptr_t)parent->tw.gc.reclaimed); + MDBX_PNL_SETSIZE(txn->tw.gc.reclaimed, (uintptr_t)parent->tw.gc.reclaimed); parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; } if (txn->tw.retired_pages) { - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.retired_pages) >= - (uintptr_t)parent->tw.retired_pages); - MDBX_PNL_SETSIZE(txn->tw.retired_pages, - (uintptr_t)parent->tw.retired_pages); + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.retired_pages) >= (uintptr_t)parent->tw.retired_pages); + MDBX_PNL_SETSIZE(txn->tw.retired_pages, (uintptr_t)parent->tw.retired_pages); parent->tw.retired_pages = txn->tw.retired_pages; } @@ -1467,18 +1346,15 @@ int txn_end(MDBX_txn *txn, unsigned mode) { dpl_free(txn); pnl_free(txn->tw.relist); - if (parent->geo.upper != txn->geo.upper || - parent->geo.now != txn->geo.now) { + if (parent->geo.upper != txn->geo.upper || parent->geo.now != txn->geo.now) { /* undo resize performed by child txn */ - rc = dxb_resize(env, parent->geo.first_unallocated, parent->geo.now, - parent->geo.upper, impilict_shrink); + rc = dxb_resize(env, parent->geo.first_unallocated, parent->geo.now, parent->geo.upper, impilict_shrink); if (rc == MDBX_EPERM) { /* unable undo resize (it is regular for Windows), * therefore promote size changes from child to the parent txn */ WARNING("unable undo resize performed by child txn, promote to " "the parent (%u->%u, %u->%u)", - txn->geo.now, parent->geo.now, txn->geo.upper, - parent->geo.upper); + txn->geo.now, parent->geo.now, txn->geo.upper, parent->geo.upper); parent->geo.now = txn->geo.now; parent->geo.upper = txn->geo.upper; parent->flags |= MDBX_TXN_DIRTY; @@ -1524,13 +1400,10 @@ int mdbx_txn_renew(MDBX_txn *txn) { int rc = txn_renew(txn, MDBX_TXN_RDONLY); if (rc == MDBX_SUCCESS) { - tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) - ? 0 - : osal_thread_self()); - DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - txn->txnid, (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, - (void *)txn->env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self()); + DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, + (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->env, txn->dbs[MAIN_DBI].root, + txn->dbs[FREE_DBI].root); } return rc; } @@ -1544,12 +1417,9 @@ int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) { return MDBX_SUCCESS; } -void *mdbx_txn_get_userctx(const MDBX_txn *txn) { - return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; -} +void *mdbx_txn_get_userctx(const MDBX_txn *txn) { return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; } -int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, - MDBX_txn **ret, void *context) { +int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret, void *context) { if (unlikely(!ret)) return MDBX_EINVAL; *ret = nullptr; @@ -1567,23 +1437,19 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn *txn = nullptr; if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ - rc = check_txn_rw(parent, - MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); + rc = check_txn_rw(parent, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; if (env->options.spill_parent4child_denominator) { /* Spill dirty-pages of parent to provide dirtyroom for child txn */ - rc = txn_spill(parent, nullptr, - parent->tw.dirtylist->length / - env->options.spill_parent4child_denominator); + rc = txn_spill(parent, nullptr, parent->tw.dirtylist->length / env->options.spill_parent4child_denominator); if (unlikely(rc != MDBX_SUCCESS)) return rc; } tASSERT(parent, audit_ex(parent, 0, false) == 0); - flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | - MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); + flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); } else if ((flags & MDBX_TXN_RDONLY) == 0) { /* Reuse preallocated write txn. However, do not touch it until * txn_renew() succeeds, since it currently may be active. */ @@ -1593,22 +1459,16 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, const intptr_t bitmap_bytes = #if MDBX_ENABLE_DBI_SPARSE - ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / - CHAR_BIT; + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT; #else 0; #endif /* MDBX_ENABLE_DBI_SPARSE */ STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); - const size_t base = (flags & MDBX_TXN_RDONLY) - ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) - : sizeof(MDBX_txn); - const size_t size = - base + - ((flags & MDBX_TXN_RDONLY) - ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) - : 0) + - env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + - sizeof(txn->dbi_state[0])); + const size_t base = + (flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn); + const size_t size = base + + ((flags & MDBX_TXN_RDONLY) ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) : 0) + + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_state[0])); txn = osal_malloc(size); if (unlikely(txn == nullptr)) { DEBUG("calloc: %s", "failed"); @@ -1619,15 +1479,13 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, VALGRIND_MAKE_MEM_UNDEFINED(txn, size); #endif /* MDBX_DEBUG */ MDBX_ANALYSIS_ASSUME(size > base); - memset(txn, 0, - (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); + memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); txn->dbs = ptr_disp(txn, base); txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); #if MDBX_DEBUG txn->cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ #endif - txn->dbi_state = - ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); + txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); txn->flags = flags; txn->env = env; @@ -1640,10 +1498,8 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, txn->geo = parent->geo; rc = dpl_alloc(txn); if (likely(rc == MDBX_SUCCESS)) { - const size_t len = - MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; - txn->tw.relist = - pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); + const size_t len = MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; + txn->tw.relist = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); if (unlikely(!txn->tw.relist)) rc = MDBX_ENOMEM; } @@ -1682,28 +1538,22 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (parent->tw.spilled.list) spill_purge(parent); - tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= - MDBX_PNL_GETSIZE(parent->tw.relist)); - memcpy(txn->tw.relist, parent->tw.relist, - MDBX_PNL_SIZEOF(parent->tw.relist)); - eASSERT(env, pnl_check_allocated( - txn->tw.relist, - (txn->geo.first_unallocated /* LY: intentional assignment - here, only for assertion */ - = parent->geo.first_unallocated) - - MDBX_ENABLE_REFUND)); + tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= MDBX_PNL_GETSIZE(parent->tw.relist)); + memcpy(txn->tw.relist, parent->tw.relist, MDBX_PNL_SIZEOF(parent->tw.relist)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, (txn->geo.first_unallocated /* LY: intentional assignment + here, only for assertion */ + = parent->geo.first_unallocated) - + MDBX_ENABLE_REFUND)); txn->tw.gc.time_acc = parent->tw.gc.time_acc; txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed; if (parent->tw.gc.reclaimed) { txn->tw.gc.reclaimed = parent->tw.gc.reclaimed; - parent->tw.gc.reclaimed = - (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed); + parent->tw.gc.reclaimed = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed); } txn->tw.retired_pages = parent->tw.retired_pages; - parent->tw.retired_pages = - (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages); + parent->tw.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages); txn->txnid = parent->txnid; txn->front_txnid = parent->front_txnid + 1; @@ -1719,25 +1569,18 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, txn->cursors[FREE_DBI] = nullptr; txn->cursors[MAIN_DBI] = nullptr; - txn->dbi_state[FREE_DBI] = - parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - txn->dbi_state[MAIN_DBI] = - parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - memset(txn->dbi_state + CORE_DBS, 0, - (txn->n_dbi = parent->n_dbi) - CORE_DBS); + txn->dbi_state[FREE_DBI] = parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->dbi_state[MAIN_DBI] = parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + memset(txn->dbi_state + CORE_DBS, 0, (txn->n_dbi = parent->n_dbi) - CORE_DBS); memcpy(txn->dbs, parent->dbs, sizeof(txn->dbs[0]) * CORE_DBS); tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->parent ? parent->parent->tw.dirtyroom - : parent->env->options.dp_limit)); + (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit)); tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom - : txn->env->options.dp_limit)); + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); env->txn = txn; tASSERT(parent, parent->cursors[FREE_DBI] == nullptr); - rc = parent->cursors[MAIN_DBI] - ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) - : MDBX_SUCCESS; + rc = parent->cursors[MAIN_DBI] ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) : MDBX_SUCCESS; if (AUDIT_ENABLED() && ASSERT_ENABLED()) { txn->signature = txn_signature; tASSERT(txn, audit_ex(txn, 0, false) == 0); @@ -1745,8 +1588,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (unlikely(rc != MDBX_SUCCESS)) txn_end(txn, TXN_END_FAIL_BEGINCHILD); } else { /* MDBX_TXN_RDONLY */ - txn->dbi_seqs = - ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); + txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); #if MDBX_ENABLE_DBI_SPARSE txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); #endif /* MDBX_ENABLE_DBI_SPARSE */ @@ -1761,22 +1603,19 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) eASSERT(env, txn->flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)); else if (flags & MDBX_TXN_RDONLY) - eASSERT(env, (txn->flags & - ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | - /* Win32: SRWL flag */ txn_shrink_allowed)) == 0); + eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | + /* Win32: SRWL flag */ txn_shrink_allowed)) == 0); else { - eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | - txn_shrink_allowed | MDBX_NOMETASYNC | + eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | txn_shrink_allowed | MDBX_NOMETASYNC | MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed); } txn->signature = txn_signature; txn->userctx = context; *ret = txn; - DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO - "/%" PRIaPGNO, - txn->txnid, (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, - (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, + (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root, + txn->dbs[FREE_DBI].root); } return rc; @@ -1811,43 +1650,34 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { head_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->geometry.now); info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->geometry.upper); - info->txn_space_leftover = - pgno2bytes(env, head.ptr_v->geometry.now - - head.ptr_v->geometry.first_unallocated); + info->txn_space_leftover = pgno2bytes(env, head.ptr_v->geometry.now - head.ptr_v->geometry.first_unallocated); } while (unlikely(meta_should_retry(env, &troika))); info->txn_reader_lag = head.txnid - info->txn_id; info->txn_space_dirty = info->txn_space_retired = 0; uint64_t reader_snapshot_pages_retired = 0; if (txn->to.reader && - ((txn->flags & MDBX_TXN_PARKED) == 0 || - safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) && + ((txn->flags & MDBX_TXN_PARKED) == 0 || safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) && head_retired > - (reader_snapshot_pages_retired = atomic_load64( - &txn->to.reader->snapshot_pages_retired, mo_Relaxed))) { - info->txn_space_dirty = info->txn_space_retired = pgno2bytes( - env, (pgno_t)(head_retired - reader_snapshot_pages_retired)); + (reader_snapshot_pages_retired = atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))) { + info->txn_space_dirty = info->txn_space_retired = + pgno2bytes(env, (pgno_t)(head_retired - reader_snapshot_pages_retired)); size_t retired_next_reader = 0; lck_t *const lck = env->lck_mmap.lck; if (scan_rlt && info->txn_reader_lag > 1 && lck) { /* find next more recent reader */ txnid_t next_reader = head.txnid; - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); for (size_t i = 0; i < snap_nreaders; ++i) { retry: if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { jitter4testing(true); const uint64_t snap_tid = safe64_read(&lck->rdt[i].tid); const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); - const uint64_t snap_retired = atomic_load64( - &lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease); - if (unlikely(snap_retired != - atomic_load64(&lck->rdt[i].snapshot_pages_retired, - mo_Relaxed)) || - snap_txnid != safe64_read(&lck->rdt[i].txnid) || - snap_tid != safe64_read(&lck->rdt[i].tid)) + const uint64_t snap_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease); + if (unlikely(snap_retired != atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed)) || + snap_txnid != safe64_read(&lck->rdt[i].txnid) || snap_tid != safe64_read(&lck->rdt[i].tid)) goto retry; if (snap_txnid <= txn->txnid) { retired_next_reader = 0; @@ -1856,10 +1686,7 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { if (snap_txnid < next_reader && snap_tid >= MDBX_TID_TXN_OUSTED) { next_reader = snap_txnid; retired_next_reader = pgno2bytes( - env, (pgno_t)(snap_retired - - atomic_load64( - &txn->to.reader->snapshot_pages_retired, - mo_Relaxed))); + env, (pgno_t)(snap_retired - atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))); } } } @@ -1870,27 +1697,23 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { info->txn_space_limit_soft = pgno2bytes(env, txn->geo.now); info->txn_space_limit_hard = pgno2bytes(env, txn->geo.upper); info->txn_space_retired = - pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages - : MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages : MDBX_PNL_GETSIZE(txn->tw.retired_pages)); info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom); - info->txn_space_dirty = pgno2bytes( - env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : (txn->tw.writemap_dirty_npages + - txn->tw.writemap_spilled_npages)); + info->txn_space_dirty = + pgno2bytes(env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose + : (txn->tw.writemap_dirty_npages + txn->tw.writemap_spilled_npages)); info->txn_reader_lag = INT64_MAX; lck_t *const lck = env->lck_mmap.lck; if (scan_rlt && lck) { txnid_t oldest_snapshot = txn->txnid; - const size_t snap_nreaders = - atomic_load32(&lck->rdt_length, mo_AcquireRelease); + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); if (snap_nreaders) { oldest_snapshot = txn_snapshot_oldest(txn); if (oldest_snapshot == txn->txnid - 1) { /* check if there is at least one reader */ bool exists = false; for (size_t i = 0; i < snap_nreaders; ++i) { - if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && - txn->txnid > safe64_read(&lck->rdt[i].txnid)) { + if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->txnid > safe64_read(&lck->rdt[i].txnid)) { exists = true; break; } @@ -1906,8 +1729,7 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { } MDBX_env *mdbx_txn_env(const MDBX_txn *txn) { - if (unlikely(!txn || txn->signature != txn_signature || - txn->env->signature.weak != env_signature)) + if (unlikely(!txn || txn->signature != txn_signature || txn->env->signature.weak != env_signature)) return nullptr; return txn->env; } @@ -1920,10 +1742,8 @@ uint64_t mdbx_txn_id(const MDBX_txn *txn) { MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) { STATIC_ASSERT( - (MDBX_TXN_INVALID & - (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | - MDBX_TXN_HAS_CHILD | txn_gc_drained | txn_shrink_allowed | - txn_rw_begin_flags | txn_ro_begin_flags)) == 0); + (MDBX_TXN_INVALID & (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | + txn_gc_drained | txn_shrink_allowed | txn_rw_begin_flags | txn_ro_begin_flags)) == 0); if (unlikely(!txn || txn->signature != txn_signature)) return MDBX_TXN_INVALID; assert(0 == (int)(txn->flags & MDBX_TXN_INVALID)); @@ -1975,8 +1795,7 @@ int mdbx_txn_abort(MDBX_txn *txn) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == - MDBX_NOSTICKYTHREADS && + if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == MDBX_NOSTICKYTHREADS && unlikely(txn->owner != osal_thread_self())) { mdbx_txn_break(txn); return MDBX_THREAD_MISMATCH; @@ -2028,11 +1847,9 @@ int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) { * - получается что транзакцию можно припарковать, потом поломать вызвав * mdbx_txn_break(), но далее любое её использование приведет к завершению * при распарковке. */ - if ((txn->flags & (bad_bits | MDBX_TXN_AUTOUNPARK)) != - (MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK)) + if ((txn->flags & (bad_bits | MDBX_TXN_AUTOUNPARK)) != (MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK)) return MDBX_BAD_TXN; - tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || - bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); return mdbx_txn_unpark((MDBX_txn *)txn, false); } diff --git a/src/unaligned.h b/src/unaligned.h index 722e084a..4085b51d 100644 --- a/src/unaligned.h +++ b/src/unaligned.h @@ -6,22 +6,17 @@ /*------------------------------------------------------------------------------ * Unaligned access */ -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t -field_alignment(size_t alignment_baseline, size_t field_offset) { +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t field_alignment(size_t alignment_baseline, + size_t field_offset) { size_t merge = alignment_baseline | (size_t)field_offset; return merge & -(int)merge; } /* read-thunk for UB-sanitizer */ -MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t -peek_u8(const uint8_t *__restrict ptr) { - return *ptr; -} +MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t peek_u8(const uint8_t *__restrict ptr) { return *ptr; } /* write-thunk for UB-sanitizer */ -static inline void poke_u8(uint8_t *__restrict ptr, const uint8_t v) { - *ptr = v; -} +static inline void poke_u8(uint8_t *__restrict ptr, const uint8_t v) { *ptr = v; } static inline void *bcopy_2(void *__restrict dst, const void *__restrict src) { uint8_t *__restrict d = (uint8_t *)dst; @@ -31,8 +26,7 @@ static inline void *bcopy_2(void *__restrict dst, const void *__restrict src) { return d; } -static inline void *bcopy_4(void *const __restrict dst, - const void *const __restrict src) { +static inline void *bcopy_4(void *const __restrict dst, const void *const __restrict src) { uint8_t *__restrict d = (uint8_t *)dst; const uint8_t *__restrict s = (uint8_t *)src; d[0] = s[0]; @@ -42,8 +36,7 @@ static inline void *bcopy_4(void *const __restrict dst, return d; } -static inline void *bcopy_8(void *const __restrict dst, - const void *const __restrict src) { +static inline void *bcopy_8(void *const __restrict dst, const void *const __restrict src) { uint8_t *__restrict d = (uint8_t *)dst; const uint8_t *__restrict s = (uint8_t *)src; d[0] = s[0]; @@ -57,14 +50,13 @@ static inline void *bcopy_8(void *const __restrict dst, return d; } -MDBX_NOTHROW_PURE_FUNCTION static inline uint16_t -unaligned_peek_u16(const size_t expected_alignment, const void *const ptr) { +MDBX_NOTHROW_PURE_FUNCTION static inline uint16_t unaligned_peek_u16(const size_t expected_alignment, + const void *const ptr) { assert((uintptr_t)ptr % expected_alignment == 0); if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(uint16_t)) == 0) return *(const uint16_t *)ptr; else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) return *(const __unaligned uint16_t *)ptr; #else uint16_t v; @@ -74,15 +66,12 @@ unaligned_peek_u16(const size_t expected_alignment, const void *const ptr) { } } -static inline void unaligned_poke_u16(const size_t expected_alignment, - void *const __restrict ptr, - const uint16_t v) { +static inline void unaligned_poke_u16(const size_t expected_alignment, void *const __restrict ptr, const uint16_t v) { assert((uintptr_t)ptr % expected_alignment == 0); if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(v)) == 0) *(uint16_t *)ptr = v; else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) *((uint16_t __unaligned *)ptr) = v; #else bcopy_2((uint8_t *)ptr, (const uint8_t *)&v); @@ -90,21 +79,17 @@ static inline void unaligned_poke_u16(const size_t expected_alignment, } } -MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t -unaligned_peek_u32(const size_t expected_alignment, - const void *const __restrict ptr) { +MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t unaligned_peek_u32(const size_t expected_alignment, + const void *const __restrict ptr) { assert((uintptr_t)ptr % expected_alignment == 0); if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(uint32_t)) == 0) return *(const uint32_t *)ptr; else if ((expected_alignment % sizeof(uint16_t)) == 0) { - const uint16_t lo = - ((const uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; - const uint16_t hi = - ((const uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; + const uint16_t lo = ((const uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; + const uint16_t hi = ((const uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; return lo | (uint32_t)hi << 16; } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) return *(const __unaligned uint32_t *)ptr; #else uint32_t v; @@ -114,19 +99,15 @@ unaligned_peek_u32(const size_t expected_alignment, } } -static inline void unaligned_poke_u32(const size_t expected_alignment, - void *const __restrict ptr, - const uint32_t v) { +static inline void unaligned_poke_u32(const size_t expected_alignment, void *const __restrict ptr, const uint32_t v) { assert((uintptr_t)ptr % expected_alignment == 0); if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(v)) == 0) *(uint32_t *)ptr = v; else if ((expected_alignment % sizeof(uint16_t)) == 0) { ((uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint16_t)v; - ((uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = - (uint16_t)(v >> 16); + ((uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = (uint16_t)(v >> 16); } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) *((uint32_t __unaligned *)ptr) = v; #else bcopy_4((uint8_t *)ptr, (const uint8_t *)&v); @@ -134,21 +115,17 @@ static inline void unaligned_poke_u32(const size_t expected_alignment, } } -MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t -unaligned_peek_u64(const size_t expected_alignment, - const void *const __restrict ptr) { +MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t unaligned_peek_u64(const size_t expected_alignment, + const void *const __restrict ptr) { assert((uintptr_t)ptr % expected_alignment == 0); if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0) return *(const uint64_t *)ptr; else if ((expected_alignment % sizeof(uint32_t)) == 0) { - const uint32_t lo = - ((const uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; - const uint32_t hi = - ((const uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; + const uint32_t lo = ((const uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; + const uint32_t hi = ((const uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; return lo | (uint64_t)hi << 32; } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) return *(const __unaligned uint64_t *)ptr; #else uint64_t v; @@ -158,40 +135,32 @@ unaligned_peek_u64(const size_t expected_alignment, } } -static inline uint64_t -unaligned_peek_u64_volatile(const size_t expected_alignment, - const volatile void *const __restrict ptr) { +static inline uint64_t unaligned_peek_u64_volatile(const size_t expected_alignment, + const volatile void *const __restrict ptr) { assert((uintptr_t)ptr % expected_alignment == 0); assert(expected_alignment % sizeof(uint32_t) == 0); if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0) return *(const volatile uint64_t *)ptr; else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) return *(const volatile __unaligned uint64_t *)ptr; #else - const uint32_t lo = ((const volatile uint32_t *) - ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; - const uint32_t hi = ((const volatile uint32_t *) - ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; + const uint32_t lo = ((const volatile uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__]; + const uint32_t hi = ((const volatile uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__]; return lo | (uint64_t)hi << 32; #endif /* _MSC_VER || __unaligned */ } } -static inline void unaligned_poke_u64(const size_t expected_alignment, - void *const __restrict ptr, - const uint64_t v) { +static inline void unaligned_poke_u64(const size_t expected_alignment, void *const __restrict ptr, const uint64_t v) { assert((uintptr_t)ptr % expected_alignment == 0); if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(v)) == 0) *(uint64_t *)ptr = v; else if ((expected_alignment % sizeof(uint32_t)) == 0) { ((uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint32_t)v; - ((uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = - (uint32_t)(v >> 32); + ((uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = (uint32_t)(v >> 32); } else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) *((uint64_t __unaligned *)ptr) = v; #else bcopy_8((uint8_t *)ptr, (const uint8_t *)&v); @@ -199,28 +168,22 @@ static inline void unaligned_poke_u64(const size_t expected_alignment, } } -#define UNALIGNED_PEEK_8(ptr, struct, field) \ - peek_u8(ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_8(ptr, struct, field, value) \ - poke_u8(ptr_disp(ptr, offsetof(struct, field)), value) +#define UNALIGNED_PEEK_8(ptr, struct, field) peek_u8(ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_8(ptr, struct, field, value) poke_u8(ptr_disp(ptr, offsetof(struct, field)), value) -#define UNALIGNED_PEEK_16(ptr, struct, field) \ - unaligned_peek_u16(1, ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_16(ptr, struct, field, value) \ +#define UNALIGNED_PEEK_16(ptr, struct, field) unaligned_peek_u16(1, ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_16(ptr, struct, field, value) \ unaligned_poke_u16(1, ptr_disp(ptr, offsetof(struct, field)), value) -#define UNALIGNED_PEEK_32(ptr, struct, field) \ - unaligned_peek_u32(1, ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_32(ptr, struct, field, value) \ +#define UNALIGNED_PEEK_32(ptr, struct, field) unaligned_peek_u32(1, ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_32(ptr, struct, field, value) \ unaligned_poke_u32(1, ptr_disp(ptr, offsetof(struct, field)), value) -#define UNALIGNED_PEEK_64(ptr, struct, field) \ - unaligned_peek_u64(1, ptr_disp(ptr, offsetof(struct, field))) -#define UNALIGNED_POKE_64(ptr, struct, field, value) \ +#define UNALIGNED_PEEK_64(ptr, struct, field) unaligned_peek_u64(1, ptr_disp(ptr, offsetof(struct, field))) +#define UNALIGNED_POKE_64(ptr, struct, field, value) \ unaligned_poke_u64(1, ptr_disp(ptr, offsetof(struct, field)), value) -MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t -peek_pgno(const void *const __restrict ptr) { +MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t peek_pgno(const void *const __restrict ptr) { if (sizeof(pgno_t) == sizeof(uint32_t)) return (pgno_t)unaligned_peek_u32(1, ptr); else if (sizeof(pgno_t) == sizeof(uint64_t)) diff --git a/src/utils.c b/src/utils.c index 317b3dd7..05b7a21e 100644 --- a/src/utils.c +++ b/src/utils.c @@ -3,10 +3,8 @@ #include "internals.h" -MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned -log2n_powerof2(size_t value_uintptr) { - assert(value_uintptr > 0 && value_uintptr < INT32_MAX && - is_powerof2(value_uintptr)); +MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr) { + assert(value_uintptr > 0 && value_uintptr < INT32_MAX && is_powerof2(value_uintptr)); assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr); const uint32_t value_uint32 = (uint32_t)value_uintptr; #if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz) @@ -18,9 +16,8 @@ log2n_powerof2(size_t value_uintptr) { _BitScanForward(&index, value_uint32); return index; #else - static const uint8_t debruijn_ctz32[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + static const uint8_t debruijn_ctz32[32] = {0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27]; #endif } diff --git a/src/utils.h b/src/utils.h index 9f51099a..85563b09 100644 --- a/src/utils.h +++ b/src/utils.h @@ -27,46 +27,36 @@ /* Pointer distance as signed number of bytes */ #define ptr_dist(more, less) (((intptr_t)(more)) - ((intptr_t)(less))) -#define MDBX_ASAN_POISON_MEMORY_REGION(addr, size) \ - do { \ - TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ - (size_t)(size), __LINE__); \ - ASAN_POISON_MEMORY_REGION(addr, size); \ +#define MDBX_ASAN_POISON_MEMORY_REGION(addr, size) \ + do { \ + TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), (size_t)(size), __LINE__); \ + ASAN_POISON_MEMORY_REGION(addr, size); \ } while (0) -#define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ - do { \ - TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ - (size_t)(size), __LINE__); \ - ASAN_UNPOISON_MEMORY_REGION(addr, size); \ +#define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + do { \ + TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), (size_t)(size), __LINE__); \ + ASAN_UNPOISON_MEMORY_REGION(addr, size); \ } while (0) -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t -branchless_abs(intptr_t value) { +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t branchless_abs(intptr_t value) { assert(value > INT_MIN); - const size_t expanded_sign = - (size_t)(value >> (sizeof(value) * CHAR_BIT - 1)); + const size_t expanded_sign = (size_t)(value >> (sizeof(value) * CHAR_BIT - 1)); return ((size_t)value + expanded_sign) ^ expanded_sign; } -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline bool -is_powerof2(size_t x) { - return (x & (x - 1)) == 0; -} +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline bool is_powerof2(size_t x) { return (x & (x - 1)) == 0; } -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t -floor_powerof2(size_t value, size_t granularity) { +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t floor_powerof2(size_t value, size_t granularity) { assert(is_powerof2(granularity)); return value & ~(granularity - 1); } -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t -ceil_powerof2(size_t value, size_t granularity) { +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t ceil_powerof2(size_t value, size_t granularity) { return floor_powerof2(value + granularity - 1, granularity); } -MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned -log2n_powerof2(size_t value_uintptr); +MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr); MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v); @@ -75,8 +65,7 @@ struct monotime_cache { int expire_countdown; }; -MDBX_MAYBE_UNUSED static inline uint64_t -monotime_since_cached(uint64_t begin_timestamp, struct monotime_cache *cache) { +MDBX_MAYBE_UNUSED static inline uint64_t monotime_since_cached(uint64_t begin_timestamp, struct monotime_cache *cache) { if (cache->expire_countdown) cache->expire_countdown -= 1; else { diff --git a/src/version.c.in b/src/version.c.in index 19513c68..5c585eae 100644 --- a/src/version.c.in +++ b/src/version.c.in @@ -3,8 +3,7 @@ #include "internals.h" -#if MDBX_VERSION_MAJOR != ${MDBX_VERSION_MAJOR} || \ - MDBX_VERSION_MINOR != ${MDBX_VERSION_MINOR} +#if MDBX_VERSION_MAJOR != ${MDBX_VERSION_MAJOR} || MDBX_VERSION_MINOR != ${MDBX_VERSION_MINOR} #error "API version mismatch! Had `git fetch --tags` done?" #endif @@ -18,8 +17,7 @@ __dll_export #endif #ifdef __attribute_externally_visible__ __attribute_externally_visible__ -#elif (defined(__GNUC__) && !defined(__clang__)) || \ - __has_attribute(__externally_visible__) +#elif (defined(__GNUC__) && !defined(__clang__)) || __has_attribute(__externally_visible__) __attribute__((__externally_visible__)) #endif const struct MDBX_version_info mdbx_version = { @@ -29,8 +27,7 @@ __dll_export ${MDBX_VERSION_TWEAK}, "@MDBX_VERSION_PRERELEASE@", /* pre-release suffix of SemVer @MDBX_VERSION_PURE@ */ - {"@MDBX_GIT_TIMESTAMP@", "@MDBX_GIT_TREE@", "@MDBX_GIT_COMMIT@", - "@MDBX_GIT_DESCRIBE@"}, + {"@MDBX_GIT_TIMESTAMP@", "@MDBX_GIT_TREE@", "@MDBX_GIT_COMMIT@", "@MDBX_GIT_DESCRIBE@"}, sourcery}; __dll_export @@ -41,8 +38,7 @@ __dll_export #endif #ifdef __attribute_externally_visible__ __attribute_externally_visible__ -#elif (defined(__GNUC__) && !defined(__clang__)) || \ - __has_attribute(__externally_visible__) +#elif (defined(__GNUC__) && !defined(__clang__)) || __has_attribute(__externally_visible__) __attribute__((__externally_visible__)) #endif const char *const mdbx_sourcery_anchor = sourcery; diff --git a/src/walk.c b/src/walk.c index 265e7da8..585c350d 100644 --- a/src/walk.c +++ b/src/walk.c @@ -41,19 +41,16 @@ static page_type_t walk_subpage_type(const page_t *sp) { } /* Depth-first tree traversal. */ -__cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, - txnid_t parent_txnid) { +__cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, txnid_t parent_txnid) { assert(pgno != P_INVALID); page_t *mp = nullptr; int err = page_get(ctx->cursor, pgno, &mp, parent_txnid); const page_type_t type = walk_page_type(mp); const size_t nentries = mp ? page_numkeys(mp) : 0; - size_t header_size = - (mp && !is_dupfix_leaf(mp)) ? PAGEHDRSZ + mp->lower : PAGEHDRSZ; + size_t header_size = (mp && !is_dupfix_leaf(mp)) ? PAGEHDRSZ + mp->lower : PAGEHDRSZ; size_t payload_size = 0; - size_t unused_size = - (mp ? page_room(mp) : ctx->txn->env->ps - header_size) - payload_size; + size_t unused_size = (mp ? page_room(mp) : ctx->txn->env->ps - header_size) - payload_size; size_t align_bytes = 0; for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { @@ -89,12 +86,10 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, assert(err == MDBX_SUCCESS); pgr_t lp = page_get_large(ctx->cursor, large_pgno, mp->txnid); - const size_t npages = - ((err = lp.err) == MDBX_SUCCESS) ? lp.page->pages : 1; + const size_t npages = ((err = lp.err) == MDBX_SUCCESS) ? lp.page->pages : 1; const size_t pagesize = pgno2bytes(ctx->txn->env, npages); const size_t over_unused = pagesize - over_payload - over_header; - const int rc = ctx->visitor(large_pgno, npages, ctx->userctx, ctx->deep, - tbl, pagesize, page_large, err, 1, + const int rc = ctx->visitor(large_pgno, npages, ctx->userctx, ctx->deep, tbl, pagesize, page_large, err, 1, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -104,8 +99,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, case N_TREE /* sub-db */: { if (unlikely(node_data_size != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid table node size", (unsigned)node_data_size); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -115,8 +109,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, case N_TREE | N_DUP /* dupsorted sub-tree */: if (unlikely(node_data_size != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-tree node size", (unsigned)node_data_size); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-tree node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } @@ -126,8 +119,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, case N_DUP /* short sub-page */: { if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-page node size", (unsigned)node_data_size); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-page node size", (unsigned)node_data_size); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; break; @@ -137,14 +129,12 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, const page_type_t subtype = walk_subpage_type(sp); const size_t nsubkeys = page_numkeys(sp); if (unlikely(subtype == page_sub_broken)) { - ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-page flags", sp->flags); + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-page flags", sp->flags); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } - size_t subheader_size = - is_dupfix_leaf(sp) ? PAGEHDRSZ : PAGEHDRSZ + sp->lower; + size_t subheader_size = is_dupfix_leaf(sp) ? PAGEHDRSZ : PAGEHDRSZ + sp->lower; size_t subunused_size = page_room(sp); size_t subpayload_size = 0; size_t subalign_bytes = 0; @@ -161,18 +151,15 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, subpayload_size += subnode_size; subalign_bytes += subnode_size & 1; if (unlikely(node_flags(subnode) != 0)) { - ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "unexpected sub-node flags", node_flags(subnode)); + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, "unexpected sub-node flags", node_flags(subnode)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } } } - const int rc = - ctx->visitor(pgno, 0, ctx->userctx, ctx->deep + 1, tbl, - node_data_size, subtype, err, nsubkeys, subpayload_size, - subheader_size, subunused_size + subalign_bytes); + const int rc = ctx->visitor(pgno, 0, ctx->userctx, ctx->deep + 1, tbl, node_data_size, subtype, err, nsubkeys, + subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; header_size += subheader_size; @@ -182,16 +169,14 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, } break; default: - ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid node flags", node_flags(node)); + ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node flags", node_flags(node)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } } - const int rc = ctx->visitor( - pgno, 1, ctx->userctx, ctx->deep, tbl, ctx->txn->env->ps, type, err, - nentries, payload_size, header_size, unused_size + align_bytes); + const int rc = ctx->visitor(pgno, 1, ctx->userctx, ctx->deep, tbl, ctx->txn->env->ps, type, err, nentries, + payload_size, header_size, unused_size + align_bytes); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -220,8 +205,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, case N_TREE /* sub-db */: if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid sub-tree node size", (unsigned)node_ds(node)); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid sub-tree node size", (unsigned)node_ds(node)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { @@ -238,8 +222,8 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, case N_TREE | N_DUP /* dupsorted sub-tree */: if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, - "invalid dupsort sub-tree node size", (unsigned)node_ds(node)); + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size", + (unsigned)node_ds(node)); assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { @@ -248,8 +232,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, assert(err == MDBX_SUCCESS); err = cursor_dupsort_setup(ctx->cursor, node, mp); if (likely(err == MDBX_SUCCESS)) { - assert(ctx->cursor->subcur == - &container_of(ctx->cursor, cursor_couple_t, outer)->inner); + assert(ctx->cursor->subcur == &container_of(ctx->cursor, cursor_couple_t, outer)->inner); ctx->cursor = &ctx->cursor->subcur->cursor; ctx->deep += 1; tbl->nested = &aligned_db; @@ -257,8 +240,7 @@ __cold static int walk_pgno(walk_ctx_t *ctx, walk_tbl_t *tbl, const pgno_t pgno, tbl->nested = nullptr; ctx->deep -= 1; subcur_t *inner_xcursor = container_of(ctx->cursor, subcur_t, cursor); - cursor_couple_t *couple = - container_of(inner_xcursor, cursor_couple_t, inner); + cursor_couple_t *couple = container_of(inner_xcursor, cursor_couple_t, inner); ctx->cursor = &couple->outer; } } @@ -280,30 +262,24 @@ __cold static int walk_tbl(walk_ctx_t *ctx, walk_tbl_t *tbl) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - const uint8_t cursor_checking = (ctx->options & dont_check_keys_ordering) - ? z_pagecheck | z_ignord - : z_pagecheck; + const uint8_t cursor_checking = (ctx->options & dont_check_keys_ordering) ? z_pagecheck | z_ignord : z_pagecheck; couple.outer.checking |= cursor_checking; couple.inner.cursor.checking |= cursor_checking; couple.outer.next = ctx->cursor; couple.outer.top_and_flags = z_disable_tree_search_fastpath; ctx->cursor = &couple.outer; - rc = walk_pgno(ctx, tbl, db->root, - db->mod_txnid ? db->mod_txnid : ctx->txn->txnid); + rc = walk_pgno(ctx, tbl, db->root, db->mod_txnid ? db->mod_txnid : ctx->txn->txnid); ctx->cursor = couple.outer.next; return rc; } -__cold int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, - walk_options_t options) { +__cold int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, walk_options_t options) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; - walk_ctx_t ctx = { - .txn = txn, .userctx = user, .visitor = visitor, .options = options}; - walk_tbl_t tbl = {.name = {.iov_base = MDBX_CHK_GC}, - .internal = &txn->dbs[FREE_DBI]}; + walk_ctx_t ctx = {.txn = txn, .userctx = user, .visitor = visitor, .options = options}; + walk_tbl_t tbl = {.name = {.iov_base = MDBX_CHK_GC}, .internal = &txn->dbs[FREE_DBI]}; rc = walk_tbl(&ctx, &tbl); if (!MDBX_IS_ERROR(rc)) { tbl.name.iov_base = MDBX_CHK_MAIN; diff --git a/src/walk.h b/src/walk.h index ef79c70b..be6dc507 100644 --- a/src/walk.h +++ b/src/walk.h @@ -10,14 +10,11 @@ typedef struct walk_tbl { tree_t *internal, *nested; } walk_tbl_t; -typedef int walk_func(const size_t pgno, const unsigned number, void *const ctx, - const int deep, const walk_tbl_t *table, - const size_t page_size, const page_type_t page_type, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes); +typedef int walk_func(const size_t pgno, const unsigned number, void *const ctx, const int deep, + const walk_tbl_t *table, const size_t page_size, const page_type_t page_type, + const MDBX_error_t err, const size_t nentries, const size_t payload_bytes, + const size_t header_bytes, const size_t unused_bytes); typedef enum walk_options { dont_check_keys_ordering = 1 } walk_options_t; -MDBX_INTERNAL int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, - walk_options_t options); +MDBX_INTERNAL int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, walk_options_t options); diff --git a/src/windows-import.c b/src/windows-import.c index a401014c..ee0ea9c8 100644 --- a/src/windows-import.c +++ b/src/windows-import.c @@ -9,9 +9,7 @@ // Stub for slim read-write lock // Portion Copyright (C) 1995-2002 Brad Wilson -static void WINAPI stub_srwlock_Init(osal_srwlock_t *srwl) { - srwl->readerCount = srwl->writerCount = 0; -} +static void WINAPI stub_srwlock_Init(osal_srwlock_t *srwl) { srwl->readerCount = srwl->writerCount = 0; } static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) { while (true) { @@ -76,8 +74,7 @@ static void WINAPI stub_srwlock_ReleaseExclusive(osal_srwlock_t *srwl) { static uint64_t WINAPI stub_GetTickCount64(void) { LARGE_INTEGER Counter, Frequency; - return (QueryPerformanceFrequency(&Frequency) && - QueryPerformanceCounter(&Counter)) + return (QueryPerformanceFrequency(&Frequency) && QueryPerformanceCounter(&Counter)) ? Counter.QuadPart * 1000ul / Frequency.QuadPart : 0; } @@ -91,8 +88,7 @@ struct libmdbx_imports imports; #pragma GCC diagnostic ignored "-Wcast-function-type" #endif /* GCC/MINGW */ -#define MDBX_IMPORT(HANDLE, ENTRY) \ - imports.ENTRY = (MDBX_##ENTRY)GetProcAddress(HANDLE, #ENTRY) +#define MDBX_IMPORT(HANDLE, ENTRY) imports.ENTRY = (MDBX_##ENTRY)GetProcAddress(HANDLE, #ENTRY) void windows_import(void) { const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll"); @@ -121,20 +117,13 @@ void windows_import(void) { } const osal_srwlock_t_function srwlock_init = - (osal_srwlock_t_function)(hKernel32dll - ? GetProcAddress(hKernel32dll, - "InitializeSRWLock") - : nullptr); + (osal_srwlock_t_function)(hKernel32dll ? GetProcAddress(hKernel32dll, "InitializeSRWLock") : nullptr); if (srwlock_init) { imports.srwl_Init = srwlock_init; - imports.srwl_AcquireShared = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "AcquireSRWLockShared"); - imports.srwl_ReleaseShared = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "ReleaseSRWLockShared"); - imports.srwl_AcquireExclusive = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "AcquireSRWLockExclusive"); - imports.srwl_ReleaseExclusive = (osal_srwlock_t_function)GetProcAddress( - hKernel32dll, "ReleaseSRWLockExclusive"); + imports.srwl_AcquireShared = (osal_srwlock_t_function)GetProcAddress(hKernel32dll, "AcquireSRWLockShared"); + imports.srwl_ReleaseShared = (osal_srwlock_t_function)GetProcAddress(hKernel32dll, "ReleaseSRWLockShared"); + imports.srwl_AcquireExclusive = (osal_srwlock_t_function)GetProcAddress(hKernel32dll, "AcquireSRWLockExclusive"); + imports.srwl_ReleaseExclusive = (osal_srwlock_t_function)GetProcAddress(hKernel32dll, "ReleaseSRWLockExclusive"); } else { imports.srwl_Init = stub_srwlock_Init; imports.srwl_AcquireShared = stub_srwlock_AcquireShared; diff --git a/src/windows-import.h b/src/windows-import.h index b7c461f9..0362e9b6 100644 --- a/src/windows-import.h +++ b/src/windows-import.h @@ -59,32 +59,27 @@ typedef struct _FILE_REMOTE_PROTOCOL_INFO { #endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */ -typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)( - _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, - _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); +typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)(_In_ HANDLE hFile, + _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, + _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( - _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, - _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber, - _Out_opt_ LPDWORD lpMaximumComponentLength, - _Out_opt_ LPDWORD lpFileSystemFlags, - _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); + _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, _In_ DWORD nVolumeNameSize, + _Out_opt_ LPDWORD lpVolumeSerialNumber, _Out_opt_ LPDWORD lpMaximumComponentLength, + _Out_opt_ LPDWORD lpFileSystemFlags, _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); -typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, - _Out_ LPWSTR lpszFilePath, - _In_ DWORD cchFilePath, - _In_ DWORD dwFlags); +typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, _Out_ LPWSTR lpszFilePath, + _In_ DWORD cchFilePath, _In_ DWORD dwFlags); -typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)( - _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, - _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); +typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)(_In_ HANDLE hFile, + _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, + _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); -typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( - IN HANDLE FileHandle, IN OUT HANDLE Event, - IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext, - OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, - IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, - OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); +typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)(IN HANDLE FileHandle, IN OUT HANDLE Event, + IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext, + OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, + IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, + OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void); @@ -95,27 +90,21 @@ typedef struct _WIN32_MEMORY_RANGE_ENTRY { } WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY; #endif /* Windows 8.x */ -typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)( - HANDLE hProcess, ULONG_PTR NumberOfEntries, - PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); +typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)(HANDLE hProcess, ULONG_PTR NumberOfEntries, + PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT; -typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, - IN PLARGE_INTEGER NewSectionSize); +typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, IN PLARGE_INTEGER NewSectionSize); -typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, - LPCSTR lpValue, DWORD dwFlags, - LPDWORD pdwType, PVOID pvData, - LPDWORD pcbData); +typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, LPCSTR lpValue, DWORD dwFlags, LPDWORD pdwType, + PVOID pvData, LPDWORD pcbData); typedef long(WINAPI *MDBX_CoCreateGuid)(bin128_t *guid); NTSYSAPI ULONG RtlRandomEx(PULONG Seed); -typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, - PUCHAR OverlappedRangeStart, - ULONG Length); +typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, PUCHAR OverlappedRangeStart, ULONG Length); struct libmdbx_imports { osal_srwlock_t_function srwl_Init; diff --git a/test/append.c++ b/test/append.c++ index 064dbf3d..59f6d1ca 100644 --- a/test/append.c++ +++ b/test/append.c++ @@ -5,16 +5,14 @@ class testcase_append : public testcase { public: - testcase_append(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_append(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} bool run() override; static bool review_params(actor_params ¶ms, unsigned space_id) { if (!testcase::review_params(params, space_id)) return false; const bool ordered = !flipcoin_x3(); - log_notice("the '%s' key-generation mode is selected", - ordered ? "ordered/linear" : "unordered/non-linear"); + log_notice("the '%s' key-generation mode is selected", ordered ? "ordered/linear" : "unordered/non-linear"); if (ordered && !params.make_keygen_linear()) return false; return true; @@ -37,13 +35,10 @@ bool testcase_append::run() { keyvalue_maker.setup(config.params, 0 /* thread_number */); /* LY: тест наполнения таблиц в append-режиме, * при котором записи добавляются строго в конец (в порядке сортировки) */ - const MDBX_put_flags_t flags = - reverse - ? ((config.params.table_flags & MDBX_DUPSORT) ? MDBX_UPSERT - : MDBX_NOOVERWRITE) - : ((config.params.table_flags & MDBX_DUPSORT) - ? (flipcoin() ? MDBX_APPEND | MDBX_APPENDDUP : MDBX_APPENDDUP) - : MDBX_APPEND); + const MDBX_put_flags_t flags = reverse ? ((config.params.table_flags & MDBX_DUPSORT) ? MDBX_UPSERT : MDBX_NOOVERWRITE) + : ((config.params.table_flags & MDBX_DUPSORT) + ? (flipcoin() ? MDBX_APPEND | MDBX_APPENDDUP : MDBX_APPENDDUP) + : MDBX_APPEND); key = keygen::alloc(config.params.keylen_max); data = keygen::alloc(config.params.datalen_max); @@ -59,11 +54,9 @@ bool testcase_append::run() { simple_checksum committed_inserted_checksum = inserted_checksum; while (should_continue()) { const keygen::serial_t serial = serial_count; - const bool turn_key = (config.params.table_flags & MDBX_DUPSORT) == 0 || - flipcoin_n(config.params.keygen.split); - if (turn_key - ? !keyvalue_maker.increment_key_part(serial_count, reverse ? -1 : 1) - : !keyvalue_maker.increment(serial_count, reverse ? -1 : 1)) { + const bool turn_key = (config.params.table_flags & MDBX_DUPSORT) == 0 || flipcoin_n(config.params.keygen.split); + if (turn_key ? !keyvalue_maker.increment_key_part(serial_count, reverse ? -1 : 1) + : !keyvalue_maker.increment(serial_count, reverse ? -1 : 1)) { // дошли до границы пространства ключей break; } @@ -106,8 +99,7 @@ bool testcase_append::run() { break; case MDBX_APPENDDUP: assert((config.params.table_flags & MDBX_DUPSORT) != 0); - expect_key_mismatch = - mdbx_cmp(txn_guard.get(), dbi, &key->value, &ge_key) == 0; + expect_key_mismatch = mdbx_cmp(txn_guard.get(), dbi, &key->value, &ge_key) == 0; break; } } else if (err == MDBX_NOTFOUND /* all pair are less than */) { @@ -152,10 +144,9 @@ bool testcase_append::run() { const auto insertion_result = speculum.insert(item); if (!insertion_result.second) { char dump_key[32], dump_value[32]; - log_error( - "speculum.append: unexpected %s {%s, %s}", "MDBX_SUCCESS", - mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)), - mdbx_dump_val(&data->value, dump_value, sizeof(dump_value))); + log_error("speculum.append: unexpected %s {%s, %s}", "MDBX_SUCCESS", + mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&data->value, dump_value, sizeof(dump_value))); return false; } } @@ -199,8 +190,7 @@ bool testcase_append::run() { cursor_renew(); MDBX_val check_key, check_data; - err = mdbx_cursor_get(cursor_guard.get(), &check_key, &check_data, - reverse ? MDBX_LAST : MDBX_FIRST); + err = mdbx_cursor_get(cursor_guard.get(), &check_key, &check_data, reverse ? MDBX_LAST : MDBX_FIRST); if (likely(inserted_number)) { if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_cursor_get(MDBX_FIRST)", err); @@ -213,19 +203,16 @@ bool testcase_append::run() { read_checksum.push((uint32_t)read_count, check_key); read_checksum.push(10639, check_data); - err = mdbx_cursor_get(cursor_guard.get(), &check_key, &check_data, - reverse ? MDBX_PREV : MDBX_NEXT); + err = mdbx_cursor_get(cursor_guard.get(), &check_key, &check_data, reverse ? MDBX_PREV : MDBX_NEXT); } if (unlikely(err != MDBX_NOTFOUND)) failure_perror("mdbx_cursor_get(MDBX_NEXT) != EOF", err); if (unlikely(read_count != inserted_number)) - failure("read_count(%" PRIu64 ") != inserted_number(%" PRIu64 ")", - read_count, inserted_number); + failure("read_count(%" PRIu64 ") != inserted_number(%" PRIu64 ")", read_count, inserted_number); - if (unlikely(read_checksum.value != inserted_checksum.value) && - !keyvalue_maker.is_unordered()) + if (unlikely(read_checksum.value != inserted_checksum.value) && !keyvalue_maker.is_unordered()) failure("read_checksum(0x%016" PRIu64 ") " "!= inserted_checksum(0x%016" PRIu64 ")", read_checksum.value, inserted_checksum.value); diff --git a/test/base.h++ b/test/base.h++ index a2d2cfbd..21af13a2 100644 --- a/test/base.h++ +++ b/test/base.h++ @@ -7,12 +7,12 @@ #ifdef _MSC_VER #pragma warning(push, 1) -#pragma warning(disable : 4548) /* expression before comma has no effect; \ +#pragma warning(disable : 4548) /* expression before comma has no effect; \ expected expression with side - effect */ -#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ +#pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ semantics are not enabled. Specify /EHsc */ -#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ - mode specified; termination on exception \ +#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ + mode specified; termination on exception \ is not guaranteed. Specify /EHsc */ #endif /* _MSC_VER (warnings) */ @@ -71,24 +71,22 @@ #ifdef _MSC_VER #pragma warning(pop) -#pragma warning(disable : 4201) /* nonstandard extension used: nameless \ +#pragma warning(disable : 4201) /* nonstandard extension used: nameless \ struct/union */ #pragma warning(disable : 4127) /* conditional expression is constant */ #if _MSC_VER < 1900 -#pragma warning(disable : 4510) /* default constructor could \ +#pragma warning(disable : 4510) /* default constructor could \ not be generated */ -#pragma warning(disable : 4512) /* assignment operator could \ +#pragma warning(disable : 4512) /* assignment operator could \ not be generated */ #pragma warning(disable : 4610) /* user-defined constructor required */ #ifndef snprintf -#define snprintf(buffer, buffer_size, format, ...) \ - _snprintf_s(buffer, buffer_size, _TRUNCATE, format, __VA_ARGS__) +#define snprintf(buffer, buffer_size, format, ...) _snprintf_s(buffer, buffer_size, _TRUNCATE, format, __VA_ARGS__) #endif #ifndef vsnprintf -#define vsnprintf(buffer, buffer_size, format, args) \ - _vsnprintf_s(buffer, buffer_size, _TRUNCATE, format, args) +#define vsnprintf(buffer, buffer_size, format, args) _vsnprintf_s(buffer, buffer_size, _TRUNCATE, format, args) #endif -#pragma warning(disable : 4996) /* 'vsnprintf': This function or variable \ +#pragma warning(disable : 4996) /* 'vsnprintf': This function or variable \ may be unsafe */ #endif #endif /* _MSC_VER */ diff --git a/test/cases.c++ b/test/cases.c++ index 3d380f61..fdb99485 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -14,9 +14,7 @@ bool registry::add(const record *item) { auto const singleton = instance(); assert(singleton->name2id.count(std::string(item->name)) == 0); assert(singleton->id2record.count(item->id) == 0); - if (singleton->name2id.count(std::string(item->name)) + - singleton->id2record.count(item->id) == - 0) { + if (singleton->name2id.count(std::string(item->name)) + singleton->id2record.count(item->id) == 0) { singleton->name2id[std::string(item->name)] = item; singleton->id2record[item->id] = item; return true; @@ -24,28 +22,24 @@ bool registry::add(const record *item) { return false; } -testcase *registry::create_actor(const actor_config &config, - const mdbx_pid_t pid) { +testcase *registry::create_actor(const actor_config &config, const mdbx_pid_t pid) { return instance()->id2record.at(config.testcase)->constructor(config, pid); } -bool registry::review_actor_params(const actor_testcase id, - actor_params ¶ms, - const unsigned space_id) { +bool registry::review_actor_params(const actor_testcase id, actor_params ¶ms, const unsigned space_id) { return instance()->id2record.at(id)->review_params(params, space_id); } //----------------------------------------------------------------------------- -void configure_actor(unsigned &last_space_id, const actor_testcase testcase, - const char *space_id_cstr, actor_params params) { +void configure_actor(unsigned &last_space_id, const actor_testcase testcase, const char *space_id_cstr, + actor_params params) { unsigned wait4id = 0; if (params.waitfor_nops) { for (auto i = global::actors.rbegin(); i != global::actors.rend(); ++i) { if (i->is_waitable(params.waitfor_nops)) { if (i->signal_nops && i->signal_nops != params.waitfor_nops) - failure("Previous waitable actor (id=%u) already linked on %u-ops\n", - i->actor_id, i->signal_nops); + failure("Previous waitable actor (id=%u) already linked on %u-ops\n", i->actor_id, i->signal_nops); wait4id = i->actor_id; i->signal_nops = params.waitfor_nops; break; @@ -75,15 +69,12 @@ void configure_actor(unsigned &last_space_id, const actor_testcase testcase, failure("Actor config-review failed for space-id %lu\n", space_id); last_space_id = unsigned(space_id); - log_trace("configure_actor: space %lu for %s", space_id, - testcase2str(testcase)); - global::actors.emplace_back( - actor_config(testcase, params, unsigned(space_id), wait4id)); + log_trace("configure_actor: space %lu for %s", space_id, testcase2str(testcase)); + global::actors.emplace_back(actor_config(testcase, params, unsigned(space_id), wait4id)); global::databases.insert(params.pathname_db); } -void testcase_setup(const char *casename, const actor_params ¶ms, - unsigned &last_space_id) { +void testcase_setup(const char *casename, const actor_params ¶ms, unsigned &last_space_id) { if (strcmp(casename, "basic") == 0) { log_notice(">>> testcase_setup(%s)", casename); configure_actor(last_space_id, ac_nested, nullptr, params); @@ -111,8 +102,7 @@ void keycase_setup(const char *casename, actor_params ¶ms) { params.keygen.keycase = kc_random; // TODO log_notice("<<< keycase_setup(%s): done", casename); - } else if (strcmp(casename, "dashes") == 0 || - strcmp(casename, "aside") == 0) { + } else if (strcmp(casename, "dashes") == 0 || strcmp(casename, "aside") == 0) { log_notice(">>> keycase_setup(%s)", casename); params.keygen.keycase = kc_dashes; // TODO diff --git a/test/chrono.c++ b/test/chrono.c++ index 093d938c..daf285b1 100644 --- a/test/chrono.c++ +++ b/test/chrono.c++ @@ -18,9 +18,7 @@ uint32_t ns2fractional(uint32_t ns) { return uint32_t((uint64_t(ns) << 32) / NSEC_PER_SEC); } -uint32_t fractional2ns(uint32_t fractional) { - return uint32_t((fractional * uint64_t(NSEC_PER_SEC)) >> 32); -} +uint32_t fractional2ns(uint32_t fractional) { return uint32_t((fractional * uint64_t(NSEC_PER_SEC)) >> 32); } #ifndef USEC_PER_SEC #define USEC_PER_SEC 1000000u @@ -51,33 +49,27 @@ uint32_t ms2fractional(uint32_t ms) { return uint32_t((uint64_t(ms) << 32) / MSEC_PER_SEC); } -uint32_t fractional2ms(uint32_t fractional) { - return uint32_t((fractional * uint64_t(MSEC_PER_SEC)) >> 32); -} +uint32_t fractional2ms(uint32_t fractional) { return uint32_t((fractional * uint64_t(MSEC_PER_SEC)) >> 32); } time from_ns(uint64_t ns) { time result; - result.fixedpoint = - ((ns / NSEC_PER_SEC) << 32) | ns2fractional(uint32_t(ns % NSEC_PER_SEC)); + result.fixedpoint = ((ns / NSEC_PER_SEC) << 32) | ns2fractional(uint32_t(ns % NSEC_PER_SEC)); return result; } time from_us(uint64_t us) { time result; - result.fixedpoint = - ((us / USEC_PER_SEC) << 32) | us2fractional(uint32_t(us % USEC_PER_SEC)); + result.fixedpoint = ((us / USEC_PER_SEC) << 32) | us2fractional(uint32_t(us % USEC_PER_SEC)); return result; } time from_ms(uint64_t ms) { time result; - result.fixedpoint = - ((ms / MSEC_PER_SEC) << 32) | ms2fractional(uint32_t(ms % MSEC_PER_SEC)); + result.fixedpoint = ((ms / MSEC_PER_SEC) << 32) | ms2fractional(uint32_t(ms % MSEC_PER_SEC)); return result; } -#if __GNUC_PREREQ(8, 0) && \ - (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) +#if __GNUC_PREREQ(8, 0) && (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-function-type" #endif /* GCC/MINGW */ @@ -88,16 +80,14 @@ time now_realtime() { if (unlikely(!query_time)) { HMODULE hModule = GetModuleHandle(TEXT("kernel32.dll")); if (hModule) - query_time = (void(WINAPI *)(LPFILETIME))GetProcAddress( - hModule, "GetSystemTimePreciseAsFileTime"); + query_time = (void(WINAPI *)(LPFILETIME))GetProcAddress(hModule, "GetSystemTimePreciseAsFileTime"); if (!query_time) query_time = GetSystemTimeAsFileTime; } FILETIME filetime; query_time(&filetime); - uint64_t ns100 = - (uint64_t)filetime.dwHighDateTime << 32 | filetime.dwLowDateTime; + uint64_t ns100 = (uint64_t)filetime.dwHighDateTime << 32 | filetime.dwLowDateTime; return from_ns((ns100 - UINT64_C(116444736000000000)) * 100u); #else struct timespec ts; @@ -115,8 +105,7 @@ time now_monotonic() { if (reciprocal == 0) { if (!QueryPerformanceFrequency(&Frequency)) failure_perror("QueryPerformanceFrequency()", GetLastError()); - reciprocal = (((UINT64_C(1) << 48) + Frequency.QuadPart / 2 + 1) / - Frequency.QuadPart); + reciprocal = (((UINT64_C(1) << 48) + Frequency.QuadPart / 2 + 1) / Frequency.QuadPart); assert(reciprocal); } @@ -138,8 +127,7 @@ time now_monotonic() { #endif } -#if __GNUC_PREREQ(8, 0) && \ - (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) +#if __GNUC_PREREQ(8, 0) && (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) #pragma GCC diagnostic pop #endif /* GCC/MINGW */ diff --git a/test/chrono.h++ b/test/chrono.h++ index 5908509f..0749db8a 100644 --- a/test/chrono.h++ +++ b/test/chrono.h++ @@ -63,12 +63,10 @@ inline time infinite() { return result; } -#if defined(HAVE_TIMESPEC_TV_NSEC) || defined(__timespec_defined) || \ - defined(CLOCK_REALTIME) +#if defined(HAVE_TIMESPEC_TV_NSEC) || defined(__timespec_defined) || defined(CLOCK_REALTIME) inline time from_timespec(const struct timespec &ts) { time result; - result.fixedpoint = - ((uint64_t)ts.tv_sec << 32) | ns2fractional((uint32_t)ts.tv_nsec); + result.fixedpoint = ((uint64_t)ts.tv_sec << 32) | ns2fractional((uint32_t)ts.tv_nsec); return result; } #endif /* HAVE_TIMESPEC_TV_NSEC */ @@ -76,8 +74,7 @@ inline time from_timespec(const struct timespec &ts) { #if defined(HAVE_TIMEVAL_TV_USEC) || defined(_STRUCT_TIMEVAL) inline time from_timeval(const struct timeval &tv) { time result; - result.fixedpoint = - ((uint64_t)tv.tv_sec << 32) | us2fractional((uint32_t)tv.tv_usec); + result.fixedpoint = ((uint64_t)tv.tv_sec << 32) | us2fractional((uint32_t)tv.tv_usec); return result; } #endif /* HAVE_TIMEVAL_TV_USEC */ diff --git a/test/config.c++ b/test/config.c++ index 4732d95b..e35e74f2 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -9,8 +9,8 @@ namespace config { -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - const char **value, const char *default_value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, const char **value, + const char *default_value) { assert(narg < argc); const char *current = argv[narg]; const size_t optlen = strlen(option); @@ -49,14 +49,11 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, failure("No value given for '--%s' option\n", option); } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - std::string &value, bool allow_empty) { - return parse_option(argc, argv, narg, option, value, allow_empty, - allow_empty ? "" : nullptr); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, std::string &value, bool allow_empty) { + return parse_option(argc, argv, narg, option, value, allow_empty, allow_empty ? "" : nullptr); } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - std::string &value, bool allow_empty, +bool parse_option(int argc, char *const argv[], int &narg, const char *option, std::string &value, bool allow_empty, const char *default_value) { const char *value_cstr; if (!parse_option(argc, argv, narg, option, &value_cstr, default_value)) @@ -70,8 +67,7 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, } template <> -bool parse_option(int argc, char *const argv[], int &narg, - const char *option, unsigned &mask, +bool parse_option(int argc, char *const argv[], int &narg, const char *option, unsigned &mask, const option_verb *verbs) { const char *list; if (!parse_option(argc, argv, narg, option, &list)) @@ -95,8 +91,7 @@ bool parse_option(int argc, char *const argv[], int &narg, while (true) { if (!scan->verb) - failure("Unknown verb '%.*s', for option '--%s'\n", (int)len, list, - option); + failure("Unknown verb '%.*s', for option '--%s'\n", (int)len, list, option); if (strlen(scan->verb) == len && strncmp(list, scan->verb, len) == 0) { mask = strikethrough ? mask & ~scan->mask : mask | scan->mask; clear = strikethrough ? clear & ~scan->mask : clear | scan->mask; @@ -110,10 +105,8 @@ bool parse_option(int argc, char *const argv[], int &narg, return true; } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - uint64_t &value, const scale_mode scale, - const uint64_t minval, const uint64_t maxval, - const uint64_t default_value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, uint64_t &value, const scale_mode scale, + const uint64_t minval, const uint64_t maxval, const uint64_t default_value) { const char *value_cstr; if (!parse_option(argc, argv, narg, option, &value_cstr)) @@ -134,17 +127,13 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return true; } - if (strcmp(value_cstr, "rnd") == 0 || strcmp(value_cstr, "rand") == 0 || - strcmp(value_cstr, "random") == 0) { + if (strcmp(value_cstr, "rnd") == 0 || strcmp(value_cstr, "rand") == 0 || strcmp(value_cstr, "random") == 0) { value = minval; if (maxval > minval) { - uint64_t salt = (scale != entropy) - ? prng64() ^ UINT64_C(44263400549519813) - : (chrono::now_monotonic().fixedpoint ^ - UINT64_C(0xD85794512ED321FD)) * - UINT64_C(0x9120038359EAF3) ^ - chrono::now_realtime().fixedpoint * - UINT64_C(0x2FE5232BDC8E5F); + uint64_t salt = (scale != entropy) ? prng64() ^ UINT64_C(44263400549519813) + : (chrono::now_monotonic().fixedpoint ^ UINT64_C(0xD85794512ED321FD)) * + UINT64_C(0x9120038359EAF3) ^ + chrono::now_realtime().fixedpoint * UINT64_C(0x2FE5232BDC8E5F); value += salt % (maxval - minval); } if (scale == intkey) @@ -161,43 +150,32 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, raw = strtoull(value_cstr, &suffix, 10); } if (errno) - failure("Option '--%s' expects a numeric value (%s)\n", option, - test_strerror(errno)); + failure("Option '--%s' expects a numeric value (%s)\n", option, test_strerror(errno)); uint64_t multiplier = 1; if (suffix && *suffix) { if (scale == no_scale || scale == intkey) - failure("Option '--%s' doesn't accepts suffixes, so '%s' is unexpected\n", - option, suffix); + failure("Option '--%s' doesn't accepts suffixes, so '%s' is unexpected\n", option, suffix); if (strcmp(suffix, "K") == 0 || strcasecmp(suffix, "Kilo") == 0) multiplier = (scale == decimal) ? UINT64_C(1000) : UINT64_C(1024); else if (strcmp(suffix, "M") == 0 || strcasecmp(suffix, "Mega") == 0) - multiplier = - (scale == decimal) ? UINT64_C(1000) * 1000 : UINT64_C(1024) * 1024; + multiplier = (scale == decimal) ? UINT64_C(1000) * 1000 : UINT64_C(1024) * 1024; else if (strcmp(suffix, "G") == 0 || strcasecmp(suffix, "Giga") == 0) - multiplier = (scale == decimal) ? UINT64_C(1000) * 1000 * 1000 - : UINT64_C(1024) * 1024 * 1024; + multiplier = (scale == decimal) ? UINT64_C(1000) * 1000 * 1000 : UINT64_C(1024) * 1024 * 1024; else if (strcmp(suffix, "T") == 0 || strcasecmp(suffix, "Tera") == 0) - multiplier = (scale == decimal) ? UINT64_C(1000) * 1000 * 1000 * 1000 - : UINT64_C(1024) * 1024 * 1024 * 1024; - else if (scale == duration && - (strcmp(suffix, "s") == 0 || strcasecmp(suffix, "Seconds") == 0)) + multiplier = (scale == decimal) ? UINT64_C(1000) * 1000 * 1000 * 1000 : UINT64_C(1024) * 1024 * 1024 * 1024; + else if (scale == duration && (strcmp(suffix, "s") == 0 || strcasecmp(suffix, "Seconds") == 0)) multiplier = 1; - else if (scale == duration && - (strcmp(suffix, "m") == 0 || strcasecmp(suffix, "Minutes") == 0)) + else if (scale == duration && (strcmp(suffix, "m") == 0 || strcasecmp(suffix, "Minutes") == 0)) multiplier = 60; - else if (scale == duration && - (strcmp(suffix, "h") == 0 || strcasecmp(suffix, "Hours") == 0)) + else if (scale == duration && (strcmp(suffix, "h") == 0 || strcasecmp(suffix, "Hours") == 0)) multiplier = 3600; - else if (scale == duration && - (strcmp(suffix, "d") == 0 || strcasecmp(suffix, "Days") == 0)) + else if (scale == duration && (strcmp(suffix, "d") == 0 || strcasecmp(suffix, "Days") == 0)) multiplier = 3600 * 24; else - failure( - "Option '--%s' expects a numeric value with Kilo/Mega/Giga/Tera %s" - "suffixes, but '%s' is unexpected\n", - option, (scale == duration) ? "or Seconds/Minutes/Hours/Days " : "", - suffix); + failure("Option '--%s' expects a numeric value with Kilo/Mega/Giga/Tera %s" + "suffixes, but '%s' is unexpected\n", + option, (scale == duration) ? "or Seconds/Minutes/Hours/Days " : "", suffix); } if (raw >= UINT64_MAX / multiplier) @@ -205,47 +183,38 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, value = raw * multiplier; if (maxval && value > maxval) - failure("The maximal value for option '--%s' is %" PRIu64 "\n", option, - maxval); + failure("The maximal value for option '--%s' is %" PRIu64 "\n", option, maxval); if (value < minval) - failure("The minimal value for option '--%s' is %" PRIu64 "\n", option, - minval); + failure("The minimal value for option '--%s' is %" PRIu64 "\n", option, minval); if (scale == intkey) value &= ~3u; return true; } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - unsigned &value, const scale_mode scale, - const unsigned minval, const unsigned maxval, - const unsigned default_value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, unsigned &value, const scale_mode scale, + const unsigned minval, const unsigned maxval, const unsigned default_value) { uint64_t huge; - if (!parse_option(argc, argv, narg, option, huge, scale, minval, maxval, - default_value)) + if (!parse_option(argc, argv, narg, option, huge, scale, minval, maxval, default_value)) return false; value = unsigned(huge); return true; } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - uint8_t &value, const uint8_t minval, const uint8_t maxval, - const uint8_t default_value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, uint8_t &value, const uint8_t minval, + const uint8_t maxval, const uint8_t default_value) { uint64_t huge; - if (!parse_option(argc, argv, narg, option, huge, no_scale, minval, maxval, - default_value)) + if (!parse_option(argc, argv, narg, option, huge, no_scale, minval, maxval, default_value)) return false; value = uint8_t(huge); return true; } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - int64_t &value, const int64_t minval, const int64_t maxval, - const int64_t default_value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, int64_t &value, const int64_t minval, + const int64_t maxval, const int64_t default_value) { uint64_t proxy = uint64_t(value); - if (parse_option(argc, argv, narg, option, proxy, config::binary, - uint64_t(minval), uint64_t(maxval), + if (parse_option(argc, argv, narg, option, proxy, config::binary, uint64_t(minval), uint64_t(maxval), uint64_t(default_value))) { value = int64_t(proxy); return true; @@ -253,12 +222,10 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return false; } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - int32_t &value, const int32_t minval, const int32_t maxval, - const int32_t default_value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, int32_t &value, const int32_t minval, + const int32_t maxval, const int32_t default_value) { uint64_t proxy = uint64_t(value); - if (parse_option(argc, argv, narg, option, proxy, config::binary, - uint64_t(minval), uint64_t(maxval), + if (parse_option(argc, argv, narg, option, proxy, config::binary, uint64_t(minval), uint64_t(maxval), uint64_t(default_value))) { value = int32_t(proxy); return true; @@ -266,14 +233,12 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return false; } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - logging::loglevel &loglevel) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, logging::loglevel &loglevel) { const char *value_cstr; if (!parse_option(argc, argv, narg, option, &value_cstr)) return false; - if (strcmp(value_cstr, "min") == 0 || strcmp(value_cstr, "minimal") == 0 || - strcmp(value_cstr, "fatal") == 0) { + if (strcmp(value_cstr, "min") == 0 || strcmp(value_cstr, "minimal") == 0 || strcmp(value_cstr, "fatal") == 0) { loglevel = logging::failure; return true; } @@ -308,8 +273,7 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return true; } - if (strcmp(value_cstr, "max") == 0 || strcmp(value_cstr, "maximal") == 0 || - strcmp(value_cstr, "extra") == 0) { + if (strcmp(value_cstr, "max") == 0 || strcmp(value_cstr, "maximal") == 0 || strcmp(value_cstr, "extra") == 0) { loglevel = logging::extra; return true; } @@ -329,8 +293,7 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, failure("Unknown log-level '%s', for option '--%s'\n", value_cstr, option); } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - bool &value) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, bool &value) { const char *value_cstr = nullptr; if (!parse_option(argc, argv, narg, option, &value_cstr, "yes")) { const char *current = argv[narg]; @@ -338,8 +301,7 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, value = false; return true; } - if (strncmp(current, "--dont-", 7) == 0 && - strcmp(current + 7, option) == 0) { + if (strncmp(current, "--dont-", 7) == 0 && strcmp(current + 7, option) == 0) { value = false; return true; } @@ -361,41 +323,36 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return true; } - failure( - "Option '--%s' expects a 'boolean' value Yes/No, so '%s' is unexpected\n", - option, value_cstr); + failure("Option '--%s' expects a 'boolean' value Yes/No, so '%s' is unexpected\n", option, value_cstr); } //----------------------------------------------------------------------------- -const struct option_verb mode_bits[] = { - {"rdonly", unsigned(MDBX_RDONLY)}, - {"nosync-utterly", unsigned(MDBX_UTTERLY_NOSYNC)}, - {"nosubdir", unsigned(MDBX_NOSUBDIR)}, - {"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)}, - {"nometasync", unsigned(MDBX_NOMETASYNC)}, - {"writemap", unsigned(MDBX_WRITEMAP)}, - {"nostickythreads", unsigned(MDBX_NOSTICKYTHREADS)}, - {"no-sticky-threads", unsigned(MDBX_NOSTICKYTHREADS)}, - {"nordahead", unsigned(MDBX_NORDAHEAD)}, - {"nomeminit", unsigned(MDBX_NOMEMINIT)}, - {"lifo", unsigned(MDBX_LIFORECLAIM)}, - {"perturb", unsigned(MDBX_PAGEPERTURB)}, - {"accede", unsigned(MDBX_ACCEDE)}, - {"exclusive", unsigned(MDBX_EXCLUSIVE)}, - {nullptr, 0}}; +const struct option_verb mode_bits[] = {{"rdonly", unsigned(MDBX_RDONLY)}, + {"nosync-utterly", unsigned(MDBX_UTTERLY_NOSYNC)}, + {"nosubdir", unsigned(MDBX_NOSUBDIR)}, + {"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)}, + {"nometasync", unsigned(MDBX_NOMETASYNC)}, + {"writemap", unsigned(MDBX_WRITEMAP)}, + {"nostickythreads", unsigned(MDBX_NOSTICKYTHREADS)}, + {"no-sticky-threads", unsigned(MDBX_NOSTICKYTHREADS)}, + {"nordahead", unsigned(MDBX_NORDAHEAD)}, + {"nomeminit", unsigned(MDBX_NOMEMINIT)}, + {"lifo", unsigned(MDBX_LIFORECLAIM)}, + {"perturb", unsigned(MDBX_PAGEPERTURB)}, + {"accede", unsigned(MDBX_ACCEDE)}, + {"exclusive", unsigned(MDBX_EXCLUSIVE)}, + {nullptr, 0}}; -const struct option_verb table_bits[] = { - {"key.reverse", unsigned(MDBX_REVERSEKEY)}, - {"key.integer", unsigned(MDBX_INTEGERKEY)}, - {"data.integer", unsigned(MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT)}, - {"data.fixed", unsigned(MDBX_DUPFIXED | MDBX_DUPSORT)}, - {"data.reverse", unsigned(MDBX_REVERSEDUP | MDBX_DUPSORT)}, - {"data.dups", unsigned(MDBX_DUPSORT)}, - {nullptr, 0}}; +const struct option_verb table_bits[] = {{"key.reverse", unsigned(MDBX_REVERSEKEY)}, + {"key.integer", unsigned(MDBX_INTEGERKEY)}, + {"data.integer", unsigned(MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT)}, + {"data.fixed", unsigned(MDBX_DUPFIXED | MDBX_DUPSORT)}, + {"data.reverse", unsigned(MDBX_REVERSEDUP | MDBX_DUPSORT)}, + {"data.dups", unsigned(MDBX_DUPSORT)}, + {nullptr, 0}}; -static void dump_verbs(const char *caption, size_t bits, - const struct option_verb *verbs) { +static void dump_verbs(const char *caption, size_t bits, const struct option_verb *verbs) { log_verbose("%s: 0x%" PRIx64 " = ", caption, (uint64_t)bits); const char *comma = ""; @@ -429,28 +386,21 @@ void dump(const char *title) { logging::local_suffix indent(title); for (auto i = global::actors.begin(); i != global::actors.end(); ++i) { - log_verbose("#%u, testcase %s, space_id/table %u\n", i->actor_id, - testcase2str(i->testcase), i->space_id); + log_verbose("#%u, testcase %s, space_id/table %u\n", i->actor_id, testcase2str(i->testcase), i->space_id); indent.push(); log_verbose("prng-seed: %u\n", i->params.prng_seed); if (i->params.loglevel) { log_verbose("log: level %u, %s\n", i->params.loglevel, - i->params.pathname_log.empty() - ? "console" - : i->params.pathname_log.c_str()); + i->params.pathname_log.empty() ? "console" : i->params.pathname_log.c_str()); } - log_verbose("database: %s, size %" PRIuPTR "[%" PRIiPTR "..%" PRIiPTR - ", %i %i, %i]\n", - i->params.pathname_db.c_str(), i->params.size_now, - i->params.size_lower, i->params.size_upper, - i->params.shrink_threshold, i->params.growth_step, - i->params.pagesize); + log_verbose("database: %s, size %" PRIuPTR "[%" PRIiPTR "..%" PRIiPTR ", %i %i, %i]\n", + i->params.pathname_db.c_str(), i->params.size_now, i->params.size_lower, i->params.size_upper, + i->params.shrink_threshold, i->params.growth_step, i->params.pagesize); dump_verbs("mode", i->params.mode_flags, mode_bits); - log_verbose("random-writemap: %s\n", - i->params.random_writemap ? "Yes" : "No"); + log_verbose("random-writemap: %s\n", i->params.random_writemap ? "Yes" : "No"); dump_verbs("table", i->params.table_flags, table_bits); if (i->params.test_nops) @@ -465,62 +415,46 @@ void dump(const char *title) { log_verbose("threads %u\n", i->params.nthreads); - log_verbose( - "keygen.params: case %s, width %u, mesh %u, rotate %u, offset %" PRIu64 - ", split %u/%u\n", - keygencase2str(i->params.keygen.keycase), i->params.keygen.width, - i->params.keygen.mesh, i->params.keygen.rotate, i->params.keygen.offset, - i->params.keygen.split, - i->params.keygen.width - i->params.keygen.split); - log_verbose("keygen.zerofill: %s\n", - i->params.keygen.zero_fill ? "Yes" : "No"); - log_verbose("key: minlen %u, maxlen %u\n", i->params.keylen_min, - i->params.keylen_max); - log_verbose("data: minlen %u, maxlen %u\n", i->params.datalen_min, - i->params.datalen_max); + log_verbose("keygen.params: case %s, width %u, mesh %u, rotate %u, offset %" PRIu64 ", split %u/%u\n", + keygencase2str(i->params.keygen.keycase), i->params.keygen.width, i->params.keygen.mesh, + i->params.keygen.rotate, i->params.keygen.offset, i->params.keygen.split, + i->params.keygen.width - i->params.keygen.split); + log_verbose("keygen.zerofill: %s\n", i->params.keygen.zero_fill ? "Yes" : "No"); + log_verbose("key: minlen %u, maxlen %u\n", i->params.keylen_min, i->params.keylen_max); + log_verbose("data: minlen %u, maxlen %u\n", i->params.datalen_min, i->params.datalen_max); - log_verbose("batch: read %u, write %u\n", i->params.batch_read, - i->params.batch_write); + log_verbose("batch: read %u, write %u\n", i->params.batch_read, i->params.batch_write); if (i->params.waitfor_nops) - log_verbose("wait: actor %u for %u ops\n", i->wait4id, - i->params.waitfor_nops); + log_verbose("wait: actor %u for %u ops\n", i->wait4id, i->params.waitfor_nops); else if (i->params.delaystart) dump_duration("delay", i->params.delaystart); else log_verbose("no-delay\n"); if (i->params.inject_writefaultn) - log_verbose("inject-writefault on %u ops\n", - i->params.inject_writefaultn); + log_verbose("inject-writefault on %u ops\n", i->params.inject_writefaultn); else log_verbose("no-inject-writefault\n"); - log_verbose("limits: readers %u, tables %u, txn-bytes %zu\n", - i->params.max_readers, i->params.max_tables, + log_verbose("limits: readers %u, tables %u, txn-bytes %zu\n", i->params.max_readers, i->params.max_tables, mdbx_limits_txnsize_max(i->params.pagesize)); log_verbose("drop table: %s\n", i->params.drop_table ? "Yes" : "No"); - log_verbose("ignore MDBX_MAP_FULL error: %s\n", - i->params.ignore_dbfull ? "Yes" : "No"); - log_verbose("verifying by speculum: %s\n", - i->params.speculum ? "Yes" : "No"); + log_verbose("ignore MDBX_MAP_FULL error: %s\n", i->params.ignore_dbfull ? "Yes" : "No"); + log_verbose("verifying by speculum: %s\n", i->params.speculum ? "Yes" : "No"); indent.pop(); } dump_duration("timeout", global::config::timeout_duration_seconds); - log_verbose("cleanup: before %s, after %s\n", - global::config::cleanup_before ? "Yes" : "No", + log_verbose("cleanup: before %s, after %s\n", global::config::cleanup_before ? "Yes" : "No", global::config::cleanup_after ? "Yes" : "No"); log_verbose("failfast: %s\n", global::config::failfast ? "Yes" : "No"); - log_verbose("progress indicator: %s\n", - global::config::progress_indicator ? "Yes" : "No"); - log_verbose("console mode: %s\n", - global::config::console_mode ? "Yes" : "No"); - log_verbose("geometry jitter: %s\n", - global::config::geometry_jitter ? "Yes" : "No"); + log_verbose("progress indicator: %s\n", global::config::progress_indicator ? "Yes" : "No"); + log_verbose("console mode: %s\n", global::config::console_mode ? "Yes" : "No"); + log_verbose("geometry jitter: %s\n", global::config::geometry_jitter ? "Yes" : "No"); } } /* namespace config */ @@ -529,11 +463,8 @@ void dump(const char *title) { using namespace config; -actor_config::actor_config(actor_testcase testcase, const actor_params ¶ms, - unsigned space_id, unsigned wait4id) - : actor_config_pod(1 + unsigned(global::actors.size()), testcase, space_id, - wait4id), - params(params) {} +actor_config::actor_config(actor_testcase testcase, const actor_params ¶ms, unsigned space_id, unsigned wait4id) + : actor_config_pod(1 + unsigned(global::actors.size()), testcase, space_id, wait4id), params(params) {} const std::string actor_config::serialize(const char *prefix) const { simple_checksum checksum; @@ -551,25 +482,19 @@ const std::string actor_config::serialize(const char *prefix) const { result.push_back('|'); #if __cplusplus > 201400 - static_assert(std::is_trivially_copyable::value, - "actor_params_pod should by POD"); + static_assert(std::is_trivially_copyable::value, "actor_params_pod should by POD"); #else - static_assert(std::is_standard_layout::value, - "actor_params_pod should by POD"); + static_assert(std::is_standard_layout::value, "actor_params_pod should by POD"); #endif - result.append(data2hex(static_cast(¶ms), - sizeof(actor_params_pod), checksum)); + result.append(data2hex(static_cast(¶ms), sizeof(actor_params_pod), checksum)); result.push_back('|'); #if __cplusplus > 201400 - static_assert(std::is_trivially_copyable::value, - "actor_config_pod should by POD"); + static_assert(std::is_trivially_copyable::value, "actor_config_pod should by POD"); #else - static_assert(std::is_standard_layout::value, - "actor_config_pod should by POD"); + static_assert(std::is_standard_layout::value, "actor_config_pod should by POD"); #endif - result.append(data2hex(static_cast(this), - sizeof(actor_config_pod), checksum)); + result.append(data2hex(static_cast(this), sizeof(actor_config_pod), checksum)); result.push_back('|'); result.push_back(global::config::progress_indicator ? 'Y' : 'N'); checksum.push(global::config::progress_indicator); @@ -615,16 +540,12 @@ bool actor_config::deserialize(const char *str, actor_config &config) { return false; } #if __cplusplus > 201400 - static_assert(std::is_trivially_copyable::value, - "actor_params_pod should by POD"); + static_assert(std::is_trivially_copyable::value, "actor_params_pod should by POD"); #else - static_assert(std::is_standard_layout::value, - "actor_params_pod should by POD"); + static_assert(std::is_standard_layout::value, "actor_params_pod should by POD"); #endif - if (!hex2data(str, slash, static_cast(&config.params), - sizeof(actor_params_pod), checksum)) { - TRACE("<< actor_config::deserialize: actor_params_pod(%.*s)\n", - (int)(slash - str), str); + if (!hex2data(str, slash, static_cast(&config.params), sizeof(actor_params_pod), checksum)) { + TRACE("<< actor_config::deserialize: actor_params_pod(%.*s)\n", (int)(slash - str), str); return false; } str = slash + 1; @@ -635,16 +556,12 @@ bool actor_config::deserialize(const char *str, actor_config &config) { return false; } #if __cplusplus > 201400 - static_assert(std::is_trivially_copyable::value, - "actor_config_pod should by POD"); + static_assert(std::is_trivially_copyable::value, "actor_config_pod should by POD"); #else - static_assert(std::is_standard_layout::value, - "actor_config_pod should by POD"); + static_assert(std::is_standard_layout::value, "actor_config_pod should by POD"); #endif - if (!hex2data(str, slash, static_cast(&config), - sizeof(actor_config_pod), checksum)) { - TRACE("<< actor_config::deserialize: actor_config_pod(%.*s)\n", - (int)(slash - str), str); + if (!hex2data(str, slash, static_cast(&config), sizeof(actor_config_pod), checksum)) { + TRACE("<< actor_config::deserialize: actor_config_pod(%.*s)\n", (int)(slash - str), str); return false; } str = slash + 1; @@ -654,8 +571,7 @@ bool actor_config::deserialize(const char *str, actor_config &config) { TRACE("<< actor_config::deserialize: slash-5\n"); return false; } - if ((str[0] == 'Y' || str[0] == 'N') && (str[1] == 'Y' || str[1] == 'N') && - (str[2] == 'Y' || str[2] == 'N')) { + if ((str[0] == 'Y' || str[0] == 'N') && (str[1] == 'Y' || str[1] == 'N') && (str[2] == 'Y' || str[2] == 'N')) { global::config::progress_indicator = str[0] == 'Y'; checksum.push(global::config::progress_indicator); global::config::console_mode = str[1] == 'Y'; @@ -690,21 +606,14 @@ bool actor_config::deserialize(const char *str, actor_config &config) { return true; } -unsigned actor_params::mdbx_keylen_min() const { - return unsigned(mdbx_limits_keysize_min(table_flags)); -} +unsigned actor_params::mdbx_keylen_min() const { return unsigned(mdbx_limits_keysize_min(table_flags)); } -unsigned actor_params::mdbx_keylen_max() const { - return unsigned(mdbx_limits_keysize_max(pagesize, table_flags)); -} +unsigned actor_params::mdbx_keylen_max() const { return unsigned(mdbx_limits_keysize_max(pagesize, table_flags)); } -unsigned actor_params::mdbx_datalen_min() const { - return unsigned(mdbx_limits_valsize_min(table_flags)); -} +unsigned actor_params::mdbx_datalen_min() const { return unsigned(mdbx_limits_valsize_min(table_flags)); } unsigned actor_params::mdbx_datalen_max() const { - return std::min(unsigned(UINT16_MAX), - unsigned(mdbx_limits_valsize_max(pagesize, table_flags))); + return std::min(unsigned(UINT16_MAX), unsigned(mdbx_limits_valsize_max(pagesize, table_flags))); } bool actor_params::make_keygen_linear() { @@ -713,26 +622,18 @@ bool actor_params::make_keygen_linear() { keygen.rotate = 0; keygen.offset = 0; const auto max_serial = serial_mask(keygen.width) + base; - const auto max_key_serial = (keygen.split && (table_flags & MDBX_DUPSORT)) - ? max_serial >> keygen.split - : max_serial; - const auto max_value_serial = (keygen.split && (table_flags & MDBX_DUPSORT)) - ? serial_mask(keygen.split) - : 0; + const auto max_key_serial = (keygen.split && (table_flags & MDBX_DUPSORT)) ? max_serial >> keygen.split : max_serial; + const auto max_value_serial = (keygen.split && (table_flags & MDBX_DUPSORT)) ? serial_mask(keygen.split) : 0; - while (keylen_min < 8 && - (keylen_min == 0 || serial_mask(keylen_min * 8) < max_key_serial)) { + while (keylen_min < 8 && (keylen_min == 0 || serial_mask(keylen_min * 8) < max_key_serial)) { keylen_min += (table_flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 : 1; if (keylen_max < keylen_min) keylen_max = keylen_min; } if (table_flags & MDBX_DUPSORT) - while ( - datalen_min < 8 && - (datalen_min == 0 || serial_mask(datalen_min * 8) < max_value_serial)) { - datalen_min += - (table_flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 : 1; + while (datalen_min < 8 && (datalen_min == 0 || serial_mask(datalen_min * 8) < max_value_serial)) { + datalen_min += (table_flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 : 1; if (datalen_max < datalen_min) datalen_max = datalen_min; } diff --git a/test/config.h++ b/test/config.h++ index 45ab7cf4..71ff5c97 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -54,18 +54,16 @@ namespace config { enum scale_mode { no_scale, decimal, binary, duration, intkey, entropy }; -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - const char **value, const char *default_value = nullptr); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, const char **value, + const char *default_value = nullptr); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - std::string &value, bool allow_empty = false); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, std::string &value, + bool allow_empty = false); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - std::string &value, bool allow_empty, +bool parse_option(int argc, char *const argv[], int &narg, const char *option, std::string &value, bool allow_empty, const char *default_value); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - bool &value); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, bool &value); struct option_verb { const char *const verb; @@ -73,8 +71,7 @@ struct option_verb { }; template -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - MASK &mask, const option_verb *verbs) { +bool parse_option(int argc, char *const argv[], int &narg, const char *option, MASK &mask, const option_verb *verbs) { static_assert(sizeof(MASK) <= sizeof(unsigned), "WTF?"); unsigned u = unsigned(mask); if (parse_option(argc, argv, narg, option, u, verbs)) { @@ -85,49 +82,36 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, } template <> -bool parse_option(int argc, char *const argv[], int &narg, - const char *option, unsigned &mask, +bool parse_option(int argc, char *const argv[], int &narg, const char *option, unsigned &mask, const option_verb *verbs); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - uint64_t &value, const scale_mode scale, - const uint64_t minval = 0, const uint64_t maxval = INT64_MAX, - const uint64_t default_value = 0); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, uint64_t &value, const scale_mode scale, + const uint64_t minval = 0, const uint64_t maxval = INT64_MAX, const uint64_t default_value = 0); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - unsigned &value, const scale_mode scale, - const unsigned minval = 0, const unsigned maxval = INT32_MAX, - const unsigned default_value = 0); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, unsigned &value, const scale_mode scale, + const unsigned minval = 0, const unsigned maxval = INT32_MAX, const unsigned default_value = 0); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - uint8_t &value, const uint8_t minval = 0, +bool parse_option(int argc, char *const argv[], int &narg, const char *option, uint8_t &value, const uint8_t minval = 0, const uint8_t maxval = 255, const uint8_t default_value = 0); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - int64_t &value, const int64_t minval, const int64_t maxval, - const int64_t default_value = -1); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, int64_t &value, const int64_t minval, + const int64_t maxval, const int64_t default_value = -1); -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - int32_t &value, const int32_t minval, const int32_t maxval, - const int32_t default_value = -1); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, int32_t &value, const int32_t minval, + const int32_t maxval, const int32_t default_value = -1); -inline bool parse_option_intptr(int argc, char *const argv[], int &narg, - const char *option, intptr_t &value, - const intptr_t minval, const intptr_t maxval, - const intptr_t default_value = -1) { +inline bool parse_option_intptr(int argc, char *const argv[], int &narg, const char *option, intptr_t &value, + const intptr_t minval, const intptr_t maxval, const intptr_t default_value = -1) { static_assert(sizeof(intptr_t) == 4 || sizeof(intptr_t) == 8, "WTF?"); if (sizeof(intptr_t) == 8) - return parse_option(argc, argv, narg, option, - *reinterpret_cast(&value), int64_t(minval), + return parse_option(argc, argv, narg, option, *reinterpret_cast(&value), int64_t(minval), int64_t(maxval), int64_t(default_value)); else - return parse_option(argc, argv, narg, option, - *reinterpret_cast(&value), int32_t(minval), + return parse_option(argc, argv, narg, option, *reinterpret_cast(&value), int32_t(minval), int32_t(maxval), int32_t(default_value)); } -bool parse_option(int argc, char *const argv[], int &narg, const char *option, - logging::loglevel &); +bool parse_option(int argc, char *const argv[], int &narg, const char *option, logging::loglevel &); //----------------------------------------------------------------------------- struct keygen_params_pod { @@ -295,10 +279,8 @@ struct actor_config_pod { unsigned signal_nops{0}; actor_config_pod() = default; - actor_config_pod(unsigned actor_id, actor_testcase testcase, - unsigned space_id, unsigned wait4id) - : actor_id(actor_id), space_id(space_id), testcase(testcase), - wait4id(wait4id) {} + actor_config_pod(unsigned actor_id, actor_testcase testcase, unsigned space_id, unsigned wait4id) + : actor_id(actor_id), space_id(space_id), testcase(testcase), wait4id(wait4id) {} }; extern const struct option_verb mode_bits[]; @@ -326,8 +308,7 @@ struct actor_config : public config::actor_config_pod { bool wanna_event4signalling() const { return true /* TODO ? */; } actor_config() = default; - actor_config(actor_testcase testcase, const actor_params ¶ms, - unsigned space_id, unsigned wait4id); + actor_config(actor_testcase testcase, const actor_params ¶ms, unsigned space_id, unsigned wait4id); actor_config(const char *str) : actor_config() { if (!deserialize(str, *this)) diff --git a/test/copy.c++ b/test/copy.c++ index e21a1318..b5d068d8 100644 --- a/test/copy.c++ +++ b/test/copy.c++ @@ -9,8 +9,7 @@ class testcase_copy : public testcase { public: testcase_copy(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid), - copy_pathname(config.params.pathname_db + "-copy") {} + : testcase(config, pid), copy_pathname(config.params.pathname_db + "-copy") {} bool run() override; }; REGISTER_TESTCASE(copy); @@ -21,14 +20,10 @@ void testcase_copy::copy_db(const bool with_compaction) { failure_perror("osal_removefile()", err); if (flipcoin()) { - err = mdbx_env_copy(db_guard.get(), copy_pathname.c_str(), - with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS); - log_verbose("mdbx_env_copy(%s), err %d", with_compaction ? "true" : "false", - err); + err = mdbx_env_copy(db_guard.get(), copy_pathname.c_str(), with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS); + log_verbose("mdbx_env_copy(%s), err %d", with_compaction ? "true" : "false", err); if (unlikely(err != MDBX_SUCCESS)) - failure_perror(with_compaction ? "mdbx_env_copy(MDBX_CP_COMPACT)" - : "mdbx_env_copy(MDBX_CP_ASIS)", - err); + failure_perror(with_compaction ? "mdbx_env_copy(MDBX_CP_COMPACT)" : "mdbx_env_copy(MDBX_CP_ASIS)", err); } else { do { const bool ro = mode_readonly() || flipcoin(); @@ -36,26 +31,20 @@ void testcase_copy::copy_db(const bool with_compaction) { const bool dynsize = flipcoin(); const bool flush = flipcoin(); const bool enable_renew = flipcoin(); - const MDBX_copy_flags_t flags = - (with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | - (dynsize ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS) | - (throttle ? MDBX_CP_THROTTLE_MVCC : MDBX_CP_DEFAULTS) | - (flush ? MDBX_CP_DEFAULTS : MDBX_CP_DONT_FLUSH) | - (enable_renew ? MDBX_CP_RENEW_TXN : MDBX_CP_DEFAULTS); + const MDBX_copy_flags_t flags = (with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) | + (dynsize ? MDBX_CP_FORCE_DYNAMIC_SIZE : MDBX_CP_DEFAULTS) | + (throttle ? MDBX_CP_THROTTLE_MVCC : MDBX_CP_DEFAULTS) | + (flush ? MDBX_CP_DEFAULTS : MDBX_CP_DONT_FLUSH) | + (enable_renew ? MDBX_CP_RENEW_TXN : MDBX_CP_DEFAULTS); txn_begin(ro); - err = - mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); + err = mdbx_txn_copy2pathname(txn_guard.get(), copy_pathname.c_str(), flags); log_verbose("mdbx_txn_copy2pathname(flags=0x%X), err %d", flags, err); txn_end(err != MDBX_SUCCESS || flipcoin()); - if (unlikely( - err != MDBX_SUCCESS && !(throttle && err == MDBX_OUSTED) && - !(!enable_renew && err == MDBX_MVCC_RETARDED) && - !(err == MDBX_EINVAL && !ro && - (flags & (MDBX_CP_THROTTLE_MVCC | MDBX_CP_RENEW_TXN)) != 0))) - failure_perror(with_compaction - ? "mdbx_txn_copy2pathname(MDBX_CP_COMPACT)" - : "mdbx_txn_copy2pathname(MDBX_CP_ASIS)", - err); + if (unlikely(err != MDBX_SUCCESS && !(throttle && err == MDBX_OUSTED) && + !(!enable_renew && err == MDBX_MVCC_RETARDED) && + !(err == MDBX_EINVAL && !ro && (flags & (MDBX_CP_THROTTLE_MVCC | MDBX_CP_RENEW_TXN)) != 0))) + failure_perror( + with_compaction ? "mdbx_txn_copy2pathname(MDBX_CP_COMPACT)" : "mdbx_txn_copy2pathname(MDBX_CP_ASIS)", err); } while (err != MDBX_SUCCESS); } } diff --git a/test/dead.c++ b/test/dead.c++ index d4bbbc19..c34443eb 100644 --- a/test/dead.c++ +++ b/test/dead.c++ @@ -5,8 +5,7 @@ class testcase_deadread : public testcase { public: - testcase_deadread(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_deadread(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} bool run() override; }; REGISTER_TESTCASE(deadread); @@ -24,8 +23,7 @@ bool testcase_deadread::run() { class testcase_deadwrite : public testcase { public: - testcase_deadwrite(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_deadwrite(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} bool run() override; }; diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index d11b5528..fde48950 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -26,8 +26,7 @@ std::string format_va(const char *fmt, va_list ap) { result.reserve(size_t(needed + 1)); result.resize(size_t(needed), '\0'); assert(int(result.capacity()) > needed); - int actual = vsnprintf(const_cast(result.data()), result.capacity(), - fmt, ones); + int actual = vsnprintf(const_cast(result.data()), result.capacity(), fmt, ones); assert(actual == needed); (void)actual; va_end(ones); @@ -47,10 +46,8 @@ struct acase { unsigned vlen_min, vlen_max; unsigned dupmax_log2; - acase(unsigned klen_min, unsigned klen_max, unsigned vlen_min, - unsigned vlen_max, unsigned dupmax_log2) - : klen_min(klen_min), klen_max(klen_max), vlen_min(vlen_min), - vlen_max(vlen_max), dupmax_log2(dupmax_log2) {} + acase(unsigned klen_min, unsigned klen_max, unsigned vlen_min, unsigned vlen_max, unsigned dupmax_log2) + : klen_min(klen_min), klen_max(klen_max), vlen_min(vlen_min), vlen_max(vlen_max), dupmax_log2(dupmax_log2) {} }; // std::random_device rd; @@ -80,33 +77,26 @@ static mdbx::slice mk_val(mdbx::default_buffer &buf, const acase &thecase) { static std::string name(unsigned n) { return format("Commitment_%05u", n); } -static mdbx::map_handle create_and_fill(mdbx::txn txn, const acase &thecase, - const unsigned n) { +static mdbx::map_handle create_and_fill(mdbx::txn txn, const acase &thecase, const unsigned n) { auto map = txn.create_map(name(n), - (thecase.klen_min == thecase.klen_max && - (thecase.klen_min == 4 || thecase.klen_max == 8)) + (thecase.klen_min == thecase.klen_max && (thecase.klen_min == 4 || thecase.klen_max == 8)) ? mdbx::key_mode::ordinal : mdbx::key_mode::usual, - (thecase.vlen_min == thecase.vlen_max) - ? mdbx::value_mode::multi_samelength - : mdbx::value_mode::multi); + (thecase.vlen_min == thecase.vlen_max) ? mdbx::value_mode::multi_samelength + : mdbx::value_mode::multi); if (txn.get_map_stat(map).ms_entries < NN) { mdbx::default_buffer k, v; for (auto i = 0u; i < NN; i++) { mk_key(k, thecase); - for (auto ii = thecase.dupmax_log2 - ? 1u + (rnd() & ((2u << thecase.dupmax_log2) - 1u)) - : 1u; - ii > 0; --ii) + for (auto ii = thecase.dupmax_log2 ? 1u + (rnd() & ((2u << thecase.dupmax_log2) - 1u)) : 1u; ii > 0; --ii) txn.upsert(map, k, mk_val(v, thecase)); } } return map; } -static void chunched_delete(mdbx::txn txn, const acase &thecase, - const unsigned n) { +static void chunched_delete(mdbx::txn txn, const acase &thecase, const unsigned n) { // printf(">> %s, case #%i\n", __FUNCTION__, n); mdbx::default_buffer k, v; auto map = txn.open_map_accede(name(n)); @@ -121,27 +111,20 @@ static void chunched_delete(mdbx::txn txn, const acase &thecase, bool last_r; if (true == ((last_op = "MDBX_GET_BOTH"), - (last_r = cursor.find_multivalue( - mk_key(k, thecase), mk_val(v, thecase), false))) || + (last_r = cursor.find_multivalue(mk_key(k, thecase), mk_val(v, thecase), false))) || rnd() % 3 == 0 || - true == ((last_op = "MDBX_SET_RANGE"), - (last_r = cursor.lower_bound(mk_key(k, thecase), false)))) { + true == ((last_op = "MDBX_SET_RANGE"), (last_r = cursor.lower_bound(mk_key(k, thecase), false)))) { int i = int(rnd() % 7) - 3; // if (i) // printf(" %s -> %s\n", last_op, last_r ? "true" : "false"); // printf("== shift multi %i\n", i); try { - while (i < 0 && - true == ((last_op = "MDBX_PREV_DUP"), - (last_r = cursor.to_current_prev_multi(false)))) + while (i < 0 && true == ((last_op = "MDBX_PREV_DUP"), (last_r = cursor.to_current_prev_multi(false)))) ++i; - while (i > 0 && - true == ((last_op = "MDBX_NEXT_DUP"), - (last_r = cursor.to_current_next_multi(false)))) + while (i > 0 && true == ((last_op = "MDBX_NEXT_DUP"), (last_r = cursor.to_current_next_multi(false)))) --i; } catch (const mdbx::no_data &) { - printf("cursor_del() -> exception, last %s %s\n", last_op, - last_r ? "true" : "false"); + printf("cursor_del() -> exception, last %s %s\n", last_op, last_r ? "true" : "false"); continue; } } @@ -159,8 +142,7 @@ static void chunched_delete(mdbx::txn txn, const acase &thecase, // printf(" cursor_del() -> %s\n", last_r ? "true" : "false"); } while (cursor.to_next(false) && --i > 0); } catch (const mdbx::no_data &) { - printf("cursor_del() -> exception, last %s %s\n", last_op, - last_r ? "true" : "false"); + printf("cursor_del() -> exception, last %s %s\n", last_op, last_r ? "true" : "false"); } // (void) last_op; @@ -178,8 +160,8 @@ static void chunched_delete(mdbx::txn txn, const acase &thecase, static char log_buffer[1024]; -static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, - int line, const char *msg, unsigned length) noexcept { +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg, + unsigned length) noexcept { (void)length; (void)loglevel; fprintf(stdout, "%s:%u %s", function, line, msg); @@ -187,12 +169,10 @@ static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, bool outofrange_prev(mdbx::env env) { mdbx::cursor_managed cursor; - const std::array items = { - {{"k1", "v1"}, {"k1", "v2"}, {"k2", "v1"}, {"k2", "v2"}}}; + const std::array items = {{{"k1", "v1"}, {"k1", "v2"}, {"k2", "v1"}, {"k2", "v2"}}}; auto txn = env.start_write(); - auto multi = - txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + auto multi = txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); auto simple = txn.create_map("simple"); txn.clear_map(multi); txn.clear_map(simple); @@ -242,12 +222,10 @@ bool outofrange_prev(mdbx::env env) { } bool next_prev_current(mdbx::env env) { - const std::array items = { - {{"k1", "v1"}, {"k1", "v2"}, {"k2", "v1"}, {"k2", "v2"}}}; + const std::array items = {{{"k1", "v1"}, {"k1", "v2"}, {"k2", "v1"}, {"k2", "v2"}}}; auto txn = env.start_write(); - auto map = - txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + auto map = txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); txn.clear_map(map); for (const auto &i : items) txn.upsert(map, i); @@ -309,8 +287,7 @@ bool next_prev_current(mdbx::env env) { } bool simple(mdbx::env env) { - const std::array items = { - {{"k0", "v0"}, {"k1", "v1"}, {"k2", "v2"}}}; + const std::array items = {{{"k0", "v0"}, {"k1", "v1"}, {"k2", "v2"}}}; auto txn = env.start_write(); auto map = txn.create_map("simple"); @@ -376,14 +353,12 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, - log_buffer, sizeof(log_buffer)); + mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); mdbx::path db_filename = "test-crunched-del"; mdbx::env::remove(db_filename); - mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), - mdbx::env::operate_parameters(42)); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters(42)); if (!simple(env) || !next_prev_current(env) || !outofrange_prev(env)) return EXIT_FAILURE; @@ -392,8 +367,7 @@ int main(int argc, const char *argv[]) { // Значения разной длины от 100 до 1000 байт. testset.emplace_back(/* keylen_min */ 1, /* keylen_max */ 64, /* datalen_min */ 100, /* datalen_max */ - mdbx_env_get_valsize4page_max( - env, MDBX_db_flags_t(mdbx::value_mode::multi)), + mdbx_env_get_valsize4page_max(env, MDBX_db_flags_t(mdbx::value_mode::multi)), /* dups_log2 */ 6); // В одной таблице DupSort: path -> version_u64+data // path - это префикс в дереве. Самые частые длины: 1-5 байт и 32-36 байт. diff --git a/test/extra/cursor_closing.c++ b/test/extra/cursor_closing.c++ index 7b6967ef..045b7677 100644 --- a/test/extra/cursor_closing.c++ +++ b/test/extra/cursor_closing.c++ @@ -2,8 +2,8 @@ #include -static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, - int line, const char *msg, unsigned length) noexcept { +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg, + unsigned length) noexcept { (void)length; (void)loglevel; std::cout << function << ":" << line << " " << msg; @@ -15,20 +15,17 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, - log_buffer, sizeof(log_buffer)); + mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); mdbx::path db_filename = "test-cursor-closing"; mdbx::env::remove(db_filename); - mdbx::env_managed env( - db_filename, mdbx::env_managed::create_parameters(), - mdbx::env::operate_parameters(42, 0, mdbx::env::nested_transactions)); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), + mdbx::env::operate_parameters(42, 0, mdbx::env::nested_transactions)); { auto txn = env.start_write(); - auto table = txn.create_map("dummy", mdbx::key_mode::usual, - mdbx::value_mode::single); + auto table = txn.create_map("dummy", mdbx::key_mode::usual, mdbx::value_mode::single); auto cursor_1 = txn.open_cursor(table); auto cursor_2 = cursor_1.clone(); diff --git a/test/extra/dbi.c++ b/test/extra/dbi.c++ index 2a12be6a..9ed37c45 100644 --- a/test/extra/dbi.c++ +++ b/test/extra/dbi.c++ @@ -4,8 +4,8 @@ static char log_buffer[1024]; -static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, - int line, const char *msg, unsigned length) noexcept { +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg, + unsigned length) noexcept { (void)length; (void)loglevel; fprintf(stdout, "%s:%u %s", function, line, msg); @@ -15,8 +15,7 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, - log_buffer, sizeof(log_buffer)); + mdbx_setup_debug_nofmt(MDBX_LOG_NOTICE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); mdbx::path db_filename = "test-dbi"; mdbx::env::remove(db_filename); @@ -26,14 +25,12 @@ int main(int argc, const char *argv[]) { { mdbx::env_managed env2(db_filename, createParameters, operateParameters); mdbx::txn_managed txn2 = env2.start_write(false); - /* mdbx::map_handle testHandle2 = */ txn2.create_map( - "fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); + /* mdbx::map_handle testHandle2 = */ txn2.create_map("fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); txn2.commit(); } mdbx::env_managed env(db_filename, createParameters, operateParameters); mdbx::txn_managed txn = env.start_write(false); - /* mdbx::map_handle testHandle = */ txn.create_map( - "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + /* mdbx::map_handle testHandle = */ txn.create_map("fap1", mdbx::key_mode::usual, mdbx::value_mode::single); txn.commit(); std::cout << "OK\n"; diff --git a/test/extra/doubtless_positioning.c++ b/test/extra/doubtless_positioning.c++ index 4cf710cd..28c37492 100644 --- a/test/extra/doubtless_positioning.c++ +++ b/test/extra/doubtless_positioning.c++ @@ -7,8 +7,7 @@ #include #include -static ::std::ostream &operator<<(::std::ostream &out, - const mdbx::cursor::move_operation op) { +static ::std::ostream &operator<<(::std::ostream &out, const mdbx::cursor::move_operation op) { static const char *const str[] = {"FIRST", "FIRST_DUP", "GET_BOTH", @@ -70,54 +69,45 @@ static buffer random_value() { return random(prng() % 47); } using predicate = std::function; -static bool probe(mdbx::txn txn, mdbx::map_handle dbi, - mdbx::cursor::move_operation op, predicate cmp, +static bool probe(mdbx::txn txn, mdbx::map_handle dbi, mdbx::cursor::move_operation op, predicate cmp, const buffer_pair &pair) { auto seeker = txn.open_cursor(dbi); auto scanner = seeker.clone(); - const bool scan_backward = - op == mdbx::cursor::key_lesser_than || - op == mdbx::cursor::key_lesser_or_equal || - op == mdbx::cursor::multi_exactkey_value_lesser_than || - op == mdbx::cursor::multi_exactkey_value_lesser_or_equal || - op == mdbx::cursor::pair_lesser_than || - op == mdbx::cursor::pair_lesser_or_equal; + const bool scan_backward = op == mdbx::cursor::key_lesser_than || op == mdbx::cursor::key_lesser_or_equal || + op == mdbx::cursor::multi_exactkey_value_lesser_than || + op == mdbx::cursor::multi_exactkey_value_lesser_or_equal || + op == mdbx::cursor::pair_lesser_than || op == mdbx::cursor::pair_lesser_or_equal; const bool is_multi = mdbx::is_multi(txn.get_handle_info(dbi).value_mode()); auto seek_result = seeker.move(op, pair.key, pair.value, false); - auto scan_result = scanner.fullscan( - [cmp, &pair](const mdbx::pair &scan) -> bool { return cmp(scan, pair); }, - scan_backward); + auto scan_result = + scanner.fullscan([cmp, &pair](const mdbx::pair &scan) -> bool { return cmp(scan, pair); }, scan_backward); if (seek_result.done == scan_result && (!scan_result || - seeker.is_same_position( - scanner, - op < mdbx::cursor::multi_exactkey_value_lesser_than && is_multi))) + seeker.is_same_position(scanner, op < mdbx::cursor::multi_exactkey_value_lesser_than && is_multi))) return true; std::cerr << std::endl; std::cerr << "bug:"; std::cerr << std::endl; - std::cerr << std::string(is_multi ? "multi" : "single") << "-map, op " << op - << ", key " << pair.key << ", value " << pair.value; + std::cerr << std::string(is_multi ? "multi" : "single") << "-map, op " << op << ", key " << pair.key << ", value " + << pair.value; std::cerr << std::endl; std::cerr << "\tscanner: "; if (scan_result) - std::cerr << " done, key " << scanner.current(false).key << ", value " - << scanner.current(false).value; + std::cerr << " done, key " << scanner.current(false).key << ", value " << scanner.current(false).value; else std::cerr << "not-found"; std::cerr << std::endl; - std::cerr << "\t seeker: " << (seek_result.done ? " done" : "not-found") - << ", key " << seek_result.key << ", value " << seek_result.value; + std::cerr << "\t seeker: " << (seek_result.done ? " done" : "not-found") << ", key " << seek_result.key + << ", value " << seek_result.value; std::cerr << std::endl; return false; } -static bool probe(mdbx::txn txn, mdbx::map_handle dbi, - mdbx::cursor::move_operation op, predicate cmp) { +static bool probe(mdbx::txn txn, mdbx::map_handle dbi, mdbx::cursor::move_operation op, predicate cmp) { const auto pair = buffer_pair(random_key(), random_value()); const bool ok = probe(txn, dbi, op, cmp, pair); #if MDBX_DEBUG @@ -159,32 +149,27 @@ static bool test(mdbx::txn txn, mdbx::map_handle dbi) { ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_lesser_than, [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { - return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && - mdbx_dcmp(txn, dbi, l.value, r.value) < 0; + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && mdbx_dcmp(txn, dbi, l.value, r.value) < 0; }) && ok; ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_lesser_or_equal, [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { - return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && - mdbx_dcmp(txn, dbi, l.value, r.value) <= 0; + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && mdbx_dcmp(txn, dbi, l.value, r.value) <= 0; }) && ok; ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_equal, [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { - return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && - mdbx_dcmp(txn, dbi, l.value, r.value) == 0; + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && mdbx_dcmp(txn, dbi, l.value, r.value) == 0; }) && ok; ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_greater_or_equal, [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { - return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && - mdbx_dcmp(txn, dbi, l.value, r.value) >= 0; + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && mdbx_dcmp(txn, dbi, l.value, r.value) >= 0; }) && ok; ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_greater, [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { - return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && - mdbx_dcmp(txn, dbi, l.value, r.value) > 0; + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && mdbx_dcmp(txn, dbi, l.value, r.value) > 0; }) && ok; @@ -205,9 +190,7 @@ static bool test(mdbx::txn txn, mdbx::map_handle dbi) { }) && ok; ok = probe(txn, dbi, mdbx::cursor::pair_equal, - [](const mdbx::pair &l, const mdbx::pair &r) -> bool { - return l == r; - }) && + [](const mdbx::pair &l, const mdbx::pair &r) -> bool { return l == r; }) && ok; ok = probe(txn, dbi, mdbx::cursor::pair_greater_or_equal, [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { @@ -234,14 +217,11 @@ int main(int argc, const char *argv[]) { mdbx::path db_filename = "test-posi"; mdbx::env_managed::remove(db_filename); - mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), - mdbx::env::operate_parameters(3)); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters(3)); auto txn = env.start_write(); - auto single = - txn.create_map("single", mdbx::key_mode::usual, mdbx::value_mode::single); - auto multi = - txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + auto single = txn.create_map("single", mdbx::key_mode::usual, mdbx::value_mode::single); + auto multi = txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); for (size_t i = 0; i < 1000; ++i) { auto key = random_key(); txn.upsert(single, key, random_value()); diff --git a/test/extra/dupfix_addodd.c b/test/extra/dupfix_addodd.c index 5b666af9..b2e6aeea 100644 --- a/test/extra/dupfix_addodd.c +++ b/test/extra/dupfix_addodd.c @@ -37,8 +37,7 @@ int main() { exit(EXIT_FAILURE); } - rc = mdbx_env_open(env, "./example-db", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, - 0664); + rc = mdbx_env_open(env, "./example-db", MDBX_NOSUBDIR | MDBX_LIFORECLAIM, 0664); if (rc != MDBX_SUCCESS) { fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); exit(EXIT_FAILURE); @@ -50,8 +49,7 @@ int main() { exit(EXIT_FAILURE); } - rc = mdbx_dbi_open(txn, "test", MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_CREATE, - &dbi); + rc = mdbx_dbi_open(txn, "test", MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_CREATE, &dbi); if (rc != MDBX_SUCCESS) { fprintf(stderr, "mdbx_dbi_open: (%d) %s\n", rc, mdbx_strerror(rc)); exit(EXIT_FAILURE); diff --git a/test/extra/dupfix_multiple.c++ b/test/extra/dupfix_multiple.c++ index 1b0ffc45..a8897314 100644 --- a/test/extra/dupfix_multiple.c++ +++ b/test/extra/dupfix_multiple.c++ @@ -8,14 +8,11 @@ int doit() { mdbx::path db_filename = "test-dupfix-multiple"; mdbx::env_managed::remove(db_filename); - mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), - mdbx::env::operate_parameters()); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters()); - using buffer = - mdbx::buffer; + using buffer = mdbx::buffer; auto txn = env.start_write(); - auto map = txn.create_map(nullptr, mdbx::key_mode::ordinal, - mdbx::value_mode::multi_ordinal); + auto map = txn.create_map(nullptr, mdbx::key_mode::ordinal, mdbx::value_mode::multi_ordinal); txn.insert(map, buffer::key_from_u64(21), buffer::key_from_u64(18)); txn.insert(map, buffer::key_from_u64(7), buffer::key_from_u64(19)); @@ -29,15 +26,11 @@ int doit() { txn = env.start_read(); auto cursor = txn.open_cursor(map); - if (cursor.to_first().value.as_uint64() != 19 || - cursor.to_next().value.as_uint64() != 18 || - cursor.to_next().value.as_uint64() != 17 || - cursor.to_next().value.as_uint64() != 16 || - cursor.to_next().value.as_uint64() != 15 || - cursor.to_next().value.as_uint64() != 14 || - cursor.to_next().value.as_uint64() != 13 || - cursor.to_next().value.as_uint64() != 12 || cursor.to_next(false).done || - !cursor.eof()) { + if (cursor.to_first().value.as_uint64() != 19 || cursor.to_next().value.as_uint64() != 18 || + cursor.to_next().value.as_uint64() != 17 || cursor.to_next().value.as_uint64() != 16 || + cursor.to_next().value.as_uint64() != 15 || cursor.to_next().value.as_uint64() != 14 || + cursor.to_next().value.as_uint64() != 13 || cursor.to_next().value.as_uint64() != 12 || + cursor.to_next(false).done || !cursor.eof()) { std::cerr << "Fail\n"; return EXIT_FAILURE; } @@ -45,20 +38,13 @@ int doit() { const uint64_t array[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 42, 17, 99, 0, 33, 333}; txn = env.start_write(); - txn.put_multiple_samelength(map, buffer::key_from_u64(13), array + 3, 4, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(10), array + 0, 1, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(12), array + 2, 3, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(15), array + 5, 6, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(14), array + 4, 5, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(11), array + 1, 2, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(16), array + 6, 7, - mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(13), array + 3, 4, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(10), array + 0, 1, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(12), array + 2, 3, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(15), array + 5, 6, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(14), array + 4, 5, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(11), array + 1, 2, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(16), array + 6, 7, mdbx::upsert); txn.commit(); txn = env.start_read(); @@ -69,42 +55,30 @@ int doit() { cursor.to_next().value.as_uint64() != 1 || /* key = 11: 2 элемента, пропуск 1 */ - cursor.to_next().value.as_uint64() != 2 || - cursor.to_next().value.as_uint64() != 3 || + cursor.to_next().value.as_uint64() != 2 || cursor.to_next().value.as_uint64() != 3 || /* key = 12: 3 элемента, пропуск 2 */ - cursor.to_next().value.as_uint64() != 3 || - cursor.to_next().value.as_uint64() != 4 || + cursor.to_next().value.as_uint64() != 3 || cursor.to_next().value.as_uint64() != 4 || cursor.to_next().value.as_uint64() != 5 || /* key = 13: 4 элемента, пропуск 3 */ - cursor.to_next().value.as_uint64() != 4 || - cursor.to_next().value.as_uint64() != 5 || - cursor.to_next().value.as_uint64() != 6 || - cursor.to_next().value.as_uint64() != 7 || + cursor.to_next().value.as_uint64() != 4 || cursor.to_next().value.as_uint64() != 5 || + cursor.to_next().value.as_uint64() != 6 || cursor.to_next().value.as_uint64() != 7 || /* key = 14: 5 элементов, пропуск 4 */ - cursor.to_next().value.as_uint64() != 5 || - cursor.to_next().value.as_uint64() != 6 || - cursor.to_next().value.as_uint64() != 7 || - cursor.to_next().value.as_uint64() != 8 || + cursor.to_next().value.as_uint64() != 5 || cursor.to_next().value.as_uint64() != 6 || + cursor.to_next().value.as_uint64() != 7 || cursor.to_next().value.as_uint64() != 8 || cursor.to_next().value.as_uint64() != 9 || /* key = 15: 6 элементов, пропуск 5 */ - cursor.to_next().value.as_uint64() != 6 || - cursor.to_next().value.as_uint64() != 7 || - cursor.to_next().value.as_uint64() != 8 || - cursor.to_next().value.as_uint64() != 9 || - cursor.to_next().value.as_uint64() != 17 || - cursor.to_next().value.as_uint64() != 42 || + cursor.to_next().value.as_uint64() != 6 || cursor.to_next().value.as_uint64() != 7 || + cursor.to_next().value.as_uint64() != 8 || cursor.to_next().value.as_uint64() != 9 || + cursor.to_next().value.as_uint64() != 17 || cursor.to_next().value.as_uint64() != 42 || /* key = 16: 7 элементов, пропуск 6 */ - cursor.to_next().value.as_uint64() != 0 || - cursor.to_next().value.as_uint64() != 7 || - cursor.to_next().value.as_uint64() != 8 || - cursor.to_next().value.as_uint64() != 9 || - cursor.to_next().value.as_uint64() != 17 || - cursor.to_next().value.as_uint64() != 42 || + cursor.to_next().value.as_uint64() != 0 || cursor.to_next().value.as_uint64() != 7 || + cursor.to_next().value.as_uint64() != 8 || cursor.to_next().value.as_uint64() != 9 || + cursor.to_next().value.as_uint64() != 17 || cursor.to_next().value.as_uint64() != 42 || cursor.to_next().value.as_uint64() != 99 || /* key = 21 */ cursor.to_next().value.as_uint64() != 18 || @@ -113,46 +87,35 @@ int doit() { /* key = 24 */ cursor.to_next().value.as_uint64() != 15 || /* key = 25 */ cursor.to_next().value.as_uint64() != 14 || /* key = 26 */ cursor.to_next().value.as_uint64() != 13 || - /* key = 27 */ cursor.to_next().value.as_uint64() != 12 || - cursor.to_next(false).done || !cursor.eof()) { + /* key = 27 */ cursor.to_next().value.as_uint64() != 12 || cursor.to_next(false).done || !cursor.eof()) { std::cerr << "Fail\n"; return EXIT_FAILURE; } txn.abort(); txn = env.start_write(); - txn.put_multiple_samelength(map, buffer::key_from_u64(7), array + 3, 4, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(7), array + 3, 4, mdbx::update); txn.upsert(map, buffer::key_from_u64(10), buffer::key_from_u64(14)); - txn.put_multiple_samelength(map, buffer::key_from_u64(11), array + 4, 5, - mdbx::upsert); - txn.put_multiple_samelength(map, buffer::key_from_u64(12), array + 0, 1, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(11), array + 4, 5, mdbx::upsert); + txn.put_multiple_samelength(map, buffer::key_from_u64(12), array + 0, 1, mdbx::update); txn.update(map, buffer::key_from_u64(13), buffer::key_from_u64(18)); - txn.put_multiple_samelength(map, buffer::key_from_u64(14), array + 2, 3, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(14), array + 2, 3, mdbx::update); txn.update(map, buffer::key_from_u64(15), buffer::key_from_u64(13)); - txn.put_multiple_samelength(map, buffer::key_from_u64(16), array + 6, 9, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(16), array + 6, 9, mdbx::update); txn.update(map, buffer::key_from_u64(21), buffer::key_from_u64(17)); txn.update(map, buffer::key_from_u64(22), buffer::key_from_u64(15)); - txn.put_multiple_samelength(map, buffer::key_from_u64(23), array + 1, 2, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(23), array + 1, 2, mdbx::update); txn.update(map, buffer::key_from_u64(24), buffer::key_from_u64(16)); - txn.put_multiple_samelength(map, buffer::key_from_u64(25), array + 5, 6, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(25), array + 5, 6, mdbx::update); txn.upsert(map, buffer::key_from_u64(26), buffer::key_from_u64(12)); - txn.put_multiple_samelength(map, buffer::key_from_u64(27), array + 12, 3, - mdbx::update); + txn.put_multiple_samelength(map, buffer::key_from_u64(27), array + 12, 3, mdbx::update); txn.commit(); txn = env.start_read(); cursor = txn.open_cursor(map); if (/* key = 7 */ - cursor.to_first().value.as_uint64() != 4 || - cursor.to_next().value.as_uint64() != 5 || - cursor.to_next().value.as_uint64() != 6 || - cursor.to_next().value.as_uint64() != 7 || + cursor.to_first().value.as_uint64() != 4 || cursor.to_next().value.as_uint64() != 5 || + cursor.to_next().value.as_uint64() != 6 || cursor.to_next().value.as_uint64() != 7 || /* key = 10: 1 элемент */ cursor.to_next().value.as_uint64() != 1 || @@ -160,13 +123,10 @@ int doit() { cursor.to_next().value.as_uint64() != 14 || /* key = 11: 2 элемента, пропуск 1 */ - cursor.to_next().value.as_uint64() != 2 || - cursor.to_next().value.as_uint64() != 3 || + cursor.to_next().value.as_uint64() != 2 || cursor.to_next().value.as_uint64() != 3 || /* +5 элементов, пропуск 4 */ - cursor.to_next().value.as_uint64() != 5 || - cursor.to_next().value.as_uint64() != 6 || - cursor.to_next().value.as_uint64() != 7 || - cursor.to_next().value.as_uint64() != 8 || + cursor.to_next().value.as_uint64() != 5 || cursor.to_next().value.as_uint64() != 6 || + cursor.to_next().value.as_uint64() != 7 || cursor.to_next().value.as_uint64() != 8 || cursor.to_next().value.as_uint64() != 9 || /* key = 12: 1 элемент */ @@ -174,44 +134,33 @@ int doit() { /* key = 13 */ cursor.to_next().value.as_uint64() != 18 || /* key = 14: 3 элемента, пропуск 2 */ - cursor.to_next().value.as_uint64() != 3 || - cursor.to_next().value.as_uint64() != 4 || + cursor.to_next().value.as_uint64() != 3 || cursor.to_next().value.as_uint64() != 4 || cursor.to_next().value.as_uint64() != 5 || /* key = 15 */ cursor.to_next().value.as_uint64() != 13 || /* key = 16: 9 элементов, пропуск 6 */ - cursor.to_next().value.as_uint64() != 0 || - cursor.to_next().value.as_uint64() != 7 || - cursor.to_next().value.as_uint64() != 8 || - cursor.to_next().value.as_uint64() != 9 || - cursor.to_next().value.as_uint64() != 17 || - cursor.to_next().value.as_uint64() != 33 || - cursor.to_next().value.as_uint64() != 42 || - cursor.to_next().value.as_uint64() != 99 || + cursor.to_next().value.as_uint64() != 0 || cursor.to_next().value.as_uint64() != 7 || + cursor.to_next().value.as_uint64() != 8 || cursor.to_next().value.as_uint64() != 9 || + cursor.to_next().value.as_uint64() != 17 || cursor.to_next().value.as_uint64() != 33 || + cursor.to_next().value.as_uint64() != 42 || cursor.to_next().value.as_uint64() != 99 || cursor.to_next().value.as_uint64() != 333 || /* key = 21 */ cursor.to_next().value.as_uint64() != 17 || /* key = 22 */ cursor.to_next().value.as_uint64() != 15 || /* key = 23: 2 элемента, пропуск 1 */ - cursor.to_next().value.as_uint64() != 2 || - cursor.to_next().value.as_uint64() != 3 || + cursor.to_next().value.as_uint64() != 2 || cursor.to_next().value.as_uint64() != 3 || /* key = 24 */ cursor.to_next().value.as_uint64() != 16 || /* key = 25: 6 элемента, пропуск 5 */ - cursor.to_next().value.as_uint64() != 6 || - cursor.to_next().value.as_uint64() != 7 || - cursor.to_next().value.as_uint64() != 8 || - cursor.to_next().value.as_uint64() != 9 || - cursor.to_next().value.as_uint64() != 17 || - cursor.to_next().value.as_uint64() != 42 || + cursor.to_next().value.as_uint64() != 6 || cursor.to_next().value.as_uint64() != 7 || + cursor.to_next().value.as_uint64() != 8 || cursor.to_next().value.as_uint64() != 9 || + cursor.to_next().value.as_uint64() != 17 || cursor.to_next().value.as_uint64() != 42 || /* key = 26, 1+1 upsert */ - cursor.to_next().value.as_uint64() != 12 || - cursor.to_next().value.as_uint64() != 13 || + cursor.to_next().value.as_uint64() != 12 || cursor.to_next().value.as_uint64() != 13 || /* key = 27: 3 элемента, пропуск 12 */ - cursor.to_next().value.as_uint64() != 0 || - cursor.to_next().value.as_uint64() != 33 || + cursor.to_next().value.as_uint64() != 0 || cursor.to_next().value.as_uint64() != 33 || cursor.to_next().value.as_uint64() != 333 || cursor.to_next(false).done || !cursor.eof()) { @@ -247,8 +196,7 @@ int doit() { txn = env.start_write(); txn.clear_map(map); - map = txn.create_map(nullptr, mdbx::key_mode::usual, - mdbx::value_mode::multi_samelength); + map = txn.create_map(nullptr, mdbx::key_mode::usual, mdbx::value_mode::multi_samelength); txn.upsert(map, mdbx::slice("key1"), mdbx::slice("val1")); txn.upsert(map, mdbx::pair("key1", "val2")); txn.upsert(map, mdbx::pair("key1", "val3")); diff --git a/test/extra/early_close_dbi.c++ b/test/extra/early_close_dbi.c++ index d9723107..42bb7adc 100644 --- a/test/extra/early_close_dbi.c++ +++ b/test/extra/early_close_dbi.c++ @@ -23,14 +23,11 @@ int main(int argc, char *argv[]) { // 1); assert(err == MDBX_SUCCESS); intptr_t lowerbound(0), size(0), upperbound(mdbx::env::geometry::GiB / 2); - intptr_t step(128 * mdbx::env::geometry::MiB), - shrink(256 * mdbx::env::geometry::MiB), pagesize(-1); - err = mdbx_env_set_geometry(environment, lowerbound, size, upperbound, step, - shrink, pagesize); + intptr_t step(128 * mdbx::env::geometry::MiB), shrink(256 * mdbx::env::geometry::MiB), pagesize(-1); + err = mdbx_env_set_geometry(environment, lowerbound, size, upperbound, step, shrink, pagesize); assert(err == MDBX_SUCCESS); - MDBX_env_flags_t flags(MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_LIFORECLAIM | - MDBX_NORDAHEAD); + MDBX_env_flags_t flags(MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_LIFORECLAIM | MDBX_NORDAHEAD); err = mdbx_env_openT(environment, db_filename.c_str(), flags, 0644); assert(err == MDBX_SUCCESS); diff --git a/test/extra/hex_base64_base58.c++ b/test/extra/hex_base64_base58.c++ index f2419ebd..7ae1fac4 100644 --- a/test/extra/hex_base64_base58.c++ +++ b/test/extra/hex_base64_base58.c++ @@ -27,20 +27,16 @@ static buffer random(size_t length) { static bool basic() { bool ok = true; const char *const hex_dump = "1D58fa\n2e46E3\nBd9c7A\nC0bF"; - const uint8_t native[] = {0x1D, 0x58, 0xfa, 0x2e, 0x46, 0xE3, - 0xBd, 0x9c, 0x7A, 0xC0, 0xbF}; + const uint8_t native[] = {0x1D, 0x58, 0xfa, 0x2e, 0x46, 0xE3, 0xBd, 0x9c, 0x7A, 0xC0, 0xbF}; if (mdbx::slice(hex_dump).hex_decode(true) != mdbx::slice::wrap(native)) std::cerr << "hex_decode() failed\n"; - else if (mdbx::slice::wrap(native).encode_hex(true, 4).hex_decode(true) != - mdbx::slice::wrap(native)) + else if (mdbx::slice::wrap(native).encode_hex(true, 4).hex_decode(true) != mdbx::slice::wrap(native)) std::cerr << "hex_encode(UPPERCASE) failed\n"; - else if (mdbx::slice::wrap(native).encode_hex(false).hex_decode(true) != - mdbx::slice::wrap(native)) + else if (mdbx::slice::wrap(native).encode_hex(false).hex_decode(true) != mdbx::slice::wrap(native)) std::cerr << "hex_encode(lowercase) failed\n"; - if (mdbx::slice("").as_base64_string() != "" || - mdbx::slice(" ").encode_base64().as_string() != "IA==" || + if (mdbx::slice("").as_base64_string() != "" || mdbx::slice(" ").encode_base64().as_string() != "IA==" || mdbx::slice("~0").encode_base64().as_string() != "fjA=" || mdbx::slice("A_z").encode_base64().as_string() != "QV96" || mdbx::slice("Ka9q").encode_base64().as_string() != "S2E5cQ==" || @@ -50,12 +46,9 @@ static bool basic() { } const uint8_t base58_rfc[] = {0x00, 0x00, 0x28, 0x7f, 0xb4, 0xcd}; - if (mdbx::slice("").as_base58_string() != "" || - mdbx::slice(" ").encode_base58().as_string() != "Z" || + if (mdbx::slice("").as_base58_string() != "" || mdbx::slice(" ").encode_base58().as_string() != "Z" || mdbx::slice("Hello World!").as_base58_string() != "2NEpo7TZRRrLZSi2U" || - mdbx::slice("The quick brown fox jumps over the lazy dog.") - .encode_base58() - .as_string() != + mdbx::slice("The quick brown fox jumps over the lazy dog.").encode_base58().as_string() != "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z" || mdbx::slice::wrap(base58_rfc).as_base58_string() != "11233QC4" || mdbx::slice("~0").encode_base58().as_string() != "Aby" || @@ -66,15 +59,11 @@ static bool basic() { ok = false; } - if (mdbx::slice("").base58_decode() != mdbx::slice() || - mdbx::slice("Z").base58_decode() != mdbx::slice(" ") || + if (mdbx::slice("").base58_decode() != mdbx::slice() || mdbx::slice("Z").base58_decode() != mdbx::slice(" ") || mdbx::slice("2NEpo7TZRRrLZSi2U").base58_decode() != "Hello World!" || - mdbx::slice( - "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z") - .base58_decode() != + mdbx::slice("USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z").base58_decode() != mdbx::slice("The quick brown fox jumps over the lazy dog.") || - mdbx::slice("11233QC4").base58_decode() != - mdbx::slice::wrap(base58_rfc) || + mdbx::slice("11233QC4").base58_decode() != mdbx::slice::wrap(base58_rfc) || mdbx::slice("Aby").base58_decode() != mdbx::slice("~0") || mdbx::slice("NxZw").base58_decode() != mdbx::slice("A_z") || mdbx::slice("2vkjDi").base58_decode() != mdbx::slice("Ka9q") || @@ -94,28 +83,19 @@ int main(int argc, const char *argv[]) { for (size_t n = 0; n < 1000; ++n) { for (size_t length = 0; ok && length < 111; ++length) { const auto pattern = random(length); - if (pattern != pattern.encode_hex(bool(prng() & 1), prng() % 111) - .hex_decode(true) - .encode_hex() - .hex_decode(false)) { - std::cerr << "hex encode/decode failed: n " << n << ", length " - << length << std::endl; + if (pattern != + pattern.encode_hex(bool(prng() & 1), prng() % 111).hex_decode(true).encode_hex().hex_decode(false)) { + std::cerr << "hex encode/decode failed: n " << n << ", length " << length << std::endl; ok = false; } - if (pattern != pattern.encode_base64(unsigned(prng() % 111)) - .base64_decode(true) - .encode_base64() - .base64_decode(false)) { - std::cerr << "base64 encode/decode failed: n " << n << ", length " - << length << std::endl; + if (pattern != + pattern.encode_base64(unsigned(prng() % 111)).base64_decode(true).encode_base64().base64_decode(false)) { + std::cerr << "base64 encode/decode failed: n " << n << ", length " << length << std::endl; ok = false; } - if (pattern != pattern.encode_base58(unsigned(prng() % 111)) - .base58_decode(true) - .encode_base58() - .base58_decode(false)) { - std::cerr << "base58 encode/decode failed: n " << n << ", length " - << length << std::endl; + if (pattern != + pattern.encode_base58(unsigned(prng() % 111)).base58_decode(true).encode_base58().base58_decode(false)) { + std::cerr << "base58 encode/decode failed: n " << n << ", length " << length << std::endl; ok = false; } } diff --git a/test/extra/maindb_ordinal.c++ b/test/extra/maindb_ordinal.c++ index dc3fd597..c766049f 100644 --- a/test/extra/maindb_ordinal.c++ +++ b/test/extra/maindb_ordinal.c++ @@ -10,14 +10,11 @@ int main(int argc, const char *argv[]) { mdbx::path db_filename = "test-dupfix-multiple"; mdbx::env_managed::remove(db_filename); - mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), - mdbx::env::operate_parameters()); + mdbx::env_managed env(db_filename, mdbx::env_managed::create_parameters(), mdbx::env::operate_parameters()); - using buffer = - mdbx::buffer; + using buffer = mdbx::buffer; auto txn = env.start_write(); - auto map = txn.create_map(nullptr, mdbx::key_mode::ordinal, - mdbx::value_mode::single); + auto map = txn.create_map(nullptr, mdbx::key_mode::ordinal, mdbx::value_mode::single); #if 0 /* workaround */ txn.commit(); env.close(); @@ -39,14 +36,10 @@ int main(int argc, const char *argv[]) { txn = env.start_read(); auto cursor = txn.open_cursor(map); #if defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L - if (cursor.to_first().value.string_view() == "a" && - cursor.to_next().value.string_view() == "b" && - cursor.to_next().value.string_view() == "c" && - cursor.to_next().value.string_view() == "d" && - cursor.to_next().value.string_view() == "e" && - cursor.to_next().value.string_view() == "f" && - cursor.to_next().value.string_view() == "g" && - cursor.to_next().value.string_view() == "h" && + if (cursor.to_first().value.string_view() == "a" && cursor.to_next().value.string_view() == "b" && + cursor.to_next().value.string_view() == "c" && cursor.to_next().value.string_view() == "d" && + cursor.to_next().value.string_view() == "e" && cursor.to_next().value.string_view() == "f" && + cursor.to_next().value.string_view() == "g" && cursor.to_next().value.string_view() == "h" && !cursor.to_next(false).done && cursor.eof()) { std::cout << "OK\n"; return EXIT_SUCCESS; diff --git a/test/extra/open.c++ b/test/extra/open.c++ index bc955c83..55b58c8c 100644 --- a/test/extra/open.c++ +++ b/test/extra/open.c++ @@ -18,8 +18,8 @@ int main(int argc, const char *argv[]) { static char log_buffer[1024]; -static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, - int line, const char *msg, unsigned length) noexcept { +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg, + unsigned length) noexcept { (void)length; (void)loglevel; fprintf(stdout, "%s:%u %s", function, line, msg); @@ -29,8 +29,7 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - mdbx_setup_debug_nofmt(MDBX_LOG_VERBOSE, MDBX_DBG_ASSERT, logger_nofmt, - log_buffer, sizeof(log_buffer)); + mdbx_setup_debug_nofmt(MDBX_LOG_VERBOSE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); mdbx::path path = "test-open"; mdbx::env::remove(path); @@ -41,19 +40,16 @@ int main(int argc, const char *argv[]) { createParameters2.geometry.make_fixed(42 * mdbx::env::geometry::MiB); mdbx::env_managed env2(path, createParameters2, operateParameters2); mdbx::txn_managed txn2 = env2.start_write(false); - /* mdbx::map_handle testHandle2 = */ txn2.create_map( - "fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); + /* mdbx::map_handle testHandle2 = */ txn2.create_map("fap1", mdbx::key_mode::reverse, mdbx::value_mode::single); txn2.commit(); } mdbx::env::operate_parameters operateParameters(100, 10); mdbx::env_managed::create_parameters createParameters; - createParameters.geometry.make_dynamic(21 * mdbx::env::geometry::MiB, - 84 * mdbx::env::geometry::MiB); + createParameters.geometry.make_dynamic(21 * mdbx::env::geometry::MiB, 84 * mdbx::env::geometry::MiB); mdbx::env_managed env(path, createParameters, operateParameters); mdbx::txn_managed txn = env.start_write(false); - /* mdbx::map_handle testHandle = */ txn.create_map( - "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + /* mdbx::map_handle testHandle = */ txn.create_map("fap1", mdbx::key_mode::usual, mdbx::value_mode::single); txn.commit(); std::latch starter(1); @@ -62,8 +58,7 @@ int main(int argc, const char *argv[]) { starter.wait(); // mdbx::env_managed env(path, createParameters, operateParameters); mdbx::txn_managed txn = env.start_write(false); - /* mdbx::map_handle testHandle = */ txn.create_map( - "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + /* mdbx::map_handle testHandle = */ txn.create_map("fap1", mdbx::key_mode::usual, mdbx::value_mode::single); txn.commit(); }); @@ -71,8 +66,7 @@ int main(int argc, const char *argv[]) { starter.wait(); // mdbx::env_managed env(path, createParameters, operateParameters); mdbx::txn_managed txn = env.start_write(false); - /* mdbx::map_handle testHandle = */ txn.create_map( - "fap1", mdbx::key_mode::usual, mdbx::value_mode::single); + /* mdbx::map_handle testHandle = */ txn.create_map("fap1", mdbx::key_mode::usual, mdbx::value_mode::single); txn.commit(); }); diff --git a/test/extra/pcrf/pcrf_test.c b/test/extra/pcrf/pcrf_test.c index 876bdf74..1d023fcf 100644 --- a/test/extra/pcrf/pcrf_test.c +++ b/test/extra/pcrf/pcrf_test.c @@ -32,9 +32,8 @@ #include #include -#define IP_PRINTF_ARG_HOST(addr) \ - (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), \ - (int)((addr) & 0xff) +#define IP_PRINTF_ARG_HOST(addr) \ + (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff) char opt_db_path[PATH_MAX] = "./mdbx_bench2"; static MDBX_env *env; @@ -88,14 +87,13 @@ static int64_t get_id_from_pool() { return id; } -#define MDBX_CHECK(x) \ - do { \ - const int rc = (x); \ - if (rc != MDBX_SUCCESS) { \ - printf("Error [%d] %s in %s at %s:%d\n", rc, mdbx_strerror(rc), #x, \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ +#define MDBX_CHECK(x) \ + do { \ + const int rc = (x); \ + if (rc != MDBX_SUCCESS) { \ + printf("Error [%d] %s in %s at %s:%d\n", rc, mdbx_strerror(rc), #x, __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ } while (0) static void db_connect() { @@ -105,13 +103,10 @@ static void db_connect() { MDBX_dbi dbi_ip; MDBX_CHECK(mdbx_env_create(&env)); - MDBX_CHECK(mdbx_env_set_geometry( - env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1)); + MDBX_CHECK(mdbx_env_set_geometry(env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1)); MDBX_CHECK(mdbx_env_set_maxdbs(env, 30)); - MDBX_CHECK(mdbx_env_open(env, opt_db_path, - MDBX_CREATE | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC | - MDBX_LIFORECLAIM, - 0664)); + MDBX_CHECK( + mdbx_env_open(env, opt_db_path, MDBX_CREATE | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC | MDBX_LIFORECLAIM, 0664)); MDBX_txn *txn; // transaction init @@ -135,16 +130,11 @@ static void create_record(uint64_t record_id) { MDBX_txn *txn; session_data_t data; // transaction init - snprintf(data.session_id1, sizeof(data.session_id1), - "prefix%02u_%02u.fill.fill.fill.fill.fill.fill;%" PRIu64, - (unsigned)(record_id % 3) + 1, (unsigned)(record_id % 9) + 1, - record_id); - snprintf(data.session_id2, sizeof(data.session_id2), - "dprefix%" PRIu64 ";%" PRIu64 ".fill.fill.;suffix", record_id, - (record_id + UINT64_C(1442695040888963407)) % - UINT64_C(6364136223846793005)); - snprintf(data.ip, sizeof(data.ip), "%d.%d.%d.%d", - IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF)); + snprintf(data.session_id1, sizeof(data.session_id1), "prefix%02u_%02u.fill.fill.fill.fill.fill.fill;%" PRIu64, + (unsigned)(record_id % 3) + 1, (unsigned)(record_id % 9) + 1, record_id); + snprintf(data.session_id2, sizeof(data.session_id2), "dprefix%" PRIu64 ";%" PRIu64 ".fill.fill.;suffix", record_id, + (record_id + UINT64_C(1442695040888963407)) % UINT64_C(6364136223846793005)); + snprintf(data.ip, sizeof(data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF)); event.obj_id = record_id; event.event_type = 1; @@ -152,8 +142,7 @@ static void create_record(uint64_t record_id) { MDBX_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)}; MDBX_val _ip_rec = {data.ip, strlen(data.ip)}; MDBX_val _obj_id_rec = {&record_id, sizeof(record_id)}; - MDBX_val _data_rec = {&data, offsetof(session_data_t, fill) + - (rand() % sizeof(data.fill))}; + MDBX_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof(data.fill))}; MDBX_val _event_rec = {&event, sizeof(event)}; uint64_t start = getClockUs(); @@ -162,20 +151,16 @@ static void create_record(uint64_t record_id) { MDBX_CHECK(mdbx_dbi_open(txn, "session_id", MDBX_CREATE, &dbi_session_id)); MDBX_CHECK(mdbx_dbi_open(txn, "event", MDBX_CREATE, &dbi_event)); MDBX_CHECK(mdbx_dbi_open(txn, "ip", MDBX_CREATE, &dbi_ip)); - MDBX_CHECK(mdbx_put(txn, dbi_session, &_obj_id_rec, &_data_rec, - MDBX_NOOVERWRITE | MDBX_NODUPDATA)); - MDBX_CHECK(mdbx_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, - MDBX_NOOVERWRITE | MDBX_NODUPDATA)); - MDBX_CHECK(mdbx_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, - MDBX_NOOVERWRITE | MDBX_NODUPDATA)); + MDBX_CHECK(mdbx_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDBX_NOOVERWRITE | MDBX_NODUPDATA)); + MDBX_CHECK(mdbx_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDBX_NOOVERWRITE | MDBX_NODUPDATA)); + MDBX_CHECK(mdbx_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDBX_NOOVERWRITE | MDBX_NODUPDATA)); MDBX_CHECK(mdbx_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0)); MDBX_CHECK(mdbx_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0)); MDBX_CHECK(mdbx_txn_commit(txn)); mdbx_data_size += (_data_rec.iov_len + _obj_id_rec.iov_len * 4); - mdbx_key_size += - (_obj_id_rec.iov_len + _session_id1_rec.iov_len + - _session_id2_rec.iov_len + _ip_rec.iov_len + _event_rec.iov_len); + mdbx_key_size += (_obj_id_rec.iov_len + _session_id1_rec.iov_len + _session_id2_rec.iov_len + _ip_rec.iov_len + + _event_rec.iov_len); // transaction commit mdbx_add_count++; @@ -218,9 +203,8 @@ static void delete_record(int64_t record_id) { MDBX_CHECK(mdbx_del(txn, dbi_session, &_obj_id_rec, NULL)); mdbx_data_size -= (_data_rec.iov_len + _obj_id_rec.iov_len * 4); - mdbx_key_size -= - (_obj_id_rec.iov_len + _session_id1_rec.iov_len + - _session_id2_rec.iov_len + _ip_rec.iov_len + _event_rec.iov_len); + mdbx_key_size -= (_obj_id_rec.iov_len + _session_id1_rec.iov_len + _session_id2_rec.iov_len + _ip_rec.iov_len + + _event_rec.iov_len); // transaction commit MDBX_CHECK(mdbx_txn_commit(txn)); @@ -233,8 +217,7 @@ static void db_disconnect() { printf("Connection closed\n"); } -static void get_db_stat(const char *db, int64_t *ms_branch_pages, - int64_t *ms_leaf_pages) { +static void get_db_stat(const char *db, int64_t *ms_branch_pages, int64_t *ms_leaf_pages) { MDBX_txn *txn; MDBX_stat stat; MDBX_dbi dbi; @@ -243,10 +226,8 @@ static void get_db_stat(const char *db, int64_t *ms_branch_pages, MDBX_CHECK(mdbx_dbi_open(txn, db, MDBX_CREATE, &dbi)); MDBX_CHECK(mdbx_dbi_stat(txn, dbi, &stat, sizeof(stat))); mdbx_txn_abort(txn); - printf("%15s | %15" PRIu64 " | %5u | %10" PRIu64 " | %10" PRIu64 - " | %11" PRIu64 " |\n", - db, stat.ms_branch_pages, stat.ms_depth, stat.ms_entries, - stat.ms_leaf_pages, stat.ms_overflow_pages); + printf("%15s | %15" PRIu64 " | %5u | %10" PRIu64 " | %10" PRIu64 " | %11" PRIu64 " |\n", db, stat.ms_branch_pages, + stat.ms_depth, stat.ms_entries, stat.ms_leaf_pages, stat.ms_overflow_pages); (*ms_branch_pages) += stat.ms_branch_pages; (*ms_leaf_pages) += stat.ms_leaf_pages; } @@ -261,25 +242,20 @@ static void periodic_stat(void) { printf("Environment Info\n"); printf(" Pagesize: %u\n", mst.ms_psize); if (mei.mi_geo.lower != mei.mi_geo.upper) { - printf(" Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64 - "/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 " pages (+%" PRIu64 - "/-%" PRIu64 ")\n", - mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow, - mei.mi_geo.shrink, mei.mi_geo.lower / mst.ms_psize, - mei.mi_geo.upper / mst.ms_psize, mei.mi_geo.grow / mst.ms_psize, - mei.mi_geo.shrink / mst.ms_psize); - printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", - mei.mi_geo.current, mei.mi_geo.current / mst.ms_psize); + printf(" Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64 "/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 + " pages (+%" PRIu64 "/-%" PRIu64 ")\n", + mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow, mei.mi_geo.shrink, mei.mi_geo.lower / mst.ms_psize, + mei.mi_geo.upper / mst.ms_psize, mei.mi_geo.grow / mst.ms_psize, mei.mi_geo.shrink / mst.ms_psize); + printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", mei.mi_geo.current, + mei.mi_geo.current / mst.ms_psize); } else { - printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", - mei.mi_geo.current, mei.mi_geo.current / mst.ms_psize); + printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", mei.mi_geo.current, + mei.mi_geo.current / mst.ms_psize); } - printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n", - mei.mi_mapsize, mei.mi_mapsize / mst.ms_psize); + printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n", mei.mi_mapsize, mei.mi_mapsize / mst.ms_psize); printf(" Number of pages used: %" PRIu64 "\n", mei.mi_last_pgno + 1); printf(" Last transaction ID: %" PRIu64 "\n", mei.mi_recent_txnid); - printf(" Tail transaction ID: %" PRIu64 " (%" PRIi64 ")\n", - mei.mi_latter_reader_txnid, + printf(" Tail transaction ID: %" PRIu64 " (%" PRIi64 ")\n", mei.mi_latter_reader_txnid, mei.mi_latter_reader_txnid - mei.mi_recent_txnid); printf(" Max readers: %u\n", mei.mi_maxreaders); printf(" Number of readers used: %u\n", mei.mi_numreaders); @@ -290,8 +266,8 @@ static void periodic_stat(void) { get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages); get_db_stat("event", &ms_branch_pages, &ms_leaf_pages); get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages); - printf("%15s | %15" PRIu64 " | %5s | %10s | %10" PRIu64 " | %11s |\n", "", - ms_branch_pages, "", "", ms_leaf_pages, ""); + printf("%15s | %15" PRIu64 " | %5s | %10s | %10" PRIu64 " | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, + ""); static int64_t prev_add_count; static int64_t prev_del_count; @@ -300,32 +276,23 @@ static void periodic_stat(void) { static int64_t t = -1; if (t > 0) { int64_t delta = (getClockUs() - t); - printf("CPS: add %" PRIu64 ", delete %" PRIu64 - ", items processed - %" PRIu64 "K data=%" PRIu64 "K key=%" PRIu64 + printf("CPS: add %" PRIu64 ", delete %" PRIu64 ", items processed - %" PRIu64 "K data=%" PRIu64 "K key=%" PRIu64 "K\n", - (mdbx_add_count - prev_add_count) * 1000000 / delta, - (mdbx_del_count - prev_del_count) * 1000000 / delta, obj_id / 1024, - mdbx_data_size / 1024, mdbx_key_size / 1024); + (mdbx_add_count - prev_add_count) * 1000000 / delta, (mdbx_del_count - prev_del_count) * 1000000 / delta, + obj_id / 1024, mdbx_data_size / 1024, mdbx_key_size / 1024); printf("usage data=%" PRIu64 "%%", - ((mdbx_data_size + mdbx_key_size) * 100) / - ((ms_leaf_pages + ms_branch_pages) * 4096)); + ((mdbx_data_size + mdbx_key_size) * 100) / ((ms_leaf_pages + ms_branch_pages) * 4096)); if (prev_add_time != mdbx_add_time) { - printf(" Add : %" PRIu64 " c/s", (mdbx_add_count - prev_add_count) * - 1000000 / - (mdbx_add_time - prev_add_time)); + printf(" Add : %" PRIu64 " c/s", (mdbx_add_count - prev_add_count) * 1000000 / (mdbx_add_time - prev_add_time)); } if (prev_del_time != mdbx_del_time) { - printf(" Del : %" PRIu64 " c/s", (mdbx_del_count - prev_del_count) * - 1000000 / - (mdbx_del_time - prev_del_time)); + printf(" Del : %" PRIu64 " c/s", (mdbx_del_count - prev_del_count) * 1000000 / (mdbx_del_time - prev_del_time)); } if (mdbx_add_time) { - printf(" tAdd : %" PRIu64 " c/s", - mdbx_add_count * 1000000 / mdbx_add_time); + printf(" tAdd : %" PRIu64 " c/s", mdbx_add_count * 1000000 / mdbx_add_time); } if (mdbx_del_time) { - printf(" tDel : %" PRIu64 " c/s", - mdbx_del_count * 1000000 / mdbx_del_time); + printf(" tDel : %" PRIu64 " c/s", mdbx_del_count * 1000000 / mdbx_del_time); } puts(""); } diff --git a/test/extra/probe.c++ b/test/extra/probe.c++ index c80f5f87..9dca781c 100644 --- a/test/extra/probe.c++ +++ b/test/extra/probe.c++ @@ -5,7 +5,6 @@ int main(int argc, const char *argv[]) { (void)argc; (void)argv; - std::cout - << "OK (but this is do-nothing test just for a check for compilation)\n"; + std::cout << "OK (but this is do-nothing test just for a check for compilation)\n"; return EXIT_SUCCESS; } diff --git a/test/extra/upsert_alldups.c b/test/extra/upsert_alldups.c index 7999f081..d6e69a93 100644 --- a/test/extra/upsert_alldups.c +++ b/test/extra/upsert_alldups.c @@ -17,8 +17,8 @@ static int dump(MDBX_cursor *cur) { int rc = mdbx_cursor_get(cur, &key, &data, MDBX_FIRST); while (rc == 0) { - printf("(%.*s) = (%.*s)\n", (int)key.iov_len, (const char *)key.iov_base, - (int)data.iov_len, (const char *)data.iov_base); + printf("(%.*s) = (%.*s)\n", (int)key.iov_len, (const char *)key.iov_base, (int)data.iov_len, + (const char *)data.iov_base); rc = mdbx_cursor_get(cur, &key, &data, MDBX_NEXT); } return rc; @@ -38,8 +38,7 @@ static int clear(MDBX_cursor *cur) { return (rc == MDBX_NOTFOUND) ? 0 : rc; } -static int put(MDBX_txn *txn, MDBX_dbi dbi, const char *k, const char *v, - MDBX_put_flags_t flags) { +static int put(MDBX_txn *txn, MDBX_dbi dbi, const char *k, const char *v, MDBX_put_flags_t flags) { MDBX_val key = {.iov_base = (void *)k, .iov_len = strlen(k)}; MDBX_val data = {.iov_base = (void *)v, .iov_len = strlen(v)}; return mdbx_put(txn, dbi, &key, &data, flags); @@ -79,21 +78,21 @@ int main(int argc, const char *argv[]) { goto Fail; } -#define DUMP() \ - do { \ - if ((rc = dump(cur)) && rc != MDBX_NOTFOUND) { \ - errmsg = "failed to mdbx_cursor_get(FIRST): %s\n"; \ - goto Fail; \ - } \ - puts(""); \ +#define DUMP() \ + do { \ + if ((rc = dump(cur)) && rc != MDBX_NOTFOUND) { \ + errmsg = "failed to mdbx_cursor_get(FIRST): %s\n"; \ + goto Fail; \ + } \ + puts(""); \ } while (0) -#define PUTVAL(k, v, flags) \ - do { \ - if ((rc = put(txn, dbi, k, v, flags))) { \ - errmsg = "failed to mdbx_put: %s\n"; \ - goto Fail; \ - } \ +#define PUTVAL(k, v, flags) \ + do { \ + if ((rc = put(txn, dbi, k, v, flags))) { \ + errmsg = "failed to mdbx_put: %s\n"; \ + goto Fail; \ + } \ } while (0) puts("TEST WITH MULTIPLE KEYS ===================="); diff --git a/test/fork.c++ b/test/fork.c++ index 05fb250a..d546cf89 100644 --- a/test/fork.c++ +++ b/test/fork.c++ @@ -17,8 +17,7 @@ protected: unsigned dbi_state{0}; public: - testcase_smoke4fork(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_smoke4fork(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} virtual void txn_end(bool abort) override; bool run() override; virtual bool smoke() = 0; @@ -27,8 +26,7 @@ public: bool testcase_smoke4fork::open_dbi() { if (!dbi || dbi_invalid) { - if (dbi_stable || - (mdbx_txn_flags(txn_guard.get()) & MDBX_TXN_RDONLY) == 0) { + if (dbi_stable || (mdbx_txn_flags(txn_guard.get()) & MDBX_TXN_RDONLY) == 0) { dbi = db_table_open(!dbi_stable); dbi_invalid = false; } @@ -37,8 +35,7 @@ bool testcase_smoke4fork::open_dbi() { dbi_state = 0; if (dbi && !dbi_invalid) { unsigned unused_dbi_flags; - int err = - mdbx_dbi_flags_ex(txn_guard.get(), dbi, &unused_dbi_flags, &dbi_state); + int err = mdbx_dbi_flags_ex(txn_guard.get(), dbi, &unused_dbi_flags, &dbi_state); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_dbi_flags_ex()", err); if ((dbi_state & (MDBX_DBI_CREAT | MDBX_DBI_FRESH)) == 0) @@ -69,8 +66,7 @@ bool testcase_smoke4fork::run() { if (history.empty() || current_pid != history.front()) { history.push_back(current_pid); if (history.size() > /* TODO: add test option */ 2) { - log_notice("force exit to avoid fork-bomb: deep %zu, pid stack", - history.size()); + log_notice("force exit to avoid fork-bomb: deep %zu, pid stack", history.size()); for (const auto pid : history) logging::feed(" %d", pid); logging::ln(); @@ -82,23 +78,19 @@ bool testcase_smoke4fork::run() { int err = db_open__begin__table_create_open_clean(dbi); if (unlikely(err != MDBX_SUCCESS)) { - log_notice("fork[deep %d, pid %d]: bailout-prepare due '%s'", deep, - current_pid, mdbx_strerror(err)); + log_notice("fork[deep %d, pid %d]: bailout-prepare due '%s'", deep, current_pid, mdbx_strerror(err)); return false; } open_dbi(); if (flipcoin()) { if (!smoke()) { - log_notice("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, - "failed"); + log_notice("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, "failed"); return false; } - log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, - "done"); + log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, "done"); } else { - log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, - "skipped"); + log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, "skipped"); #ifdef __SANITIZE_ADDRESS__ const bool commit_txn_to_avoid_memleak = true; #else @@ -115,13 +107,12 @@ bool testcase_smoke4fork::run() { if (child == 0) { const pid_t new_pid = getpid(); - log_verbose(">>> %s, deep %d, parent-pid %d, child-pid %d", - "mdbx_env_resurrect_after_fork()", deep, current_pid, new_pid); + log_verbose(">>> %s, deep %d, parent-pid %d, child-pid %d", "mdbx_env_resurrect_after_fork()", deep, current_pid, + new_pid); log_flush(); int err = mdbx_env_resurrect_after_fork(db_guard.get()); - log_verbose("<<< %s, deep %d, parent-pid %d, child-pid %d, err %d", - "mdbx_env_resurrect_after_fork()", deep, current_pid, new_pid, - err); + log_verbose("<<< %s, deep %d, parent-pid %d, child-pid %d, err %d", "mdbx_env_resurrect_after_fork()", deep, + current_pid, new_pid, err); log_flush(); if (err != MDBX_SUCCESS) failure_perror("mdbx_env_resurrect_after_fork()", err); @@ -134,12 +125,10 @@ bool testcase_smoke4fork::run() { mdbx_txn_abort(txn_guard.release()); } if (!smoke()) { - log_notice("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, - "failed"); + log_notice("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, "failed"); return false; } - log_verbose("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, - "done"); + log_verbose("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, "done"); log_flush(); return true; } @@ -154,12 +143,10 @@ bool testcase_smoke4fork::run() { if (WIFEXITED(status)) { const int code = WEXITSTATUS(status); if (code != EXIT_SUCCESS) { - log_notice("%s[deep %d, pid %d] child-pid %d failed, err %d", - "fork-child", deep, current_pid, child, code); + log_notice("%s[deep %d, pid %d] child-pid %d failed, err %d", "fork-child", deep, current_pid, child, code); return false; } - log_notice("%s[deep %d, pid %d] child-pid %d done", "fork-child", deep, - current_pid, child); + log_notice("%s[deep %d, pid %d] child-pid %d done", "fork-child", deep, current_pid, child); } else if (WIFSIGNALED(status)) { const int sig = WTERMSIG(status); switch (sig) { @@ -168,12 +155,12 @@ bool testcase_smoke4fork::run() { case SIGFPE: case SIGILL: case SIGSEGV: - log_notice("%s[deep %d, pid %d] child-pid %d %s by SIG%s", "fork-child", - deep, current_pid, child, "terminated", signal_name(sig)); + log_notice("%s[deep %d, pid %d] child-pid %d %s by SIG%s", "fork-child", deep, current_pid, child, "terminated", + signal_name(sig)); break; default: - log_notice("%s[deep %d, pid %d] child-id %d %s by SIG%s", "fork-child", - deep, current_pid, child, "killed", signal_name(sig)); + log_notice("%s[deep %d, pid %d] child-id %d %s by SIG%s", "fork-child", deep, current_pid, child, "killed", + signal_name(sig)); } return false; } else { @@ -181,12 +168,10 @@ bool testcase_smoke4fork::run() { } if (!smoke()) { - log_notice("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, - "failed"); + log_notice("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, "failed"); return false; } - log_verbose("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, - "done"); + log_verbose("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, "done"); return true; } @@ -196,16 +181,14 @@ class testcase_forkread : public testcase_smoke4fork { using inherited = testcase_smoke4fork; public: - testcase_forkread(const actor_config &config, const mdbx_pid_t pid) - : testcase_smoke4fork(config, pid) {} + testcase_forkread(const actor_config &config, const mdbx_pid_t pid) : testcase_smoke4fork(config, pid) {} bool smoke() override; }; REGISTER_TESTCASE(forkread); bool testcase_forkread::smoke() { MDBX_envinfo env_info; - int err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, - sizeof(env_info)); + int err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, sizeof(env_info)); if (err) failure_perror("mdbx_env_info_ex()", err); @@ -217,8 +200,7 @@ bool testcase_forkread::smoke() { if (err) failure_perror("mdbx_txn_info()", err); fetch_canary(); - err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, - sizeof(env_info)); + err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, sizeof(env_info)); if (err) failure_perror("mdbx_env_info_ex()", err); @@ -226,15 +208,13 @@ bool testcase_forkread::smoke() { if (dbi_invalid) { err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); if (unlikely(err != (dbi ? MDBX_BAD_DBI : MDBX_SUCCESS))) - failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", - mdbx_strerror(err), dbi); + failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", mdbx_strerror(err), dbi); open_dbi(); } if (!dbi_invalid) { err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); if (unlikely(err != MDBX_SUCCESS)) - failure("unexpected '%s' from mdbx_dbi_sequence(get, dbi %d)", - mdbx_strerror(err), dbi); + failure("unexpected '%s' from mdbx_dbi_sequence(get, dbi %d)", mdbx_strerror(err), dbi); } txn_end(false); return true; @@ -246,8 +226,7 @@ class testcase_forkwrite : public testcase_forkread { using inherited = testcase_forkread; public: - testcase_forkwrite(const actor_config &config, const mdbx_pid_t pid) - : testcase_forkread(config, pid) {} + testcase_forkwrite(const actor_config &config, const mdbx_pid_t pid) : testcase_forkread(config, pid) {} bool smoke() override; }; REGISTER_TESTCASE(forkwrite); @@ -266,15 +245,13 @@ bool testcase_forkwrite::smoke() { if (dbi_invalid) { int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); if (unlikely(err != (dbi ? MDBX_BAD_DBI : MDBX_EACCESS))) - failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", - mdbx_strerror(err), dbi); + failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", mdbx_strerror(err), dbi); open_dbi(); } if (!dbi_invalid) { int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); if (unlikely(err != MDBX_SUCCESS)) - failure("unexpected '%s' from mdbx_dbi_sequence(inc, dbi %d)", - mdbx_strerror(err), dbi); + failure("unexpected '%s' from mdbx_dbi_sequence(inc, dbi %d)", mdbx_strerror(err), dbi); } txn_end(false); diff --git a/test/hill.c++ b/test/hill.c++ index 6689a0db..ac6254e9 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -25,8 +25,7 @@ class testcase_hill : public testcase { public: - testcase_hill(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_hill(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} bool run() override; }; REGISTER_TESTCASE(hill); @@ -50,13 +49,9 @@ bool testcase_hill::run() { keygen::buffer b_data = keygen::alloc(config.params.datalen_max); const MDBX_put_flags_t insert_flags = - (config.params.table_flags & MDBX_DUPSORT) - ? MDBX_NODUPDATA - : MDBX_NODUPDATA | MDBX_NOOVERWRITE; + (config.params.table_flags & MDBX_DUPSORT) ? MDBX_NODUPDATA : MDBX_NODUPDATA | MDBX_NOOVERWRITE; const MDBX_put_flags_t update_flags = - (config.params.table_flags & MDBX_DUPSORT) - ? MDBX_CURRENT | MDBX_NODUPDATA | MDBX_NOOVERWRITE - : MDBX_NODUPDATA; + (config.params.table_flags & MDBX_DUPSORT) ? MDBX_CURRENT | MDBX_NODUPDATA | MDBX_NOOVERWRITE : MDBX_NODUPDATA; uint64_t serial_count = 0; uint64_t committed_serial = serial_count; @@ -80,8 +75,7 @@ bool testcase_hill::run() { // создаем первую запись из пары const keygen::serial_t age_shift = keyvalue_maker.remix_age(a_serial); - log_trace("uphill: insert-a (age %" PRIu64 ") %" PRIu64, age_shift, - a_serial); + log_trace("uphill: insert-a (age %" PRIu64 ") %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_1, age_shift); err = insert(a_key, a_data_1, insert_flags); @@ -154,8 +148,7 @@ bool testcase_hill::run() { } // обновляем данные в первой записи - log_trace("uphill: update-a (age %" PRIu64 "->0) %" PRIu64, age_shift, - a_serial); + log_trace("uphill: update-a (age %" PRIu64 "->0) %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_0, 0); checkdata("uphill: update-a", dbi, a_key->value, a_data_1->value); err = replace(a_key, a_data_0, a_data_1, update_flags); @@ -271,8 +264,7 @@ bool testcase_hill::run() { if (str.back() == '-') str.append(std::to_string(prev)); - log_notice("hill: reached %d tree depth & %s sub-tree depth(s)", - stat.ms_depth, str.c_str()); + log_notice("hill: reached %d tree depth & %s sub-tree depth(s)", stat.ms_depth, str.c_str()); } if ((config.params.table_flags & MDBX_DUPSORT) == 0) { @@ -292,16 +284,14 @@ bool testcase_hill::run() { // обновляем первую запись из пары const keygen::serial_t age_shift = keyvalue_maker.remix_age(a_serial); - log_trace("downhill: update-a (age 0->%" PRIu64 ") %" PRIu64, age_shift, - a_serial); + log_trace("downhill: update-a (age 0->%" PRIu64 ") %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_0, 0); generate_pair(a_serial, a_key, a_data_1, age_shift); checkdata("downhill: update-a", dbi, a_key->value, a_data_0->value); err = replace(a_key, a_data_1, a_data_0, update_flags); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { - log_notice("downhill: bailout at update-a due '%s'", - mdbx_strerror(err)); + log_notice("downhill: bailout at update-a due '%s'", mdbx_strerror(err)); txn_end(true); speculum = speculum_committed; break; @@ -334,8 +324,7 @@ bool testcase_hill::run() { err = insert(b_key, b_data, insert_flags); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { - log_notice("downhill: bailout at insert-a due '%s'", - mdbx_strerror(err)); + log_notice("downhill: bailout at insert-a due '%s'", mdbx_strerror(err)); txn_end(true); speculum = speculum_committed; break; @@ -363,14 +352,12 @@ bool testcase_hill::run() { } // удаляем первую запись - log_trace("downhill: delete-a (age %" PRIu64 ") %" PRIu64, age_shift, - a_serial); + log_trace("downhill: delete-a (age %" PRIu64 ") %" PRIu64, age_shift, a_serial); checkdata("downhill: delete-a", dbi, a_key->value, a_data_1->value); err = remove(a_key, a_data_1); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { - log_notice("downhill: bailout at delete-a due '%s'", - mdbx_strerror(err)); + log_notice("downhill: bailout at delete-a due '%s'", mdbx_strerror(err)); txn_end(true); speculum = speculum_committed; break; @@ -403,8 +390,7 @@ bool testcase_hill::run() { err = remove(b_key, b_data); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { - log_notice("downhill: bailout at delete-b due '%s'", - mdbx_strerror(err)); + log_notice("downhill: bailout at delete-b due '%s'", mdbx_strerror(err)); txn_end(true); speculum = speculum_committed; break; diff --git a/test/jitter.c++ b/test/jitter.c++ index 1f56978c..3bf75100 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -8,8 +8,7 @@ protected: void check_dbi_error(int expect, const char *stage); public: - testcase_jitter(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_jitter(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} bool run() override; }; REGISTER_TESTCASE(jitter); @@ -18,8 +17,7 @@ void testcase_jitter::check_dbi_error(int expect, const char *stage) { MDBX_stat stat; int err = mdbx_dbi_stat(txn_guard.get(), dbi, &stat, sizeof(stat)); if (err != expect) - failure("unexpected result for %s dbi-handle: expect %d, got %d", stage, - expect, err); + failure("unexpected result for %s dbi-handle: expect %d, got %d", stage, expect, err); } bool testcase_jitter::run() { @@ -31,8 +29,7 @@ bool testcase_jitter::run() { tablename_buf buffer; const char *const tablename = db_tablename(buffer); tablename_buf buffer_renamed; - const char *const tablename_renamed = - db_tablename(buffer_renamed, ".renamed"); + const char *const tablename_renamed = db_tablename(buffer_renamed, ".renamed"); while (should_continue()) { jitter_delay(); @@ -81,9 +78,8 @@ bool testcase_jitter::run() { // restore DBI dbi = db_table_open(false, renamed); if (renamed) { - err = mdbx_dbi_open( - txn_guard.get(), tablename_renamed, - flipcoin() ? MDBX_DB_ACCEDE : config.params.table_flags, &dbi); + err = mdbx_dbi_open(txn_guard.get(), tablename_renamed, flipcoin() ? MDBX_DB_ACCEDE : config.params.table_flags, + &dbi); if (unlikely(err != MDBX_SUCCESS)) failure_perror("open-renamed", err); err = mdbx_dbi_rename(txn_guard.get(), dbi, tablename); @@ -100,13 +96,10 @@ bool testcase_jitter::run() { if (upper_limit < 1) { MDBX_envinfo info; - err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &info, - sizeof(info)); + err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &info, sizeof(info)); if (err) failure_perror("mdbx_env_info_ex()", err); - upper_limit = (info.mi_geo.upper < INTPTR_MAX) - ? (intptr_t)info.mi_geo.upper - : INTPTR_MAX; + upper_limit = (info.mi_geo.upper < INTPTR_MAX) ? (intptr_t)info.mi_geo.upper : INTPTR_MAX; } if (flipcoin()) { @@ -156,29 +149,26 @@ bool testcase_jitter::run() { fetch_canary(); update_canary(1); if (global::config::geometry_jitter) { - err = mdbx_env_set_geometry( - db_guard.get(), -1, -1, - coin4size ? upper_limit * 2 / 3 : upper_limit * 3 / 2, -1, -1, -1); - if (err != MDBX_SUCCESS && err != MDBX_UNABLE_EXTEND_MAPSIZE && - err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE && err != MDBX_EPERM) + err = mdbx_env_set_geometry(db_guard.get(), -1, -1, coin4size ? upper_limit * 2 / 3 : upper_limit * 3 / 2, -1, + -1, -1); + if (err != MDBX_SUCCESS && err != MDBX_UNABLE_EXTEND_MAPSIZE && err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE && + err != MDBX_EPERM) failure_perror("mdbx_env_set_geometry-1", err); } } if (flipcoin()) { uint64_t unused; - err = mdbx_dbi_sequence(txn_guard.get(), MAIN_DBI, &unused, - mode_readonly() ? 0 : 1); + err = mdbx_dbi_sequence(txn_guard.get(), MAIN_DBI, &unused, mode_readonly() ? 0 : 1); if (err) failure_perror("mdbx_dbi_sequence()", err); } txn_end(flipcoin()); if (global::config::geometry_jitter) { - err = mdbx_env_set_geometry( - db_guard.get(), -1, -1, - !coin4size ? upper_limit * 2 / 3 : upper_limit * 3 / 2, -1, -1, -1); - if (err != MDBX_SUCCESS && err != MDBX_UNABLE_EXTEND_MAPSIZE && - err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE && err != MDBX_EPERM) + err = mdbx_env_set_geometry(db_guard.get(), -1, -1, !coin4size ? upper_limit * 2 / 3 : upper_limit * 3 / 2, -1, + -1, -1); + if (err != MDBX_SUCCESS && err != MDBX_UNABLE_EXTEND_MAPSIZE && err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE && + err != MDBX_EPERM) failure_perror("mdbx_env_set_geometry-2", err); } @@ -191,18 +181,16 @@ bool testcase_jitter::run() { if (global::config::geometry_jitter) { jitter_delay(); - err = mdbx_env_set_geometry(db_guard.get(), -1, -1, upper_limit, -1, -1, - -1); - if (err != MDBX_SUCCESS && err != MDBX_UNABLE_EXTEND_MAPSIZE && - err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE && err != MDBX_EPERM) + err = mdbx_env_set_geometry(db_guard.get(), -1, -1, upper_limit, -1, -1, -1); + if (err != MDBX_SUCCESS && err != MDBX_UNABLE_EXTEND_MAPSIZE && err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE && + err != MDBX_EPERM) failure_perror("mdbx_env_set_geometry-3", err); } db_close(); /* just 'align' nops with other tests with batching */ - const auto batching = - std::max(config.params.batch_read, config.params.batch_write); + const auto batching = std::max(config.params.batch_read, config.params.batch_write); report(std::max(1u, batching / 2)); } return true; diff --git a/test/keygen.c++ b/test/keygen.c++ index a3879770..8ac4987d 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -7,25 +7,18 @@ static const uint64_t primes[64] = { /* */ 0, 1, 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, /* */ - UINT64_C(32749), UINT64_C(65521), UINT64_C(131071), UINT64_C(262139), - UINT64_C(524287), UINT64_C(1048573), UINT64_C(2097143), UINT64_C(4194301), - UINT64_C(8388593), UINT64_C(16777213), UINT64_C(33554393), - UINT64_C(67108859), UINT64_C(134217689), UINT64_C(268435399), - UINT64_C(536870909), UINT64_C(1073741789), UINT64_C(2147483647), - UINT64_C(4294967291), UINT64_C(8589934583), UINT64_C(17179869143), - UINT64_C(34359738337), UINT64_C(68719476731), UINT64_C(137438953447), - UINT64_C(274877906899), UINT64_C(549755813881), UINT64_C(1099511627689), - UINT64_C(2199023255531), UINT64_C(4398046511093), UINT64_C(8796093022151), - UINT64_C(17592186044399), UINT64_C(35184372088777), - UINT64_C(70368744177643), UINT64_C(140737488355213), - UINT64_C(281474976710597), UINT64_C(562949953421231), - UINT64_C(1125899906842597), UINT64_C(2251799813685119), - UINT64_C(4503599627370449), UINT64_C(9007199254740881), - UINT64_C(18014398509481951), UINT64_C(36028797018963913), - UINT64_C(72057594037927931), UINT64_C(144115188075855859), - UINT64_C(288230376151711717), UINT64_C(576460752303423433), - UINT64_C(1152921504606846883), UINT64_C(2305843009213693951), - UINT64_C(4611686018427387847), UINT64_C(9223372036854775783)}; + UINT64_C(32749), UINT64_C(65521), UINT64_C(131071), UINT64_C(262139), UINT64_C(524287), UINT64_C(1048573), + UINT64_C(2097143), UINT64_C(4194301), UINT64_C(8388593), UINT64_C(16777213), UINT64_C(33554393), UINT64_C(67108859), + UINT64_C(134217689), UINT64_C(268435399), UINT64_C(536870909), UINT64_C(1073741789), UINT64_C(2147483647), + UINT64_C(4294967291), UINT64_C(8589934583), UINT64_C(17179869143), UINT64_C(34359738337), UINT64_C(68719476731), + UINT64_C(137438953447), UINT64_C(274877906899), UINT64_C(549755813881), UINT64_C(1099511627689), + UINT64_C(2199023255531), UINT64_C(4398046511093), UINT64_C(8796093022151), UINT64_C(17592186044399), + UINT64_C(35184372088777), UINT64_C(70368744177643), UINT64_C(140737488355213), UINT64_C(281474976710597), + UINT64_C(562949953421231), UINT64_C(1125899906842597), UINT64_C(2251799813685119), UINT64_C(4503599627370449), + UINT64_C(9007199254740881), UINT64_C(18014398509481951), UINT64_C(36028797018963913), UINT64_C(72057594037927931), + UINT64_C(144115188075855859), UINT64_C(288230376151711717), UINT64_C(576460752303423433), + UINT64_C(1152921504606846883), UINT64_C(2305843009213693951), UINT64_C(4611686018427387847), + UINT64_C(9223372036854775783)}; /* static unsigned supid_log2(uint64_t v) { unsigned r = 0; @@ -39,9 +32,7 @@ static const uint64_t primes[64] = { namespace keygen { /* LY: https://en.wikipedia.org/wiki/Injective_function */ -serial_t injective(const serial_t serial, - const unsigned bits /* at least serial_minwith (8) */, - const serial_t salt) { +serial_t injective(const serial_t serial, const unsigned bits /* at least serial_minwith (8) */, const serial_t salt) { assert(bits >= serial_minwith && bits <= serial_maxwith); /* LY: All these "magic" prime numbers were found @@ -49,26 +40,21 @@ serial_t injective(const serial_t serial, static const uint64_t m[64 - serial_minwith + 1] = { /* 8 - 24 */ - 113, 157, 397, 653, 1753, 5641, 9697, 23873, 25693, 80833, 105953, 316937, - 309277, 834497, 1499933, 4373441, 10184137, + 113, 157, 397, 653, 1753, 5641, 9697, 23873, 25693, 80833, 105953, 316937, 309277, 834497, 1499933, 4373441, + 10184137, /* 25 - 64 */ - 10184137, 17279209, 33990377, 67295161, 284404553, 1075238767, 6346721573, - 6924051577, 19204053433, 45840188887, 53625693977, 73447827913, - 141638870249, 745683604649, 1283334050489, 1100828289853, 2201656586197, - 5871903036137, 11238507001417, 45264020802263, 105008404482889, - 81921776907059, 199987980256399, 307207457507641, 946769023178273, - 2420886491930041, 3601632139991929, 11984491914483833, 21805846439714153, - 23171543400565993, 53353226456762893, 155627817337932409, - 227827205384840249, 816509268558278821, 576933057762605689, - 2623957345935638441, 5048241705479929949, 4634245581946485653, - 4613509448041658233, 4952535426879925961}; - static const uint8_t s[64 - serial_minwith + 1] = { - /* 8 - 24 */ - 2, 3, 4, 4, 2, 4, 3, 3, 7, 3, 3, 4, 8, 3, 10, 3, 11, - /* 25 - 64 */ - 11, 9, 9, 9, 11, 10, 5, 14, 11, 16, 14, 12, 13, 16, 19, 10, 10, 21, 7, 20, - 10, 14, 22, 19, 3, 21, 18, 19, 26, 24, 2, 21, 25, 29, 24, 10, 11, 14, 20, - 19}; + 10184137, 17279209, 33990377, 67295161, 284404553, 1075238767, 6346721573, 6924051577, 19204053433, 45840188887, + 53625693977, 73447827913, 141638870249, 745683604649, 1283334050489, 1100828289853, 2201656586197, 5871903036137, + 11238507001417, 45264020802263, 105008404482889, 81921776907059, 199987980256399, 307207457507641, + 946769023178273, 2420886491930041, 3601632139991929, 11984491914483833, 21805846439714153, 23171543400565993, + 53353226456762893, 155627817337932409, 227827205384840249, 816509268558278821, 576933057762605689, + 2623957345935638441, 5048241705479929949, 4634245581946485653, 4613509448041658233, 4952535426879925961}; + static const uint8_t s[64 - serial_minwith + 1] = {/* 8 - 24 */ + 2, 3, 4, 4, 2, 4, 3, 3, 7, 3, 3, 4, 8, 3, 10, 3, 11, + /* 25 - 64 */ + 11, 9, 9, 9, 11, 10, 5, 14, 11, 16, 14, 12, 13, 16, 19, 10, 10, 21, + 7, 20, 10, 14, 22, 19, 3, 21, 18, 19, 26, 24, 2, 21, 25, 29, 24, + 10, 11, 14, 20, 19}; const auto mask = actor_params::serial_mask(bits); const auto mult = m[bits - 8]; @@ -83,49 +69,40 @@ serial_t injective(const serial_t serial, result ^= (result & mask) >> shift; result &= mask; - log_trace("keygen-injective: serial %" PRIu64 "/%u @%" PRIx64 ",%u,%" PRIu64 - " => %" PRIu64 "/%u", - serial, bits, mult, shift, salt, result, bits); + log_trace("keygen-injective: serial %" PRIu64 "/%u @%" PRIx64 ",%u,%" PRIu64 " => %" PRIu64 "/%u", serial, bits, mult, + shift, salt, result, bits); return result; } -void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, - serial_t value_age, const bool keylen_changeable) { +void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, serial_t value_age, + const bool keylen_changeable) { assert(mapping.width >= serial_minwith && mapping.width <= serial_maxwith); assert(mapping.split <= mapping.width); assert(mapping.mesh <= mapping.width); assert(mapping.rotate <= mapping.width); assert(mapping.offset <= actor_params::serial_mask(mapping.width)); assert(!(key_essentials.flags & - ~(essentials::prng_fill_flag | - unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)))); + ~(essentials::prng_fill_flag | unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)))); assert(!(value_essentials.flags & - ~(essentials::prng_fill_flag | - unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)))); + ~(essentials::prng_fill_flag | unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)))); - log_trace("keygen-pair: serial %" PRIu64 ", data-age %" PRIu64, serial, - value_age); + log_trace("keygen-pair: serial %" PRIu64 ", data-age %" PRIu64, serial, value_age); if (mapping.mesh >= serial_minwith) { - serial = (serial & ~actor_params::serial_mask(mapping.mesh)) | - injective(serial, mapping.mesh, salt); + serial = (serial & ~actor_params::serial_mask(mapping.mesh)) | injective(serial, mapping.mesh, salt); log_trace("keygen-pair: mesh@%u => %" PRIu64, mapping.mesh, serial); } if (mapping.rotate) { const unsigned right = mapping.rotate; const unsigned left = mapping.width - right; - serial = (serial << left) | - ((serial & actor_params::serial_mask(mapping.width)) >> right); - log_trace("keygen-pair: rotate@%u => %" PRIu64 ", 0x%" PRIx64, - mapping.rotate, serial, serial); + serial = (serial << left) | ((serial & actor_params::serial_mask(mapping.width)) >> right); + log_trace("keygen-pair: rotate@%u => %" PRIu64 ", 0x%" PRIx64, mapping.rotate, serial, serial); } if (mapping.offset) { - serial = - (serial + mapping.offset) & actor_params::serial_mask(mapping.width); - log_trace("keygen-pair: offset@%" PRIu64 " => %" PRIu64, mapping.offset, - serial); + serial = (serial + mapping.offset) & actor_params::serial_mask(mapping.width); + log_trace("keygen-pair: offset@%" PRIu64 " => %" PRIu64, mapping.offset, serial); } if (base) { serial += base; @@ -144,16 +121,13 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, * Поэтому key_serial не трогаем, а в value_serial нелинейно вмешиваем * запрошенное количество бит из serial */ value_serial += - (serial ^ (serial >> mapping.split) * UINT64_C(57035339200100753)) & - actor_params::serial_mask(mapping.split); + (serial ^ (serial >> mapping.split) * UINT64_C(57035339200100753)) & actor_params::serial_mask(mapping.split); } - log_trace("keygen-pair: split@%u => k%" PRIu64 ", v%" PRIu64, mapping.split, - key_serial, value_serial); + log_trace("keygen-pair: split@%u => k%" PRIu64 ", v%" PRIu64, mapping.split, key_serial, value_serial); } - log_trace("keygen-pair: key %" PRIu64 ", value %" PRIu64, key_serial, - value_serial); + log_trace("keygen-pair: key %" PRIu64 ", value %" PRIu64, key_serial, value_serial); key_serial = mk_begin(key_serial, key_essentials, *key); value_serial = mk_begin(value_serial, value_essentials, *value); @@ -210,50 +184,39 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, log_pair(logging::trace, "kv", key, value); } -void maker::setup(const config::actor_params_pod &actor, - unsigned thread_number) { +void maker::setup(const config::actor_params_pod &actor, unsigned thread_number) { #if CONSTEXPR_ENUM_FLAGS_OPERATIONS - static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | - MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < - UINT16_MAX, + static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | + MDBX_REVERSEDUP) < UINT16_MAX, "WTF?"); #else - assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | - MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < - UINT16_MAX); + assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | + MDBX_REVERSEDUP) < UINT16_MAX); #endif - key_essentials.flags = uint16_t( - actor.table_flags & - MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)); + key_essentials.flags = + uint16_t(actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)); assert(actor.keylen_min <= UINT16_MAX); key_essentials.minlen = uint16_t(actor.keylen_min); assert(actor.keylen_max <= UINT32_MAX); - key_essentials.maxlen = std::min( - uint32_t(actor.keylen_max), - uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags))); - key_essentials.bits = (key_essentials.maxlen < sizeof(serial_t)) - ? key_essentials.maxlen * CHAR_BIT - : sizeof(serial_t) * CHAR_BIT; + key_essentials.maxlen = + std::min(uint32_t(actor.keylen_max), uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags))); + key_essentials.bits = + (key_essentials.maxlen < sizeof(serial_t)) ? key_essentials.maxlen * CHAR_BIT : sizeof(serial_t) * CHAR_BIT; key_essentials.mask = actor_params::serial_mask(key_essentials.bits); - assert(key_essentials.bits > 63 || - key_essentials.mask > primes[key_essentials.bits]); + assert(key_essentials.bits > 63 || key_essentials.mask > primes[key_essentials.bits]); - value_essentials.flags = uint16_t( - actor.table_flags & - MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)); + value_essentials.flags = + uint16_t(actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)); assert(actor.datalen_min <= UINT16_MAX); value_essentials.minlen = uint16_t(actor.datalen_min); assert(actor.datalen_max <= UINT32_MAX); - value_essentials.maxlen = std::min( - uint32_t(actor.datalen_max), - uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags))); - value_essentials.bits = (value_essentials.maxlen < sizeof(serial_t)) - ? value_essentials.maxlen * CHAR_BIT - : sizeof(serial_t) * CHAR_BIT; + value_essentials.maxlen = + std::min(uint32_t(actor.datalen_max), uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags))); + value_essentials.bits = + (value_essentials.maxlen < sizeof(serial_t)) ? value_essentials.maxlen * CHAR_BIT : sizeof(serial_t) * CHAR_BIT; value_essentials.mask = actor_params::serial_mask(value_essentials.bits); - assert(value_essentials.bits > 63 || - value_essentials.mask > primes[value_essentials.bits]); + assert(value_essentials.bits > 63 || value_essentials.mask > primes[value_essentials.bits]); if (!actor.keygen.zero_fill) { key_essentials.flags |= essentials::prng_fill_flag; @@ -262,43 +225,31 @@ void maker::setup(const config::actor_params_pod &actor, mapping = actor.keygen; const auto split = mapping.split; - while (mapping.split > - value_essentials.bits - essentials::value_age_minwidth || - mapping.split >= mapping.width) + while (mapping.split > value_essentials.bits - essentials::value_age_minwidth || mapping.split >= mapping.width) mapping.split -= 1; if (split != mapping.split) - log_notice("keygen: reduce mapping-split from %u to %u", split, - mapping.split); + log_notice("keygen: reduce mapping-split from %u to %u", split, mapping.split); const auto width = mapping.width; - while (unsigned((actor.table_flags & MDBX_DUPSORT) - ? mapping.width - mapping.split - : mapping.width) > key_essentials.bits) + while (unsigned((actor.table_flags & MDBX_DUPSORT) ? mapping.width - mapping.split : mapping.width) > + key_essentials.bits) mapping.width -= 1; if (width != mapping.width) - log_notice("keygen: reduce mapping-width from %u to %u", width, - mapping.width); + log_notice("keygen: reduce mapping-width from %u to %u", width, mapping.width); value_age_bits = value_essentials.bits - mapping.split; value_age_mask = actor_params::serial_mask(value_age_bits); assert(value_age_bits >= essentials::value_age_minwidth); - salt = (prng_state ^ - (thread_number * 1575554837) * UINT64_C(59386707711075671)) * - UINT64_C(14653293970879851569); + salt = (prng_state ^ (thread_number * 1575554837) * UINT64_C(59386707711075671)) * UINT64_C(14653293970879851569); base = actor.serial_base(); } bool maker::is_unordered() const { - return mapping.rotate || - mapping.mesh > ((MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) - ? 0 - : mapping.split); + return mapping.rotate || mapping.mesh > ((MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) ? 0 : mapping.split); } -void maker::seek2end(serial_t &serial) const { - serial = actor_params::serial_mask(mapping.width) - 1; -} +void maker::seek2end(serial_t &serial) const { serial = actor_params::serial_mask(mapping.width) - 1; } bool maker::increment(serial_t &serial, int64_t delta) const { if (serial > actor_params::serial_mask(mapping.width)) { @@ -308,17 +259,12 @@ bool maker::increment(serial_t &serial, int64_t delta) const { } serial_t target = serial + delta; - if (target > actor_params::serial_mask(mapping.width) || - ((delta > 0) ? target < serial : target > serial)) { - log_extra("keygen-increment: %" PRIu64 "%-" PRId64 " => %" PRIu64 - ", overflow", - serial, delta, target); + if (target > actor_params::serial_mask(mapping.width) || ((delta > 0) ? target < serial : target > serial)) { + log_extra("keygen-increment: %" PRIu64 "%-" PRId64 " => %" PRIu64 ", overflow", serial, delta, target); return false; } - log_extra("keygen-increment: %" PRIu64 "%-" PRId64 " => %" PRIu64 - ", continue", - serial, delta, target); + log_extra("keygen-increment: %" PRIu64 "%-" PRId64 " => %" PRIu64 ", continue", serial, delta, target); serial = target; return true; } @@ -363,8 +309,7 @@ buffer alloc(size_t limit) { return buffer(ptr); } -serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, - result &out) { +serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, result &out) { assert(out.limit >= params.maxlen); assert(params.maxlen >= params.minlen); assert(serial <= params.mask); @@ -374,9 +319,8 @@ serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, assert(params.mask > primes[params.bits]); #else const serial_t maxbits = params.maxlen * CHAR_BIT; - serial ^= (serial >> maxbits / 2) * - serial_t((sizeof(serial_t) > 4) ? UINT64_C(40719303417517073) - : UINT32_C(3708688457)); + serial ^= + (serial >> maxbits / 2) * serial_t((sizeof(serial_t) > 4) ? UINT64_C(40719303417517073) : UINT32_C(3708688457)); serial &= params.mask; #endif assert(params.maxlen >= length(serial)); @@ -388,9 +332,7 @@ serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, if (serial % (variation + serial_t(1))) { auto refix = serial * UINT64_C(48835288005252737); refix ^= refix >> 32; - out.value.iov_len = - std::max(out.value.iov_len, - params.minlen + size_t(1) + size_t(refix) % variation); + out.value.iov_len = std::max(out.value.iov_len, params.minlen + size_t(1) + size_t(refix) % variation); } } @@ -400,18 +342,14 @@ serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, return serial; } -void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, - result &out) { +void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, result &out) { #if CONSTEXPR_ENUM_FLAGS_OPERATIONS - static_assert( - (essentials::prng_fill_flag & - unsigned(MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY | - MDBX_INTEGERDUP | MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0, - "WTF?"); + static_assert((essentials::prng_fill_flag & unsigned(MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY | + MDBX_INTEGERDUP | MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0, + "WTF?"); #else - assert((essentials::prng_fill_flag & - unsigned(MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY | - MDBX_INTEGERDUP | MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0); + assert((essentials::prng_fill_flag & unsigned(MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY | MDBX_INTEGERDUP | + MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0); #endif assert(length(serial) <= out.value.iov_len); out.value.iov_base = out.bytes; @@ -423,8 +361,7 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, if (!is_byteorder_le() && out.value.iov_len != 8) out.u32 = uint32_t(serial); } else { - const auto prefix = - std::max(std::min(unsigned(params.minlen), 8u), length(serial)); + const auto prefix = std::max(std::min(unsigned(params.minlen), 8u), length(serial)); out.u64 = htobe64(serial); out.value.iov_base = out.bytes + 8 - prefix; if (out.value.iov_len > prefix) { @@ -434,28 +371,22 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, } else memset(out.bytes + 8, '\0', out.value.iov_len - prefix); } - if (unlikely(MDBX_db_flags_t(params.flags) & - (MDBX_REVERSEKEY | MDBX_REVERSEDUP))) - std::reverse((char *)out.value.iov_base, - (char *)out.value.iov_base + out.value.iov_len); + if (unlikely(MDBX_db_flags_t(params.flags) & (MDBX_REVERSEKEY | MDBX_REVERSEDUP))) + std::reverse((char *)out.value.iov_base, (char *)out.value.iov_base + out.value.iov_len); } assert(out.value.iov_len >= params.minlen); assert(out.value.iov_len <= params.maxlen); assert(out.value.iov_len >= length(serial)); assert(out.value.iov_base >= out.bytes); - assert((char *)out.value.iov_base + out.value.iov_len <= - (char *)&out.bytes + out.limit); + assert((char *)out.value.iov_base + out.value.iov_len <= (char *)&out.bytes + out.limit); } -void log_pair(logging::loglevel level, const char *prefix, const buffer &key, - buffer &value) { +void log_pair(logging::loglevel level, const char *prefix, const buffer &key, buffer &value) { if (log_enabled(level)) { char dump_key[4096], dump_value[4096]; - logging::output( - level, "%s-pair: key %s, value %s", prefix, - mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)), - mdbx_dump_val(&value->value, dump_value, sizeof(dump_value))); + logging::output(level, "%s-pair: key %s, value %s", prefix, mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&value->value, dump_value, sizeof(dump_value))); } } diff --git a/test/keygen.h++ b/test/keygen.h++ index a9d69dfe..389cab4d 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -60,11 +60,7 @@ namespace keygen { typedef uint64_t serial_t; -enum : serial_t { - serial_minwith = 8, - serial_maxwith = sizeof(serial_t) * 8, - serial_allones = ~(serial_t)0u -}; +enum : serial_t { serial_minwith = 8, serial_maxwith = sizeof(serial_t) * 8, serial_allones = ~(serial_t)0u }; struct result { MDBX_val value; @@ -75,9 +71,7 @@ struct result { uint64_t u64; }; - std::string as_string() const { - return std::string((const char *)value.iov_base, value.iov_len); - } + std::string as_string() const { return std::string((const char *)value.iov_base, value.iov_len); } }; //----------------------------------------------------------------------------- @@ -106,21 +100,17 @@ class maker { unsigned value_age_bits{0}; serial_t value_age_mask{0}; - static serial_t mk_begin(serial_t serial, const essentials ¶ms, - result &out); - static void mk_continue(const serial_t serial, const essentials ¶ms, - result &out); + static serial_t mk_begin(serial_t serial, const essentials ¶ms, result &out); + static void mk_continue(const serial_t serial, const essentials ¶ms, result &out); public: - void pair(serial_t serial, const buffer &key, buffer &value, - serial_t value_age, const bool keylen_changeable); + void pair(serial_t serial, const buffer &key, buffer &value, serial_t value_age, const bool keylen_changeable); void setup(const config::actor_params_pod &actor, unsigned thread_number); bool is_unordered() const; void seek2end(serial_t &serial) const; bool increment(serial_t &serial, int64_t delta) const; - bool increment_key_part(serial_t &serial, int64_t delta, - bool reset_value_part = true) const { + bool increment_key_part(serial_t &serial, int64_t delta, bool reset_value_part = true) const { if (reset_value_part) { serial_t value_part_bits = ((serial_t(1) << mapping.split) - 1); serial |= value_part_bits; @@ -131,12 +121,10 @@ public: } serial_t remix_age(serial_t serial) const { - return (UINT64_C(768097847591) * (serial ^ UINT64_C(768097847591))) & - value_age_mask; + return (UINT64_C(768097847591) * (serial ^ UINT64_C(768097847591))) & value_age_mask; } }; -void log_pair(logging::loglevel level, const char *prefix, const buffer &key, - buffer &value); +void log_pair(logging::loglevel level, const char *prefix, const buffer &key, buffer &value); } /* namespace keygen */ diff --git a/test/log.c++ b/test/log.c++ index 0a3d4355..9b952785 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -26,15 +26,13 @@ MDBX_NORETURN void failure_perror(const char *what, int errnum) { //----------------------------------------------------------------------------- -static void mdbx_logger(MDBX_log_level_t priority, const char *function, - int line, const char *fmt, +static void mdbx_logger(MDBX_log_level_t priority, const char *function, int line, const char *fmt, va_list args) MDBX_CXX17_NOEXCEPT { if (function) { if (priority == MDBX_LOG_FATAL) log_error("mdbx: fatal failure: %s, %d", function, line); - logging::output_nocheckloglevel( - logging::loglevel(priority), - strncmp(function, "mdbx_", 5) == 0 ? "%s: " : "mdbx %s: ", function); + logging::output_nocheckloglevel(logging::loglevel(priority), + strncmp(function, "mdbx_", 5) == 0 ? "%s: " : "mdbx %s: ", function); logging::feed_ap(fmt, args); } else logging::feed_ap(fmt, args); @@ -58,9 +56,7 @@ static FILE *flow; void setlevel(loglevel priority) { level = priority; int rc = mdbx_setup_debug(MDBX_log_level_t(priority), - MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER | - MDBX_DBG_DUMP, - mdbx_logger); + MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER | MDBX_DBG_DUMP, mdbx_logger); log_trace("set mdbx debug-opts: 0x%02x", rc); } @@ -117,8 +113,7 @@ void ln() { } } -void output_nocheckloglevel_ap(const logging::loglevel priority, - const char *format, va_list ap) { +void output_nocheckloglevel_ap(const logging::loglevel priority, const char *format, va_list ap) { ln(); chrono::time now = chrono::now_realtime(); struct tm tm; @@ -134,11 +129,9 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, if (rc != MDBX_SUCCESS) failure_perror("localtime_r()", rc); - fprintf(stdout, - "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */, - tm.tm_year - 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, - tm.tm_sec, chrono::fractional2us(now.fractional), (long)osal_getpid(), - prefix_buf, level2str(priority), suffix_ptr); + fprintf(stdout, "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */, tm.tm_year - 100, + tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, chrono::fractional2us(now.fractional), + (long)osal_getpid(), prefix_buf, level2str(priority), suffix_ptr); va_list ones; memset(&ones, 0, sizeof(ones)) /* zap MSVC and other goofy compilers */; @@ -171,8 +164,7 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, if (same_or_higher(priority, error)) { if (flow) flow = stderr; - fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), prefix_buf, - level2str(priority), suffix_ptr); + fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), prefix_buf, level2str(priority), suffix_ptr); vfprintf(stderr, format, ones); va_end(ones); } @@ -206,14 +198,12 @@ bool feed(const char *format, ...) { return true; } -local_suffix::local_suffix(const char *c_str) - : trim_pos(suffix_buf.size()), indent(0) { +local_suffix::local_suffix(const char *c_str) : trim_pos(suffix_buf.size()), indent(0) { suffix_buf.append(c_str); suffix_ptr = suffix_buf.c_str(); } -local_suffix::local_suffix(const std::string &str) - : trim_pos(suffix_buf.size()), indent(0) { +local_suffix::local_suffix(const std::string &str) : trim_pos(suffix_buf.size()), indent(0) { suffix_buf.append(str); suffix_ptr = suffix_buf.c_str(); } @@ -242,8 +232,7 @@ void progress_canary(bool active) { static chrono::time progress_timestamp; chrono::time now = chrono::now_monotonic(); - if (now.fixedpoint - progress_timestamp.fixedpoint < - chrono::from_ms(42).fixedpoint) + if (now.fixedpoint - progress_timestamp.fixedpoint < chrono::from_ms(42).fixedpoint) return; if (osal_progress_push(active)) { @@ -262,20 +251,17 @@ void progress_canary(bool active) { progress_timestamp = now; fprintf(stderr, "%c\b", "-\\|/"[last_point = point]); } - } else if (now.fixedpoint - progress_timestamp.fixedpoint > - chrono::from_seconds(2).fixedpoint) { + } else if (now.fixedpoint - progress_timestamp.fixedpoint > chrono::from_seconds(2).fixedpoint) { progress_timestamp = now; fprintf(stderr, "%c\b", "@*"[now.utc & 1]); } } else { static int count; - if (active && now.fixedpoint - progress_timestamp.fixedpoint > - chrono::from_seconds(1).fixedpoint) { + if (active && now.fixedpoint - progress_timestamp.fixedpoint > chrono::from_seconds(1).fixedpoint) { putc('.', stderr); progress_timestamp = now; ++count; - } else if (now.fixedpoint - progress_timestamp.fixedpoint > - chrono::from_seconds(5).fixedpoint) { + } else if (now.fixedpoint - progress_timestamp.fixedpoint > chrono::from_seconds(5).fixedpoint) { putc("@*"[now.utc & 1], stderr); progress_timestamp = now; ++count; @@ -364,9 +350,7 @@ void log_trouble(const char *where, const char *what, int errnum) { log_error("%s: %s %s", where, what, test_strerror(errnum)); } -bool log_enabled(const logging::loglevel priority) { - return logging::same_or_higher(priority, logging::level); -} +bool log_enabled(const logging::loglevel priority) { return logging::same_or_higher(priority, logging::level); } void log_flush(void) { logging::ln(); diff --git a/test/log.h++ b/test/log.h++ index 838e8de1..bb92783e 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -29,25 +29,20 @@ inline bool lower(loglevel left, loglevel right) { return left > right; } -inline bool same_or_higher(loglevel left, loglevel right) { - return left <= right; -} +inline bool same_or_higher(loglevel left, loglevel right) { return left <= right; } const char *level2str(const loglevel level); void setup(loglevel priority, const std::string &prefix); void setup(const std::string &prefix); void setlevel(loglevel priority); -void output_nocheckloglevel_ap(const loglevel priority, const char *format, - va_list ap); -bool MDBX_PRINTF_ARGS(2, 3) - output(const loglevel priority, const char *format, ...); +void output_nocheckloglevel_ap(const loglevel priority, const char *format, va_list ap); +bool MDBX_PRINTF_ARGS(2, 3) output(const loglevel priority, const char *format, ...); bool feed_ap(const char *format, va_list ap); bool MDBX_PRINTF_ARGS(1, 2) feed(const char *format, ...); void ln(); -void inline MDBX_PRINTF_ARGS(2, 3) - output_nocheckloglevel(const loglevel priority, const char *format, ...) { +void inline MDBX_PRINTF_ARGS(2, 3) output_nocheckloglevel(const loglevel priority, const char *format, ...) { va_list ap; va_start(ap, format); output_nocheckloglevel_ap(priority, format, ap); @@ -75,9 +70,7 @@ public: } // namespace logging -void MDBX_PRINTF_ARGS(1, 2) static inline log_null(const char *msg, ...) { - return (void)msg; -} +void MDBX_PRINTF_ARGS(1, 2) static inline log_null(const char *msg, ...) { return (void)msg; } void MDBX_PRINTF_ARGS(1, 2) log_extra(const char *msg, ...); void MDBX_PRINTF_ARGS(1, 2) log_trace(const char *msg, ...); void MDBX_PRINTF_ARGS(1, 2) log_debug(const char *msg, ...); diff --git a/test/main.c++ b/test/main.c++ index 9a7fb4df..90c3701e 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -9,104 +9,103 @@ #endif /* !Windows */ MDBX_NORETURN void usage(void) { - puts( - "usage:\n" - " --help or -h Show this text\n" - "Common parameters:\n" - " --loglevel=[0-7]|[fatal..extra]s" - " --pathname=... Path and/or name of database files\n" - " --repeat=N Set repeat counter\n" - " --threads=N Number of thread (unsupported for now)\n" - " --timeout=N[s|m|h|d] Set timeout in seconds/minutes/hours/days\n" - " --failfast[=YES/no] Lill all actors on first failure/error\n" - " --max-readers=N See mdbx_env_set_maxreaders() description\n" - " --max-tables=N Se mdbx_env_set_maxdbs() description\n" - " --dump-config[=YES/no] Dump entire test config before run\n" - " --progress[=YES/no] Enable/disable progress `canary`\n" - " --console[=yes/no] Enable/disable console-like output\n" - " --cleanup-before[=YES/no] Cleanup/remove and re-create database\n" - " --cleanup-after[=YES/no] Cleanup/remove database after completion\n" - " --prng-seed=N Seed PRNG\n" - "Database size control:\n" - " --pagesize=... Database page size: min, max, 256..65536\n" - " --size-lower=N[K|M|G|T] Lower-bound of size in Kb/Mb/Gb/Tb\n" - " --size-upper Upper-bound of size in Kb/Mb/Gb/Tb\n" - " --size Initial size in Kb/Mb/Gb/Tb\n" - " --shrink-threshold Shrink threshold in Kb/Mb/Gb/Tb\n" - " --growth-step Grow step in Kb/Mb/Gb/Tb\n" - "Predefined complex scenarios/cases:\n" - " --case=... Only `basic` scenario implemented for now\n" - " basic == Simultaneous multi-process execution\n" - " of test-actors: nested,hill,ttl,copy,append,jitter,try\n" - "Test actors:\n" - " --hill Fill-up and empty-down\n" - " by CRUD-operation quads\n" - " --ttl Stochastic time-to-live simulation\n" - " --nested Nested transactionы\n" - " with stochastic-size bellows\n" - " --jitter Jitter/delays simulation\n" - " --try Try write-transaction, no more\n" - " --copy Online copy/backup\n" - " --append Append-mode insertions\n" - " --dead.reader Dead-reader simulator\n" - " --dead.writer Dead-writer simulator\n" + puts("usage:\n" + " --help or -h Show this text\n" + "Common parameters:\n" + " --loglevel=[0-7]|[fatal..extra]s" + " --pathname=... Path and/or name of database files\n" + " --repeat=N Set repeat counter\n" + " --threads=N Number of thread (unsupported for now)\n" + " --timeout=N[s|m|h|d] Set timeout in seconds/minutes/hours/days\n" + " --failfast[=YES/no] Lill all actors on first failure/error\n" + " --max-readers=N See mdbx_env_set_maxreaders() description\n" + " --max-tables=N Se mdbx_env_set_maxdbs() description\n" + " --dump-config[=YES/no] Dump entire test config before run\n" + " --progress[=YES/no] Enable/disable progress `canary`\n" + " --console[=yes/no] Enable/disable console-like output\n" + " --cleanup-before[=YES/no] Cleanup/remove and re-create database\n" + " --cleanup-after[=YES/no] Cleanup/remove database after completion\n" + " --prng-seed=N Seed PRNG\n" + "Database size control:\n" + " --pagesize=... Database page size: min, max, 256..65536\n" + " --size-lower=N[K|M|G|T] Lower-bound of size in Kb/Mb/Gb/Tb\n" + " --size-upper Upper-bound of size in Kb/Mb/Gb/Tb\n" + " --size Initial size in Kb/Mb/Gb/Tb\n" + " --shrink-threshold Shrink threshold in Kb/Mb/Gb/Tb\n" + " --growth-step Grow step in Kb/Mb/Gb/Tb\n" + "Predefined complex scenarios/cases:\n" + " --case=... Only `basic` scenario implemented for now\n" + " basic == Simultaneous multi-process execution\n" + " of test-actors: nested,hill,ttl,copy,append,jitter,try\n" + "Test actors:\n" + " --hill Fill-up and empty-down\n" + " by CRUD-operation quads\n" + " --ttl Stochastic time-to-live simulation\n" + " --nested Nested transactionы\n" + " with stochastic-size bellows\n" + " --jitter Jitter/delays simulation\n" + " --try Try write-transaction, no more\n" + " --copy Online copy/backup\n" + " --append Append-mode insertions\n" + " --dead.reader Dead-reader simulator\n" + " --dead.writer Dead-writer simulator\n" #if !defined(_WIN32) && !defined(_WIN64) - " --fork.reader After-fork reader\n" - " --fork.writer After-fork writer\n" + " --fork.reader After-fork reader\n" + " --fork.writer After-fork writer\n" #endif /* Windows */ - "Actor options:\n" - " --batch.read=N Read-operations batch size\n" - " --batch.write=N Write-operations batch size\n" - " --delay=N | --no-delay (no)Delay test-actor before start\n" - " --wait4ops=N | --no-wait4ops (no)Wait for previous test-actor\n" - " completes # ops before start\n" - " --duration=N[s|m|h|d] Define running duration\n" - " --nops=N[K|M|G|T] Define number of operations/steps\n" - " --inject-writefault[=yes|NO] TBD (see the source code)\n" - " --drop[=yes|NO] Drop key-value space/table on " - "completion\n" - " --ignore-dbfull[=yes|NO] Ignore MDBX_MAP_FULL error\n" - " --speculum[=yes|NO] Use internal `speculum` to check " - "dataset\n" - " --geometry-jitter[=YES|no] Use jitter for geometry upper-limit\n" - "Keys and Value:\n" - " --keylen.min=N Minimal keys length\n" - " --keylen.max=N Miximal keys length\n" - " --keylen=N Set both min/max for keys length\n" - " --datalen.min=N Minimal data length\n" - " --datalen.max=N Miximal data length\n" - " --datalen=N Set both min/max for data length\n" - " --keygen.width=N TBD (see the source code)\n" - " --keygen.mesh=N TBD (see the source code)\n" - " --keygen.zerofill=yes|NO TBD (see the source code)\n" - " --keygen.split=N TBD (see the source code)\n" - " --keygen.rotate=N TBD (see the source code)\n" - " --keygen.offset=N TBD (see the source code)\n" - " --keygen.case=random Generator case (only `random` for now)\n" - "Database operation mode:\n" - " --mode={[+-]FLAG}[,[+-]FLAG]...\n" - " nosubdir == MDBX_NOSUBDIR\n" - " rdonly == MDBX_RDONLY\n" - " exclusive == MDBX_EXCLUSIVE\n" - " accede == MDBX_ACCEDE\n" - " nometasync == MDBX_NOMETASYNC\n" - " lifo == MDBX_LIFORECLAIM\n" - " nosync-safe == MDBX_SAFE_NOSYNC\n" - " writemap == MDBX_WRITEMAP\n" - " nosync-utterly == MDBX_UTTERLY_NOSYNC\n" - " perturb == MDBX_PAGEPERTURB\n" - " nostickythreads== MDBX_NOSTICKYTHREADS\n" - " nordahead == MDBX_NORDAHEAD\n" - " nomeminit == MDBX_NOMEMINIT\n" - " --random-writemap[=YES|no] Toggle MDBX_WRITEMAP randomly\n" - "Key-value space/table options:\n" - " --table={[+-]FLAG}[,[+-]FLAG]...\n" - " key.reverse == MDBX_REVERSEKEY\n" - " key.integer == MDBX_INTEGERKEY\n" - " data.dups == MDBX_DUPSORT\n" - " data.integer == MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT\n" - " data.fixed == MDBX_DUPFIXED | MDBX_DUPSORT\n" - " data.reverse == MDBX_REVERSEDUP | MDBX_DUPSORT\n"); + "Actor options:\n" + " --batch.read=N Read-operations batch size\n" + " --batch.write=N Write-operations batch size\n" + " --delay=N | --no-delay (no)Delay test-actor before start\n" + " --wait4ops=N | --no-wait4ops (no)Wait for previous test-actor\n" + " completes # ops before start\n" + " --duration=N[s|m|h|d] Define running duration\n" + " --nops=N[K|M|G|T] Define number of operations/steps\n" + " --inject-writefault[=yes|NO] TBD (see the source code)\n" + " --drop[=yes|NO] Drop key-value space/table on " + "completion\n" + " --ignore-dbfull[=yes|NO] Ignore MDBX_MAP_FULL error\n" + " --speculum[=yes|NO] Use internal `speculum` to check " + "dataset\n" + " --geometry-jitter[=YES|no] Use jitter for geometry upper-limit\n" + "Keys and Value:\n" + " --keylen.min=N Minimal keys length\n" + " --keylen.max=N Miximal keys length\n" + " --keylen=N Set both min/max for keys length\n" + " --datalen.min=N Minimal data length\n" + " --datalen.max=N Miximal data length\n" + " --datalen=N Set both min/max for data length\n" + " --keygen.width=N TBD (see the source code)\n" + " --keygen.mesh=N TBD (see the source code)\n" + " --keygen.zerofill=yes|NO TBD (see the source code)\n" + " --keygen.split=N TBD (see the source code)\n" + " --keygen.rotate=N TBD (see the source code)\n" + " --keygen.offset=N TBD (see the source code)\n" + " --keygen.case=random Generator case (only `random` for now)\n" + "Database operation mode:\n" + " --mode={[+-]FLAG}[,[+-]FLAG]...\n" + " nosubdir == MDBX_NOSUBDIR\n" + " rdonly == MDBX_RDONLY\n" + " exclusive == MDBX_EXCLUSIVE\n" + " accede == MDBX_ACCEDE\n" + " nometasync == MDBX_NOMETASYNC\n" + " lifo == MDBX_LIFORECLAIM\n" + " nosync-safe == MDBX_SAFE_NOSYNC\n" + " writemap == MDBX_WRITEMAP\n" + " nosync-utterly == MDBX_UTTERLY_NOSYNC\n" + " perturb == MDBX_PAGEPERTURB\n" + " nostickythreads== MDBX_NOSTICKYTHREADS\n" + " nordahead == MDBX_NORDAHEAD\n" + " nomeminit == MDBX_NOMEMINIT\n" + " --random-writemap[=YES|no] Toggle MDBX_WRITEMAP randomly\n" + "Key-value space/table options:\n" + " --table={[+-]FLAG}[,[+-]FLAG]...\n" + " key.reverse == MDBX_REVERSEKEY\n" + " key.integer == MDBX_INTEGERKEY\n" + " data.dups == MDBX_DUPSORT\n" + " data.integer == MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT\n" + " data.fixed == MDBX_DUPFIXED | MDBX_DUPSORT\n" + " data.reverse == MDBX_REVERSEDUP | MDBX_DUPSORT\n"); exit(EXIT_FAILURE); } @@ -209,9 +208,7 @@ bool geometry_jitter; const char global::thunk_param_prefix[] = "--execute="; -std::string thunk_param(const actor_config &config) { - return config.serialize(global::thunk_param_prefix); -} +std::string thunk_param(const actor_config &config) { return config.serialize(global::thunk_param_prefix); } void cleanup() { log_trace(">> cleanup"); @@ -234,9 +231,7 @@ static void fixup4qemu(actor_params ¶ms) { safe4qemu_limit >>= 1; #endif /* __SANITIZE_ADDRESS__ */ - if (params.size_lower > safe4qemu_limit || - params.size_now > safe4qemu_limit || - params.size_upper > safe4qemu_limit) { + if (params.size_lower > safe4qemu_limit || params.size_now > safe4qemu_limit || params.size_upper > safe4qemu_limit) { params.size_upper = std::min(params.size_upper, safe4qemu_limit); params.size_now = std::min(params.size_now, params.size_upper); params.size_lower = std::min(params.size_lower, params.size_now); @@ -246,13 +241,12 @@ static void fixup4qemu(actor_params ¶ms) { } #endif /* MDBX_WORDBITS == 32 */ -#if defined(__alpha__) || defined(__alpha) || defined(__sparc__) || \ - defined(__sparc) || defined(__sparc64__) || defined(__sparc64) +#if defined(__alpha__) || defined(__alpha) || defined(__sparc__) || defined(__sparc) || defined(__sparc64__) || \ + defined(__sparc64) if (params.size_lower != params.size_upper) { - log_notice( - "workaround: for conformance Alpha/Sparc build with QEMU/ASAN/Valgrind " - "enforce fixed database size %zu megabytes", - params.size_upper >> 20); + log_notice("workaround: for conformance Alpha/Sparc build with QEMU/ASAN/Valgrind " + "enforce fixed database size %zu megabytes", + params.size_upper >> 20); params.size_lower = params.size_now = params.size_upper; } #endif /* Alpha || Sparc */ @@ -282,15 +276,10 @@ int main(int argc, char *const argv[]) { if (argc < 2) failure("No parameters given. Try --help\n"); - if (argc == 2 && strncmp(argv[1], global::thunk_param_prefix, - strlen(global::thunk_param_prefix)) == 0) - return test_execute( - actor_config(argv[1] + strlen(global::thunk_param_prefix))) - ? EXIT_SUCCESS - : EXIT_FAILURE; + if (argc == 2 && strncmp(argv[1], global::thunk_param_prefix, strlen(global::thunk_param_prefix)) == 0) + return test_execute(actor_config(argv[1] + strlen(global::thunk_param_prefix))) ? EXIT_SUCCESS : EXIT_FAILURE; - if (argc == 2 && - (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)) + if (argc == 2 && (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)) usage(); actor_params params; @@ -300,30 +289,22 @@ int main(int argc, char *const argv[]) { unsigned last_space_id = 0; for (int narg = 1; narg < argc; ++narg) { - if (config::parse_option(argc, argv, narg, "dump-config", - global::config::dump_config)) + if (config::parse_option(argc, argv, narg, "dump-config", global::config::dump_config)) continue; - if (config::parse_option(argc, argv, narg, "cleanup-before", - global::config::cleanup_before)) + if (config::parse_option(argc, argv, narg, "cleanup-before", global::config::cleanup_before)) continue; - if (config::parse_option(argc, argv, narg, "cleanup-after", - global::config::cleanup_after)) + if (config::parse_option(argc, argv, narg, "cleanup-after", global::config::cleanup_after)) continue; - if (config::parse_option(argc, argv, narg, "failfast", - global::config::failfast)) + if (config::parse_option(argc, argv, narg, "failfast", global::config::failfast)) continue; - if (config::parse_option(argc, argv, narg, "progress", - global::config::progress_indicator)) + if (config::parse_option(argc, argv, narg, "progress", global::config::progress_indicator)) continue; - if (config::parse_option(argc, argv, narg, "console", - global::config::console_mode)) + if (config::parse_option(argc, argv, narg, "console", global::config::console_mode)) continue; - if (config::parse_option(argc, argv, narg, "geometry-jitter", - global::config::geometry_jitter)) + if (config::parse_option(argc, argv, narg, "geometry-jitter", global::config::geometry_jitter)) continue; - if (config::parse_option(argc, argv, narg, "timeout", - global::config::timeout_duration_seconds, - config::duration, 1)) + if (config::parse_option(argc, argv, narg, "timeout", global::config::timeout_duration_seconds, config::duration, + 1)) continue; logging::loglevel loglevel; @@ -341,19 +322,15 @@ int main(int argc, char *const argv[]) { } if (config::parse_option(argc, argv, narg, "pathname", params.pathname_db)) continue; - if (config::parse_option(argc, argv, narg, "mode", params.mode_flags, - config::mode_bits)) + if (config::parse_option(argc, argv, narg, "mode", params.mode_flags, config::mode_bits)) continue; - if (config::parse_option(argc, argv, narg, "random-writemap", - params.random_writemap)) + if (config::parse_option(argc, argv, narg, "random-writemap", params.random_writemap)) continue; - if (config::parse_option(argc, argv, narg, "table", params.table_flags, - config::table_bits)) { + if (config::parse_option(argc, argv, narg, "table", params.table_flags, config::table_bits)) { if ((params.table_flags & MDBX_DUPFIXED) == 0) params.table_flags &= ~MDBX_INTEGERDUP; if ((params.table_flags & MDBX_DUPSORT) == 0) - params.table_flags &= - ~(MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP); + params.table_flags &= ~(MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP); const unsigned keylen_max = params.mdbx_keylen_max(); if (params.keylen_min > keylen_max) params.keylen_min = keylen_max; @@ -378,8 +355,7 @@ int main(int argc, char *const argv[]) { continue; } - if (config::parse_option(argc, argv, narg, "pagesize", params.pagesize, - int(mdbx_limits_pgsize_min()), + if (config::parse_option(argc, argv, narg, "pagesize", params.pagesize, int(mdbx_limits_pgsize_min()), int(mdbx_limits_pgsize_max()))) { const unsigned keylen_max = params.mdbx_keylen_max(); if (params.keylen_min > keylen_max) @@ -393,159 +369,116 @@ int main(int argc, char *const argv[]) { params.datalen_max = datalen_max; continue; } - if (config::parse_option(argc, argv, narg, "repeat", params.nrepeat, - config::entropy)) + if (config::parse_option(argc, argv, narg, "repeat", params.nrepeat, config::entropy)) continue; - if (config::parse_option(argc, argv, narg, "threads", params.nthreads, - config::no_scale, 1, 64)) + if (config::parse_option(argc, argv, narg, "threads", params.nthreads, config::no_scale, 1, 64)) continue; - if (config::parse_option_intptr(argc, argv, narg, "size-lower", - params.size_lower, - mdbx_limits_dbsize_min(params.pagesize), - mdbx_limits_dbsize_max(params.pagesize))) + if (config::parse_option_intptr(argc, argv, narg, "size-lower", params.size_lower, + mdbx_limits_dbsize_min(params.pagesize), mdbx_limits_dbsize_max(params.pagesize))) continue; int64_t i64 = params.size_upper; - if (config::parse_option(argc, argv, narg, "size-upper-upto", i64, - int64_t(mdbx_limits_dbsize_min(params.pagesize)), + if (config::parse_option(argc, argv, narg, "size-upper-upto", i64, int64_t(mdbx_limits_dbsize_min(params.pagesize)), INT64_MAX, -1)) { if (i64 > mdbx_limits_dbsize_max(params.pagesize)) i64 = mdbx_limits_dbsize_max(params.pagesize); params.size_upper = intptr_t(i64); continue; } - if (config::parse_option_intptr(argc, argv, narg, "size-upper", - params.size_upper, - mdbx_limits_dbsize_min(params.pagesize), + if (config::parse_option_intptr(argc, argv, narg, "size-upper", params.size_upper, + mdbx_limits_dbsize_min(params.pagesize), mdbx_limits_dbsize_max(params.pagesize))) + continue; + if (config::parse_option_intptr(argc, argv, narg, "size", params.size_now, mdbx_limits_dbsize_min(params.pagesize), mdbx_limits_dbsize_max(params.pagesize))) continue; - if (config::parse_option_intptr(argc, argv, narg, "size", params.size_now, - mdbx_limits_dbsize_min(params.pagesize), - mdbx_limits_dbsize_max(params.pagesize))) + if (config::parse_option(argc, argv, narg, "shrink-threshold", params.shrink_threshold, 0, + (int)std::min((intptr_t)INT_MAX, mdbx_limits_dbsize_max(params.pagesize) - + mdbx_limits_dbsize_min(params.pagesize)))) continue; - if (config::parse_option( - argc, argv, narg, "shrink-threshold", params.shrink_threshold, 0, - (int)std::min((intptr_t)INT_MAX, - mdbx_limits_dbsize_max(params.pagesize) - - mdbx_limits_dbsize_min(params.pagesize)))) - continue; - if (config::parse_option( - argc, argv, narg, "growth-step", params.growth_step, 0, - (int)std::min((intptr_t)INT_MAX, - mdbx_limits_dbsize_max(params.pagesize) - - mdbx_limits_dbsize_min(params.pagesize)))) + if (config::parse_option(argc, argv, narg, "growth-step", params.growth_step, 0, + (int)std::min((intptr_t)INT_MAX, mdbx_limits_dbsize_max(params.pagesize) - + mdbx_limits_dbsize_min(params.pagesize)))) continue; - if (config::parse_option(argc, argv, narg, "keygen.width", - params.keygen.width, 8, 64)) + if (config::parse_option(argc, argv, narg, "keygen.width", params.keygen.width, 8, 64)) continue; - if (config::parse_option(argc, argv, narg, "keygen.mesh", - params.keygen.mesh, 0, 64)) + if (config::parse_option(argc, argv, narg, "keygen.mesh", params.keygen.mesh, 0, 64)) continue; - if (config::parse_option(argc, argv, narg, "prng-seed", params.prng_seed, - config::entropy)) { + if (config::parse_option(argc, argv, narg, "prng-seed", params.prng_seed, config::entropy)) { prng_seed(params.prng_seed); continue; } - if (config::parse_option(argc, argv, narg, "keygen.zerofill", - params.keygen.zero_fill)) + if (config::parse_option(argc, argv, narg, "keygen.zerofill", params.keygen.zero_fill)) continue; - if (config::parse_option(argc, argv, narg, "keygen.split", - params.keygen.split, 0, 63)) + if (config::parse_option(argc, argv, narg, "keygen.split", params.keygen.split, 0, 63)) continue; - if (config::parse_option(argc, argv, narg, "keygen.rotate", - params.keygen.rotate, 0, 63)) + if (config::parse_option(argc, argv, narg, "keygen.rotate", params.keygen.rotate, 0, 63)) continue; - if (config::parse_option(argc, argv, narg, "keygen.offset", - params.keygen.offset, config::binary)) + if (config::parse_option(argc, argv, narg, "keygen.offset", params.keygen.offset, config::binary)) continue; if (config::parse_option(argc, argv, narg, "keygen.case", &value)) { keycase_setup(value, params); continue; } - if (config::parse_option( - argc, argv, narg, "keylen.min", params.keylen_min, - (params.table_flags & MDBX_INTEGERKEY) ? config::intkey - : config::no_scale, - params.mdbx_keylen_min(), params.mdbx_keylen_max())) { - if ((params.table_flags & MDBX_INTEGERKEY) || - params.keylen_max < params.keylen_min) + if (config::parse_option(argc, argv, narg, "keylen.min", params.keylen_min, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { + if ((params.table_flags & MDBX_INTEGERKEY) || params.keylen_max < params.keylen_min) params.keylen_max = params.keylen_min; continue; } - if (config::parse_option( - argc, argv, narg, "keylen.max", params.keylen_max, - (params.table_flags & MDBX_INTEGERKEY) ? config::intkey - : config::no_scale, - params.mdbx_keylen_min(), params.mdbx_keylen_max())) { - if ((params.table_flags & MDBX_INTEGERKEY) || - params.keylen_min > params.keylen_max) + if (config::parse_option(argc, argv, narg, "keylen.max", params.keylen_max, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { + if ((params.table_flags & MDBX_INTEGERKEY) || params.keylen_min > params.keylen_max) params.keylen_min = params.keylen_max; continue; } - if (config::parse_option( - argc, argv, narg, "keylen", params.keylen_min, - (params.table_flags & MDBX_INTEGERKEY) ? config::intkey - : config::no_scale, - params.mdbx_keylen_min(), params.mdbx_keylen_max())) { + if (config::parse_option(argc, argv, narg, "keylen", params.keylen_min, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { params.keylen_max = params.keylen_min; continue; } - if (config::parse_option( - argc, argv, narg, "datalen.min", params.datalen_min, - (params.table_flags & MDBX_INTEGERDUP) ? config::intkey - : config::no_scale, - params.mdbx_datalen_min(), params.mdbx_datalen_max())) { - if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || - params.datalen_max < params.datalen_min) + if (config::parse_option(argc, argv, narg, "datalen.min", params.datalen_min, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { + if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || params.datalen_max < params.datalen_min) params.datalen_max = params.datalen_min; continue; } - if (config::parse_option( - argc, argv, narg, "datalen.max", params.datalen_max, - (params.table_flags & MDBX_INTEGERDUP) ? config::intkey - : config::no_scale, - params.mdbx_datalen_min(), params.mdbx_datalen_max())) { - if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || - params.datalen_min > params.datalen_max) + if (config::parse_option(argc, argv, narg, "datalen.max", params.datalen_max, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { + if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || params.datalen_min > params.datalen_max) params.datalen_min = params.datalen_max; continue; } - if (config::parse_option( - argc, argv, narg, "datalen", params.datalen_min, - (params.table_flags & MDBX_INTEGERDUP) ? config::intkey - : config::no_scale, - params.mdbx_datalen_min(), params.mdbx_datalen_max())) { + if (config::parse_option(argc, argv, narg, "datalen", params.datalen_min, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { params.datalen_max = params.datalen_min; continue; } - if (config::parse_option(argc, argv, narg, "batch.read", params.batch_read, - config::no_scale, 1)) + if (config::parse_option(argc, argv, narg, "batch.read", params.batch_read, config::no_scale, 1)) continue; - if (config::parse_option(argc, argv, narg, "batch.write", - params.batch_write, config::no_scale, 1)) + if (config::parse_option(argc, argv, narg, "batch.write", params.batch_write, config::no_scale, 1)) continue; - if (config::parse_option(argc, argv, narg, "delay", params.delaystart, - config::duration)) + if (config::parse_option(argc, argv, narg, "delay", params.delaystart, config::duration)) continue; - if (config::parse_option(argc, argv, narg, "wait4ops", params.waitfor_nops, - config::decimal)) + if (config::parse_option(argc, argv, narg, "wait4ops", params.waitfor_nops, config::decimal)) continue; - if (config::parse_option(argc, argv, narg, "inject-writefault", - params.inject_writefaultn, config::decimal)) + if (config::parse_option(argc, argv, narg, "inject-writefault", params.inject_writefaultn, config::decimal)) continue; if (config::parse_option(argc, argv, narg, "drop", params.drop_table)) continue; - if (config::parse_option(argc, argv, narg, "ignore-dbfull", - params.ignore_dbfull)) + if (config::parse_option(argc, argv, narg, "ignore-dbfull", params.ignore_dbfull)) continue; if (config::parse_option(argc, argv, narg, "speculum", params.speculum)) continue; - if (config::parse_option(argc, argv, narg, "max-readers", - params.max_readers, config::no_scale, 1, 255)) + if (config::parse_option(argc, argv, narg, "max-readers", params.max_readers, config::no_scale, 1, 255)) continue; - if (config::parse_option(argc, argv, narg, "max-tables", params.max_tables, - config::no_scale, 1, INT16_MAX)) + if (config::parse_option(argc, argv, narg, "max-tables", params.max_tables, config::no_scale, 1, INT16_MAX)) continue; if (config::parse_option(argc, argv, narg, "no-delay", nullptr)) { @@ -556,13 +489,11 @@ int main(int argc, char *const argv[]) { params.waitfor_nops = 0; continue; } - if (config::parse_option(argc, argv, narg, "duration", params.test_duration, - config::duration, 1)) { + if (config::parse_option(argc, argv, narg, "duration", params.test_duration, config::duration, 1)) { params.test_nops = 0; continue; } - if (config::parse_option(argc, argv, narg, "nops", params.test_nops, - config::decimal, 1)) { + if (config::parse_option(argc, argv, narg, "nops", params.test_nops, config::decimal, 1)) { params.test_duration = 0; continue; } @@ -647,8 +578,7 @@ int main(int argc, char *const argv[]) { (global::config::timeout_duration_seconds == 0) ? chrono::infinite().fixedpoint : global::start_monotonic.fixedpoint + - chrono::from_seconds(global::config::timeout_duration_seconds) - .fixedpoint; + chrono::from_seconds(global::config::timeout_duration_seconds).fixedpoint; if (global::config::cleanup_before) cleanup(); @@ -675,8 +605,7 @@ int main(int argc, char *const argv[]) { log_trace(">> killall_actors: (%s)", "start failed"); osal_killall_actors(); log_trace("<< killall_actors"); - failure("Failed to start actor #%u (%s)\n", a.actor_id, - test_strerror(rc)); + failure("Failed to start actor #%u (%s)\n", a.actor_id, test_strerror(rc)); } global::pid2actor[pid] = &a; } @@ -696,8 +625,7 @@ int main(int argc, char *const argv[]) { timeout_seconds_left = 0; else { chrono::time left_monotonic; - left_monotonic.fixedpoint = - global::deadline_monotonic.fixedpoint - now_monotonic.fixedpoint; + left_monotonic.fixedpoint = global::deadline_monotonic.fixedpoint - now_monotonic.fixedpoint; timeout_seconds_left = left_monotonic.seconds(); } @@ -713,8 +641,8 @@ int main(int argc, char *const argv[]) { continue; if (status > as_running) { - log_notice("actor #%u, id %d, pid %ld: %s\n", actor->actor_id, - actor->space_id, (long)pid, status2str(status)); + log_notice("actor #%u, id %d, pid %ld: %s\n", actor->actor_id, actor->space_id, (long)pid, + status2str(status)); left -= 1; if (status != as_successful) { if (global::config::failfast && !failed) { @@ -725,8 +653,8 @@ int main(int argc, char *const argv[]) { failed = true; } } else { - log_verbose("actor #%u, id %d, pid %ld: %s\n", actor->actor_id, - actor->space_id, (long)pid, status2str(status)); + log_verbose("actor #%u, id %d, pid %ld: %s\n", actor->actor_id, actor->space_id, (long)pid, + status2str(status)); } } else { if (timeout_seconds_left == 0) @@ -738,8 +666,7 @@ int main(int argc, char *const argv[]) { if (!failed) { MDBX_envinfo info; - int err = - mdbx_preopen_snapinfo(params.pathname_db.c_str(), &info, sizeof(info)); + int err = mdbx_preopen_snapinfo(params.pathname_db.c_str(), &info, sizeof(info)); if (err != MDBX_SUCCESS) failure_perror("mdbx_preopen_snapinfo()", err); } @@ -755,15 +682,12 @@ int main(int argc, char *const argv[]) { #if !(defined(_WIN32) || defined(_WIN64)) struct rusage spent; if (!getrusage(global::singlemode ? RUSAGE_SELF : RUSAGE_CHILDREN, &spent)) { - log_notice("%6s: user %f, system %f", "CPU", - spent.ru_utime.tv_sec + spent.ru_utime.tv_usec * 1e-6, + log_notice("%6s: user %f, system %f", "CPU", spent.ru_utime.tv_sec + spent.ru_utime.tv_usec * 1e-6, spent.ru_stime.tv_sec + spent.ru_stime.tv_usec * 1e-6); -#if defined(__linux__) || defined(__gnu_linux__) || defined(__FreeBSD__) || \ - defined(__NetBSD__) || defined(__OpenBSD__) || defined(__BSD__) || \ - defined(__bsdi__) || defined(__DragonFly__) || defined(__APPLE__) || \ +#if defined(__linux__) || defined(__gnu_linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(__OpenBSD__) || defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(__APPLE__) || \ defined(__MACH__) || defined(__sun) - log_notice("%6s: read %ld, write %ld", "IOPs", spent.ru_inblock, - spent.ru_oublock); + log_notice("%6s: read %ld, write %ld", "IOPs", spent.ru_inblock, spent.ru_oublock); if (spent.ru_maxrss > 0) log_notice("%6s: %ld Kb", "RAM", spent.ru_maxrss @@ -773,8 +697,7 @@ int main(int argc, char *const argv[]) { / 1024u #endif ); - log_notice("%6s: reclaims %ld, faults %ld, swaps %ld", "Paging", - spent.ru_minflt, spent.ru_majflt, spent.ru_nswap); + log_notice("%6s: reclaims %ld, faults %ld, swaps %ld", "Paging", spent.ru_minflt, spent.ru_majflt, spent.ru_nswap); #endif /* Linux */ } #endif /* !Windows */ diff --git a/test/nested.c++ b/test/nested.c++ index a90c4d37..dda73f92 100644 --- a/test/nested.c++ +++ b/test/nested.c++ @@ -38,16 +38,12 @@ class testcase_nested : public testcase_ttl { bool trim_tail(unsigned window_width); bool grow_head(unsigned head_count); bool pop_txn(bool abort); - bool pop_txn() { - return pop_txn(inherited::is_nested_txn_available() ? flipcoin_x3() - : flipcoin_x2()); - } + bool pop_txn() { return pop_txn(inherited::is_nested_txn_available() ? flipcoin_x3() : flipcoin_x2()); } void push_txn(); bool stochastic_breakable_restart_with_nested(bool force_restart = false); public: - testcase_nested(const actor_config &config, const mdbx_pid_t pid) - : inherited(config, pid) {} + testcase_nested(const actor_config &config, const mdbx_pid_t pid) : inherited(config, pid) {} bool setup() override; bool run() override; bool teardown() override; @@ -84,8 +80,7 @@ bool testcase_nested::teardown() { txn_begin(false); db_table_drop(dbi); int err = breakable_commit(); - if (unlikely(err != MDBX_SUCCESS) && - (err != MDBX_MAP_FULL || !config.params.ignore_dbfull)) { + if (unlikely(err != MDBX_SUCCESS) && (err != MDBX_MAP_FULL || !config.params.ignore_dbfull)) { log_notice("nested: bailout-clean due '%s'", mdbx_strerror(err)); ok = false; } @@ -101,8 +96,7 @@ bool testcase_nested::teardown() { void testcase_nested::push_txn() { MDBX_txn *nested_txn; - MDBX_txn_flags_t flags = MDBX_txn_flags_t( - prng32() & uint32_t(MDBX_TXN_NOSYNC | MDBX_TXN_NOMETASYNC)); + MDBX_txn_flags_t flags = MDBX_txn_flags_t(prng32() & uint32_t(MDBX_TXN_NOSYNC | MDBX_TXN_NOMETASYNC)); int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &nested_txn); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_begin(nested)", err); @@ -114,10 +108,9 @@ void testcase_nested::push_txn() { scoped_txn_guard nested_txn_guard(nested_txn); txn_guard.swap(nested_txn_guard); SET speculum_snapshot(speculum); - stack.emplace(std::move(nested_txn_guard), serial, fifo, - std::move(speculum_snapshot)); - log_verbose("begin level#%zu txn #%" PRIu64 ", flags 0x%x, serial %" PRIu64, - stack.size(), mdbx_txn_id(nested_txn), flags, serial); + stack.emplace(std::move(nested_txn_guard), serial, fifo, std::move(speculum_snapshot)); + log_verbose("begin level#%zu txn #%" PRIu64 ", flags 0x%x, serial %" PRIu64, stack.size(), mdbx_txn_id(nested_txn), + flags, serial); if (!dbi && stack.size() == 1) dbi = db_table_open(true); } @@ -128,18 +121,16 @@ bool testcase_nested::pop_txn(bool abort) { MDBX_txn *txn = txn_guard.release(); bool committed = false; if (abort) { - log_verbose( - "abort level#%zu txn #%" PRIu64 ", undo serial %" PRIu64 " <- %" PRIu64, - stack.size(), mdbx_txn_id(txn), serial, std::get<1>(stack.top())); - if (dbi > 0 && stack.size() == 1 && - is_handle_created_in_current_txn(dbi, txn)) + log_verbose("abort level#%zu txn #%" PRIu64 ", undo serial %" PRIu64 " <- %" PRIu64, stack.size(), mdbx_txn_id(txn), + serial, std::get<1>(stack.top())); + if (dbi > 0 && stack.size() == 1 && is_handle_created_in_current_txn(dbi, txn)) dbi = 0; int err = mdbx_txn_abort(txn); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_abort()", err); } else { - log_verbose("commit level#%zu txn, nested serial %" PRIu64 " -> %" PRIu64, - stack.size(), serial, std::get<1>(stack.top())); + log_verbose("commit level#%zu txn, nested serial %" PRIu64 " -> %" PRIu64, stack.size(), serial, + std::get<1>(stack.top())); int err = mdbx_txn_commit(txn); if (likely(err == MDBX_SUCCESS)) committed = true; @@ -147,8 +138,7 @@ bool testcase_nested::pop_txn(bool abort) { should_continue = false; if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { err = mdbx_txn_abort(txn); - if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH && - err != MDBX_BAD_TXN)) + if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH && err != MDBX_BAD_TXN)) failure_perror("mdbx_txn_abort()", err); } else failure_perror("mdbx_txn_commit()", err); @@ -165,18 +155,15 @@ bool testcase_nested::pop_txn(bool abort) { return should_continue; } -bool testcase_nested::stochastic_breakable_restart_with_nested( - bool force_restart) { - log_trace(">> stochastic_breakable_restart_with_nested%s", - force_restart ? ": force_restart" : ""); +bool testcase_nested::stochastic_breakable_restart_with_nested(bool force_restart) { + log_trace(">> stochastic_breakable_restart_with_nested%s", force_restart ? ": force_restart" : ""); if (force_restart) while (txn_guard) pop_txn(true); bool should_continue = true; - while (!stack.empty() && - (flipcoin() || txn_underutilization_x256(txn_guard.get()) < 42)) + while (!stack.empty() && (flipcoin() || txn_underutilization_x256(txn_guard.get()) < 42)) should_continue &= pop_txn(); if (flipcoin_x3()) { @@ -200,12 +187,10 @@ bool testcase_nested::stochastic_breakable_restart_with_nested( } if (should_continue) - while (stack.empty() || - (is_nested_txn_available() && flipcoin() && stack.size() < 5)) + while (stack.empty() || (is_nested_txn_available() && flipcoin() && stack.size() < 5)) push_txn(); - log_trace("<< stochastic_breakable_restart_with_nested: should_continue=%s", - should_continue ? "yes" : "no"); + log_trace("<< stochastic_breakable_restart_with_nested: should_continue=%s", should_continue ? "yes" : "no"); return should_continue; } @@ -215,8 +200,7 @@ bool testcase_nested::trim_tail(unsigned window_width) { while (fifo.size() > window_width) { uint64_t tail_serial = fifo.back().first; const unsigned tail_count = fifo.back().second; - log_verbose("nested: trim-tail (serial %" PRIu64 ", count %u)", - tail_serial, tail_count); + log_verbose("nested: trim-tail (serial %" PRIu64 ", count %u)", tail_serial, tail_count); fifo.pop_back(); for (unsigned n = 0; n < tail_count; ++n) { log_trace("nested: remove-tail %" PRIu64, tail_serial); @@ -235,9 +219,8 @@ bool testcase_nested::trim_tail(unsigned window_width) { report(tail_count); } } else if (!fifo.empty()) { - log_verbose("nested: purge state %" PRIu64 " - %" PRIu64 ", fifo-items %zu", - fifo.front().first, fifo.back().first + fifo.back().second, - fifo.size()); + log_verbose("nested: purge state %" PRIu64 " - %" PRIu64 ", fifo-items %zu", fifo.front().first, + fifo.back().first + fifo.back().second, fifo.size()); db_table_clear(dbi, txn_guard.get()); fifo.clear(); clear_wholetable_passed += 1; @@ -248,9 +231,7 @@ bool testcase_nested::trim_tail(unsigned window_width) { bool testcase_nested::grow_head(unsigned head_count) { const MDBX_put_flags_t insert_flags = - (config.params.table_flags & MDBX_DUPSORT) - ? MDBX_NODUPDATA - : MDBX_NODUPDATA | MDBX_NOOVERWRITE; + (config.params.table_flags & MDBX_DUPSORT) ? MDBX_NODUPDATA : MDBX_NODUPDATA | MDBX_NOOVERWRITE; retry: fifo.push_front(std::make_pair(serial, head_count)); for (unsigned n = 0; n < head_count; ++n) { @@ -289,12 +270,10 @@ bool testcase_nested::run() { unsigned loops = 0; while (true) { const uint64_t salt = prng64_white(seed) /* mdbx_txn_id(txn_guard.get()) */; - const unsigned window_width = - (!should_continue() || flipcoin_x4()) ? 0 : edge2window(salt); + const unsigned window_width = (!should_continue() || flipcoin_x4()) ? 0 : edge2window(salt); const unsigned head_count = edge2count(salt); - log_debug("nested: step #%" PRIu64 " (serial %" PRIu64 - ", window %u, count %u) salt %" PRIu64, - nops_completed, serial, window_width, head_count, salt); + log_debug("nested: step #%" PRIu64 " (serial %" PRIu64 ", window %u, count %u) salt %" PRIu64, nops_completed, + serial, window_width, head_count, salt); if (!trim_tail(window_width)) return false; @@ -307,10 +286,8 @@ bool testcase_nested::run() { return false; } - if (!keyspace_overflow && (should_continue() || !clear_wholetable_passed || - !clear_stepbystep_passed)) { - unsigned underutilization_x256 = - txn_underutilization_x256(txn_guard.get()); + if (!keyspace_overflow && (should_continue() || !clear_wholetable_passed || !clear_stepbystep_passed)) { + unsigned underutilization_x256 = txn_underutilization_x256(txn_guard.get()); if (dbfull_passed > underutilization_x256) { log_notice("nested: skip head-grow to avoid one more dbfull (was %u, " "underutilization %.2f%%)", @@ -327,9 +304,7 @@ bool testcase_nested::run() { } loops += 1; } else if (fifo.empty()) { - log_notice("nested: done %u whole loops, %" PRIu64 " ops, %" PRIu64 - " items", - loops, nops_completed, serial); + log_notice("nested: done %u whole loops, %" PRIu64 " ops, %" PRIu64 " items", loops, nops_completed, serial); break; } else { log_notice("nested: done, wait for empty, skip head-grow"); diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index 2c099d85..3a6bf215 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -19,14 +19,12 @@ #error "Oops, MDBX_LOCKING is undefined!" #endif -#if defined(__APPLE__) && (MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008) +#if defined(__APPLE__) && (MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008) #include "stub/pthread_barrier.c" #endif /* __APPLE__ && MDBX_LOCKING >= MDBX_LOCKING_POSIX2001 */ -#if defined(__ANDROID_API__) && __ANDROID_API__ < 24 && \ - (MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008) +#if defined(__ANDROID_API__) && __ANDROID_API__ < 24 && \ + (MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008) #include "stub/pthread_barrier.c" #endif /* __ANDROID_API__ < 24 && MDBX_LOCKING >= MDBX_LOCKING_POSIX2001 */ @@ -40,9 +38,7 @@ #if __cplusplus >= 201103L #include -MDBX_MAYBE_UNUSED static inline int atomic_decrement(std::atomic_int *p) { - return std::atomic_fetch_sub(p, 1) - 1; -} +MDBX_MAYBE_UNUSED static inline int atomic_decrement(std::atomic_int *p) { return std::atomic_fetch_sub(p, 1) - 1; } #else MDBX_MAYBE_UNUSED static inline int atomic_decrement(volatile int *p) { #if defined(__GNUC__) || defined(__clang__) @@ -69,8 +65,7 @@ static void ipc_remove(void) { #else struct shared_t { -#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 pthread_barrier_t barrier; pthread_mutex_t mutex; size_t count; @@ -105,18 +100,14 @@ void osal_wait4barrier(void) { op.sem_flg = 0; if (semop(ipc, &op, 1)) failure_perror("semop(wait)", errno); -#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 assert(shared != nullptr && shared != MAP_FAILED); int err = pthread_barrier_wait(&shared->barrier); if (err != 0 && err != PTHREAD_BARRIER_SERIAL_THREAD) failure_perror("pthread_barrier_wait(shared)", err); #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 assert(shared != nullptr && shared != MAP_FAILED); - int err = (atomic_decrement(&shared->barrier.countdown) > 0 && - sem_wait(&shared->barrier.sema)) - ? errno - : 0; + int err = (atomic_decrement(&shared->barrier.countdown) > 0 && sem_wait(&shared->barrier.sema)) ? errno : 0; if (err != 0) failure_perror("sem_wait(shared)", err); if (sem_post(&shared->barrier.sema)) @@ -149,22 +140,20 @@ void osal_setup(const std::vector &actors) { failure_perror("semctl(SETVAL.N, shared_sems)", errno); #else assert(shared == nullptr); - shared = (shared_t *)mmap( - nullptr, sizeof(shared_t) + actors.size() * sizeof(shared->events[0]), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS + shared = + (shared_t *)mmap(nullptr, sizeof(shared_t) + actors.size() * sizeof(shared->events[0]), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS #ifdef MAP_HASSEMAPHORE - | MAP_HASSEMAPHORE + | MAP_HASSEMAPHORE #endif - , - -1, 0); + , + -1, 0); if (MAP_FAILED == (void *)shared) failure_perror("mmap(shared)", errno); shared->count = actors.size() + 1; -#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 pthread_barrierattr_t barrierattr; int err = pthread_barrierattr_init(&barrierattr); if (err) @@ -173,8 +162,7 @@ void osal_setup(const std::vector &actors) { if (err) failure_perror("pthread_barrierattr_setpshared()", err); - err = pthread_barrier_init(&shared->barrier, &barrierattr, - unsigned(shared->count)); + err = pthread_barrier_init(&shared->barrier, &barrierattr, unsigned(shared->count)); if (err) failure_perror("pthread_barrier_init(shared)", err); pthread_barrierattr_destroy(&barrierattr); @@ -204,8 +192,7 @@ void osal_setup(const std::vector &actors) { err = pthread_cond_init(event, &condattr); if (err) failure_perror("pthread_cond_init(shared)", err); - log_trace("osal_setup: event(shared pthread_cond) %" PRIuPTR " -> %p", i, - __Wpedantic_format_voidptr(event)); + log_trace("osal_setup: event(shared pthread_cond) %" PRIuPTR " -> %p", i, __Wpedantic_format_voidptr(event)); } pthread_condattr_destroy(&condattr); pthread_mutexattr_destroy(&mutexattr); @@ -217,8 +204,7 @@ void osal_setup(const std::vector &actors) { sem_t *event = &shared->events[i]; if (sem_init(event, true, 0)) failure_perror("sem_init(shared.event)", errno); - log_trace("osal_setup: event(shared sem_init) %" PRIuPTR " -> %p", i, - __Wpedantic_format_voidptr(event)); + log_trace("osal_setup: event(shared sem_init) %" PRIuPTR " -> %p", i, __Wpedantic_format_voidptr(event)); } #else #error "FIXME" @@ -235,8 +221,7 @@ void osal_broadcast(unsigned id) { assert(shared != nullptr && shared != MAP_FAILED); if (id >= shared->count) failure("osal_broadcast: id > limit"); -#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 int err = pthread_cond_broadcast(shared->events + id); if (err) failure_perror("pthread_cond_broadcast(shared)", err); @@ -261,8 +246,7 @@ int osal_waitfor(unsigned id) { if (id >= shared->count) failure("osal_waitfor: id > limit"); -#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ - MDBX_LOCKING == MDBX_LOCKING_POSIX2008 +#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008 int rc = pthread_mutex_lock(&shared->mutex); if (rc != 0) failure_perror("pthread_mutex_lock(shared)", rc); @@ -288,15 +272,13 @@ int osal_waitfor(unsigned id) { //----------------------------------------------------------------------------- -const std::string -actor_config::osal_serialize(simple_checksum &checksum) const { +const std::string actor_config::osal_serialize(simple_checksum &checksum) const { (void)checksum; /* not used in workload, but just for testing */ return "unix.fork"; } -bool actor_config::osal_deserialize(const char *str, const char *end, - simple_checksum &checksum) { +bool actor_config::osal_deserialize(const char *str, const char *end, simple_checksum &checksum) { (void)checksum; /* not used in workload, but just for testing */ return strncmp(str, "unix.fork", 9) == 0 && str + 9 == end; @@ -392,8 +374,7 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { if (pid < 0) return errno; - log_trace("osal_actor_start: fork pid %ld for %u", (long)pid, - config.actor_id); + log_trace("osal_actor_start: fork pid %ld for %u", (long)pid, config.actor_id); children[pid] = as_running; return 0; } @@ -536,8 +517,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { if (pid > 0) { if (WIFEXITED(status)) - children[pid] = - (WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed; + children[pid] = (WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed; else if (WIFSIGNALED(status)) { int sig = WTERMSIG(status); #ifdef WCOREDUMP @@ -551,13 +531,11 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { case SIGFPE: case SIGILL: case SIGSEGV: - log_notice("child pid %lu %s by SIG%s", (long)pid, "terminated", - signal_name(sig)); + log_notice("child pid %lu %s by SIG%s", (long)pid, "terminated", signal_name(sig)); children[pid] = as_coredump; break; default: - log_notice("child pid %lu %s by SIG%s", (long)pid, "killed", - signal_name(sig)); + log_notice("child pid %lu %s by SIG%s", (long)pid, "killed", signal_name(sig)); children[pid] = as_killed; } } else if (WIFSTOPPED(status)) @@ -605,12 +583,10 @@ void osal_udelay(size_t us) { static size_t threshold_us; if (threshold_us == 0) { -#if defined(_POSIX_CPUTIME) && _POSIX_CPUTIME > -1 && \ - defined(CLOCK_PROCESS_CPUTIME_ID) +#if defined(_POSIX_CPUTIME) && _POSIX_CPUTIME > -1 && defined(CLOCK_PROCESS_CPUTIME_ID) if (clock_getres(CLOCK_PROCESS_CPUTIME_ID, &ts)) { int rc = errno; - log_warning("clock_getres(CLOCK_PROCESS_CPUTIME_ID), failed errno %d", - rc); + log_warning("clock_getres(CLOCK_PROCESS_CPUTIME_ID), failed errno %d", rc); } #endif /* CLOCK_PROCESS_CPUTIME_ID */ if (threshold_us == 0 && clock_getres(CLOCK_MONOTONIC, &ts)) { diff --git a/test/osal-windows.c++ b/test/osal-windows.c++ index 0ce04cc8..94f52b4a 100644 --- a/test/osal-windows.c++ +++ b/test/osal-windows.c++ @@ -30,13 +30,11 @@ void osal_wait4barrier(void) { DWORD rc = WaitForSingleObject(hBarrierSemaphore, 0); switch (rc) { default: - failure_perror("WaitForSingleObject(BarrierSemaphore)", - waitstatus2errcode(rc)); + failure_perror("WaitForSingleObject(BarrierSemaphore)", waitstatus2errcode(rc)); case WAIT_OBJECT_0: rc = WaitForSingleObject(hBarrierEvent, INFINITE); if (rc != WAIT_OBJECT_0) - failure_perror("WaitForSingleObject(BarrierEvent)", - waitstatus2errcode(rc)); + failure_perror("WaitForSingleObject(BarrierEvent)", waitstatus2errcode(rc)); break; case WAIT_TIMEOUT: if (!SetEvent(hBarrierEvent)) @@ -47,8 +45,7 @@ void osal_wait4barrier(void) { static HANDLE make_inheritable(HANDLE hHandle) { assert(hHandle != NULL && hHandle != INVALID_HANDLE_VALUE); - if (!DuplicateHandle(GetCurrentProcess(), hHandle, GetCurrentProcess(), - &hHandle, 0, TRUE, + if (!DuplicateHandle(GetCurrentProcess(), hHandle, GetCurrentProcess(), &hHandle, 0, TRUE, DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS)) failure_perror("DuplicateHandle()", GetLastError()); return hHandle; @@ -108,8 +105,7 @@ int osal_delay(unsigned seconds) { //----------------------------------------------------------------------------- -const std::string -actor_config::osal_serialize(simple_checksum &checksum) const { +const std::string actor_config::osal_serialize(simple_checksum &checksum) const { checksum.push(hBarrierSemaphore); checksum.push(hBarrierEvent); checksum.push(hProgressActiveEvent); @@ -127,12 +123,11 @@ actor_config::osal_serialize(simple_checksum &checksum) const { checksum.push(hSignal); } - return format("%p.%p.%p.%p.%p.%p", hBarrierSemaphore, hBarrierEvent, hWait, - hSignal, hProgressActiveEvent, hProgressPassiveEvent); + return format("%p.%p.%p.%p.%p.%p", hBarrierSemaphore, hBarrierEvent, hWait, hSignal, hProgressActiveEvent, + hProgressPassiveEvent); } -bool actor_config::osal_deserialize(const char *str, const char *end, - simple_checksum &checksum) { +bool actor_config::osal_deserialize(const char *str, const char *end, simple_checksum &checksum) { std::string copy(str, end - str); TRACE(">> osal_deserialize(%s)\n", copy.c_str()); @@ -144,9 +139,8 @@ bool actor_config::osal_deserialize(const char *str, const char *end, assert(events.empty()); HANDLE hWait, hSignal; - if (sscanf_s(copy.c_str(), "%p.%p.%p.%p.%p.%p", &hBarrierSemaphore, - &hBarrierEvent, &hWait, &hSignal, &hProgressActiveEvent, - &hProgressPassiveEvent) != 6) { + if (sscanf_s(copy.c_str(), "%p.%p.%p.%p.%p.%p", &hBarrierSemaphore, &hBarrierEvent, &hWait, &hSignal, + &hProgressActiveEvent, &hProgressPassiveEvent) != 6) { TRACE("<< osal_deserialize: failed\n"); return false; } @@ -175,23 +169,19 @@ bool actor_config::osal_deserialize(const char *str, const char *end, typedef std::pair child; static std::unordered_map children; -bool osal_multiactor_mode(void) { - return hProgressActiveEvent || hProgressPassiveEvent; -} +bool osal_multiactor_mode(void) { return hProgressActiveEvent || hProgressPassiveEvent; } bool osal_progress_push(bool active) { if (!children.empty()) { if (!SetEvent(active ? hProgressActiveEvent : hProgressPassiveEvent)) - failure_perror("osal_progress_push: SetEvent(overlord.progress)", - GetLastError()); + failure_perror("osal_progress_push: SetEvent(overlord.progress)", GetLastError()); return true; } return false; } -static void ArgvQuote(std::string &CommandLine, const std::string &Argument, - bool Force = false) +static void ArgvQuote(std::string &CommandLine, const std::string &Argument, bool Force = false) /*++ @@ -232,8 +222,7 @@ Environment: // parse quotes properly // - if (Force == false && Argument.empty() == false && - Argument.find_first_of(" \t\n\v\"") == Argument.npos) { + if (Force == false && Argument.empty() == false && Argument.find_first_of(" \t\n\v\"") == Argument.npos) { CommandLine.append(Argument); } else { CommandLine.push_back('"'); @@ -276,8 +265,7 @@ Environment: int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { if (children.size() == MAXIMUM_WAIT_OBJECTS) - failure("Couldn't manage more that %u actors on Windows\n", - MAXIMUM_WAIT_OBJECTS); + failure("Couldn't manage more that %u actors on Windows\n", MAXIMUM_WAIT_OBJECTS); _flushall(); @@ -286,8 +274,7 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { char exename[_MAX_PATH + 1]; DWORD exename_size = sizeof(exename); - if (!QueryFullProcessImageNameA(GetCurrentProcess(), 0, exename, - &exename_size)) + if (!QueryFullProcessImageNameA(GetCurrentProcess(), 0, exename, &exename_size)) failure_perror("QueryFullProcessImageName()", GetLastError()); if (exename[1] != ':') { @@ -383,8 +370,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { while (true) { DWORD rc = - MsgWaitForMultipleObjectsEx((DWORD)handles.size(), &handles[0], - (timeout > 60) ? 60 * 1000 : timeout * 1000, + MsgWaitForMultipleObjectsEx((DWORD)handles.size(), &handles[0], (timeout > 60) ? 60 * 1000 : timeout * 1000, QS_ALLINPUT | QS_ALLPOSTMESSAGE, 0); if (rc == WAIT_OBJECT_0) { diff --git a/test/stub/pthread_barrier.c b/test/stub/pthread_barrier.c index 2e37900a..f8be732d 100644 --- a/test/stub/pthread_barrier.c +++ b/test/stub/pthread_barrier.c @@ -42,8 +42,7 @@ int pthread_barrierattr_destroy(pthread_barrierattr_t *attr) { return m ? m : c; } -int pthread_barrierattr_getpshared(const pthread_barrierattr_t *__restrict attr, - int *__restrict pshared) { +int pthread_barrierattr_getpshared(const pthread_barrierattr_t *__restrict attr, int *__restrict pshared) { return pthread_condattr_getpshared(&attr->cattr, pshared); } @@ -53,8 +52,7 @@ int pthread_barrierattr_setpshared(pthread_barrierattr_t *attr, int pshared) { return m ? m : c; } -int pthread_barrier_init(pthread_barrier_t *__restrict barrier, - const pthread_barrierattr_t *__restrict attr, +int pthread_barrier_init(pthread_barrier_t *__restrict barrier, const pthread_barrierattr_t *__restrict attr, unsigned count) { if (count == 0) return errno = EINVAL; diff --git a/test/stub/pthread_barrier.h b/test/stub/pthread_barrier.h index b9e0dd7c..5b8c6d86 100644 --- a/test/stub/pthread_barrier.h +++ b/test/stub/pthread_barrier.h @@ -61,12 +61,10 @@ typedef struct { int pthread_barrierattr_init(pthread_barrierattr_t *attr); int pthread_barrierattr_destroy(pthread_barrierattr_t *attr); -int pthread_barrierattr_getpshared(const pthread_barrierattr_t *__restrict attr, - int *__restrict pshared); +int pthread_barrierattr_getpshared(const pthread_barrierattr_t *__restrict attr, int *__restrict pshared); int pthread_barrierattr_setpshared(pthread_barrierattr_t *attr, int pshared); -int pthread_barrier_init(pthread_barrier_t *__restrict barrier, - const pthread_barrierattr_t *__restrict attr, +int pthread_barrier_init(pthread_barrier_t *__restrict barrier, const pthread_barrierattr_t *__restrict attr, unsigned int count); int pthread_barrier_destroy(pthread_barrier_t *barrier); diff --git a/test/test.c++ b/test/test.c++ index 9c827fc5..1d8db898 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -73,17 +73,14 @@ const char *keygencase2str(const keygen_case keycase) { //----------------------------------------------------------------------------- -int testcase::hsr_callback(const MDBX_env *env, const MDBX_txn *txn, - mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard, - unsigned gap, size_t space, - int retry) MDBX_CXX17_NOEXCEPT { +int testcase::hsr_callback(const MDBX_env *env, const MDBX_txn *txn, mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard, + unsigned gap, size_t space, int retry) MDBX_CXX17_NOEXCEPT { (void)txn; testcase *self = (testcase *)mdbx_env_get_userctx(env); if (retry == 0) - log_notice("hsr_callback: waitfor pid %lu, thread %" PRIuPTR - ", txn #%" PRIu64 ", gap %d, space %zu", - (long)pid, (size_t)tid, laggard, gap, space); + log_notice("hsr_callback: waitfor pid %lu, thread %" PRIuPTR ", txn #%" PRIu64 ", gap %d, space %zu", (long)pid, + (size_t)tid, laggard, gap, space); MDBX_envinfo info; int rc = mdbx_env_info_ex(env, txn, &info, sizeof(info)); @@ -91,8 +88,7 @@ int testcase::hsr_callback(const MDBX_env *env, const MDBX_txn *txn, return rc; if (self->should_continue(true) && - (space > size_t(info.mi_geo.grow) * 2 || - info.mi_geo.current >= info.mi_geo.upper)) { + (space > size_t(info.mi_geo.grow) * 2 || info.mi_geo.current >= info.mi_geo.upper)) { osal_yield(); if (retry > 0) osal_udelay(retry * size_t(100)); @@ -131,10 +127,8 @@ void testcase::db_prepare() { if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_env_set_hsr()", rc); - rc = mdbx_env_set_geometry( - env, config.params.size_lower, config.params.size_now, - config.params.size_upper, config.params.growth_step, - config.params.shrink_threshold, config.params.pagesize); + rc = mdbx_env_set_geometry(env, config.params.size_lower, config.params.size_now, config.params.size_upper, + config.params.growth_step, config.params.shrink_threshold, config.params.pagesize); if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_env_set_mapsize()", rc); @@ -153,8 +147,7 @@ void testcase::db_open() { if (config.params.random_writemap && flipcoin()) mode ^= MDBX_WRITEMAP; - int rc = mdbx_env_open(db_guard.get(), config.params.pathname_db.c_str(), - mode, 0640); + int rc = mdbx_env_open(db_guard.get(), config.params.pathname_db.c_str(), mode, 0640); if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_env_open()", rc); @@ -185,32 +178,26 @@ void testcase::db_close() { void testcase::txn_begin(bool readonly, MDBX_txn_flags_t flags) { assert((flags & MDBX_TXN_RDONLY) == 0); - log_trace(">> txn_begin(%s, 0x%04X)", readonly ? "read-only" : "read-write", - flags); + log_trace(">> txn_begin(%s, 0x%04X)", readonly ? "read-only" : "read-write", flags); assert(!txn_guard); MDBX_txn *txn = nullptr; - int rc = mdbx_txn_begin(db_guard.get(), nullptr, - readonly ? flags | MDBX_TXN_RDONLY : flags, &txn); + int rc = mdbx_txn_begin(db_guard.get(), nullptr, readonly ? flags | MDBX_TXN_RDONLY : flags, &txn); if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_txn_begin()", rc); txn_guard.reset(txn); need_speculum_assign = config.params.speculum && !readonly; - log_trace("<< txn_begin(%s, 0x%04X)", readonly ? "read-only" : "read-write", - flags); + log_trace("<< txn_begin(%s, 0x%04X)", readonly ? "read-only" : "read-write", flags); if (flipcoin_n(5)) { - const unsigned mask = - unsigned(MDBX_warmup_default | MDBX_warmup_force | MDBX_warmup_oomsafe | - MDBX_warmup_lock | MDBX_warmup_touchlimit); + const unsigned mask = unsigned(MDBX_warmup_default | MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | + MDBX_warmup_touchlimit); static unsigned counter; - MDBX_warmup_flags_t warmup_flags = MDBX_warmup_flags_t( - (counter > MDBX_warmup_release) ? prng64() & mask : counter); + MDBX_warmup_flags_t warmup_flags = MDBX_warmup_flags_t((counter > MDBX_warmup_release) ? prng64() & mask : counter); counter += 1; int err = mdbx_env_warmup(db_guard.get(), txn, warmup_flags, 0); - log_trace("== counter %u, env_warmup(flags %u), rc %d", counter, - warmup_flags, err); + log_trace("== counter %u, env_warmup(flags %u), rc %d", counter, warmup_flags, err); } if (readonly && flipcoin()) @@ -226,8 +213,7 @@ int testcase::breakable_commit() { * during call mdbx_cmp() with zero txn. So it is the workaround for this: * - explicitly make copies of the `speculums`; * - explicitly move relevant copy after transaction commit. */ - SET speculum_committed_copy(ItemCompare(this)), - speculum_copy(ItemCompare(this)); + SET speculum_committed_copy(ItemCompare(this)), speculum_copy(ItemCompare(this)); if (need_speculum_assign) { speculum_committed_copy = speculum_committed; speculum_copy = speculum; @@ -236,8 +222,7 @@ int testcase::breakable_commit() { MDBX_txn *txn = txn_guard.release(); txn_inject_writefault(txn); int rc = mdbx_txn_commit(txn); - if (unlikely(rc != MDBX_SUCCESS) && - (rc != MDBX_MAP_FULL || !config.params.ignore_dbfull)) + if (unlikely(rc != MDBX_SUCCESS) && (rc != MDBX_MAP_FULL || !config.params.ignore_dbfull)) failure_perror("mdbx_txn_commit()", rc); if (need_speculum_assign) { @@ -259,8 +244,7 @@ unsigned testcase::txn_underutilization_x256(MDBX_txn *txn) const { if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_info()", err); const size_t left = size_t(info.txn_space_leftover); - const size_t total = - size_t(info.txn_space_leftover) + size_t(info.txn_space_dirty); + const size_t total = size_t(info.txn_space_leftover) + size_t(info.txn_space_dirty); return (unsigned)(left / (total >> 8)); } return 0; @@ -357,9 +341,7 @@ void testcase::txn_inject_writefault(MDBX_txn *txn) { if (config.params.inject_writefaultn && txn) { if (config.params.inject_writefaultn <= nops_completed && (MDBX_txn_flags_t(mdbx_txn_flags(txn)) & MDBX_TXN_RDONLY) == 0) { - log_verbose( - "== txn_inject_writefault(): got %u nops or more, inject FAULT", - config.params.inject_writefaultn); + log_verbose("== txn_inject_writefault(): got %u nops or more, inject FAULT", config.params.inject_writefaultn); log_flush(); #if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) TerminateProcess(GetCurrentProcess(), 42); @@ -376,8 +358,7 @@ bool testcase::wait4start() { assert(!global::singlemode); int rc = osal_waitfor(config.wait4id); if (rc) { - log_trace("<< wait4start(%u), failed %s", config.wait4id, - test_strerror(rc)); + log_trace("<< wait4start(%u), failed %s", config.wait4id, test_strerror(rc)); return false; } } else { @@ -387,8 +368,7 @@ bool testcase::wait4start() { if (config.params.delaystart) { int rc = osal_delay(config.params.delaystart); if (rc) { - log_trace("<< delay(%u), failed %s", config.params.delaystart, - test_strerror(rc)); + log_trace("<< delay(%u), failed %s", config.params.delaystart, test_strerror(rc)); return false; } } else { @@ -410,13 +390,11 @@ void testcase::report(size_t nops_done) { return; nops_completed += nops_done; - log_debug("== complete +%" PRIuPTR " iteration, total %" PRIu64 " done", - nops_done, nops_completed); + log_debug("== complete +%" PRIuPTR " iteration, total %" PRIu64 " done", nops_done, nops_completed); kick_progress(true); - if (config.signal_nops && !signalled && - config.signal_nops <= nops_completed) { + if (config.signal_nops && !signalled && config.signal_nops <= nops_completed) { log_trace(">> signal(n-ops %" PRIu64 ")", nops_completed); if (!global::singlemode) osal_broadcast(config.actor_id); @@ -458,14 +436,12 @@ bool testcase::should_continue(bool check_timeout_only) const { if (config.params.test_duration) { chrono::time since; - since.fixedpoint = - chrono::now_monotonic().fixedpoint - start_timestamp.fixedpoint; + since.fixedpoint = chrono::now_monotonic().fixedpoint - start_timestamp.fixedpoint; if (since.seconds() >= config.params.test_duration) result = false; } - if (!check_timeout_only && config.params.test_nops && - nops_completed >= config.params.test_nops) + if (!check_timeout_only && config.params.test_nops && nops_completed >= config.params.test_nops) result = false; if (result) @@ -483,25 +459,18 @@ void testcase::fetch_canary() { failure_perror("mdbx_canary_get()", rc); if (canary_now.v < last.canary.v) - failure("fetch_canary: %" PRIu64 "(canary-now.v) < %" PRIu64 - "(canary-last.v)", - canary_now.v, last.canary.v); + failure("fetch_canary: %" PRIu64 "(canary-now.v) < %" PRIu64 "(canary-last.v)", canary_now.v, last.canary.v); if (canary_now.y < last.canary.y) - failure("fetch_canary: %" PRIu64 "(canary-now.y) < %" PRIu64 - "(canary-last.y)", - canary_now.y, last.canary.y); + failure("fetch_canary: %" PRIu64 "(canary-now.y) < %" PRIu64 "(canary-last.y)", canary_now.y, last.canary.y); last.canary = canary_now; - log_trace("<< fetch_canary: db-sequence %" PRIu64 - ", db-sequence.txnid %" PRIu64, - last.canary.y, last.canary.v); + log_trace("<< fetch_canary: db-sequence %" PRIu64 ", db-sequence.txnid %" PRIu64, last.canary.y, last.canary.v); } void testcase::update_canary(uint64_t increment) { MDBX_canary canary_now = last.canary; - log_trace(">> update_canary: sequence %" PRIu64 " += %" PRIu64, canary_now.y, - increment); + log_trace(">> update_canary: sequence %" PRIu64 " += %" PRIu64, canary_now.y, increment); canary_now.y += increment; int rc = mdbx_canary_put(txn_guard.get(), &canary_now); @@ -511,8 +480,7 @@ void testcase::update_canary(uint64_t increment) { log_trace("<< update_canary: sequence = %" PRIu64, canary_now.y); } -bool testcase::is_handle_created_in_current_txn(const MDBX_dbi handle, - MDBX_txn *txn) { +bool testcase::is_handle_created_in_current_txn(const MDBX_dbi handle, MDBX_txn *txn) { unsigned flags, state; int err = mdbx_dbi_flags_ex(txn, handle, &flags, &state); if (unlikely(err != MDBX_SUCCESS)) @@ -540,17 +508,14 @@ int testcase::db_open__begin__table_create_open_clean(MDBX_dbi &handle) { break; jitter_delay(true); } - log_notice("db_begin_table_create_open_clean: bailout due '%s'", - mdbx_strerror(err)); + log_notice("db_begin_table_create_open_clean: bailout due '%s'", mdbx_strerror(err)); return err; } -const char *testcase::db_tablename(tablename_buf &buffer, - const char *suffix) const { +const char *testcase::db_tablename(tablename_buf &buffer, const char *suffix) const { const char *tablename = nullptr; if (config.space_id) { - int rc = - snprintf(buffer, sizeof(buffer), "TBL%04u%s", config.space_id, suffix); + int rc = snprintf(buffer, sizeof(buffer), "TBL%04u%s", config.space_id, suffix); if (rc < 4 || rc >= (int)sizeof(tablename_buf) - 1) failure("snprintf(tablename): %d", rc); tablename = buffer; @@ -560,23 +525,20 @@ const char *testcase::db_tablename(tablename_buf &buffer, } MDBX_dbi testcase::db_table_open(bool create, bool expect_failure) { - log_trace(">> testcase::db_table_%s%s", create ? "create" : "open", - expect_failure ? "(expect_failure)" : ""); + log_trace(">> testcase::db_table_%s%s", create ? "create" : "open", expect_failure ? "(expect_failure)" : ""); tablename_buf buffer; const char *tablename = db_tablename(buffer); MDBX_dbi handle = 0; - int rc = mdbx_dbi_open( - txn_guard.get(), tablename, - create ? (MDBX_CREATE | config.params.table_flags) - : (flipcoin() ? MDBX_DB_ACCEDE - : MDBX_DB_DEFAULTS | config.params.table_flags), - &handle); + int rc = mdbx_dbi_open(txn_guard.get(), tablename, + create ? (MDBX_CREATE | config.params.table_flags) + : (flipcoin() ? MDBX_DB_ACCEDE : MDBX_DB_DEFAULTS | config.params.table_flags), + &handle); if (unlikely(expect_failure != (rc != MDBX_SUCCESS))) { char act[64]; - snprintf(act, sizeof(act), "mdbx_dbi_open(create=%s,expect_failure=%s)", - create ? "true" : "false", expect_failure ? "true" : "false"); + snprintf(act, sizeof(act), "mdbx_dbi_open(create=%s,expect_failure=%s)", create ? "true" : "false", + expect_failure ? "true" : "false"); failure_perror(act, rc); } @@ -617,11 +579,9 @@ void testcase::db_table_close(MDBX_dbi handle) { log_trace("<< testcase::db_table_close"); } -bool testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, - MDBX_val expected_valued) { +bool testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued) { MDBX_val actual_value = expected_valued; - int err = mdbx_get_equal_or_great(txn_guard.get(), handle, &key2check, - &actual_value); + int err = mdbx_get_equal_or_great(txn_guard.get(), handle, &key2check, &actual_value); if (unlikely(err != MDBX_SUCCESS)) { if (!config.params.speculum || err != MDBX_RESULT_TRUE) failure_perror(step, (err == MDBX_RESULT_TRUE) ? MDBX_NOTFOUND : err); @@ -690,8 +650,7 @@ static void dump_stack(CONTEXT *ctx, FILE *out) { #error "FIXME" #endif , - process, thread, &stack, &ctxCopy, NULL, SymFunctionTableAccess64, - SymGetModuleBase64, NULL); + process, thread, &stack, &ctxCopy, NULL, SymFunctionTableAccess64, SymGetModuleBase64, NULL); if (!result) break; @@ -706,16 +665,14 @@ static void dump_stack(CONTEXT *ctx, FILE *out) { // try to get line if (SymGetLineFromAddr64(process, stack.AddrPC.Offset, &disp, &line)) { - fprintf(out, "\tat %s in %s: line: %lu: address: 0x%0" PRIx64 "\n", - pSymbol->Name, line.FileName, line.LineNumber, pSymbol->Address); + fprintf(out, "\tat %s in %s: line: %lu: address: 0x%0" PRIx64 "\n", pSymbol->Name, line.FileName, line.LineNumber, + pSymbol->Address); } else { // failed to get line - fprintf(out, "\tat %s, address 0x%0" PRIx64 ".\n", pSymbol->Name, - pSymbol->Address); + fprintf(out, "\tat %s, address 0x%0" PRIx64 ".\n", pSymbol->Name, pSymbol->Address); hModule = NULL; lstrcpyA(module, ""); - GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | - GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCTSTR)(stack.AddrPC.Offset), &hModule); // at least print module name @@ -787,19 +744,16 @@ static LONG seh_filter(struct _EXCEPTION_POINTERS *ExInfo, FILE *out) { PVOID CodeAdress = ExInfo->ExceptionRecord->ExceptionAddress; fprintf(out, "****************************************************\n"); fprintf(out, "*** A Program Fault occurred:\n"); - fprintf(out, "*** Error code %08X: %s\n", - ExInfo->ExceptionRecord->ExceptionCode, caption); + fprintf(out, "*** Error code %08X: %s\n", ExInfo->ExceptionRecord->ExceptionCode, caption); fprintf(out, "****************************************************\n"); fprintf(out, "*** Address: %08zX\n", (intptr_t)CodeAdress); - fprintf(out, "*** Flags: %08X\n", - ExInfo->ExceptionRecord->ExceptionFlags); + fprintf(out, "*** Flags: %08X\n", ExInfo->ExceptionRecord->ExceptionFlags); dump_stack(ExInfo->ContextRecord, out); return EXCEPTION_EXECUTE_HANDLER; } #endif /* _MSC_VER */ -static bool execute_thunk(const actor_config *const_config, - const mdbx_pid_t pid) { +static bool execute_thunk(const actor_config *const_config, const mdbx_pid_t pid) { actor_config config = *const_config; try { if (global::singlemode) { @@ -837,8 +791,7 @@ static bool execute_thunk(const actor_config *const_config, log_verbose("test successfully"); else { if (config.params.nrepeat) - log_verbose("test successfully (iteration %zi of %zi)", iter, - size_t(config.params.nrepeat)); + log_verbose("test successfully (iteration %zi of %zi)", iter, size_t(config.params.nrepeat)); else log_verbose("test successfully (iteration %zi)", iter); } @@ -866,61 +819,44 @@ bool test_execute(const actor_config &config) { //----------------------------------------------------------------------------- -enum speculum_cursors : int { - lowerbound = 0, - prev = 1, - prev_prev = 2, - next = 3, - next_next = 4, - seek_check = 5 -}; +enum speculum_cursors : int { lowerbound = 0, prev = 1, prev_prev = 2, next = 3, next_next = 4, seek_check = 5 }; bool testcase::is_same(const Item &a, const Item &b) const { if (!is_samedata(dataview2iov(a.first), dataview2iov(b.first))) return false; - if ((config.params.table_flags & MDBX_DUPSORT) && - !is_samedata(dataview2iov(a.second), dataview2iov(b.second))) + if ((config.params.table_flags & MDBX_DUPSORT) && !is_samedata(dataview2iov(a.second), dataview2iov(b.second))) return false; return true; } -bool testcase::is_same(const testcase::SET::const_iterator &it, - const MDBX_val &k, const MDBX_val &v) const { +bool testcase::is_same(const testcase::SET::const_iterator &it, const MDBX_val &k, const MDBX_val &v) const { - return is_samedata(dataview2iov(it->first), k) && - is_samedata(dataview2iov(it->second), v); + return is_samedata(dataview2iov(it->first), k) && is_samedata(dataview2iov(it->second), v); } -void testcase::verbose(const char *where, const char *stage, - const testcase::SET::const_iterator &it) const { +void testcase::verbose(const char *where, const char *stage, const testcase::SET::const_iterator &it) const { if (it == speculum.end()) log_verbose("speculum-%s: %s expect END", where, stage); else { char dump_key[32], dump_value[32]; MDBX_val it_key = dataview2iov(it->first); MDBX_val it_data = dataview2iov(it->second); - log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, - mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); } } -void testcase::verbose(const char *where, const char *stage, const MDBX_val &k, - const MDBX_val &v, int err) const { +void testcase::verbose(const char *where, const char *stage, const MDBX_val &k, const MDBX_val &v, int err) const { char dump_key[32], dump_value[32]; if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE) - log_verbose("speculum-%s: %s cursor {%d, %s}", where, stage, err, - mdbx_strerror(err)); + log_verbose("speculum-%s: %s cursor {%d, %s}", where, stage, err, mdbx_strerror(err)); else - log_verbose("speculum-%s: %s cursor {%s, %s}", where, stage, - mdbx_dump_val(&k, dump_key, sizeof(dump_key)), + log_verbose("speculum-%s: %s cursor {%s, %s}", where, stage, mdbx_dump_val(&k, dump_key, sizeof(dump_key)), mdbx_dump_val(&v, dump_value, sizeof(dump_value))); } -bool testcase::speculum_check_iterator(const char *where, const char *stage, - const testcase::SET::const_iterator &it, - const MDBX_val &k, const MDBX_val &v, - MDBX_cursor *cursor) const { +bool testcase::speculum_check_iterator(const char *where, const char *stage, const testcase::SET::const_iterator &it, + const MDBX_val &k, const MDBX_val &v, MDBX_cursor *cursor) const { char dump_key[32], dump_value[32]; MDBX_val it_key = dataview2iov(it->first); MDBX_val it_data = dataview2iov(it->second); @@ -935,8 +871,8 @@ bool testcase::speculum_check_iterator(const char *where, const char *stage, } if (!is_samedata(it_data, v)) { speculum_render(it, cursor); - return failure("speculum-%s: %s data mismatch %s (must) != %s", where, - stage, mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), + return failure("speculum-%s: %s data mismatch %s (must) != %s", where, stage, + mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), mdbx_dump_val(&v, dump_value, sizeof(dump_value))); } return true; @@ -957,8 +893,7 @@ bool testcase::failure(const char *fmt, ...) const { #if SPECULUM_CURSORS -static void speculum_render_cursor(const MDBX_val &ikey, const MDBX_val &ival, - const MDBX_cursor *cursor, +static void speculum_render_cursor(const MDBX_val &ikey, const MDBX_val &ival, const MDBX_cursor *cursor, const MDBX_cursor *ref) { scoped_cursor_guard guard(mdbx_cursor_create(nullptr)); if (!guard) @@ -990,16 +925,14 @@ static void speculum_render_cursor(const MDBX_val &ikey, const MDBX_val &ival, if (mdbx_cursor_get(clone, &ckey, &cval, MDBX_GET_CURRENT) != MDBX_SUCCESS) *s++ = '!'; else { - const int kcmp = - mdbx_cmp(mdbx_cursor_txn(clone), mdbx_cursor_dbi(clone), &ikey, &ckey); + const int kcmp = mdbx_cmp(mdbx_cursor_txn(clone), mdbx_cursor_dbi(clone), &ikey, &ckey); if (kcmp < 0) *s++ = '<'; else if (kcmp > 0) *s++ = '>'; else { *s++ = '='; - const int vcmp = mdbx_dcmp(mdbx_cursor_txn(clone), mdbx_cursor_dbi(clone), - &ival, &cval); + const int vcmp = mdbx_dcmp(mdbx_cursor_txn(clone), mdbx_cursor_dbi(clone), &ival, &cval); if (vcmp < 0) *s++ = '<'; else if (vcmp > 0) @@ -1018,8 +951,7 @@ static void speculum_render_cursor(const MDBX_val &ikey, const MDBX_val &ival, printf(" | %-10.10s", status); } -void testcase::speculum_render(const testcase::SET::const_iterator &it, - const MDBX_cursor *ref) const { +void testcase::speculum_render(const testcase::SET::const_iterator &it, const MDBX_cursor *ref) const { char dump_key[32], dump_value[32]; auto top = it; @@ -1030,21 +962,18 @@ void testcase::speculum_render(const testcase::SET::const_iterator &it, } printf("## %-20.20s %-20.20s | %-10.10s | %-10.10s | %-10.10s | %-10.10s | " "%-10.10s | %-10.10s |\n", - "k0_1_2_3_4_5_6_7_8_9", "v0_1_2_3_4_5_6_7_8_9", "prev-prev", "prev", - "seek", "lowerbound", "next", "next-next"); + "k0_1_2_3_4_5_6_7_8_9", "v0_1_2_3_4_5_6_7_8_9", "prev-prev", "prev", "seek", "lowerbound", "next", + "next-next"); while (offset < 5 && top != speculum.end()) { const MDBX_val ikey = dataview2iov(top->first); const MDBX_val idata = dataview2iov(top->second); - printf("%+d) %20.20s %20.20s", offset, - mdbx_dump_val(&ikey, dump_key, sizeof(dump_key)), + printf("%+d) %20.20s %20.20s", offset, mdbx_dump_val(&ikey, dump_key, sizeof(dump_key)), mdbx_dump_val(&idata, dump_value, sizeof(dump_value))); speculum_render_cursor(ikey, idata, speculum_cursors[prev_prev].get(), ref); speculum_render_cursor(ikey, idata, speculum_cursors[prev].get(), ref); - speculum_render_cursor(ikey, idata, speculum_cursors[seek_check].get(), - ref); - speculum_render_cursor(ikey, idata, speculum_cursors[lowerbound].get(), - ref); + speculum_render_cursor(ikey, idata, speculum_cursors[seek_check].get(), ref); + speculum_render_cursor(ikey, idata, speculum_cursors[lowerbound].get(), ref); speculum_render_cursor(ikey, idata, speculum_cursors[next].get(), ref); speculum_render_cursor(ikey, idata, speculum_cursors[next_next].get(), ref); @@ -1054,29 +983,24 @@ void testcase::speculum_render(const testcase::SET::const_iterator &it, } } -bool testcase::speculum_check_cursor(const char *where, const char *stage, - const testcase::SET::const_iterator &it, - int cursor_err, const MDBX_val &cursor_key, - const MDBX_val &cursor_data, +bool testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, + int cursor_err, const MDBX_val &cursor_key, const MDBX_val &cursor_data, MDBX_cursor *cursor) const { // verbose(where, stage, cursor_key, cursor_data, cursor_err); // verbose(where, stage, it); - if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND && - cursor_err != MDBX_RESULT_TRUE && cursor_err != MDBX_ENODATA) { + if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND && cursor_err != MDBX_RESULT_TRUE && + cursor_err != MDBX_ENODATA) { speculum_render(it, cursor); - return failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", - cursor_err, mdbx_strerror(cursor_err)); + return failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", cursor_err, mdbx_strerror(cursor_err)); } char dump_key[32], dump_value[32]; - if (it == speculum.end() && cursor_err != MDBX_NOTFOUND && - cursor_err != MDBX_ENODATA) { + if (it == speculum.end() && cursor_err != MDBX_NOTFOUND && cursor_err != MDBX_ENODATA) { speculum_render(it, cursor); return failure("speculum-%s: %s extra pair {%s, %s}", where, stage, mdbx_dump_val(&cursor_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&cursor_data, dump_value, sizeof(dump_value))); - } else if (it != speculum.end() && - (cursor_err == MDBX_NOTFOUND || cursor_err == MDBX_ENODATA)) { + } else if (it != speculum.end() && (cursor_err == MDBX_NOTFOUND || cursor_err == MDBX_ENODATA)) { speculum_render(it, cursor); MDBX_val it_key = dataview2iov(it->first); MDBX_val it_data = dataview2iov(it->second); @@ -1084,24 +1008,19 @@ bool testcase::speculum_check_cursor(const char *where, const char *stage, mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); } else if (cursor_err == MDBX_SUCCESS || cursor_err == MDBX_RESULT_TRUE) - return speculum_check_iterator(where, stage, it, cursor_key, cursor_data, - cursor); + return speculum_check_iterator(where, stage, it, cursor_key, cursor_data, cursor); else { - assert(it == speculum.end() && - (cursor_err == MDBX_NOTFOUND || cursor_err == MDBX_ENODATA)); + assert(it == speculum.end() && (cursor_err == MDBX_NOTFOUND || cursor_err == MDBX_ENODATA)); return true; } } -bool testcase::speculum_check_cursor(const char *where, const char *stage, - const testcase::SET::const_iterator &it, - MDBX_cursor *cursor, - const MDBX_cursor_op op) const { +bool testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, + MDBX_cursor *cursor, const MDBX_cursor_op op) const { MDBX_val cursor_key = {0, 0}; MDBX_val cursor_data = {0, 0}; int err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, op); - return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data, - cursor); + return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data, cursor); } void testcase::speculum_prepare_cursors(const Item &item) { @@ -1109,8 +1028,7 @@ void testcase::speculum_prepare_cursors(const Item &item) { assert(config.params.speculum); if (speculum_cursors[lowerbound]) for (auto &guard : speculum_cursors) { - if (txn_guard.get() != mdbx_cursor_txn(guard.get()) || - dbi != mdbx_cursor_dbi(guard.get())) { + if (txn_guard.get() != mdbx_cursor_txn(guard.get()) || dbi != mdbx_cursor_dbi(guard.get())) { err = mdbx_cursor_bind(txn_guard.get(), guard.get(), dbi); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_cursor_bind()", err); @@ -1127,81 +1045,64 @@ void testcase::speculum_prepare_cursors(const Item &item) { // mdbx_cursor_reset(speculum_cursors[seek_check].get()); const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); - const MDBX_val item_key = dataview2iov(item.first), - item_data = dataview2iov(item.second); + const MDBX_val item_key = dataview2iov(item.first), item_data = dataview2iov(item.second); MDBX_val lowerbound_key = item_key; MDBX_val lowerbound_data = item_data; // verbose("prepare-cursors", "item", item_key, item_data); - err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, - MDBX_SET_LOWERBOUND); + err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, MDBX_SET_LOWERBOUND); // verbose("prepare-cursors", "lowerbound", lowerbound_key, lowerbound_data, // err); - if (unlikely(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE && - err != MDBX_NOTFOUND)) - failure("speculum-%s: %s %s %d %s", "prepare-cursors", "lowerbound", - "cursor-get", err, mdbx_strerror(err)); + if (unlikely(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE && err != MDBX_NOTFOUND)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "lowerbound", "cursor-get", err, mdbx_strerror(err)); auto it_lowerbound = speculum.lower_bound(item); // verbose("prepare-cursors", "lowerbound", it_lowerbound); - speculum_check_cursor("prepare-cursors", "lowerbound", it_lowerbound, err, - lowerbound_key, lowerbound_data, cursor_lowerbound); + speculum_check_cursor("prepare-cursors", "lowerbound", it_lowerbound, err, lowerbound_key, lowerbound_data, + cursor_lowerbound); const auto cursor_prev = speculum_cursors[prev].get(); err = mdbx_cursor_copy(cursor_lowerbound, cursor_prev); if (unlikely(err != MDBX_SUCCESS)) - failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev", - "cursor-copy", err, mdbx_strerror(err)); + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev", "cursor-copy", err, mdbx_strerror(err)); auto it_prev = it_lowerbound; if (it_prev != speculum.begin()) { - speculum_check_cursor("prepare-cursors", "prev", --it_prev, cursor_prev, - MDBX_PREV); + speculum_check_cursor("prepare-cursors", "prev", --it_prev, cursor_prev, MDBX_PREV); } else if ((err = mdbx_cursor_on_first(cursor_prev)) != MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev", err, - mdbx_strerror(err)); + failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev", err, mdbx_strerror(err)); const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); err = mdbx_cursor_copy(cursor_prev, cursor_prev_prev); if (unlikely(err != MDBX_SUCCESS)) - failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev-prev", - "cursor-copy", err, mdbx_strerror(err)); + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev-prev", "cursor-copy", err, mdbx_strerror(err)); auto it_prev_prev = it_prev; if (it_prev_prev != speculum.begin()) { - speculum_check_cursor("prepare-cursors", "prev-prev", --it_prev_prev, - cursor_prev_prev, MDBX_PREV); + speculum_check_cursor("prepare-cursors", "prev-prev", --it_prev_prev, cursor_prev_prev, MDBX_PREV); } else if ((err = mdbx_cursor_on_first(cursor_prev_prev)) != MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev-prev", - err, mdbx_strerror(err)); + failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev-prev", err, mdbx_strerror(err)); const auto cursor_next = speculum_cursors[next].get(); err = mdbx_cursor_copy(cursor_lowerbound, cursor_next); if (unlikely(err != MDBX_SUCCESS)) - failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next", - "cursor-copy", err, mdbx_strerror(err)); + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next", "cursor-copy", err, mdbx_strerror(err)); auto it_next = it_lowerbound; if (it_next != speculum.end()) { - speculum_check_cursor("prepare-cursors", "next", ++it_next, cursor_next, - MDBX_NEXT); + speculum_check_cursor("prepare-cursors", "next", ++it_next, cursor_next, MDBX_NEXT); } else if ((err = mdbx_cursor_on_last(cursor_next)) != MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next", err, - mdbx_strerror(err)); + failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next", err, mdbx_strerror(err)); const auto cursor_next_next = speculum_cursors[next_next].get(); err = mdbx_cursor_copy(cursor_next, cursor_next_next); if (unlikely(err != MDBX_SUCCESS)) - failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next-next", - "cursor-copy", err, mdbx_strerror(err)); + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next-next", "cursor-copy", err, mdbx_strerror(err)); auto it_next_next = it_next; if (it_next_next != speculum.end()) { - speculum_check_cursor("prepare-cursors", "next-next", ++it_next_next, - cursor_next_next, MDBX_NEXT); + speculum_check_cursor("prepare-cursors", "next-next", ++it_next_next, cursor_next_next, MDBX_NEXT); } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next-next", - err, mdbx_strerror(err)); + failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next-next", err, mdbx_strerror(err)); } #endif /* SPECULUM_CURSORS */ -int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, - MDBX_put_flags_t flags) { +int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags) { int err; bool rc = true; Item item; @@ -1218,13 +1119,10 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, check_seek_cursor = speculum_cursors[seek_check].get(); seek_check_key = akey->value; seek_check_data = adata->value; - seek_check_err = mdbx_cursor_get(check_seek_cursor, &seek_check_key, - &seek_check_data, MDBX_SET_LOWERBOUND); + seek_check_err = mdbx_cursor_get(check_seek_cursor, &seek_check_key, &seek_check_data, MDBX_SET_LOWERBOUND); // speculum_render(speculum.find(item), check_seek_cursor); - if (seek_check_err != MDBX_SUCCESS && seek_check_err != MDBX_NOTFOUND && - seek_check_err != MDBX_RESULT_TRUE) - failure("speculum-%s: %s pre-insert %d %s", "insert", "seek", - seek_check_err, mdbx_strerror(seek_check_err)); + if (seek_check_err != MDBX_SUCCESS && seek_check_err != MDBX_NOTFOUND && seek_check_err != MDBX_RESULT_TRUE) + failure("speculum-%s: %s pre-insert %d %s", "insert", "seek", seek_check_err, mdbx_strerror(seek_check_err)); #endif /* SPECULUM_CURSORS */ } @@ -1251,20 +1149,17 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, #if SPECULUM_CURSORS if (insertion_result.second) { if (seek_check_err == MDBX_SUCCESS) { - log_error( - "speculum.pre-insert-seek: unexpected %d {%s, %s}", seek_check_err, - mdbx_dump_val(&seek_check_key, dump_key, sizeof(dump_key)), - mdbx_dump_val(&seek_check_data, dump_value, sizeof(dump_value))); + log_error("speculum.pre-insert-seek: unexpected %d {%s, %s}", seek_check_err, + mdbx_dump_val(&seek_check_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&seek_check_data, dump_value, sizeof(dump_value))); rc = false; } } else { if (seek_check_err != MDBX_SUCCESS) { - log_error( - "speculum.pre-insert-seek: unexpected %d {%s, %s}", seek_check_err, - mdbx_dump_val(&seek_check_key, dump_key, sizeof(dump_key)), - mdbx_dump_val(&seek_check_data, dump_value, sizeof(dump_value))); - speculum_check_iterator("insert", "pre-seek", insertion_result.first, - seek_check_key, seek_check_data, + log_error("speculum.pre-insert-seek: unexpected %d {%s, %s}", seek_check_err, + mdbx_dump_val(&seek_check_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&seek_check_data, dump_value, sizeof(dump_value))); + speculum_check_iterator("insert", "pre-seek", insertion_result.first, seek_check_key, seek_check_data, check_seek_cursor); rc = false; } @@ -1273,13 +1168,11 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, if (insertion_result.first != speculum.begin()) { const auto cursor_prev = speculum_cursors[prev].get(); auto it_prev = insertion_result.first; - speculum_check_cursor("after-insert", "prev", --it_prev, cursor_prev, - MDBX_GET_CURRENT); + speculum_check_cursor("after-insert", "prev", --it_prev, cursor_prev, MDBX_GET_CURRENT); if (it_prev != speculum.begin()) { const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); auto it_prev_prev = it_prev; - speculum_check_cursor("after-insert", "prev-prev", --it_prev_prev, - cursor_prev_prev, MDBX_GET_CURRENT); + speculum_check_cursor("after-insert", "prev-prev", --it_prev_prev, cursor_prev_prev, MDBX_GET_CURRENT); } } @@ -1288,20 +1181,17 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, ++it_lowerbound; if (it_lowerbound != speculum.end()) { const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); - speculum_check_cursor("after-insert", "lowerbound", it_lowerbound, - cursor_lowerbound, MDBX_GET_CURRENT); + speculum_check_cursor("after-insert", "lowerbound", it_lowerbound, cursor_lowerbound, MDBX_GET_CURRENT); auto it_next = it_lowerbound; if (++it_next != speculum.end()) { const auto cursor_next = speculum_cursors[next].get(); - speculum_check_cursor("after-insert", "next", it_next, cursor_next, - MDBX_GET_CURRENT); + speculum_check_cursor("after-insert", "next", it_next, cursor_next, MDBX_GET_CURRENT); auto it_next_next = it_next; if (++it_next_next != speculum.end()) { const auto cursor_next_next = speculum_cursors[next_next].get(); - speculum_check_cursor("after-insert", "next-next", it_next_next, - cursor_next_next, MDBX_GET_CURRENT); + speculum_check_cursor("after-insert", "next-next", it_next_next, cursor_next_next, MDBX_GET_CURRENT); } } } @@ -1313,10 +1203,8 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, return rc ? MDBX_SUCCESS : MDBX_RESULT_TRUE; } -int testcase::replace(const keygen::buffer &akey, - const keygen::buffer &new_data, - const keygen::buffer &old_data, MDBX_put_flags_t flags, - bool hush_keygen_mistakes) { +int testcase::replace(const keygen::buffer &akey, const keygen::buffer &new_data, const keygen::buffer &old_data, + MDBX_put_flags_t flags, bool hush_keygen_mistakes) { int expected_err = MDBX_SUCCESS; if (config.params.speculum) { const auto S_key = iov2dataview(akey); @@ -1325,22 +1213,19 @@ int testcase::replace(const keygen::buffer &akey, const auto removed = speculum.erase(SET::key_type(S_key, S_old)); if (unlikely(!removed)) { char dump_key[128], dump_value[128]; - log_error( - "speculum-%s: no old pair {%s, %s} (keygen mistake)", "replace", - mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), - mdbx_dump_val(&old_data->value, dump_value, sizeof(dump_value))); + log_error("speculum-%s: no old pair {%s, %s} (keygen mistake)", "replace", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&old_data->value, dump_value, sizeof(dump_value))); expected_err = MDBX_NOTFOUND; } else if (unlikely(!speculum.emplace(S_key, S_new).second)) { char dump_key[128], dump_value[128]; - log_error( - "speculum-%s: %s {%s, %s}", "replace", "new pair not inserted", - mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), - mdbx_dump_val(&new_data->value, dump_value, sizeof(dump_value))); + log_error("speculum-%s: %s {%s, %s}", "replace", "new pair not inserted", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&new_data->value, dump_value, sizeof(dump_value))); expected_err = MDBX_KEYEXIST; } } - int err = mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, - &old_data->value, flags); + int err = mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, &old_data->value, flags); if (err && err == expected_err && hush_keygen_mistakes) { log_notice("speculum-%s: %s %d", "replace", "hust keygen mistake", err); err = MDBX_SUCCESS; @@ -1393,13 +1278,11 @@ int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { if (it_found != speculum.begin()) { const auto cursor_prev = speculum_cursors[prev].get(); auto it_prev = it_found; - speculum_check_cursor("after-remove", "prev", --it_prev, cursor_prev, - MDBX_GET_CURRENT); + speculum_check_cursor("after-remove", "prev", --it_prev, cursor_prev, MDBX_GET_CURRENT); if (it_prev != speculum.begin()) { const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); auto it_prev_prev = it_prev; - speculum_check_cursor("after-remove", "prev-prev", --it_prev_prev, - cursor_prev_prev, MDBX_GET_CURRENT); + speculum_check_cursor("after-remove", "prev-prev", --it_prev_prev, cursor_prev_prev, MDBX_GET_CURRENT); } } @@ -1407,27 +1290,20 @@ int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { const auto cursor_next = speculum_cursors[next].get(); const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); if (++it_next != speculum.end()) { - speculum_check_cursor("after-remove", "next", it_next, cursor_next, - MDBX_GET_CURRENT); - speculum_check_cursor("after-remove", "lowerbound", it_next, - cursor_lowerbound, MDBX_NEXT); + speculum_check_cursor("after-remove", "next", it_next, cursor_next, MDBX_GET_CURRENT); + speculum_check_cursor("after-remove", "lowerbound", it_next, cursor_lowerbound, MDBX_NEXT); auto it_next_next = it_next; const auto cursor_next_next = speculum_cursors[next_next].get(); if (++it_next_next != speculum.end()) { - speculum_check_cursor("after-remove", "next-next", it_next_next, - cursor_next_next, MDBX_GET_CURRENT); - } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != - MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-last %d %s", "after-remove", "next-next", - err, mdbx_strerror(err)); + speculum_check_cursor("after-remove", "next-next", it_next_next, cursor_next_next, MDBX_GET_CURRENT); + } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "next-next", err, mdbx_strerror(err)); } else { if ((err = mdbx_cursor_on_last(cursor_next)) != MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-last %d %s", "after-remove", "next", err, - mdbx_strerror(err)); + failure("speculum-%s: %s on-last %d %s", "after-remove", "next", err, mdbx_strerror(err)); if ((err = mdbx_cursor_on_last(cursor_lowerbound)) != MDBX_RESULT_TRUE) - failure("speculum-%s: %s on-last %d %s", "after-remove", "lowerbound", - err, mdbx_strerror(err)); + failure("speculum-%s: %s on-last %d %s", "after-remove", "lowerbound", err, mdbx_strerror(err)); } #endif /* SPECULUM_CURSORS */ @@ -1477,8 +1353,8 @@ bool testcase::speculum_verify() { } else { eof = mdbx_cursor_eof(cursor); if (eof != MDBX_RESULT_FALSE) { - log_error("false-positive cursor-eof %u/%u: db{%s, %s}, rc %i", n, - extra, mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), + log_error("false-positive cursor-eof %u/%u: db{%s, %s}, rc %i", n, extra, + mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)), eof); rc = false; } @@ -1489,38 +1365,31 @@ bool testcase::speculum_verify() { mkey = it->first; mvalue = it->second; } - if (err == MDBX_SUCCESS && it != speculum.cend() && S_key == it->first && - S_data == it->second) { + if (err == MDBX_SUCCESS && it != speculum.cend() && S_key == it->first && S_data == it->second) { ++it; err = mdbx_cursor_get(cursor, &akey, &avalue, MDBX_NEXT); } else if (err == MDBX_SUCCESS && - (it == speculum.cend() || S_key < it->first || - (S_key == it->first && S_data < it->second))) { + (it == speculum.cend() || S_key < it->first || (S_key == it->first && S_data < it->second))) { extra += 1; if (it != speculum.cend()) { - log_error("extra pair %u/%u: db{%s, %s} < mi{%s, %s}", n, extra, - mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), - mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)), - mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)), - mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue))); + log_error( + "extra pair %u/%u: db{%s, %s} < mi{%s, %s}", n, extra, mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), + mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)), mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)), + mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue))); } else { - log_error("extra pair %u/%u: db{%s, %s} < mi.END", n, extra, - mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), + log_error("extra pair %u/%u: db{%s, %s} < mi.END", n, extra, mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), mdbx_dump_val(&avalue, dump_value, sizeof(dump_value))); } err = mdbx_cursor_get(cursor, &akey, &avalue, MDBX_NEXT); rc = false; } else if (it != speculum.cend() && - (err == MDBX_NOTFOUND || S_key > it->first || - (S_key == it->first && S_data > it->second))) { + (err == MDBX_NOTFOUND || S_key > it->first || (S_key == it->first && S_data > it->second))) { lost += 1; if (err == MDBX_NOTFOUND) { - log_error("lost pair %u/%u: db.END > mi{%s, %s}", n, lost, - mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)), + log_error("lost pair %u/%u: db.END > mi{%s, %s}", n, lost, mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)), mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue))); } else { - log_error("lost pair %u/%u: db{%s, %s} > mi{%s, %s}", n, lost, - mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), + log_error("lost pair %u/%u: db{%s, %s} > mi{%s, %s}", n, lost, mdbx_dump_val(&akey, dump_key, sizeof(dump_key)), mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)), mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)), mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue))); @@ -1566,31 +1435,26 @@ bool testcase::check_batch_get() { bool rc = true; MDBX_val pairs[42]; size_t count = 0xDeadBeef; - batch_err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, - ARRAY_LENGTH(pairs), MDBX_FIRST); + batch_err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, ARRAY_LENGTH(pairs), MDBX_FIRST); size_t i, n = 0; while (batch_err == MDBX_SUCCESS || batch_err == MDBX_RESULT_TRUE) { for (i = 0; i < count; i += 2) { mdbx::slice k, v; - check_err = - mdbx_cursor_get(check_cursor, &k, &v, n ? MDBX_NEXT : MDBX_FIRST); + check_err = mdbx_cursor_get(check_cursor, &k, &v, n ? MDBX_NEXT : MDBX_FIRST); if (check_err != MDBX_SUCCESS) failure_perror("batch-verify: mdbx_cursor_get(MDBX_NEXT)", check_err); if (k != pairs[i] || v != pairs[i + 1]) { - log_error( - "batch-get pair mismatch %zu/%zu: sequential{%s, %s} != " - "batch{%s, %s}", - n + i / 2, i, mdbx_dump_val(&k, dump_key, sizeof(dump_key)), - mdbx_dump_val(&v, dump_value, sizeof(dump_value)), - mdbx_dump_val(&pairs[i], dump_key_batch, sizeof(dump_key_batch)), - mdbx_dump_val(&pairs[i + 1], dump_value_batch, - sizeof(dump_value_batch))); + log_error("batch-get pair mismatch %zu/%zu: sequential{%s, %s} != " + "batch{%s, %s}", + n + i / 2, i, mdbx_dump_val(&k, dump_key, sizeof(dump_key)), + mdbx_dump_val(&v, dump_value, sizeof(dump_value)), + mdbx_dump_val(&pairs[i], dump_key_batch, sizeof(dump_key_batch)), + mdbx_dump_val(&pairs[i + 1], dump_value_batch, sizeof(dump_value_batch))); rc = false; } ++n; } - batch_err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, - ARRAY_LENGTH(pairs), MDBX_NEXT); + batch_err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, ARRAY_LENGTH(pairs), MDBX_NEXT); } if (batch_err != MDBX_NOTFOUND) { log_error("mdbx_cursor_get_batch(), err %d", batch_err); @@ -1619,10 +1483,8 @@ bool testcase::check_batch_get() { } bool testcase::txn_probe_parking() { - MDBX_txn_flags_t state = - mdbx_txn_flags(txn_guard.get()) & - (MDBX_TXN_RDONLY | MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK | - MDBX_TXN_OUSTED | MDBX_TXN_BLOCKED); + MDBX_txn_flags_t state = mdbx_txn_flags(txn_guard.get()) & (MDBX_TXN_RDONLY | MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK | + MDBX_TXN_OUSTED | MDBX_TXN_BLOCKED); if (state != MDBX_TXN_RDONLY) return true; @@ -1635,42 +1497,35 @@ bool testcase::txn_probe_parking() { if (flipcoin()) { err = mdbx_txn_info(txn_guard.get(), &txn_info, flipcoin()); if (err != MDBX_SUCCESS) - failure("mdbx_txn_info(1), state 0x%x, err %d", - state = mdbx_txn_flags(txn_guard.get()), err); + failure("mdbx_txn_info(1), state 0x%x, err %d", state = mdbx_txn_flags(txn_guard.get()), err); } if (osal_multiactor_mode() && !mode_readonly()) { - while (flipcoin() && - ((state = mdbx_txn_flags(txn_guard.get())) & MDBX_TXN_OUSTED) == 0) + while (flipcoin() && ((state = mdbx_txn_flags(txn_guard.get())) & MDBX_TXN_OUSTED) == 0) osal_udelay(4242); } if (flipcoin()) { err = mdbx_txn_info(txn_guard.get(), &txn_info, flipcoin()); if (err != MDBX_SUCCESS) - failure("mdbx_txn_info(2), state 0x%x, err %d", - state = mdbx_txn_flags(txn_guard.get()), err); + failure("mdbx_txn_info(2), state 0x%x, err %d", state = mdbx_txn_flags(txn_guard.get()), err); } if (flipcoin()) { MDBX_envinfo env_info; - err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, - sizeof(env_info)); + err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, sizeof(env_info)); if (!autounpark) { if (err != MDBX_BAD_TXN) failure("mdbx_env_info_ex(autounpark=%s), flags 0x%x, unexpected err " "%d, must %d", autounpark ? "true" : "false", state, err, MDBX_BAD_TXN); } else if (err != MDBX_SUCCESS) { - if (err != MDBX_OUSTED || - ((state = mdbx_txn_flags(txn_guard.get())) & MDBX_TXN_OUSTED) == 0) - failure("mdbx_env_info_ex(autounpark=%s), flags 0x%x, err %d", - autounpark ? "true" : "false", state, err); + if (err != MDBX_OUSTED || ((state = mdbx_txn_flags(txn_guard.get())) & MDBX_TXN_OUSTED) == 0) + failure("mdbx_env_info_ex(autounpark=%s), flags 0x%x, err %d", autounpark ? "true" : "false", state, err); else { err = mdbx_txn_renew(txn_guard.get()); if (err != MDBX_SUCCESS) - failure("mdbx_txn_renew(), state 0x%x, err %d", - state = mdbx_txn_flags(txn_guard.get()), err); + failure("mdbx_txn_renew(), state 0x%x, err %d", state = mdbx_txn_flags(txn_guard.get()), err); } } } @@ -1679,20 +1534,17 @@ bool testcase::txn_probe_parking() { err = mdbx_txn_unpark(txn_guard.get(), autorestart); if (MDBX_IS_ERROR(err)) { if (err != MDBX_OUSTED || autorestart) - failure("mdbx_txn_unpark(autounpark=%s, autorestart=%s), err %d", - autounpark ? "true" : "false", autorestart ? "true" : "false", - err); + failure("mdbx_txn_unpark(autounpark=%s, autorestart=%s), err %d", autounpark ? "true" : "false", + autorestart ? "true" : "false", err); else { err = mdbx_txn_renew(txn_guard.get()); if (err != MDBX_SUCCESS) - failure("mdbx_txn_renew(), state 0x%x, err %d", - state = mdbx_txn_flags(txn_guard.get()), err); + failure("mdbx_txn_renew(), state 0x%x, err %d", state = mdbx_txn_flags(txn_guard.get()), err); } } state = mdbx_txn_flags(txn_guard.get()) & - (MDBX_TXN_RDONLY | MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK | - MDBX_TXN_OUSTED | MDBX_TXN_BLOCKED); + (MDBX_TXN_RDONLY | MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK | MDBX_TXN_OUSTED | MDBX_TXN_BLOCKED); if (state != MDBX_TXN_RDONLY) failure("unexpected txn-state 0x%x", state); return state == MDBX_TXN_RDONLY; diff --git a/test/test.h++ b/test/test.h++ index 39ce1118..861429ff 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -31,10 +31,9 @@ bool test_execute(const actor_config &config); std::string thunk_param(const actor_config &config); -void testcase_setup(const char *casename, const actor_params ¶ms, - unsigned &last_space_id); -void configure_actor(unsigned &last_space_id, const actor_testcase testcase, - const char *space_id_cstr, actor_params params); +void testcase_setup(const char *casename, const actor_params ¶ms, unsigned &last_space_id); +void configure_actor(unsigned &last_space_id, const actor_testcase testcase, const char *space_id_cstr, + actor_params params); void keycase_setup(const char *casename, actor_params ¶ms); namespace global { @@ -106,34 +105,26 @@ public: this->id = id; this->name = name; review_params = TESTCASE::review_params; - constructor = [](const actor_config &config, - const mdbx_pid_t pid) -> testcase * { + constructor = [](const actor_config &config, const mdbx_pid_t pid) -> testcase * { return new TESTCASE(config, pid); }; add(this); } }; - static bool review_actor_params(const actor_testcase id, actor_params ¶ms, - const unsigned space_id); - static testcase *create_actor(const actor_config &config, - const mdbx_pid_t pid); + static bool review_actor_params(const actor_testcase id, actor_params ¶ms, const unsigned space_id); + static testcase *create_actor(const actor_config &config, const mdbx_pid_t pid); }; -#define REGISTER_TESTCASE(NAME) \ - static registry::factory gRegister_##NAME( \ - ac_##NAME, MDBX_STRINGIFY(NAME)) +#define REGISTER_TESTCASE(NAME) \ + static registry::factory gRegister_##NAME(ac_##NAME, MDBX_STRINGIFY(NAME)) class testcase { protected: using data_view = mdbx::slice; static inline data_view iov2dataview(const MDBX_val &v) { - return (v.iov_base && v.iov_len) - ? data_view(static_cast(v.iov_base), v.iov_len) - : data_view(); - } - static inline data_view iov2dataview(const keygen::buffer &b) { - return iov2dataview(b->value); + return (v.iov_base && v.iov_len) ? data_view(static_cast(v.iov_base), v.iov_len) : data_view(); } + static inline data_view iov2dataview(const keygen::buffer &b) { return iov2dataview(b->value); } using Item = std::pair<::mdbx::buffer<>, ::mdbx::buffer<>>; @@ -145,16 +136,13 @@ protected: } struct ItemCompare { const testcase *context; - ItemCompare(const testcase *owner) : context(owner) { - /* The context->txn_guard may be empty/null here */ - } + ItemCompare(const testcase *owner) : context(owner) { /* The context->txn_guard may be empty/null here */ } bool operator()(const Item &a, const Item &b) const { MDBX_val va = dataview2iov(a.first), vb = dataview2iov(b.first); assert(context->txn_guard.get() != nullptr); int cmp = mdbx_cmp(context->txn_guard.get(), context->dbi, &va, &vb); - if (cmp == 0 && - (context->config.params.table_flags & MDBX_DUPSORT) != 0) { + if (cmp == 0 && (context->config.params.table_flags & MDBX_DUPSORT) != 0) { va = dataview2iov(a.second); vb = dataview2iov(b.second); cmp = mdbx_dcmp(context->txn_guard.get(), context->dbi, &va, &vb); @@ -194,61 +182,44 @@ protected: #if SPECULUM_CURSORS scoped_cursor_guard speculum_cursors[5 + 1]; void speculum_prepare_cursors(const Item &item); - bool speculum_check_cursor(const char *where, const char *stage, - const testcase::SET::const_iterator &it, - int cursor_err, const MDBX_val &cursor_key, - const MDBX_val &cursor_data, + bool speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, + int cursor_err, const MDBX_val &cursor_key, const MDBX_val &cursor_data, MDBX_cursor *cursor) const; - bool speculum_check_cursor(const char *where, const char *stage, - const testcase::SET::const_iterator &it, - MDBX_cursor *cursor, - const MDBX_cursor_op op) const; - void speculum_render(const testcase::SET::const_iterator &it, - const MDBX_cursor *ref) const; + bool speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, + MDBX_cursor *cursor, const MDBX_cursor_op op) const; + void speculum_render(const testcase::SET::const_iterator &it, const MDBX_cursor *ref) const; #endif /* SPECULUM_CURSORS */ - bool speculum_check_iterator(const char *where, const char *stage, - const testcase::SET::const_iterator &it, - const MDBX_val &k, const MDBX_val &v, - MDBX_cursor *cursor) const; + bool speculum_check_iterator(const char *where, const char *stage, const testcase::SET::const_iterator &it, + const MDBX_val &k, const MDBX_val &v, MDBX_cursor *cursor) const; - void verbose(const char *where, const char *stage, - const testcase::SET::const_iterator &it) const; - void verbose(const char *where, const char *stage, const MDBX_val &k, - const MDBX_val &v, int err = MDBX_SUCCESS) const; + void verbose(const char *where, const char *stage, const testcase::SET::const_iterator &it) const; + void verbose(const char *where, const char *stage, const MDBX_val &k, const MDBX_val &v, + int err = MDBX_SUCCESS) const; bool is_same(const Item &a, const Item &b) const; - bool is_same(const SET::const_iterator &it, const MDBX_val &k, - const MDBX_val &v) const; + bool is_same(const SET::const_iterator &it, const MDBX_val &k, const MDBX_val &v) const; bool speculum_verify(); bool check_batch_get(); - int insert(const keygen::buffer &akey, const keygen::buffer &adata, - MDBX_put_flags_t flags); - int replace(const keygen::buffer &akey, const keygen::buffer &new_value, - const keygen::buffer &old_value, MDBX_put_flags_t flags, - bool hush_keygen_mistakes = true); + int insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags); + int replace(const keygen::buffer &akey, const keygen::buffer &new_value, const keygen::buffer &old_value, + MDBX_put_flags_t flags, bool hush_keygen_mistakes = true); int remove(const keygen::buffer &akey, const keygen::buffer &adata); - static int hsr_callback(const MDBX_env *env, const MDBX_txn *txn, - mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard, - unsigned gap, size_t space, - int retry) MDBX_CXX17_NOEXCEPT; + static int hsr_callback(const MDBX_env *env, const MDBX_txn *txn, mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard, + unsigned gap, size_t space, int retry) MDBX_CXX17_NOEXCEPT; MDBX_env_flags_t actual_env_mode{MDBX_ENV_DEFAULTS}; - bool is_nested_txn_available() const { - return (actual_env_mode & MDBX_WRITEMAP) == 0; - } + bool is_nested_txn_available() const { return (actual_env_mode & MDBX_WRITEMAP) == 0; } void kick_progress(bool active) const; void db_prepare(); void db_open(); void db_close(); - virtual void txn_begin(bool readonly, - MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); + virtual void txn_begin(bool readonly, MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); int breakable_commit(); virtual void txn_end(bool abort); int breakable_restart(); - void txn_restart(bool abort, bool readonly, - MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); + void txn_restart(bool abort, bool readonly, MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); void cursor_open(MDBX_dbi handle); void cursor_close(); void cursor_renew(); @@ -258,13 +229,11 @@ protected: void fetch_canary(); void update_canary(uint64_t increment); - bool checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, - MDBX_val expected_valued); + bool checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued); unsigned txn_underutilization_x256(MDBX_txn *txn) const; using tablename_buf = char[32]; - const char *db_tablename(tablename_buf &buffer, - const char *suffix = "") const; + const char *db_tablename(tablename_buf &buffer, const char *suffix = "") const; MDBX_dbi db_table_open(bool create, bool expect_failure = false); void db_table_drop(MDBX_dbi handle); void db_table_clear(MDBX_dbi handle, MDBX_txn *txn = nullptr); @@ -278,22 +247,17 @@ protected: bool should_continue(bool check_timeout_only = false) const; bool MDBX_PRINTF_ARGS(2, 3) failure(const char *fmt, ...) const; - void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, - keygen::buffer &out_value, keygen::serial_t data_age) { + void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, keygen::buffer &out_value, + keygen::serial_t data_age) { keyvalue_maker.pair(serial, out_key, out_value, data_age, false); } - void generate_pair(const keygen::serial_t serial) { - keyvalue_maker.pair(serial, key, data, 0, true); - } + void generate_pair(const keygen::serial_t serial) { keyvalue_maker.pair(serial, key, data, 0, true); } - bool mode_readonly() const { - return (config.params.mode_flags & MDBX_RDONLY) ? true : false; - } + bool mode_readonly() const { return (config.params.mode_flags & MDBX_RDONLY) ? true : false; } public: - testcase(const actor_config &config, const mdbx_pid_t pid) - : config(config), pid(pid) { + testcase(const actor_config &config, const mdbx_pid_t pid) : config(config), pid(pid) { start_timestamp.reset(); memset(&last, 0, sizeof(last)); } @@ -301,11 +265,9 @@ public: static bool review_params(actor_params ¶ms, unsigned space_id) { // silently fix key/data length for fixed-length modes params.prng_seed += bleach32(space_id); - if ((params.table_flags & MDBX_INTEGERKEY) && - params.keylen_min != params.keylen_max) + if ((params.table_flags & MDBX_INTEGERKEY) && params.keylen_min != params.keylen_max) params.keylen_min = params.keylen_max; - if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) && - params.datalen_min != params.datalen_max) + if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) && params.datalen_min != params.datalen_max) params.datalen_min = params.datalen_max; return true; } @@ -330,8 +292,7 @@ protected: unsigned edge2count(uint64_t edge); public: - testcase_ttl(const actor_config &config, const mdbx_pid_t pid) - : inherited(config, pid) {} + testcase_ttl(const actor_config &config, const mdbx_pid_t pid) : inherited(config, pid) {} bool setup() override; bool run() override; }; diff --git a/test/try.c++ b/test/try.c++ index 708122ac..4333a996 100644 --- a/test/try.c++ +++ b/test/try.c++ @@ -5,8 +5,7 @@ class testcase_try : public testcase { public: - testcase_try(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + testcase_try(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} bool run() override; }; REGISTER_TESTCASE(try); diff --git a/test/ttl.c++ b/test/ttl.c++ index d22be384..a4482e1f 100644 --- a/test/ttl.c++ +++ b/test/ttl.c++ @@ -24,17 +24,14 @@ REGISTER_TESTCASE(ttl); unsigned testcase_ttl::edge2count(uint64_t edge) { const double rnd = u64_to_double1(prng64_map1_white(edge)); - const unsigned count = - unsigned(std::lrint(std::pow(sliding.max_step_size, rnd))); + const unsigned count = unsigned(std::lrint(std::pow(sliding.max_step_size, rnd))); // average value: (X - 1) / ln(X), where X = sliding.max_step_size return count; } unsigned testcase_ttl::edge2window(uint64_t edge) { const double rnd = u64_to_double1(bleach64(edge)); - const unsigned size = - sliding.max_window_size - - unsigned(std::lrint(std::pow(sliding.max_window_size, rnd))); + const unsigned size = sliding.max_window_size - unsigned(std::lrint(std::pow(sliding.max_window_size, rnd))); // average value: Y - (Y - 1) / ln(Y), where Y = sliding.max_window_size return size; } @@ -48,20 +45,17 @@ static inline double estimate(const double x, const double y) { } bool testcase_ttl::setup() { - const unsigned window_top_lower = - 7 /* нижний предел для верхней границы диапазона, в котором будет - стохастически колебаться размер окна */ + const unsigned window_top_lower = 7 /* нижний предел для верхней границы диапазона, в котором будет + стохастически колебаться размер окна */ ; - const unsigned count_top_lower = - 7 /* нижний предел для верхней границы диапазона, в котором будет - стохастически колебаться кол-во записей добавляемых на одном шаге */ + const unsigned count_top_lower = 7 /* нижний предел для верхней границы диапазона, в котором будет + стохастически колебаться кол-во записей добавляемых на одном шаге */ ; /* для параметризации используем подходящие параметры, * которые не имеют здесь смысла в первоначальном значении. */ - const double ratio = - double(config.params.batch_read ? config.params.batch_read : 1) / - double(config.params.batch_write ? config.params.batch_write : 1); + const double ratio = double(config.params.batch_read ? config.params.batch_read : 1) / + double(config.params.batch_write ? config.params.batch_write : 1); /* проще найти двоичным поиском (вариация метода Ньютона) */ double hi = config.params.test_nops, lo = 1; @@ -82,8 +76,7 @@ bool testcase_ttl::setup() { if (sliding.max_window_size < window_top_lower) sliding.max_window_size = window_top_lower; - while (estimate(sliding.max_step_size, sliding.max_window_size) > - config.params.test_nops * 2.0) { + while (estimate(sliding.max_step_size, sliding.max_window_size) > config.params.test_nops * 2.0) { if (ratio * sliding.max_step_size > sliding.max_window_size) { if (sliding.max_step_size < count_top_lower) break; @@ -95,8 +88,7 @@ bool testcase_ttl::setup() { } } - log_verbose("come up window_max %u from `batch_read`", - sliding.max_window_size); + log_verbose("come up window_max %u from `batch_read`", sliding.max_window_size); log_verbose("come up step_max %u from `batch_write`", sliding.max_step_size); return inherited::setup(); } @@ -113,9 +105,7 @@ bool testcase_ttl::run() { key = keygen::alloc(config.params.keylen_max); data = keygen::alloc(config.params.datalen_max); const MDBX_put_flags_t insert_flags = - (config.params.table_flags & MDBX_DUPSORT) - ? MDBX_NODUPDATA - : MDBX_NODUPDATA | MDBX_NOOVERWRITE; + (config.params.table_flags & MDBX_DUPSORT) ? MDBX_NODUPDATA : MDBX_NODUPDATA | MDBX_NOOVERWRITE; std::deque> fifo; uint64_t serial = 0; @@ -128,20 +118,17 @@ bool testcase_ttl::run() { while (true) { const uint64_t salt = prng64_white(seed) /* mdbx_txn_id(txn_guard.get()) */; - const unsigned window_width = - (!should_continue() || flipcoin_x4()) ? 0 : edge2window(salt); + const unsigned window_width = (!should_continue() || flipcoin_x4()) ? 0 : edge2window(salt); unsigned head_count = edge2count(salt); - log_debug("ttl: step #%" PRIu64 " (serial %" PRIu64 - ", window %u, count %u) salt %" PRIu64, - nops_completed, serial, window_width, head_count, salt); + log_debug("ttl: step #%" PRIu64 " (serial %" PRIu64 ", window %u, count %u) salt %" PRIu64, nops_completed, serial, + window_width, head_count, salt); if (window_width || flipcoin()) { clear_stepbystep_passed += window_width == 0; while (fifo.size() > window_width) { uint64_t tail_serial = fifo.back().first; const unsigned tail_count = fifo.back().second; - log_trace("ttl: pop-tail (serial %" PRIu64 ", count %u)", tail_serial, - tail_count); + log_trace("ttl: pop-tail (serial %" PRIu64 ", count %u)", tail_serial, tail_count); fifo.pop_back(); for (unsigned n = 0; n < tail_count; ++n) { log_trace("ttl: remove-tail %" PRIu64, tail_serial); @@ -177,10 +164,8 @@ bool testcase_ttl::run() { return false; } - if (!keyspace_overflow && (should_continue() || !clear_wholetable_passed || - !clear_stepbystep_passed)) { - unsigned underutilization_x256 = - txn_underutilization_x256(txn_guard.get()); + if (!keyspace_overflow && (should_continue() || !clear_wholetable_passed || !clear_stepbystep_passed)) { + unsigned underutilization_x256 = txn_underutilization_x256(txn_guard.get()); if (dbfull_passed > underutilization_x256) { log_notice("ttl: skip head-grow to avoid one more dbfull (was %u, " "underutilization %.2f%%)", @@ -194,8 +179,7 @@ bool testcase_ttl::run() { generate_pair(serial); err = insert(key, data, insert_flags); if (unlikely(err != MDBX_SUCCESS)) { - if ((err == MDBX_TXN_FULL || err == MDBX_MAP_FULL) && - config.params.ignore_dbfull) { + if ((err == MDBX_TXN_FULL || err == MDBX_MAP_FULL) && config.params.ignore_dbfull) { log_notice("ttl: head-insert skip due '%s'", mdbx_strerror(err)); txn_restart(true, false); serial = fifo.front().first; @@ -227,8 +211,7 @@ bool testcase_ttl::run() { } loops += 1; } else if (fifo.empty()) { - log_notice("ttl: done %u whole loops, %" PRIu64 " ops, %" PRIu64 " items", - loops, nops_completed, serial); + log_notice("ttl: done %u whole loops, %" PRIu64 " ops, %" PRIu64 " items", loops, nops_completed, serial); rc = true; break; } else { diff --git a/test/utils.c++ b/test/utils.c++ index f96f4ffd..442e4a21 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -24,8 +24,7 @@ std::string format(const char *fmt, ...) { std::string result; result.reserve((size_t)needed + 1); result.resize((size_t)needed, '\0'); - MDBX_MAYBE_UNUSED int actual = - vsnprintf((char *)result.data(), result.capacity(), fmt, ones); + MDBX_MAYBE_UNUSED int actual = vsnprintf((char *)result.data(), result.capacity(), fmt, ones); assert(actual == needed); (void)actual; va_end(ones); @@ -50,8 +49,7 @@ std::string data2hex(const void *ptr, size_t bytes, simple_checksum &checksum) { return result; } -bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, - size_t bytes, simple_checksum &checksum) { +bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, size_t bytes, simple_checksum &checksum) { if (bytes * 2 != (size_t)(hex_end - hex_begin)) return false; @@ -85,8 +83,7 @@ bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, } bool is_samedata(const MDBX_val *a, const MDBX_val *b) { - return a->iov_len == b->iov_len && - memcmp(a->iov_base, b->iov_base, a->iov_len) == 0; + return a->iov_len == b->iov_len && memcmp(a->iov_base, b->iov_base, a->iov_len) == 0; } //----------------------------------------------------------------------------- @@ -96,13 +93,9 @@ uint64_t prng64_white(uint64_t &state) { return bleach64(state); } -uint32_t prng32_fast(uint64_t &state) { - return uint32_t(prng64_careless(state) >> 32); -} +uint32_t prng32_fast(uint64_t &state) { return uint32_t(prng64_careless(state) >> 32); } -uint32_t prng32_white(uint64_t &state) { - return bleach32(uint32_t(prng64_careless(state) >> 32)); -} +uint32_t prng32_white(uint64_t &state) { return bleach32(uint32_t(prng64_careless(state) >> 32)); } void prng_fill(uint64_t &state, void *ptr, size_t bytes) { uint32_t u32 = prng32_fast(state); @@ -174,9 +167,7 @@ bool flipcoin() { return prng32() & 1; } bool flipcoin_x2() { return (prng32() & 3) == 0; } bool flipcoin_x3() { return (prng32() & 7) == 0; } bool flipcoin_x4() { return (prng32() & 15) == 0; } -bool flipcoin_n(unsigned n) { - return (prng64() & ((UINT64_C(1) << n) - 1)) == 0; -} +bool flipcoin_n(unsigned n) { return (prng64() & ((UINT64_C(1) << n) - 1)) == 0; } bool jitter(unsigned probability_percent) { const uint32_t top = UINT32_MAX - UINT32_MAX % 100; @@ -201,8 +192,7 @@ void jitter_delay(bool extra) { osal_yield(); cpu_relax(); if (dice > 2) { - size_t us = - prng32() & (extra ? 0xffff /* 656 ms */ : 0x3ff /* 1 ms */); + size_t us = prng32() & (extra ? 0xffff /* 656 ms */ : 0x3ff /* 1 ms */); log_trace("== jitter.delay: %0.6f", us / 1000000.0); osal_udelay(us); } diff --git a/test/utils.h++ b/test/utils.h++ index 0dd7c4d8..caa34049 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -4,13 +4,11 @@ #pragma once #include "base.h++" -#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ - !defined(__ORDER_BIG_ENDIAN__) +#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || !defined(__ORDER_BIG_ENDIAN__) #error __BYTE_ORDER__ should be defined. #endif -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ - __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ #error Unsupported byte order. #endif @@ -21,16 +19,14 @@ #ifndef bswap32 #define bswap32(v) __builtin_bswap32(v) #endif -#if (__GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)) && \ - !defined(bswap16) +#if (__GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)) && !defined(bswap16) #define bswap16(v) __builtin_bswap16(v) #endif #elif defined(_MSC_VER) #if _MSC_FULL_VER < 190024215 -#pragma message( \ - "It is recommended to use Visual Studio 2015 (MSC 19.0) or newer.") +#pragma message("It is recommended to use Visual Studio 2015 (MSC 19.0) or newer.") #endif #define bswap64(v) _byteswap_uint64(v) @@ -60,12 +56,9 @@ #define bswap64(v) __bswap_64(v) #else static inline uint64_t bswap64(uint64_t v) { - return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | - ((v << 24) & UINT64_C(0x0000ff0000000000)) | - ((v << 8) & UINT64_C(0x000000ff00000000)) | - ((v >> 8) & UINT64_C(0x00000000ff0000000)) | - ((v >> 24) & UINT64_C(0x0000000000ff0000)) | - ((v >> 40) & UINT64_C(0x000000000000ff00)); + return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | ((v << 24) & UINT64_C(0x0000ff0000000000)) | + ((v << 8) & UINT64_C(0x000000ff00000000)) | ((v >> 8) & UINT64_C(0x00000000ff0000000)) | + ((v >> 24) & UINT64_C(0x0000000000ff0000)) | ((v >> 40) & UINT64_C(0x000000000000ff00)); } #endif #endif /* bswap64 */ @@ -75,8 +68,7 @@ static inline uint64_t bswap64(uint64_t v) { #define bswap32(v) __bswap_32(v) #else static inline uint32_t bswap32(uint32_t v) { - return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | - ((v >> 8) & UINT32_C(0x0000ff00)); + return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | ((v >> 8) & UINT32_C(0x0000ff00)); } #endif #endif /* bswap32 */ @@ -140,8 +132,7 @@ template static inline T load(const void *ptr) { if (MDBX_UNALIGNED_OK >= sizeof(T)) return *(const T *)ptr; else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) return *(const T __unaligned *)ptr; #else T local; @@ -155,8 +146,7 @@ template static inline void store(void *ptr, const T &value) { if (MDBX_UNALIGNED_OK >= sizeof(T)) *(T *)ptr = value; else { -#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(_M_X64) || defined(_M_IA64) +#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) *((T __unaligned *)ptr) = value; #else memcpy(ptr, &value, sizeof(T)); @@ -169,9 +159,7 @@ template static inline void store(void *ptr, const T &value) { //----------------------------------------------------------------------------- #ifndef rot64 -static inline uint64_t rot64(uint64_t v, unsigned s) { - return (v >> s) | (v << (64 - s)); -} +static inline uint64_t rot64(uint64_t v, unsigned s) { return (v >> s) | (v << (64 - s)); } #endif /* rot64 */ static inline bool is_power2(size_t x) { return (x & (x - 1)) == 0; } @@ -203,11 +191,9 @@ static inline void memory_barrier(void) { #endif #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) __machine_rw_barrier(); -#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ - (defined(HP_IA64) || defined(__ia64)) +#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64)) _Asm_mf(); -#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || \ - defined(__ppc64__) || defined(__powerpc64__) +#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) __lwsync(); #else #error "Could not guess the kind of compiler, please report to us." @@ -217,8 +203,7 @@ static inline void memory_barrier(void) { static inline void cpu_relax() { #if defined(__ia32__) _mm_pause(); -#elif defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) || \ - defined(YieldProcessor) +#elif defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) || defined(YieldProcessor) YieldProcessor(); #else /* nope */ @@ -243,9 +228,7 @@ struct simple_checksum { push((uint32_t)(data >> 32)); } - void push(const bool data) { - push(data ? UINT32_C(0x780E) : UINT32_C(0xFA18E)); - } + void push(const bool data) { push(data ? UINT32_C(0x780E) : UINT32_C(0xFA18E)); } void push(const void *ptr, size_t bytes) { const uint8_t *data = (const uint8_t *)ptr; @@ -269,12 +252,9 @@ struct simple_checksum { }; std::string data2hex(const void *ptr, size_t bytes, simple_checksum &checksum); -bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, - size_t bytes, simple_checksum &checksum); +bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, size_t bytes, simple_checksum &checksum); bool is_samedata(const MDBX_val *a, const MDBX_val *b); -inline bool is_samedata(const MDBX_val &a, const MDBX_val &b) { - return is_samedata(&a, &b); -} +inline bool is_samedata(const MDBX_val &a, const MDBX_val &b) { return is_samedata(&a, &b); } std::string format(const char *fmt, ...); static inline uint64_t bleach64(uint64_t x) { @@ -300,22 +280,15 @@ static inline uint32_t bleach32(uint32_t x) { return x; } -static inline uint64_t prng64_map1_careless(uint64_t state) { - return state * UINT64_C(6364136223846793005) + 1; -} +static inline uint64_t prng64_map1_careless(uint64_t state) { return state * UINT64_C(6364136223846793005) + 1; } static inline uint64_t prng64_map2_careless(uint64_t state) { - return (state + UINT64_C(1442695040888963407)) * - UINT64_C(6364136223846793005); + return (state + UINT64_C(1442695040888963407)) * UINT64_C(6364136223846793005); } -static inline uint64_t prng64_map1_white(uint64_t state) { - return bleach64(prng64_map1_careless(state)); -} +static inline uint64_t prng64_map1_white(uint64_t state) { return bleach64(prng64_map1_careless(state)); } -static inline uint64_t prng64_map2_white(uint64_t state) { - return bleach64(prng64_map2_careless(state)); -} +static inline uint64_t prng64_map2_white(uint64_t state) { return bleach64(prng64_map2_careless(state)); } static inline uint64_t prng64_careless(uint64_t &state) { state = prng64_map1_careless(state); From b687e835e90ae73c29453987c2edcb7d234b2ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Dec 2024 21:51:56 +0300 Subject: [PATCH 382/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=BF=D1=83=D1=81?= =?UTF-8?q?=D0=BA=200.13.2=20"=D0=9F=D1=80=D0=BE=D1=88=D0=BB=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=D0=B4=D0=BD=D0=B8=D0=B9=20=D0=A1=D0=BD=D0=B5=D0=B3"=20(L?= =?UTF-8?q?ast=20Year's=20Snow).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов в день рождения и в память об [Алекса́ндре Миха́йловиче Тата́рском](https://ru.wikipedia.org/wiki/Татарский,_Александр_Михайлович), российском режиссёре-мультипликаторе, создавшем такие знаменитые мультфильмы как "Падал прошлогодний снег", "Пластилиновая ворона", заставку "Спокойной ночи, малыши!" и многие другие шедевры нашего детства. За перечнем доработок и изменений обращайтесь к [ChangeLog](https://libmdbx.dqdkfa.ru/md__change_log.html). git diff' stat: 151 files changed, 10647 insertions(+), 14952 deletions(-) Signed-off-by: Леонид Юрьев (Leonid Yuriev) --- ChangeLog.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index cff93e63..3750b7d3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,9 +5,14 @@ English version [by liar Google](https://gitflic-ru.translate.goog/project/erthi and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). -## v0.13.2 в процессе +## v0.13.2 "Прошлогодний Снег" (Last Year's Snow) от 2024-12-11 -Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов. +Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов +в день рождения и в память об [Алекса́ндре Миха́йловиче Тата́рском](https://ru.wikipedia.org/wiki/Татарский,_Александр_Михайлович), +российском режиссёре-мультипликаторе, создавшем такие знаменитые +мультфильмы как "Падал прошлогодний снег", "Пластилиновая ворона", +заставку "Спокойной ночи, малыши!" и многие другие шедевры нашего +детства. Новое: From f2dc60aa53ad6650da4521c2a0d9e77806f8a595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 12 Dec 2024 11:20:34 +0300 Subject: [PATCH 383/443] =?UTF-8?q?mdbx-cmake:=20=D0=BD=D0=BE=D0=B2=D1=8B?= =?UTF-8?q?=D0=B5=20=D0=BD=D0=B0=D1=81=D1=82=D1=80=D0=BE=D0=B9=D0=BA=D0=B8?= =?UTF-8?q?=20cmake-format=20(=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8?= =?UTF-8?q?=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cmake-format.yaml | 3 + CMakeLists.txt | 371 ++++++++++++++----------------------------- cmake/compiler.cmake | 329 +++++++++++++------------------------- cmake/profile.cmake | 24 +-- cmake/utils.cmake | 112 ++++--------- test/CMakeLists.txt | 121 +++++--------- 6 files changed, 308 insertions(+), 652 deletions(-) create mode 100644 .cmake-format.yaml diff --git a/.cmake-format.yaml b/.cmake-format.yaml new file mode 100644 index 00000000..674d01cb --- /dev/null +++ b/.cmake-format.yaml @@ -0,0 +1,3 @@ +format: + line_width: 120 + tab_size: 2 diff --git a/CMakeLists.txt b/CMakeLists.txt index d3fb4704..6719ed24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,25 +1,20 @@ -# Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev +# Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev ############################################### # SPDX-License-Identifier: Apache-2.0 # -# Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. -# Всё будет хорошо! +# Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`. Всё будет хорошо! -# libmdbx = { Revised and extended descendant of Symas LMDB. } Please see -# README.md at https://gitflic.ru/project/erthink/libmdbx +# libmdbx = { Revised and extended descendant of Symas LMDB. } Please see README.md at +# https://gitflic.ru/project/erthink/libmdbx # -# Libmdbx is superior to LMDB in terms of features and reliability, not inferior -# in performance. libmdbx works on Linux, FreeBSD, MacOS X and other systems -# compliant with POSIX.1-2008, but also support Windows as a complementary -# platform. +# Libmdbx is superior to LMDB in terms of features and reliability, not inferior in performance. libmdbx works on Linux, +# FreeBSD, MacOS X and other systems compliant with POSIX.1-2008, but also support Windows as a complementary platform. # -# The next version is under active non-public development and will be released -# as MithrilDB and libmithrildb for libraries & packages. Admittedly mythical -# Mithril is resembling silver but being stronger and lighter than steel. +# The next version is under active non-public development and will be released as MithrilDB and libmithrildb for +# libraries & packages. Admittedly mythical Mithril is resembling silver but being stronger and lighter than steel. # Therefore MithrilDB is rightly relevant name. # -# MithrilDB will be radically different from libmdbx by the new database format -# and API based on C++17, as well as the Apache 2.0 License. The goal of this -# revolution is to provide a clearer and robust API, add more features and new +# MithrilDB will be radically different from libmdbx by the new database format and API based on C++17, as well as the +# Apache 2.0 License. The goal of this revolution is to provide a clearer and robust API, add more features and new # valuable properties of database. if(CMAKE_VERSION VERSION_LESS 3.8.2) @@ -190,8 +185,7 @@ else() message( FATAL_ERROR "\nThe set of libmdbx source code files is incomplete! " - "Instead just follow the https://libmdbx.dqdkfa.ru/usage.html " - "PLEASE, AVOID USING ANY OTHER TECHNIQUES.") + "Instead just follow the https://libmdbx.dqdkfa.ru/usage.html " "PLEASE, AVOID USING ANY OTHER TECHNIQUES.") endif() # Provide version @@ -199,15 +193,12 @@ include(cmake/utils.cmake) set(MDBX_BUILD_METADATA "${MDBX_BUILD_METADATA}" CACHE STRING "An extra/custom information provided during libmdbx build") -semver_provide(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" - "${MDBX_BUILD_METADATA}" FALSE) +semver_provide(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" "${MDBX_BUILD_METADATA}" FALSE) message(STATUS "libmdbx version is ${MDBX_VERSION}") if(DEFINED PROJECT_NAME) - option( - MDBX_FORCE_BUILD_AS_MAIN_PROJECT - "Force libmdbx to full control build options even it added as a subdirectory to your project." - OFF) + option(MDBX_FORCE_BUILD_AS_MAIN_PROJECT + "Force libmdbx to full control build options even it added as a subdirectory to your project." OFF) endif() if(DEFINED PROJECT_NAME AND NOT MDBX_FORCE_BUILD_AS_MAIN_PROJECT) @@ -230,10 +221,7 @@ if(NOT MDBX_AMALGAMATED_SOURCE) include(CTest) option(MDBX_ENABLE_TESTS "Build libmdbx tests." ${BUILD_TESTING}) elseif(DEFINED MDBX_ENABLE_TESTS AND MDBX_ENABLE_TESTS) - message( - WARNING - "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: But amalgamated source code don't includes tests." - ) + message(WARNING "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: But amalgamated source code don't includes tests.") set(MDBX_ENABLE_TESTS OFF) endif() @@ -242,8 +230,7 @@ if(NOT CMAKE_CXX_COMPILER_LOADED) include(CheckLanguage) if(NOT DEFINED MDBX_BUILD_CXX OR MDBX_BUILD_CXX - OR (NOT MDBX_AMALGAMATED_SOURCE AND (NOT DEFINED MDBX_ENABLE_TESTS - OR MDBX_ENABLE_TESTS))) + OR (NOT MDBX_AMALGAMATED_SOURCE AND (NOT DEFINED MDBX_ENABLE_TESTS OR MDBX_ENABLE_TESTS))) check_language(CXX) if(CMAKE_CXX_COMPILER) enable_language(CXX) @@ -257,10 +244,7 @@ endif() if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release - CACHE - STRING - "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." - FORCE) + CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) endif() string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPERCASE) @@ -303,10 +287,8 @@ include(GNUInstallDirs) if(CMAKE_C_COMPILER_ID STREQUAL "MSVC" AND MSVC_VERSION LESS 1900) message( - SEND_ERROR - "MSVC compiler ${MSVC_VERSION} is too old for building MDBX." - " At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required." - ) + SEND_ERROR "MSVC compiler ${MSVC_VERSION} is too old for building MDBX." + " At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required.") endif() if(NOT DEFINED THREADS_PREFER_PTHREAD_FLAG) @@ -350,38 +332,29 @@ if(SUBPROJECT) option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)" OFF) endif() if(NOT DEFINED CMAKE_POSITION_INDEPENDENT_CODE) - option(CMAKE_POSITION_INDEPENDENT_CODE - "Generate position independent (PIC)" ON) + option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" ON) endif() set(MDBX_MANAGE_BUILD_FLAGS_DEFAULT OFF) else() option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)" ON) - option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" - ON) + option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" ON) if(CC_HAS_ARCH_NATIVE) - option(BUILD_FOR_NATIVE_CPU "Generate code for the compiling machine CPU" - OFF) + option(BUILD_FOR_NATIVE_CPU "Generate code for the compiling machine CPU" OFF) endif() if(CMAKE_INTERPROCEDURAL_OPTIMIZATION_AVAILABLE OR GCC_LTO_AVAILABLE OR MSVC_LTO_AVAILABLE OR CLANG_LTO_AVAILABLE) - if((CMAKE_CONFIGURATION_TYPES OR NOT CMAKE_BUILD_TYPE_UPPERCASE STREQUAL - "DEBUG") - AND ((MSVC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 19 - ) - OR (GCC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS - 7) - OR (CLANG_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION - VERSION_LESS 5) - )) + if((CMAKE_CONFIGURATION_TYPES OR NOT CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") + AND ((MSVC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 19) + OR (GCC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7) + OR (CLANG_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 5))) set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT ON) else() set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT OFF) endif() - option(INTERPROCEDURAL_OPTIMIZATION - "Enable interprocedural/LTO optimization." + option(INTERPROCEDURAL_OPTIMIZATION "Enable interprocedural/LTO optimization." ${INTERPROCEDURAL_OPTIMIZATION_DEFAULT}) endif() @@ -457,8 +430,8 @@ else() if(NOT MDBX_AMALGAMATED_SOURCE) find_program(VALGRIND valgrind) if(VALGRIND) - # (LY) cmake is ugly and nasty. Therefore memcheck-options should be - # defined before including ctest. Otherwise ctest may ignore it. + # (LY) cmake is ugly and nasty. Therefore memcheck-options should be defined before including ctest. Otherwise + # ctest may ignore it. set(MEMORYCHECK_SUPPRESSIONS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/test/valgrind_suppress.txt" CACHE FILEPATH "Suppressions file for Valgrind" FORCE) @@ -483,19 +456,15 @@ else() if(UNIX) find_program(CLANG_FORMAT NAMES clang-format-13 clang-format) if(CLANG_FORMAT) - execute_process(COMMAND ${CLANG_FORMAT} "--version" - OUTPUT_VARIABLE clang_format_version_info) - string(REGEX MATCH "version ([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" - clang_format_version_info CLANG_FORMAT_VERSION) - if(clang_format_version_info AND NOT CLANG_FORMAT_VERSION VERSION_LESS - 13.0) + execute_process(COMMAND ${CLANG_FORMAT} "--version" OUTPUT_VARIABLE clang_format_version_info) + string(REGEX MATCH "version ([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" clang_format_version_info CLANG_FORMAT_VERSION) + if(clang_format_version_info AND NOT CLANG_FORMAT_VERSION VERSION_LESS 13.0) # Enable 'make reformat' target. add_custom_target( reformat VERBATIM - COMMAND - git ls-files | grep -E \\.\(c|cxx|cc|cpp|h|hxx|hpp\)\(\\.in\)?\$ | - xargs ${CLANG_FORMAT} -i --style=file + COMMAND git ls-files | grep -E \\.\(c|cxx|cc|cpp|h|hxx|hpp\)\(\\.in\)?\$ | xargs ${CLANG_FORMAT} -i + --style=file WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) endif() endif() @@ -523,8 +492,7 @@ else() set(MDBX_MANAGE_BUILD_FLAGS_DEFAULT ON) endif(SUBPROJECT) -option(MDBX_MANAGE_BUILD_FLAGS - "Allow libmdbx to configure/manage/override its own build flags" +option(MDBX_MANAGE_BUILD_FLAGS "Allow libmdbx to configure/manage/override its own build flags" ${MDBX_MANAGE_BUILD_FLAGS_DEFAULT}) if(MDBX_MANAGE_BUILD_FLAGS) setup_compile_flags() @@ -541,17 +509,11 @@ if(NOT DEFINED MDBX_CXX_STANDARD) endif() if(DEFINED CMAKE_CXX_STANDARD) set(MDBX_CXX_STANDARD ${CMAKE_CXX_STANDARD}) - elseif(NOT HAS_CXX23 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 12)) + elseif(NOT HAS_CXX23 LESS 0 AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12)) set(MDBX_CXX_STANDARD 23) - elseif(NOT HAS_CXX20 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 10)) + elseif(NOT HAS_CXX20 LESS 0 AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) set(MDBX_CXX_STANDARD 20) - elseif(NOT HAS_CXX17 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 5)) + elseif(NOT HAS_CXX17 LESS 0 AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)) set(MDBX_CXX_STANDARD 17) elseif(NOT HAS_CXX14 LESS 0) set(MDBX_CXX_STANDARD 14) @@ -574,8 +536,7 @@ if(NOT DEFINED MDBX_C_STANDARD) set(MDBX_C_STANDARD ${CMAKE_C_STANDARD}) elseif( MSVC - # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! It unable process - # Windows SDK headers in the C11 mode! + # MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! It unable process Windows SDK headers in the C11 mode! AND MSVC_VERSION GREATER 1927 AND NOT MSVC_VERSION GREATER 1929) set(MDBX_C_STANDARD 99) @@ -603,16 +564,13 @@ if(WIN32 AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") endif() if(MSVC_LIB_EXE) message(STATUS "Found MSVC's lib tool: ${MSVC_LIB_EXE}") - set(MDBX_NTDLL_EXTRA_IMPLIB - "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.lib") + set(MDBX_NTDLL_EXTRA_IMPLIB "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.lib") add_custom_command( OUTPUT "${MDBX_NTDLL_EXTRA_IMPLIB}" COMMENT "Create extra-import-library for ntdll.dll" MAIN_DEPENDENCY "${MDBX_SOURCE_DIR}/ntdll.def" - COMMAND - ${MSVC_LIB_EXE} /def:"${MDBX_SOURCE_DIR}/ntdll.def" - /out:"${MDBX_NTDLL_EXTRA_IMPLIB}" - ${INITIAL_CMAKE_STATIC_LINKER_FLAGS}) + COMMAND ${MSVC_LIB_EXE} /def:"${MDBX_SOURCE_DIR}/ntdll.def" /out:"${MDBX_NTDLL_EXTRA_IMPLIB}" + ${INITIAL_CMAKE_STATIC_LINKER_FLAGS}) else() message(WARNING "MSVC's lib tool not found") endif() @@ -625,14 +583,12 @@ if(WIN32 AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") endif() if(DLLTOOL) message(STATUS "Found dlltool: ${DLLTOOL}") - set(MDBX_NTDLL_EXTRA_IMPLIB - "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.a") + set(MDBX_NTDLL_EXTRA_IMPLIB "${CMAKE_CURRENT_BINARY_DIR}/mdbx_ntdll_extra.a") add_custom_command( OUTPUT "${MDBX_NTDLL_EXTRA_IMPLIB}" COMMENT "Create extra-import-library for ntdll.dll" MAIN_DEPENDENCY "${MDBX_SOURCE_DIR}/ntdll.def" - COMMAND ${DLLTOOL} -d "${MDBX_SOURCE_DIR}/ntdll.def" -l - "${MDBX_NTDLL_EXTRA_IMPLIB}") + COMMAND ${DLLTOOL} -d "${MDBX_SOURCE_DIR}/ntdll.def" -l "${MDBX_NTDLL_EXTRA_IMPLIB}") else() message(WARNING "dlltool not found") endif() @@ -647,12 +603,11 @@ if(WIN32 AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") add_library(ntdll_extra STATIC IMPORTED GLOBAL) add_dependencies(ntdll_extra ntdll_extra_target) # (3) specify where the library is (and where to find the headers) - set_target_properties(ntdll_extra PROPERTIES IMPORTED_LOCATION - "${MDBX_NTDLL_EXTRA_IMPLIB}") + set_target_properties(ntdll_extra PROPERTIES IMPORTED_LOCATION "${MDBX_NTDLL_EXTRA_IMPLIB}") endif() endif() -# ############################################################################## +# ###################################################################################################################### # ~~~ # # #### ##### ##### # #### # # #### @@ -663,10 +618,9 @@ endif() # #### # # # #### # # #### # # ~~~ -# ############################################################################## +# ###################################################################################################################### -set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF - ENABLE_GCOV) +set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) macro(add_mdbx_option NAME DESCRIPTION DEFAULT) list(APPEND MDBX_BUILD_OPTIONS ${NAME}) if(NOT ${DEFAULT} STREQUAL "AUTO") @@ -686,51 +640,32 @@ else() set(MDBX_BUILD_TOOLS_DEFAULT ON) endif() -add_mdbx_option(MDBX_INSTALL_STATIC - "Build and install libmdbx for static linking" OFF) -add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY - "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) -add_mdbx_option( - MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy/drop)" - ${MDBX_BUILD_TOOLS_DEFAULT}) -cmake_dependent_option( - MDBX_INSTALL_MANPAGES - "Install man-pages for MDBX tools (mdbx_chk/stat/dump/load/copy)" ON - MDBX_BUILD_TOOLS OFF) -add_mdbx_option( - MDBX_TXN_CHECKOWNER - "Checking transaction matches the calling thread inside libmdbx's API" ON) -add_mdbx_option( - MDBX_ENV_CHECKPID - "Checking PID inside libmdbx's API against reuse DB environment after the fork()" - AUTO) +add_mdbx_option(MDBX_INSTALL_STATIC "Build and install libmdbx for static linking" OFF) +add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) +add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy/drop)" ${MDBX_BUILD_TOOLS_DEFAULT}) +cmake_dependent_option(MDBX_INSTALL_MANPAGES "Install man-pages for MDBX tools (mdbx_chk/stat/dump/load/copy)" ON + MDBX_BUILD_TOOLS OFF) +add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON) +add_mdbx_option(MDBX_ENV_CHECKPID "Checking PID inside libmdbx's API against reuse DB environment after the fork()" + AUTO) mark_as_advanced(MDBX_ENV_CHECKPID) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" - OFF) + add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF) mark_as_advanced(MDBX_DISABLE_GNU_SOURCE) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" OR IOS) - add_mdbx_option(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY - "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) + add_mdbx_option(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) mark_as_advanced(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY) endif() if(WIN32) if(MDBX_NTDLL_EXTRA_IMPLIB) - add_mdbx_option( - MDBX_WITHOUT_MSVC_CRT - "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) + add_mdbx_option(MDBX_WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) endif() set(MDBX_AVOID_MSYNC_DEFAULT ON) else() - add_mdbx_option( - MDBX_USE_OFDLOCKS - "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) + add_mdbx_option(MDBX_USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) mark_as_advanced(MDBX_USE_OFDLOCKS) - add_mdbx_option( - MDBX_USE_MINCORE - "Use Unix' mincore() to determine whether DB-pages are resident in memory" - ON) + add_mdbx_option(MDBX_USE_MINCORE "Use Unix' mincore() to determine whether DB-pages are resident in memory" ON) mark_as_advanced(MDBX_USE_MINCORE) set(MDBX_AVOID_MSYNC_DEFAULT OFF) endif() @@ -738,19 +673,12 @@ add_mdbx_option( MDBX_AVOID_MSYNC "Disable in-memory database updating with consequent flush-to-disk/msync syscall in `MDBX_WRITEMAP` mode" ${MDBX_AVOID_MSYNC_DEFAULT}) -add_mdbx_option( - MDBX_MMAP_NEEDS_JOLT - "Assume system needs explicit syscall to sync/flush/write modified mapped memory" - AUTO) +add_mdbx_option(MDBX_MMAP_NEEDS_JOLT "Assume system needs explicit syscall to sync/flush/write modified mapped memory" + AUTO) mark_as_advanced(MDBX_MMAP_NEEDS_JOLT) -add_mdbx_option( - MDBX_LOCKING - "Locking method (Windows=-1, SystemV=5, POSIX=1988, POSIX=2001, POSIX=2008)" - AUTO) +add_mdbx_option(MDBX_LOCKING "Locking method (Windows=-1, SystemV=5, POSIX=1988, POSIX=2001, POSIX=2008)" AUTO) mark_as_advanced(MDBX_LOCKING) -add_mdbx_option( - MDBX_TRUST_RTC - "Does a system have battery-backed Real-Time Clock or just a fake" AUTO) +add_mdbx_option(MDBX_TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake" AUTO) mark_as_advanced(MDBX_TRUST_RTC) add_mdbx_option(MDBX_FORCE_ASSERTIONS "Force enable assertion checking" OFF) add_mdbx_option( @@ -758,24 +686,18 @@ add_mdbx_option( "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" OFF) mark_as_advanced(MDBX_DISABLE_VALIDATION) -add_mdbx_option(MDBX_ENABLE_REFUND - "Zerocost auto-compactification during write-transactions" ON) +add_mdbx_option(MDBX_ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON) add_mdbx_option( MDBX_ENABLE_BIGFOOT - "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" - ON) -add_mdbx_option(MDBX_ENABLE_PGOP_STAT - "Gathering statistics for page operations" ON) + "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" ON) +add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) add_mdbx_option( MDBX_ENABLE_DBI_SPARSE - "Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions" - ON) -add_mdbx_option( - MDBX_ENABLE_DBI_LOCKFREE - "Support for deferred releasing and a lockfree path to quickly open DBI handles" - ON) + "Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions" ON) +add_mdbx_option(MDBX_ENABLE_DBI_LOCKFREE + "Support for deferred releasing and a lockfree path to quickly open DBI handles" ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") @@ -783,14 +705,11 @@ if(NOT MDBX_AMALGAMATED_SOURCE) else() set(MDBX_ALLOY_BUILD_DEFAULT ON) endif() - add_mdbx_option( - MDBX_ALLOY_BUILD "Build MDBX library through single/alloyed object file" - ${MDBX_ALLOY_BUILD_DEFAULT}) + add_mdbx_option(MDBX_ALLOY_BUILD "Build MDBX library through single/alloyed object file" ${MDBX_ALLOY_BUILD_DEFAULT}) endif() if((MDBX_BUILD_TOOLS OR MDBX_ENABLE_TESTS) AND MDBX_BUILD_SHARED_LIBRARY) - add_mdbx_option(MDBX_LINK_TOOLS_NONSTATIC - "Link MDBX tools with non-static libmdbx" OFF) + add_mdbx_option(MDBX_LINK_TOOLS_NONSTATIC "Link MDBX tools with non-static libmdbx" OFF) else() unset(MDBX_LINK_TOOLS_NONSTATIC CACHE) endif() @@ -802,10 +721,8 @@ if(CMAKE_CXX_COMPILER_LOADED option(MDBX_ENABLE_TESTS "Build MDBX tests" ${BUILD_TESTING}) endif() if(NOT MDBX_WITHOUT_MSVC_CRT - AND NOT (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 4.8) - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 3.9) + AND NOT (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) + AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.9) AND NOT (MSVC AND MSVC_VERSION LESS 1900)) option(MDBX_BUILD_CXX "Build C++ portion" ON) else() @@ -820,13 +737,10 @@ if(CI) add_definitions(-DMDBX_CI="${CI}") endif() -# ############################################################################## +# ###################################################################################################################### if(MDBX_BUILD_CXX AND NOT CMAKE_CXX_COMPILER_LOADED) - message( - FATAL_ERROR - "MDBX_BUILD_CXX=${MDBX_BUILD_CXX}: The C++ compiler is required to build the C++API." - ) + message(FATAL_ERROR "MDBX_BUILD_CXX=${MDBX_BUILD_CXX}: The C++ compiler is required to build the C++API.") endif() if(MDBX_BUILD_CXX) @@ -841,8 +755,7 @@ if(MDBX_AMALGAMATED_SOURCE) list(APPEND LIBMDBX_SOURCES mdbx.c) else() # generate version file - configure_file("${MDBX_SOURCE_DIR}/version.c.in" - "${CMAKE_CURRENT_BINARY_DIR}/version.c" ESCAPE_QUOTES) + configure_file("${MDBX_SOURCE_DIR}/version.c.in" "${CMAKE_CURRENT_BINARY_DIR}/version.c" ESCAPE_QUOTES) file(SHA256 "${CMAKE_CURRENT_BINARY_DIR}/version.c" MDBX_SOURCERY_DIGEST) string(MAKE_C_IDENTIFIER "${MDBX_GIT_DESCRIBE}" MDBX_SOURCERY_SUFFIX) set(MDBX_BUILD_SOURCERY "${MDBX_SOURCERY_DIGEST}_${MDBX_SOURCERY_SUFFIX}") @@ -930,21 +843,18 @@ else() list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/lck-posix.c") endif() if(NOT APPLE) - list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/windows-import.h" - "${MDBX_SOURCE_DIR}/windows-import.c" + list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/windows-import.h" "${MDBX_SOURCE_DIR}/windows-import.c" "${MDBX_SOURCE_DIR}/lck-windows.c") endif() include_directories("${MDBX_SOURCE_DIR}") endif() endif(MDBX_AMALGAMATED_SOURCE) if(MDBX_BUILD_CXX) - message( - STATUS "Use C${MDBX_C_STANDARD} and C++${MDBX_CXX_STANDARD} for libmdbx") + message(STATUS "Use C${MDBX_C_STANDARD} and C++${MDBX_CXX_STANDARD} for libmdbx") list(APPEND LIBMDBX_PUBLIC_HEADERS mdbx.h++) list(APPEND LIBMDBX_SOURCES "${MDBX_SOURCE_DIR}/mdbx.c++" mdbx.h++) else() - message( - STATUS "Use C${MDBX_C_STANDARD} for libmdbx but C++ portion is disabled") + message(STATUS "Use C${MDBX_C_STANDARD} for libmdbx but C++ portion is disabled") endif() if(MSVC) @@ -953,26 +863,20 @@ endif() macro(target_setup_options TARGET) if(DEFINED INTERPROCEDURAL_OPTIMIZATION) - set_target_properties( - ${TARGET} PROPERTIES INTERPROCEDURAL_OPTIMIZATION - $) + set_target_properties(${TARGET} PROPERTIES INTERPROCEDURAL_OPTIMIZATION $) endif() if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) - set_target_properties(${TARGET} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} - C_STANDARD_REQUIRED ON) + set_target_properties(${TARGET} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) endif() if(MDBX_BUILD_CXX) if(NOT CXX_FALLBACK_GNU11 AND NOT CXX_FALLBACK_11) - set_target_properties( - ${TARGET} PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} - CXX_STANDARD_REQUIRED ON) + set_target_properties(${TARGET} PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() if(MSVC AND NOT MSVC_VERSION LESS 1910) target_compile_options(${TARGET} INTERFACE "/Zc:__cplusplus") endif() endif() - if(CC_HAS_FASTMATH AND NOT (CMAKE_COMPILER_IS_CLANG - AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) + if(CC_HAS_FASTMATH AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) target_compile_options(${TARGET} PRIVATE "-ffast-math") endif() if(CC_HAS_VISIBILITY) @@ -1001,8 +905,7 @@ macro(libmdbx_setup_libs TARGET MODE) if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT) target_link_libraries(${TARGET} ${MODE} ntdll_extra) endif() - elseif(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS" OR ${CMAKE_SYSTEM_NAME} STREQUAL - "Solaris") + elseif(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Solaris") target_link_libraries(${TARGET} ${MODE} kstat) elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Android") target_link_libraries(${TARGET} ${MODE} log) @@ -1023,8 +926,7 @@ if(MDBX_INSTALL_STATIC) else() add_library(mdbx-static STATIC EXCLUDE_FROM_ALL ${LIBMDBX_SOURCES}) endif() -set_target_properties(mdbx-static PROPERTIES PUBLIC_HEADER - "${LIBMDBX_PUBLIC_HEADERS}") +set_target_properties(mdbx-static PROPERTIES PUBLIC_HEADER "${LIBMDBX_PUBLIC_HEADERS}") target_compile_definitions(mdbx-static PRIVATE MDBX_BUILD_SHARED_LIBRARY=0) target_setup_options(mdbx-static) libmdbx_setup_libs(mdbx-static INTERFACE) @@ -1035,13 +937,12 @@ else() endif() target_include_directories(mdbx-static INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") -# ############################################################################## +# ###################################################################################################################### # build shared library if(MDBX_BUILD_SHARED_LIBRARY) add_library(mdbx SHARED ${LIBMDBX_SOURCES}) - set_target_properties(mdbx PROPERTIES PUBLIC_HEADER - "${LIBMDBX_PUBLIC_HEADERS}") + set_target_properties(mdbx PROPERTIES PUBLIC_HEADER "${LIBMDBX_PUBLIC_HEADERS}") target_compile_definitions( mdbx PRIVATE LIBMDBX_EXPORTS MDBX_BUILD_SHARED_LIBRARY=1 @@ -1052,9 +953,7 @@ if(MDBX_BUILD_SHARED_LIBRARY) if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT) set_property(TARGET mdbx PROPERTY LINKER_FLAGS "/NODEFAULTLIB") else() - set_property( - TARGET mdbx PROPERTY MSVC_RUNTIME_LIBRARY - "MultiThreaded$<$:Debug>DLL") + set_property(TARGET mdbx PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") endif() endif() if(CC_HAS_VISIBILITY AND (LTO_ENABLED OR INTERPROCEDURAL_OPTIMIZATION)) @@ -1070,18 +969,15 @@ if(MDBX_BUILD_SHARED_LIBRARY AND MDBX_LINK_TOOLS_NONSTATIC) # use, i.e. don't skip the full RPATH for the build tree set(CMAKE_SKIP_BUILD_RPATH FALSE) - # when building, don't use the install RPATH already (but later on when - # installing) + # when building, don't use the install RPATH already (but later on when installing) set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) - # add the automatically determined parts of the RPATH which point to - # directories outside the build tree to the install RPATH + # add the automatically determined parts of the RPATH which point to directories outside the build tree to the install + # RPATH set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) - # the RPATH to be used when installing, but only if it's not a system - # directory - list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES - "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) + # the RPATH to be used when installing, but only if it's not a system directory + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) if(isSystemDir EQUAL -1) if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") set(CMAKE_INSTALL_RPATH "@executable_path/../lib") @@ -1091,8 +987,7 @@ if(MDBX_BUILD_SHARED_LIBRARY AND MDBX_LINK_TOOLS_NONSTATIC) endif() if(WIN32) - # Windows don't have RPATH feature, therefore we should prepare PATH or copy - # DLL(s) + # Windows don't have RPATH feature, therefore we should prepare PATH or copy DLL(s) set(TOOL_MDBX_DLLCRUTCH "Crutch for ${CMAKE_SYSTEM_NAME}") if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_VERSION VERSION_LESS 3.0) # will use LOCATION property to compose DLLPATH @@ -1110,21 +1005,17 @@ endif() if(MDBX_BUILD_TOOLS) set(WINGETOPT_SRC "") if(WIN32) - set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/tools/wingetopt.c - ${MDBX_SOURCE_DIR}/tools/wingetopt.h) + set(WINGETOPT_SRC ${MDBX_SOURCE_DIR}/tools/wingetopt.c ${MDBX_SOURCE_DIR}/tools/wingetopt.h) endif() foreach(TOOL chk copy stat dump load drop) if(MDBX_AMALGAMATED_SOURCE) add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/mdbx_${TOOL}.c) else() - add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/tools/${TOOL}.c - ${WINGETOPT_SRC}) + add_executable(mdbx_${TOOL} mdbx.h ${MDBX_SOURCE_DIR}/tools/${TOOL}.c ${WINGETOPT_SRC}) endif() if(NOT C_FALLBACK_GNU11 AND NOT C_FALLBACK_11) - set_target_properties( - mdbx_${TOOL} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} - C_STANDARD_REQUIRED ON) + set_target_properties(mdbx_${TOOL} PROPERTIES C_STANDARD ${MDBX_C_STANDARD} C_STANDARD_REQUIRED ON) endif() target_setup_options(mdbx_${TOOL}) target_link_libraries(mdbx_${TOOL} ${TOOL_MDBX_LIB}) @@ -1135,7 +1026,7 @@ if(MDBX_BUILD_TOOLS) endif() endif() -# ############################################################################## +# ###################################################################################################################### # mdbx-shared-lib installation if(NOT DEFINED MDBX_DLL_INSTALL_DESTINATION) @@ -1178,19 +1069,15 @@ if(MDBX_BUILD_TOOLS) set(MDBX_TOOLS_INSTALL_DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() install(TARGETS mdbx_chk mdbx_stat mdbx_copy mdbx_dump mdbx_load mdbx_drop - RUNTIME DESTINATION ${MDBX_TOOLS_INSTALL_DESTINATION} - COMPONENT runtime) + RUNTIME DESTINATION ${MDBX_TOOLS_INSTALL_DESTINATION} COMPONENT runtime) if(MDBX_INSTALL_MANPAGES) if(NOT DEFINED MDBX_MAN_INSTALL_DESTINATION) set(MDBX_MAN_INSTALL_DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) endif() install( - FILES "${MDBX_SOURCE_DIR}/man1/mdbx_chk.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_stat.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_copy.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_dump.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_load.1" - "${MDBX_SOURCE_DIR}/man1/mdbx_drop.1" + FILES "${MDBX_SOURCE_DIR}/man1/mdbx_chk.1" "${MDBX_SOURCE_DIR}/man1/mdbx_stat.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_copy.1" "${MDBX_SOURCE_DIR}/man1/mdbx_dump.1" + "${MDBX_SOURCE_DIR}/man1/mdbx_load.1" "${MDBX_SOURCE_DIR}/man1/mdbx_drop.1" DESTINATION ${MDBX_MAN_INSTALL_DESTINATION} COMPONENT doc) endif() @@ -1225,7 +1112,7 @@ if(MDBX_INSTALL_STATIC) endif() endif(MDBX_INSTALL_STATIC) -# ############################################################################## +# ###################################################################################################################### # collect options & build info if(NOT DEFINED MDBX_BUILD_TIMESTAMP) @@ -1244,8 +1131,7 @@ endif() if(NOT CMAKE_CONFIGURATION_TYPES) list(APPEND MDBX_BUILD_FLAGS ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}) if(MDBX_BUILD_CXX) - list(APPEND MDBX_BUILD_FLAGS - ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}) + list(APPEND MDBX_BUILD_FLAGS ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}) endif() endif() @@ -1272,9 +1158,7 @@ list(REMOVE_DUPLICATES MDBX_BUILD_FLAGS) string(REPLACE ";" " " MDBX_BUILD_FLAGS "${MDBX_BUILD_FLAGS}") if(CMAKE_CONFIGURATION_TYPES) # add dynamic part via per-configuration define - message( - STATUS - "MDBX Compile Flags: ${MDBX_BUILD_FLAGS} ") + message(STATUS "MDBX Compile Flags: ${MDBX_BUILD_FLAGS} ") add_definitions( -DMDBX_BUILD_FLAGS_CONFIG="$<$:${CMAKE_C_FLAGS_DEBUG} ${CMAKE_C_DEFINES_DEBUG}>$<$:${CMAKE_C_FLAGS_RELEASE} ${CMAKE_C_DEFINES_RELEASE}>$<$:${CMAKE_C_FLAGS_RELWITHDEBINFO} ${CMAKE_C_DEFINES_RELWITHDEBINFO}>$<$:${CMAKE_C_FLAGS_MINSIZEREL} ${CMAKE_C_DEFINES_MINSIZEREL}>" ) @@ -1289,8 +1173,7 @@ execute_process( OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET RESULT_VARIABLE rc) if(rc OR NOT MDBX_BUILD_COMPILER) - string(STRIP "${CMAKE_C_COMPILER_ID}-${CMAKE_C_COMPILER_VERSION}" - MDBX_BUILD_COMPILER) + string(STRIP "${CMAKE_C_COMPILER_ID}-${CMAKE_C_COMPILER_VERSION}" MDBX_BUILD_COMPILER) endif() # make a build-target triplet @@ -1299,8 +1182,7 @@ if(CMAKE_C_COMPILER_TARGET) else() if(CMAKE_C_COMPILER_ARCHITECTURE_ID) string(STRIP "${CMAKE_C_COMPILER_ARCHITECTURE_ID}" MDBX_BUILD_TARGET) - elseif(CMAKE_GENERATOR_PLATFORM AND NOT CMAKE_GENERATOR_PLATFORM STREQUAL - CMAKE_SYSTEM_NAME) + elseif(CMAKE_GENERATOR_PLATFORM AND NOT CMAKE_GENERATOR_PLATFORM STREQUAL CMAKE_SYSTEM_NAME) string(STRIP "${CMAKE_GENERATOR_PLATFORM}" MDBX_BUILD_TARGET) elseif(CMAKE_SYSTEM_ARCH) string(STRIP "${CMAKE_SYSTEM_ARCH}" MDBX_BUILD_TARGET) @@ -1311,20 +1193,16 @@ else() else() set(MDBX_BUILD_TARGET "unknown") endif() - if(CMAKE_C_COMPILER_ABI - AND NOT (CMAKE_C_COMPILER_ABI MATCHES ".*${MDBX_BUILD_TARGET}.*" - OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_COMPILER_ABI}.*")) - string(CONCAT MDBX_BUILD_TARGET - "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") + if(CMAKE_C_COMPILER_ABI AND NOT (CMAKE_C_COMPILER_ABI MATCHES ".*${MDBX_BUILD_TARGET}.*" + OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_COMPILER_ABI}.*")) + string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") endif() if(CMAKE_C_PLATFORM_ID - AND NOT (CMAKE_SYSTEM_NAME - AND (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_SYSTEM_NAME}.*" - OR CMAKE_SYSTEM_NAME MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) - AND NOT (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_C_PLATFORM_ID}.*" - OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) - string(CONCAT MDBX_BUILD_TARGET - "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") + AND NOT (CMAKE_SYSTEM_NAME AND (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_SYSTEM_NAME}.*" + OR CMAKE_SYSTEM_NAME MATCHES ".*${CMAKE_C_PLATFORM_ID}.*")) + AND NOT (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_C_PLATFORM_ID}.*" OR MDBX_BUILD_TARGET MATCHES + ".*${CMAKE_C_PLATFORM_ID}.*")) + string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}") endif() if(CMAKE_SYSTEM_NAME) string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_SYSTEM_NAME}") @@ -1341,8 +1219,7 @@ else() endif() # options -set(options VERSION C_COMPILER CXX_COMPILER MDBX_BUILD_TARGET MDBX_BUILD_TYPE - ${MDBX_BUILD_OPTIONS}) +set(options VERSION C_COMPILER CXX_COMPILER MDBX_BUILD_TARGET MDBX_BUILD_TYPE ${MDBX_BUILD_OPTIONS}) foreach(item IN LISTS options) if(DEFINED ${item}) set(value "${${item}}") @@ -1359,23 +1236,19 @@ foreach(item IN LISTS options) endforeach(item) # provide config.h for library build info -configure_file("${MDBX_SOURCE_DIR}/config.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/config.h" ESCAPE_QUOTES) +configure_file("${MDBX_SOURCE_DIR}/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/config.h" ESCAPE_QUOTES) add_definitions(-DMDBX_CONFIG_H="${CMAKE_CURRENT_BINARY_DIR}/config.h") -# ############################################################################## +# ###################################################################################################################### if(NOT MDBX_AMALGAMATED_SOURCE AND MDBX_ENABLE_TESTS) if(NOT CMAKE_CXX_COMPILER_LOADED) - message( - FATAL_ERROR - "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: The C++ compiler is required to build the tests." - ) + message(FATAL_ERROR "MDBX_ENABLE_TESTS=${MDBX_ENABLE_TESTS}: The C++ compiler is required to build the tests.") endif() add_subdirectory(test) endif() -# ############################################################################## +# ###################################################################################################################### if(NOT SUBPROJECT) set(PACKAGE "libmdbx") diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index 0ee15952..f875af16 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2010-2024 Леонид Юрьев aka Leonid Yuriev +# Copyright (c) 2010-2024 Леонид Юрьев aka Leonid Yuriev ############################################### # SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) @@ -32,11 +32,8 @@ if(NOT CMAKE_VERSION VERSION_LESS 3.9) endif() if(CMAKE_VERSION MATCHES ".*MSVC.*" AND CMAKE_VERSION VERSION_LESS 3.16) - message( - FATAL_ERROR - "CMake from MSVC kit is unfit! " - "Please use MSVC2019 with modern CMake the original CMake from https://cmake.org/download/" - ) + message(FATAL_ERROR "CMake from MSVC kit is unfit! " + "Please use MSVC2019 with modern CMake the original CMake from https://cmake.org/download/") endif() if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) @@ -62,8 +59,7 @@ if(CMAKE_C_COMPILER_LOADED AND CMAKE_CXX_COMPILER_LOADED AND NOT (CMAKE_C_COMPILER_ID STREQUAL CMAKE_CXX_COMPILER_ID)) message(WARNING "CMAKE_C_COMPILER_ID (${CMAKE_C_COMPILER_ID}) is different " - "from CMAKE_CXX_COMPILER_ID (${CMAKE_CXX_COMPILER_ID}). " - "The final binary may be unusable.") + "from CMAKE_CXX_COMPILER_ID (${CMAKE_CXX_COMPILER_ID}). " "The final binary may be unusable.") endif() if(CMAKE_CXX_COMPILER_LOADED) @@ -80,15 +76,12 @@ macro(check_compiler_flag flag variable) endif() endmacro(check_compiler_flag) -# We support building with Clang and gcc. First check what we're using for -# build. -if(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ID MATCHES - ".*[Cc][Ll][Aa][Nn][Gg].*") +# We support building with Clang and gcc. First check what we're using for build. +if(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ID MATCHES ".*[Cc][Ll][Aa][Nn][Gg].*") set(CMAKE_COMPILER_IS_CLANG ON) set(CMAKE_COMPILER_IS_GNUCC OFF) endif() -if(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXX_COMPILER_ID MATCHES - ".*[Cc][Ll][Aa][Nn][Gg].*") +if(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXX_COMPILER_ID MATCHES ".*[Cc][Ll][Aa][Nn][Gg].*") set(CMAKE_COMPILER_IS_CLANG ON) set(CMAKE_COMPILER_IS_GNUCXX OFF) endif() @@ -140,8 +133,7 @@ if(CMAKE_CXX_COMPILER_LOADED) OUTPUT_STRIP_TRAILING_WHITESPACE) set(CMAKE_COMPILER_IS_ELBRUSCXX ON) set(CMAKE_CXX_COMPILER_ID "Elbrus") - message( - STATUS "Detected Elbrus C++ compiler ${CMAKE_CXX_COMPILER_VERSION}") + message(STATUS "Detected Elbrus C++ compiler ${CMAKE_CXX_COMPILER_VERSION}") else() set(CMAKE_COMPILER_IS_ELBRUSCXX OFF) endif() @@ -152,20 +144,17 @@ if(CMAKE_CXX_COMPILER_LOADED) unset(tmp_lxx_probe_result) endif() -# Hard coding the compiler version is ugly from cmake POV, but at least gives -# user a friendly error message. The most critical demand for C++ compiler is -# support of C++11 lambdas, added only in version 4.5 +# Hard coding the compiler version is ugly from cmake POV, but at least gives user a friendly error message. The most +# critical demand for C++ compiler is support of C++11 lambdas, added only in version 4.5 # https://gcc.gnu.org/projects/cxx0x.html if(CMAKE_COMPILER_IS_GNUCC) - if(CMAKE_C_COMPILER_VERSION VERSION_LESS 4.5 AND NOT - CMAKE_COMPILER_IS_ELBRUSC) + if(CMAKE_C_COMPILER_VERSION VERSION_LESS 4.5 AND NOT CMAKE_COMPILER_IS_ELBRUSC) message(FATAL_ERROR " Your GCC version is ${CMAKE_C_COMPILER_VERSION}, please update") endif() endif() if(CMAKE_COMPILER_IS_GNUCXX) - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.5 - AND NOT CMAKE_COMPILER_IS_ELBRUSCXX) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.5 AND NOT CMAKE_COMPILER_IS_ELBRUSCXX) message(FATAL_ERROR " Your G++ version is ${CMAKE_CXX_COMPILER_VERSION}, please update") endif() @@ -210,12 +199,10 @@ endif() if(NOT CMAKE_SYSTEM_ARCH) if(CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID) - string(TOLOWER "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID}" - CMAKE_SYSTEM_ARCH) + string(TOLOWER "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID}" CMAKE_SYSTEM_ARCH) if(CMAKE_SYSTEM_ARCH STREQUAL "x86") set(X86_32 TRUE) - elseif(CMAKE_SYSTEM_ARCH STREQUAL "x86_64" OR CMAKE_SYSTEM_ARCH STREQUAL - "x64") + elseif(CMAKE_SYSTEM_ARCH STREQUAL "x86_64" OR CMAKE_SYSTEM_ARCH STREQUAL "x64") set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") elseif(CMAKE_SYSTEM_ARCH MATCHES "^(aarch.*|arm.*)") @@ -263,8 +250,7 @@ if(NOT CMAKE_SYSTEM_ARCH) set(X86_32 TRUE) set(CMAKE_SYSTEM_ARCH "x86") endif() - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES - "amd64.*|[xXiI]86_64.*|AMD64.*|[iI][3-6]86.*|[xXiI]86.*") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|[xXiI]86_64.*|AMD64.*|[iI][3-6]86.*|[xXiI]86.*") if(CMAKE_TARGET_BITNESS EQUAL 64) set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") @@ -347,18 +333,15 @@ if(NOT DEFINED CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) ) )) set(CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET TRUE) - message(STATUS "Assume СAN RUN A BUILT EXECUTABLES," - " since host (${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_ARCH})" + message(STATUS "Assume СAN RUN A BUILT EXECUTABLES," " since host (${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_ARCH})" " match target (${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_ARCH})") else() if(CMAKE_C_COMPILER_LOADED) include(CheckCSourceRuns) - check_c_source_runs("int main(void) { return 0; }" - CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) + check_c_source_runs("int main(void) { return 0; }" CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) elseif(CMAKE_CXX_COMPILER_LOADED) include(CheckCXXSourceRuns) - check_cxx_source_runs("int main(void) { return 0; }" - CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) + check_cxx_source_runs("int main(void) { return 0; }" CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) endif() if(NOT CMAKE_HOST_CAN_RUN_EXECUTABLES_BUILT_FOR_TARGET) message(STATUS "Force CMAKE_CROSSCOMPILING to TRUE") @@ -373,15 +356,13 @@ if(MSVC) check_compiler_flag("/fsanitize=undefined" CC_HAS_UBSAN) else() # - # GCC started to warn for unused result starting from 4.2, and this is when it - # introduced -Wno-unused-result GCC can also be built on top of llvm runtime - # (on mac). + # GCC started to warn for unused result starting from 4.2, and this is when it introduced -Wno-unused-result GCC can + # also be built on top of llvm runtime (on mac). check_compiler_flag("-Wno-unknown-pragmas" CC_HAS_WNO_UNKNOWN_PRAGMAS) check_compiler_flag("-Wextra" CC_HAS_WEXTRA) check_compiler_flag("-Werror" CC_HAS_WERROR) check_compiler_flag("-fexceptions" CC_HAS_FEXCEPTIONS) - check_compiler_flag("-fno-semantic-interposition" - CC_HAS_FNO_SEMANTIC_INTERPOSITION) + check_compiler_flag("-fno-semantic-interposition" CC_HAS_FNO_SEMANTIC_INTERPOSITION) if(CMAKE_CXX_COMPILER_LOADED) check_cxx_compiler_flag("-fcxx-exceptions" CC_HAS_FCXX_EXCEPTIONS) endif() @@ -463,34 +444,26 @@ if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} OUTPUT_VARIABLE gcc_info_v ERROR_VARIABLE gcc_info_v) - string(REGEX MATCH "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" gcc_collect_valid - ${gcc_info_v}) + string(REGEX MATCH "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" gcc_collect_valid ${gcc_info_v}) if(gcc_collect_valid) - string(REGEX REPLACE "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" "\\2" - gcc_collect ${gcc_info_v}) + string(REGEX REPLACE "^(.+\nCOLLECT_GCC=)([^ \n]+)(\n.+)$" "\\2" gcc_collect ${gcc_info_v}) endif() - string(REGEX MATCH - "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" - gcc_lto_wrapper_valid ${gcc_info_v}) + string(REGEX MATCH "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" gcc_lto_wrapper_valid ${gcc_info_v}) if(gcc_lto_wrapper_valid) - string(REGEX - REPLACE "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" - "\\2" gcc_lto_wrapper ${gcc_info_v}) + string(REGEX REPLACE "^(.+\nCOLLECT_LTO_WRAPPER=)([^ \n]+/lto-wrapper)(\n.+)$" "\\2" gcc_lto_wrapper + ${gcc_info_v}) endif() set(gcc_suffix "") if(gcc_collect_valid AND gcc_collect) - string(REGEX MATCH "^(.*(cc|\\+\\+))(-.+)$" gcc_suffix_valid - ${gcc_collect}) + string(REGEX MATCH "^(.*(cc|\\+\\+))(-.+)$" gcc_suffix_valid ${gcc_collect}) if(gcc_suffix_valid) - string(REGEX REPLACE "^(.*(cc|\\+\\+))(-.+)$" "\\3" gcc_suffix - ${gcc_collect}) + string(REGEX REPLACE "^(.*(cc|\\+\\+))(-.+)$" "\\3" gcc_suffix ${gcc_collect}) endif() endif() - get_filename_component(gcc_dir ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} - DIRECTORY) + get_filename_component(gcc_dir ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) if(NOT CMAKE_GCC_AR) find_program( CMAKE_GCC_AR @@ -526,10 +499,7 @@ if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} AND CMAKE_GCC_NM AND CMAKE_GCC_RANLIB AND gcc_lto_wrapper) - message( - STATUS - "Found GCC's LTO toolset: ${gcc_lto_wrapper}, ${CMAKE_GCC_AR}, ${CMAKE_GCC_RANLIB}" - ) + message(STATUS "Found GCC's LTO toolset: ${gcc_lto_wrapper}, ${CMAKE_GCC_AR}, ${CMAKE_GCC_RANLIB}") set(GCC_LTO_CFLAGS "-flto -fno-fat-lto-objects -fuse-linker-plugin") set(GCC_LTO_AVAILABLE TRUE) message(STATUS "Link-Time Optimization by GCC is available") @@ -565,11 +535,9 @@ if(CMAKE_COMPILER_IS_CLANG) unset(clang_libdirs) unset(clang_libdirs_x) if(clang_probe_result EQUAL 0) - string(REGEX MATCH "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" - regexp_valid ${clang_search_dirs}) + string(REGEX MATCH "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" regexp_valid ${clang_search_dirs}) if(regexp_valid) - string(REGEX REPLACE "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" "\\3" - list ${clang_search_dirs}) + string(REGEX REPLACE "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" "\\3" list ${clang_search_dirs}) string(REPLACE ":" ";" list "${list}") foreach(dir IN LISTS list) get_filename_component(dir "${dir}" REALPATH) @@ -582,11 +550,9 @@ if(CMAKE_COMPILER_IS_CLANG) list(APPEND clang_bindirs "${clang_bindirs_x}") list(REMOVE_DUPLICATES clang_bindirs) endif() - string(REGEX MATCH "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" - regexp_valid ${clang_search_dirs}) + string(REGEX MATCH "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" regexp_valid ${clang_search_dirs}) if(regexp_valid) - string(REGEX REPLACE "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" "\\3" - list ${clang_search_dirs}) + string(REGEX REPLACE "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" "\\3" list ${clang_search_dirs}) string(REPLACE ":" ";" list "${list}") foreach(dir IN LISTS list) get_filename_component(dir "${dir}" REALPATH) @@ -600,21 +566,16 @@ if(CMAKE_COMPILER_IS_CLANG) list(REMOVE_DUPLICATES clang_libdirs) endif() else() - get_filename_component(clang_bindirs - ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) + get_filename_component(clang_bindirs ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(clang_libdirs ${clang_bindirs}) else() - get_filename_component( - clang_libdirs "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER}/../lib" - REALPATH) + get_filename_component(clang_libdirs "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER}/../lib" REALPATH) endif() endif() if(clang_bindirs AND clang_libdirs) - message( - STATUS - "Found CLANG/LLVM directories: ${clang_bindirs}, ${clang_libdirs}") + message(STATUS "Found CLANG/LLVM directories: ${clang_bindirs}, ${clang_libdirs}") else() message(STATUS "Could NOT find CLANG/LLVM directories (bin and/or lib).") endif() @@ -622,8 +583,7 @@ if(CMAKE_COMPILER_IS_CLANG) if(NOT CMAKE_CLANG_LD AND clang_bindirs) find_program( CMAKE_CLANG_LD - NAMES lld-link ld.lld "ld${CMAKE_TARGET_BITNESS}.lld" lld llvm-link - llvm-ld + NAMES lld-link ld.lld "ld${CMAKE_TARGET_BITNESS}.lld" lld llvm-link llvm-ld PATHS ${clang_bindirs} NO_DEFAULT_PATH) endif() @@ -666,31 +626,21 @@ if(CMAKE_COMPILER_IS_CLANG) if(CLANG_LTO_PLUGIN) message(STATUS "Found CLANG/LLVM's plugin for LTO: ${CLANG_LTO_PLUGIN}") else() - message( - STATUS - "Could NOT find CLANG/LLVM's plugin (${clang_lto_plugin_name}) for LTO." - ) + message(STATUS "Could NOT find CLANG/LLVM's plugin (${clang_lto_plugin_name}) for LTO.") endif() if(CMAKE_CLANG_LD) message(STATUS "Found CLANG/LLVM's linker for LTO: ${CMAKE_CLANG_LD}") else() - message( - STATUS - "Could NOT find CLANG/LLVM's linker (lld, llvm-ld, llvm-link) for LTO." - ) + message(STATUS "Could NOT find CLANG/LLVM's linker (lld, llvm-ld, llvm-link) for LTO.") endif() if(CMAKE_CLANG_AR AND CMAKE_CLANG_RANLIB AND CMAKE_CLANG_NM) - message( - STATUS - "Found CLANG/LLVM's binutils for LTO: ${CMAKE_CLANG_AR}, ${CMAKE_CLANG_RANLIB}, ${CMAKE_CLANG_NM}" - ) + message(STATUS "Found CLANG/LLVM's binutils for LTO: ${CMAKE_CLANG_AR}, ${CMAKE_CLANG_RANLIB}, ${CMAKE_CLANG_NM}") else() - message( - STATUS "Could NOT find CLANG/LLVM's binutils (ar, ranlib, nm) for LTO.") + message(STATUS "Could NOT find CLANG/LLVM's binutils (ar, ranlib, nm) for LTO.") endif() unset(clang_lto_plugin_name) @@ -704,22 +654,17 @@ if(CMAKE_COMPILER_IS_CLANG) AND CMAKE_CLANG_NM AND CMAKE_CLANG_RANLIB AND ((CLANG_LTO_PLUGIN AND CMAKE_LD_GOLD) - OR (CMAKE_CLANG_LD AND NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" - AND CMAKE_SYSTEM_NAME STREQUAL "Linux")) - OR APPLE - )) + OR (CMAKE_CLANG_LD AND NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")) + OR APPLE)) if(ANDROID AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 12) set(CLANG_LTO_AVAILABLE FALSE) message( - STATUS - "Link-Time Optimization by CLANG/LLVM is available but unusable due https://reviews.llvm.org/D79919" - ) + STATUS "Link-Time Optimization by CLANG/LLVM is available but unusable due https://reviews.llvm.org/D79919") else() set(CLANG_LTO_AVAILABLE TRUE) message(STATUS "Link-Time Optimization by CLANG/LLVM is available") endif() - elseif(CMAKE_TOOLCHAIN_FILE - AND NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 7.0) + elseif(CMAKE_TOOLCHAIN_FILE AND NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 7.0) set(CLANG_LTO_AVAILABLE TRUE) if(NOT CMAKE_CLANG_LD) set(CMAKE_CLANG_LD ${CMAKE_LINKER}) @@ -733,10 +678,7 @@ if(CMAKE_COMPILER_IS_CLANG) if(NOT CMAKE_CLANG_RANLIB) set(CMAKE_CLANG_RANLIB ${CMAKE_RANLIB}) endif() - message( - STATUS - "Assume Link-Time Optimization by CLANG/LLVM is available via ${CMAKE_TOOLCHAIN_FILE}" - ) + message(STATUS "Assume Link-Time Optimization by CLANG/LLVM is available via ${CMAKE_TOOLCHAIN_FILE}") else() set(CLANG_LTO_AVAILABLE FALSE) message(STATUS "Link-Time Optimization by CLANG/LLVM is NOT available") @@ -772,8 +714,7 @@ if(ENABLE_BACKTRACE) if(IBERTY_LIBRARY) check_library_exists(${IBERTY_LIBRARY} cplus_demangle "" HAVE_IBERTY_LIB) endif() - set(CMAKE_REQUIRED_DEFINITIONS -DPACKAGE=${PACKAGE} - -DPACKAGE_VERSION=${PACKAGE_VERSION}) + set(CMAKE_REQUIRED_DEFINITIONS -DPACKAGE=${PACKAGE} -DPACKAGE_VERSION=${PACKAGE_VERSION}) check_include_files(bfd.h HAVE_BFD_H) set(CMAKE_REQUIRED_DEFINITIONS) find_package(ZLIB) @@ -783,8 +724,7 @@ if(ENABLE_BACKTRACE) AND ZLIB_FOUND) set(HAVE_BFD ON) set(BFD_LIBRARIES ${BFD_LIBRARY} ${IBERTY_LIBRARY} ${ZLIB_LIBRARIES}) - find_package_message(BFD_LIBRARIES "Found libbfd and dependencies" - ${BFD_LIBRARIES}) + find_package_message(BFD_LIBRARIES "Found libbfd and dependencies" ${BFD_LIBRARIES}) if(TARGET_OS_FREEBSD AND NOT TARGET_OS_DEBIAN_FREEBSD) set(BFD_LIBRARIES ${BFD_LIBRARIES} iconv) endif() @@ -859,12 +799,9 @@ macro(setup_compile_flags) add_compile_flags("C;CXX" "-fno-semantic-interposition") endif() if(MSVC) - # checks for /EHa or /clr options exists, i.e. is enabled structured async - # WinNT exceptions - string(REGEX MATCH "^(.* )*[-/]EHc*a( .*)*$" msvc_async_eh_enabled - "${CXX_FLAGS}" "${C_FLAGS}") - string(REGEX MATCH "^(.* )*[-/]clr( .*)*$" msvc_clr_enabled "${CXX_FLAGS}" - "${C_FLAGS}") + # checks for /EHa or /clr options exists, i.e. is enabled structured async WinNT exceptions + string(REGEX MATCH "^(.* )*[-/]EHc*a( .*)*$" msvc_async_eh_enabled "${CXX_FLAGS}" "${C_FLAGS}") + string(REGEX MATCH "^(.* )*[-/]clr( .*)*$" msvc_clr_enabled "${CXX_FLAGS}" "${C_FLAGS}") # remote any /EH? options string(REGEX REPLACE "( *[-/]-*EH[csa]+ *)+" "" CXX_FLAGS "${CXX_FLAGS}") string(REGEX REPLACE "( *[-/]-*EH[csa]+ *)+" "" C_FLAGS "${C_FLAGS}") @@ -887,21 +824,17 @@ macro(setup_compile_flags) endif() endif() - # In C a global variable without a storage specifier (static/extern) and - # without an initialiser is called a ’tentative definition’. The language - # permits multiple tentative definitions in the single translation unit; i.e. - # int foo; int foo; is perfectly ok. GNU toolchain goes even further, allowing - # multiple tentative definitions in *different* translation units. Internally, - # variables introduced via tentative definitions are implemented as ‘common’ - # symbols. Linker permits multiple definitions if they are common symbols, and - # it picks one arbitrarily for inclusion in the binary being linked. + # In C a global variable without a storage specifier (static/extern) and without an initialiser is called a ’tentative + # definition’. The language permits multiple tentative definitions in the single translation unit; i.e. int foo; int + # foo; is perfectly ok. GNU toolchain goes even further, allowing multiple tentative definitions in *different* + # translation units. Internally, variables introduced via tentative definitions are implemented as ‘common’ symbols. + # Linker permits multiple definitions if they are common symbols, and it picks one arbitrarily for inclusion in the + # binary being linked. # - # -fno-common forces GNU toolchain to behave in a more standard-conformant way - # in respect to tentative definitions and it prevents common symbols - # generation. Since we are a cross-platform project it really makes sense. - # There are toolchains that don’t implement GNU style handling of the - # tentative definitions and there are platforms lacking proper support for - # common symbols (osx). + # -fno-common forces GNU toolchain to behave in a more standard-conformant way in respect to tentative definitions and + # it prevents common symbols generation. Since we are a cross-platform project it really makes sense. There are + # toolchains that don’t implement GNU style handling of the tentative definitions and there are platforms lacking + # proper support for common symbols (osx). if(CC_HAS_FNO_COMMON) add_compile_flags("C;CXX" "-fno-common") endif() @@ -920,9 +853,8 @@ macro(setup_compile_flags) add_compile_flags("C;CXX" "/Gy") endif() - # We must set -fno-omit-frame-pointer here, since we rely on frame pointer - # when getting a backtrace, and it must be used consistently across all object - # files. The same reasoning applies to -fno-stack-protector switch. + # We must set -fno-omit-frame-pointer here, since we rely on frame pointer when getting a backtrace, and it must be + # used consistently across all object files. The same reasoning applies to -fno-stack-protector switch. if(ENABLE_BACKTRACE) if(CC_HAS_FNO_OMIT_FRAME_POINTER) add_compile_flags("C;CXX" "-fno-omit-frame-pointer") @@ -933,8 +865,7 @@ macro(setup_compile_flags) if(MSVC_VERSION LESS 1900) message( FATAL_ERROR - "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required." - ) + "At least \"Microsoft C/C++ Compiler\" version 19.0.24234.1 (Visual Studio 2015 Update 3) is required.") endif() if(NOT MSVC_VERSION LESS 1910) add_compile_flags("CXX" "/Zc:__cplusplus") @@ -955,8 +886,7 @@ macro(setup_compile_flags) add_definitions("-D__STDC_CONSTANT_MACROS=1") add_definitions("-D_HAS_EXCEPTIONS=1") - # Only add -Werror if it's a debug build, done by developers. Release builds - # should not cause extra trouble. + # Only add -Werror if it's a debug build, done by developers. Release builds should not cause extra trouble. if(CC_HAS_WERROR AND (CI OR CMAKE_CONFIGURATION_TYPES @@ -976,17 +906,15 @@ macro(setup_compile_flags) endif() endif() - if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} - AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 5) + if(CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 5) # G++ bug. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31488 add_compile_flags("CXX" "-Wno-invalid-offsetof") endif() if(MINGW) - # Disable junk MINGW's warnings that issued due to incompatibilities and - # shortcomings of MINGW, since the code is checked by builds with GCC, CLANG - # and MSVC. - add_compile_flags("C;CXX" "-Wno-format-extra-args" "-Wno-format" - "-Wno-cast-function-type" "-Wno-implicit-fallthrough") + # Disable junk MINGW's warnings that issued due to incompatibilities and shortcomings of MINGW, since the code is + # checked by builds with GCC, CLANG and MSVC. + add_compile_flags("C;CXX" "-Wno-format-extra-args" "-Wno-format" "-Wno-cast-function-type" + "-Wno-implicit-fallthrough") endif() if(ENABLE_ASAN) @@ -1000,8 +928,7 @@ macro(setup_compile_flags) if(ENABLE_UBSAN) if(NOT MSVC) - add_compile_flags("C;CXX" "-fsanitize=undefined" - "-fsanitize-undefined-trap-on-error") + add_compile_flags("C;CXX" "-fsanitize=undefined" "-fsanitize-undefined-trap-on-error") else() add_compile_flags("C;CXX" "/fsanitize=undefined") endif() @@ -1010,17 +937,13 @@ macro(setup_compile_flags) if(ENABLE_GCOV) if(NOT HAVE_GCOV) - message( - FATAL_ERROR "ENABLE_GCOV option requested but gcov library is not found" - ) + message(FATAL_ERROR "ENABLE_GCOV option requested but gcov library is not found") endif() add_compile_flags("C;CXX" "-fprofile-arcs" "-ftest-coverage") set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") - set(SHARED_LINKER_FLAGS - "${SHARED_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") - set(MODULE_LINKER_FLAGS - "${MODULE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") + set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") + set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage") # add_library(gcov SHARED IMPORTED) endif() @@ -1030,16 +953,12 @@ macro(setup_compile_flags) if(CMAKE_COMPILER_IS_GNUCC AND LTO_ENABLED) add_compile_flags("C;CXX" ${GCC_LTO_CFLAGS}) - set(EXE_LINKER_FLAGS - "${EXE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm -fwhole-program") - set(SHARED_LINKER_FLAGS - "${SHARED_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") - set(MODULE_LINKER_FLAGS - "${MODULE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") + set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm -fwhole-program") + set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") + set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} ${GCC_LTO_CFLAGS} -fverbose-asm") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5) # Pass the same optimization flags to the linker - set(compile_flags - "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}") + set(compile_flags "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPERCASE}}") set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} ${compile_flags}") set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} ${compile_flags}") set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} ${compile_flags}") @@ -1055,9 +974,8 @@ macro(setup_compile_flags) add_compile_flags("C;CXX" "/GL") foreach(linkmode IN ITEMS EXE SHARED STATIC MODULE) set(${linkmode}_LINKER_FLAGS "${${linkmode}_LINKER_FLAGS} /LTCG") - string(REGEX - REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" - ${linkmode}_LINKER_FLAGS "${${linkmode}_LINKER_FLAGS}") + string(REGEX REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" ${linkmode}_LINKER_FLAGS + "${${linkmode}_LINKER_FLAGS}") string(STRIP "${${linkmode}_LINKER_FLAGS}" ${linkmode}_LINKER_FLAGS) foreach( config IN @@ -1065,14 +983,10 @@ macro(setup_compile_flags) ITEMS Release MinSizeRel RelWithDebInfo Debug) string(TOUPPER "${config}" config_uppercase) if(DEFINED "CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}") - string( - REGEX - REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" - altered_flags - "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") + string(REGEX REPLACE "^(.*)(/INCREMENTAL)(:YES)?(:NO)?( ?.*)$" "\\1\\2:NO\\5" altered_flags + "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") string(STRIP "${altered_flags}" altered_flags) - if(NOT "${altered_flags}" STREQUAL - "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") + if(NOT "${altered_flags}" STREQUAL "${CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase}}") set(CMAKE_${linkmode}_LINKER_FLAGS_${config_uppercase} "${altered_flags}" CACHE STRING "Altered: '/INCREMENTAL' removed for LTO" FORCE) @@ -1089,10 +1003,8 @@ macro(setup_compile_flags) foreach(lang IN ITEMS C CXX) string(TOUPPER "${config}" config_uppercase) if(DEFINED "CMAKE_${lang}_FLAGS_${config_uppercase}") - string(REPLACE "/O2" "/Ox" altered_flags - "${CMAKE_${lang}_FLAGS_${config_uppercase}}") - if(NOT "${altered_flags}" STREQUAL - "${CMAKE_${lang}_FLAGS_${config_uppercase}}") + string(REPLACE "/O2" "/Ox" altered_flags "${CMAKE_${lang}_FLAGS_${config_uppercase}}") + if(NOT "${altered_flags}" STREQUAL "${CMAKE_${lang}_FLAGS_${config_uppercase}}") set(CMAKE_${lang}_FLAGS_${config_uppercase} "${altered_flags}" CACHE STRING "Altered: '/O2' replaced by '/Ox' for LTO" FORCE) @@ -1120,12 +1032,9 @@ macro(setup_compile_flags) endif() add_compile_flags("C;CXX" ${CLANG_LTO_FLAG}) if(NOT MSVC) - set(EXE_LINKER_FLAGS - "${EXE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm -fwhole-program") - set(SHARED_LINKER_FLAGS - "${SHARED_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") - set(MODULE_LINKER_FLAGS - "${MODULE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") + set(EXE_LINKER_FLAGS "${EXE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm -fwhole-program") + set(SHARED_LINKER_FLAGS "${SHARED_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") + set(MODULE_LINKER_FLAGS "${MODULE_LINKER_FLAGS} ${CLANG_LTO_FLAG} -fverbose-asm") endif() endif() @@ -1133,15 +1042,13 @@ macro(setup_compile_flags) if(CMAKE_CXX_COMPILER_LOADED) set(CMAKE_CXX_FLAGS ${CXX_FLAGS} - CACHE STRING "Flags used by the C++ compiler during all build types" - FORCE) + CACHE STRING "Flags used by the C++ compiler during all build types" FORCE) unset(CXX_FLAGS) endif() if(CMAKE_C_COMPILER_LOADED) set(CMAKE_C_FLAGS ${C_FLAGS} - CACHE STRING "Flags used by the C compiler during all build types" - FORCE) + CACHE STRING "Flags used by the C compiler during all build types" FORCE) unset(C_FLAGS) endif() set(CMAKE_EXE_LINKER_FLAGS @@ -1149,17 +1056,13 @@ macro(setup_compile_flags) CACHE STRING "Flags used by the linker" FORCE) set(CMAKE_SHARED_LINKER_FLAGS ${SHARED_LINKER_FLAGS} - CACHE STRING "Flags used by the linker during the creation of dll's" - FORCE) + CACHE STRING "Flags used by the linker during the creation of dll's" FORCE) set(CMAKE_STATIC_LINKER_FLAGS ${STATIC_LINKER_FLAGS} - CACHE STRING - "Flags used by the linker during the creation of static libraries" - FORCE) + CACHE STRING "Flags used by the linker during the creation of static libraries" FORCE) set(CMAKE_MODULE_LINKER_FLAGS ${MODULE_LINKER_FLAGS} - CACHE STRING "Flags used by the linker during the creation of modules" - FORCE) + CACHE STRING "Flags used by the linker during the creation of modules" FORCE) unset(EXE_LINKER_FLAGS) unset(SHARED_LINKER_FLAGS) unset(STATIC_LINKER_FLAGS) @@ -1183,9 +1086,7 @@ macro(probe_libcxx_filesystem) if(NOT DEFINED CMAKE_CXX_STANDARD) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_14 HAS_CXX14) list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_17 HAS_CXX17) - if(NOT HAS_CXX17 LESS 0 - AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 5)) + if(NOT HAS_CXX17 LESS 0 AND NOT (CMAKE_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)) set(CMAKE_CXX_STANDARD 17) elseif(NOT HAS_CXX14 LESS 0) set(CMAKE_CXX_STANDARD 14) @@ -1198,14 +1099,11 @@ macro(probe_libcxx_filesystem) endif() set(stdfs_probe_clear_cxx_standard ON) endif() - if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION - VERSION_LESS 1.25.23) + if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23) if(CMAKE_VERSION VERSION_LESS 3.14) - set(stdfs_probe_flags ${stdfs_probe_flags} - "-Wl,--allow-multiple-definition") + set(stdfs_probe_flags ${stdfs_probe_flags} "-Wl,--allow-multiple-definition") else() - set(CMAKE_REQUIRED_LINK_OPTIONS ${stdfs_probe_save_link_options} - "-Wl,--allow-multiple-definition") + set(CMAKE_REQUIRED_LINK_OPTIONS ${stdfs_probe_save_link_options} "-Wl,--allow-multiple-definition") endif() endif() set(CMAKE_REQUIRED_FLAGS ${stdfs_probe_flags}) @@ -1261,40 +1159,25 @@ macro(probe_libcxx_filesystem) check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_none) if(LIBCXX_FILESYSTEM_none) - message( - STATUS "No linking with additional library needed for std::filesystem" - ) + message(STATUS "No linking with additional library needed for std::filesystem") else() set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} "stdc++fs") - check_cxx_source_compiles("${stdfs_probe_code}" - LIBCXX_FILESYSTEM_stdcxxfs) + check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_stdcxxfs) if(LIBCXX_FILESYSTEM_stdcxxfs) set(LIBCXX_FILESYSTEM "stdc++fs") - message( - STATUS - "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem" - ) + message(STATUS "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem") else() set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} "c++fs") - check_cxx_source_compiles("${stdfs_probe_code}" - LIBCXX_FILESYSTEM_cxxfs) + check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_cxxfs) if(LIBCXX_FILESYSTEM_cxxfs) set(LIBCXX_FILESYSTEM "c++fs") - message( - STATUS - "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem" - ) + message(STATUS "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem") else() - set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} - "c++experimental") - check_cxx_source_compiles("${stdfs_probe_code}" - LIBCXX_FILESYSTEM_cxxexperimental) + set(CMAKE_REQUIRED_LIBRARIES ${stdfs_probe_save_libraries} "c++experimental") + check_cxx_source_compiles("${stdfs_probe_code}" LIBCXX_FILESYSTEM_cxxexperimental) if(LIBCXX_FILESYSTEM_cxxexperimental) set(LIBCXX_FILESYSTEM "c++experimental") - message( - STATUS - "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem" - ) + message(STATUS "Linking with ${LIBCXX_FILESYSTEM} is required for std::filesystem") else() message(STATUS "No support for std::filesystem") endif() diff --git a/cmake/profile.cmake b/cmake/profile.cmake index d325724e..dc930f3a 100644 --- a/cmake/profile.cmake +++ b/cmake/profile.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev +# Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev ############################################### # SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) @@ -22,8 +22,7 @@ if(NOT DEFINED ENABLE_MEMCHECK) set(MEMCHECK_OPTION_NAME "ENABLE_MEMCHECK") endif() if(MEMCHECK_OPTION_NAME STREQUAL "ENABLE_MEMCHECK") - option(ENABLE_MEMCHECK - "Enable integration with valgrind, a memory analyzing tool" OFF) + option(ENABLE_MEMCHECK "Enable integration with valgrind, a memory analyzing tool" OFF) elseif(${MEMCHECK_OPTION_NAME}) set(ENABLE_MEMCHECK ON) else() @@ -36,18 +35,12 @@ check_library_exists(gcov __gcov_flush "" HAVE_GCOV) option(ENABLE_GCOV "Enable integration with gcov, a code coverage program" OFF) -option(ENABLE_GPROF - "Enable integration with gprof, a performance analyzing tool" OFF) +option(ENABLE_GPROF "Enable integration with gprof, a performance analyzing tool" OFF) -option( - ENABLE_ASAN - "Enable AddressSanitizer, a fast memory error detector based on compiler instrumentation" - OFF) +option(ENABLE_ASAN "Enable AddressSanitizer, a fast memory error detector based on compiler instrumentation" OFF) -option( - ENABLE_UBSAN - "Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector based on compiler instrumentation" - OFF) +option(ENABLE_UBSAN + "Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector based on compiler instrumentation" OFF) if(ENABLE_MEMCHECK) if(CMAKE_CXX_COMPILER_LOADED) @@ -58,10 +51,7 @@ if(ENABLE_MEMCHECK) check_include_file(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) endif() if(NOT HAVE_VALGRIND_MEMCHECK_H) - message( - FATAL_ERROR - "${MEMCHECK_OPTION_NAME} option is set but valgrind/memcheck.h is not found" - ) + message(FATAL_ERROR "${MEMCHECK_OPTION_NAME} option is set but valgrind/memcheck.h is not found") endif() endif() diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 8e5ba6de..547040ae 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev +# Copyright (c) 2012-2024 Леонид Юрьев aka Leonid Yuriev ############################################### # SPDX-License-Identifier: Apache-2.0 if(CMAKE_VERSION VERSION_LESS 3.8.2) @@ -27,10 +27,8 @@ macro(add_compile_flags languages) endmacro(add_compile_flags) macro(remove_flag varname flag) - string(REGEX REPLACE "^(.*)( ${flag} )(.*)$" "\\1 \\3" ${varname} - ${${varname}}) - string(REGEX REPLACE "^((.+ )*)(${flag})(( .+)*)$" "\\1\\4" ${varname} - ${${varname}}) + string(REGEX REPLACE "^(.*)( ${flag} )(.*)$" "\\1 \\3" ${varname} ${${varname}}) + string(REGEX REPLACE "^((.+ )*)(${flag})(( .+)*)$" "\\1\\4" ${varname} ${${varname}}) endmacro(remove_flag) macro(remove_compile_flag languages flag) @@ -51,9 +49,8 @@ macro(set_source_files_compile_flags) set(_lang "") if("${_file_ext}" STREQUAL ".m") set(_lang OBJC) - # CMake believes that Objective C is a flavor of C++, not C, and uses g++ - # compiler for .m files. LANGUAGE property forces CMake to use CC for - # ${file} + # CMake believes that Objective C is a flavor of C++, not C, and uses g++ compiler for .m files. LANGUAGE property + # forces CMake to use CC for ${file} set_source_files_properties(${file} PROPERTIES LANGUAGE C) elseif("${_file_ext}" STREQUAL ".mm") set(_lang OBJCXX) @@ -87,10 +84,8 @@ macro(semver_parse str) set(_semver_prerelease "") set(_semver_buildmetadata_withplus "") set(_semver_buildmetadata "") - if("${str}" - MATCHES - "^v?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))?([-+]-*[0-9a-zA-Z]+.*)?$" - ) + if("${str}" MATCHES + "^v?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))?([-+]-*[0-9a-zA-Z]+.*)?$") set(_semver_major ${CMAKE_MATCH_1}) set(_semver_minor ${CMAKE_MATCH_2}) set(_semver_patch ${CMAKE_MATCH_3}) @@ -102,13 +97,11 @@ macro(semver_parse str) elseif("${_semver_extra}" MATCHES "^([.-][a-zA-Z0-9-]+)*(\\+[^+]+)?$") set(_semver_prerelease_withdash "${CMAKE_MATCH_1}") if(NOT "${_semver_prerelease_withdash}" STREQUAL "") - string(SUBSTRING "${_semver_prerelease_withdash}" 1 -1 - _semver_prerelease) + string(SUBSTRING "${_semver_prerelease_withdash}" 1 -1 _semver_prerelease) endif() set(_semver_buildmetadata_withplus "${CMAKE_MATCH_2}") if(NOT "${_semver_buildmetadata_withplus}" STREQUAL "") - string(SUBSTRING "${_semver_buildmetadata_withplus}" 1 -1 - _semver_buildmetadata) + string(SUBSTRING "${_semver_buildmetadata_withplus}" 1 -1 _semver_buildmetadata) endif() set(_semver_ok TRUE) else() @@ -117,19 +110,14 @@ macro(semver_parse str) ) endif() else() - set(_semver_err - "Версионная отметка в целом не соответствует шаблону `0.0.0[.0][-foo][+bar]` SemVer-спецификации" - ) + set(_semver_err "Версионная отметка в целом не соответствует шаблону `0.0.0[.0][-foo][+bar]` SemVer-спецификации") endif() endmacro(semver_parse) function(_semver_parse_probe str expect) semver_parse(${str}) if(expect AND NOT _semver_ok) - message( - FATAL_ERROR - "semver_parse(${str}) expect SUCCESS, got ${_semver_ok}: ${_semver_err}" - ) + message(FATAL_ERROR "semver_parse(${str}) expect SUCCESS, got ${_semver_ok}: ${_semver_err}") elseif(NOT expect AND _semver_ok) message(FATAL_ERROR "semver_parse(${str}) expect FAIL, got ${_semver_ok}") endif() @@ -150,8 +138,7 @@ function(semver_parse_selfcheck) _semver_parse_probe("1.0.0-alpha.1" TRUE) _semver_parse_probe("1.0.0-alpha0.valid" TRUE) _semver_parse_probe("1.0.0-alpha.0valid" TRUE) - _semver_parse_probe("1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay" - TRUE) + _semver_parse_probe("1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay" TRUE) _semver_parse_probe("1.0.0-rc.1+build.1" TRUE) _semver_parse_probe("2.0.0-rc.1+build.123" TRUE) _semver_parse_probe("1.2.3-beta" TRUE) @@ -167,14 +154,12 @@ function(semver_parse_selfcheck) _semver_parse_probe("1.2.3----R-S.12.9.1--.12+meta" TRUE) _semver_parse_probe("1.2.3----RC-SNAPSHOT.12.9.1--.12" TRUE) _semver_parse_probe("1.0.0+0.build.1-rc.10000aaa-kk-0.1" TRUE) - _semver_parse_probe( - "99999999999999999999999.999999999999999999.99999999999999999" TRUE) + _semver_parse_probe("99999999999999999999999.999999999999999999.99999999999999999" TRUE) _semver_parse_probe("v1.0.0-0A.is.legal" TRUE) _semver_parse_probe("1" FALSE) _semver_parse_probe("1.2" FALSE) - # _semver_parse_probe("1.2.3-0123" FALSE) - # _semver_parse_probe("1.2.3-0123.0123" FALSE) + # _semver_parse_probe("1.2.3-0123" FALSE) _semver_parse_probe("1.2.3-0123.0123" FALSE) _semver_parse_probe("1.1.2+.123" FALSE) _semver_parse_probe("+invalid" FALSE) _semver_parse_probe("-invalid" FALSE) @@ -238,10 +223,7 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR "${_git_timestamp}" STREQUAL "%ci") - message( - FATAL_ERROR - "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)" - ) + message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%cI HEAD` failed)") endif() endif() @@ -252,10 +234,7 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR "${_git_tree}" STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)" - ) + message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%T HEAD` failed)") endif() execute_process( @@ -265,10 +244,7 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR "${_git_commit}" STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)" - ) + message(FATAL_ERROR "Please install latest version of git (`show --no-patch --format=%H HEAD` failed)") endif() execute_process( @@ -278,10 +254,7 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc) - message( - FATAL_ERROR - "Please install latest version of git (`status --untracked-files=no --porcelain` failed)" - ) + message(FATAL_ERROR "Please install latest version of git (`status --untracked-files=no --porcelain` failed)") endif() if(NOT "${_git_status}" STREQUAL "") set(_git_commit "DIRTY-${_git_commit}") @@ -311,18 +284,12 @@ macro(git_get_versioninfo source_root_directory) if(_rc) message( FATAL_ERROR - "Please install latest version of git (`git rev-list --count --no-merges --remove-empty HEAD` failed)" - ) + "Please install latest version of git (`git rev-list --count --no-merges --remove-empty HEAD` failed)") endif() if(_git_whole_count GREATER 42 AND "${_git_tags_dump}" STREQUAL "") - message( - FATAL_ERROR - "Please fetch tags (`describe --tags --abbrev=0 --match=v[0-9]*` failed)" - ) + message(FATAL_ERROR "Please fetch tags (`describe --tags --abbrev=0 --match=v[0-9]*` failed)") else() - message( - NOTICE - "Falling back to version `0.0.0` (have you made an initial release?") + message(NOTICE "Falling back to version `0.0.0` (have you made an initial release?") endif() set(_git_last_vtag "0.0.0") set(_git_trailing_commits ${_git_whole_count}) @@ -340,10 +307,7 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR "${_git_describe}" STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`describe --tags --all --long` failed)" - ) + message(FATAL_ERROR "Please install latest version of git (`describe --tags --all --long` failed)") endif() endif() else() @@ -354,10 +318,7 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR "${_git_describe}" STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`describe --tags --long --match=v[0-9]*`)" - ) + message(FATAL_ERROR "Please install latest version of git (`describe --tags --long --match=v[0-9]*`)") endif() execute_process( COMMAND ${GIT} rev-list --count "${_git_last_vtag}..HEAD" @@ -366,16 +327,12 @@ macro(git_get_versioninfo source_root_directory) WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE _rc) if(_rc OR "${_git_trailing_commits}" STREQUAL "") - message( - FATAL_ERROR - "Please install latest version of git (`rev-list --count ${_git_last_vtag}..HEAD` failed)" - ) + message(FATAL_ERROR "Please install latest version of git (`rev-list --count ${_git_last_vtag}..HEAD` failed)") endif() endif() endmacro(git_get_versioninfo) -macro(semver_provide name source_root_directory build_directory_for_json_output - build_metadata parent_scope) +macro(semver_provide name source_root_directory build_directory_for_json_output build_metadata parent_scope) set(_semver "") set(_git_describe "") set(_git_timestamp "") @@ -395,11 +352,9 @@ macro(semver_provide name source_root_directory build_directory_for_json_output RESULT_VARIABLE _rc) if(_rc OR "${_git_root}" STREQUAL "") if(EXISTS "${source_root_directory}/.git") - message(ERROR - "`git rev-parse --show-toplevel` failed '${_git_root_error}'") + message(ERROR "`git rev-parse --show-toplevel` failed '${_git_root_error}'") else() - message(VERBOSE - "`git rev-parse --show-toplevel` failed '${_git_root_error}'") + message(VERBOSE "`git rev-parse --show-toplevel` failed '${_git_root_error}'") endif() else() set(_source_root "${source_root_directory}") @@ -420,8 +375,7 @@ macro(semver_provide name source_root_directory build_directory_for_json_output set(_version_from "${source_root_directory}/VERSION.json") if(CMAKE_VERSION VERSION_LESS 3.19) - message( - FATAL_ERROR "Требуется CMake версии >= 3.19 для чтения VERSION.json") + message(FATAL_ERROR "Требуется CMake версии >= 3.19 для чтения VERSION.json") endif() file( STRINGS "${_version_from}" _versioninfo_json NEWLINE_CONSUME @@ -435,14 +389,11 @@ macro(semver_provide name source_root_directory build_directory_for_json_output string(JSON _semver GET "${_versioninfo_json}" "semver") unset(_json_object) if(NOT _semver) - message( - FATAL_ERROR - "Unable to retrieve ${name} version from \"${_version_from}\" file.") + message(FATAL_ERROR "Unable to retrieve ${name} version from \"${_version_from}\" file.") endif() semver_parse("${_semver}") if(NOT _semver_ok) - message( - FATAL_ERROR "SemVer `${_semver}` from ${_version_from}: ${_semver_err}") + message(FATAL_ERROR "SemVer `${_semver}` from ${_version_from}: ${_semver_err}") endif() elseif(_git_root AND _source_root STREQUAL _git_root) set(_version_from git) @@ -551,8 +502,7 @@ macro(semver_provide name source_root_directory build_directory_for_json_output \"semver\" : \"@_semver@\"\n}" _versioninfo_json @ONLY ESCAPE_QUOTES) - file(WRITE "${build_directory_for_json_output}/VERSION.json" - "${_versioninfo_json}") + file(WRITE "${build_directory_for_json_output}/VERSION.json" "${_versioninfo_json}") endif() endmacro(semver_provide) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7e884c55..5a162685 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev +# Copyright (c) 2020-2024 Леонид Юрьев aka Leonid Yuriev ############################################### # SPDX-License-Identifier: Apache-2.0 enable_language(CXX) @@ -8,14 +8,10 @@ function(add_extra_test name) set(options DISABLED) set(oneValueArgs TIMEOUT) set(multiValueArgs SOURCE LIBRARY DEPEND DLLPATH) - cmake_parse_arguments(params "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(params "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if(params_UNPARSED_ARGUMENTS) - message( - FATAL_ERROR - "Unknown keywords given to add_extra_test(): \"${params_UNPARSED_ARGUMENTS}\"." - ) + message(FATAL_ERROR "Unknown keywords given to add_extra_test(): \"${params_UNPARSED_ARGUMENTS}\".") endif() macro(oops) @@ -30,12 +26,10 @@ function(add_extra_test name) add_executable(${target} ${params_SOURCE}) target_include_directories(${target} PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(${target} ${TOOL_MDBX_LIB}) - set_target_properties(${target} PROPERTIES SKIP_BUILD_RPATH FALSE - BUILD_WITH_INSTALL_RPATH FALSE) + set_target_properties(${target} PROPERTIES SKIP_BUILD_RPATH FALSE BUILD_WITH_INSTALL_RPATH FALSE) if(MDBX_BUILD_CXX AND MDBX_CXX_STANDARD) - set_target_properties(${target} PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} - CXX_STANDARD_REQUIRED ON) + set_target_properties(${target} PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() if(params_DEPEND) @@ -47,32 +41,25 @@ function(add_extra_test name) foreach(dep IN LISTS params_LIBRARY) get_target_property(type ${dep} TYPE) if(type STREQUAL SHARED_LIBRARY) - # Windows don't have RPATH feature, therefore we should prepare PATH or - # copy DLL(s)... + # Windows don't have RPATH feature, therefore we should prepare PATH or copy DLL(s)... if(CMAKE_CONFIGURATION_TYPES) - # Could not provide static ENVIRONMENT property with - # configuration-depended path + # Could not provide static ENVIRONMENT property with configuration-depended path set(dir FALSE) else(CMAKE_CONFIGURATION_TYPES) - get_target_property(filename ${dep} - IMPORTED_LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property(filename ${dep} IMPORTED_LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) if(NOT filename) get_target_property(filename ${dep} IMPORTED_LOCATION) endif() - get_target_property(filename ${dep} - LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property(filename ${dep} LOCATION_${CMAKE_BUILD_TYPE_UPPERCASE}) if(NOT filename) get_target_property(filename ${dep} LOCATION) endif() if(filename) get_filename_component(dir ${filename} DIRECTORY) else(filename) - get_target_property( - dir ${dep} LIBRARY_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property(dir ${dep} LIBRARY_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) if(NOT dir) - get_target_property( - dir ${dep} - RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) + get_target_property(dir ${dep} RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE_UPPERCASE}) endif() if(NOT dir) get_target_property(dir ${dep} LIBRARY_OUTPUT_DIRECTORY) @@ -89,32 +76,25 @@ function(add_extra_test name) add_custom_command( TARGET ${target} POST_BUILD - COMMAND - if exist "$" ${CMAKE_COMMAND} -E - copy_if_different "$" - "$") + COMMAND if exist "$" ${CMAKE_COMMAND} -E copy_if_different + "$" "$") add_custom_command( TARGET ${target} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "$" "$" - COMMENT - "${TOOL_MDBX_DLLCRUTCH}: Copy shared library ${dep} for test ${target}" - ) + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$" + COMMENT "${TOOL_MDBX_DLLCRUTCH}: Copy shared library ${dep} for test ${target}") endif(dir) endif() endforeach(dep) endif(TOOL_MDBX_DLLCRUTCH) - if(NOT params_DISABLED AND NOT (CMAKE_CROSSCOMPILING - AND NOT CMAKE_CROSSCOMPILING_EMULATOR)) + if(NOT params_DISABLED AND NOT (CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)) add_test(extra_${name} ${MDBX_OUTPUT_DIR}/${target}) if(params_TIMEOUT) if(MEMORYCHECK_COMMAND OR CMAKE_MEMORYCHECK_COMMAND OR ENABLE_MEMCHECK) - # FIXME: unless there are any other ideas how to fix the timeouts - # problem when testing under Valgrind. + # FIXME: unless there are any other ideas how to fix the timeouts problem when testing under Valgrind. math(EXPR params_TIMEOUT "${params_TIMEOUT} * 42") endif() set_tests_properties(extra_${name} PROPERTIES TIMEOUT ${params_TIMEOUT}) @@ -133,8 +113,7 @@ function(add_extra_test name) else() string(REPLACE ";" ":" params_DLLPATH_ENV "${params_DLLPATH_ENV}") endif() - set_tests_properties(extra_${name} - PROPERTIES ENVIRONMENT "PATH=${params_DLLPATH_ENV}") + set_tests_properties(extra_${name} PROPERTIES ENVIRONMENT "PATH=${params_DLLPATH_ENV}") endif() endif() endfunction(add_extra_test) @@ -179,13 +158,10 @@ add_executable(mdbx_test ${LIBMDBX_TEST_SOURCES}) target_compile_definitions(mdbx_test PRIVATE MDBX_BUILD_TEST=1 MDBX_BUILD_CXX=1) if(MDBX_CXX_STANDARD) - set_target_properties(mdbx_test PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} - CXX_STANDARD_REQUIRED ON) + set_target_properties(mdbx_test PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() -set_target_properties( - mdbx_test PROPERTIES INTERPROCEDURAL_OPTIMIZATION - $) +set_target_properties(mdbx_test PROPERTIES INTERPROCEDURAL_OPTIMIZATION $) target_setup_options(mdbx_test) if(NOT MDBX_BUILD_CXX) @@ -205,8 +181,7 @@ if(NOT MDBX_BUILD_CXX AND LIBCXX_FILESYSTEM) endif() if(CMAKE_VERSION VERSION_LESS 3.1) - target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} - ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} ${CMAKE_THREAD_LIBS_INIT}) else() target_link_libraries(mdbx_test ${TOOL_MDBX_LIB} ${LIB_MATH} Threads::Threads) endif() @@ -222,29 +197,22 @@ if(NOT SUBPROJECT) endif() endif() -# ############################################################################## +# ###################################################################################################################### if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) message(WARNING "No emulator to run cross-compiled tests") - add_test(NAME fake_since_no_crosscompiling_emulator - COMMAND ${CMAKE_COMMAND} -E echo - "No emulator to run cross-compiled tests") + add_test(NAME fake_since_no_crosscompiling_emulator COMMAND ${CMAKE_COMMAND} -E echo + "No emulator to run cross-compiled tests") else() string( RANDOM LENGTH 9 ALPHABET "1234567890" test_seed) - message( - STATUS - "The ${test_seed} will be used for seeding tests. Re-run cmake to re-seed it." - ) + message(STATUS "The ${test_seed} will be used for seeding tests. Re-run cmake to re-seed it.") - add_test( - NAME smoke - COMMAND - ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=verbose --prng-seed=${test_seed} - --progress --console=no --pathname=smoke.db --dont-cleanup-after basic) + add_test(NAME smoke COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=verbose --prng-seed=${test_seed} --progress + --console=no --pathname=smoke.db --dont-cleanup-after basic) set_tests_properties(smoke PROPERTIES TIMEOUT 600 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) add_test(NAME smoke_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv smoke.db) @@ -258,8 +226,7 @@ else() "cooperative mode" REQUIRED_FILES smoke.db) - add_test(NAME smoke_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv - smoke.db-copy) + add_test(NAME smoke_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv smoke.db-copy) set_tests_properties( smoke_chk_copy PROPERTIES DEPENDS @@ -275,19 +242,14 @@ else() add_test( NAME dupsort_writemap COMMAND - ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice --prng-seed=${test_seed} - --table=+data.fixed --keygen.split=29 --datalen=rnd --progress - --console=no --repeat=2 --pathname=dupsort_writemap.db - --dont-cleanup-after basic) + ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice --prng-seed=${test_seed} --table=+data.fixed --keygen.split=29 + --datalen=rnd --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES TIMEOUT 3600 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) - add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk - -nvvwc dupsort_writemap.db) - set_tests_properties( - dupsort_writemap_chk PROPERTIES DEPENDS dupsort_writemap TIMEOUT 60 - REQUIRED_FILES dupsort_writemap.db) - add_test(NAME dupsort_writemap_chk_copy - COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvc dupsort_writemap.db-copy) + add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvwc dupsort_writemap.db) + set_tests_properties(dupsort_writemap_chk PROPERTIES DEPENDS dupsort_writemap TIMEOUT 60 REQUIRED_FILES + dupsort_writemap.db) + add_test(NAME dupsort_writemap_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvc dupsort_writemap.db-copy) set_tests_properties( dupsort_writemap_chk_copy PROPERTIES DEPENDS @@ -300,16 +262,12 @@ else() dupsort_writemap.db-copy) endif() - add_test( - NAME uniq_nested - COMMAND - ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice - --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=2 - --pathname=uniq_nested.db --dont-cleanup-after basic) + add_test(NAME uniq_nested + COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice --mode=-writemap,-nosync-safe,-lifo --progress + --console=no --repeat=2 --pathname=uniq_nested.db --dont-cleanup-after basic) set_tests_properties(uniq_nested PROPERTIES TIMEOUT 1800 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) - add_test(NAME uniq_nested_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvw - uniq_nested.db) + add_test(NAME uniq_nested_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvw uniq_nested.db) set_tests_properties( uniq_nested_chk PROPERTIES DEPENDS @@ -320,8 +278,7 @@ else() "cooperative mode" REQUIRED_FILES uniq_nested.db) - add_test(NAME uniq_nested_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv - uniq_nested.db-copy) + add_test(NAME uniq_nested_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv uniq_nested.db-copy) set_tests_properties( uniq_nested_chk_copy PROPERTIES DEPENDS From 513518ca5e36781d80eaaef1504ae9978e9c3862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 12 Dec 2024 13:06:44 +0300 Subject: [PATCH 384/443] =?UTF-8?q?mdbx-cmake:=20=D1=81=D0=B8=D0=BD=D1=85?= =?UTF-8?q?=D1=80=D0=BE=D0=BD=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20=D1=83?= =?UTF-8?q?=D1=82=D0=B8=D0=BB=D0=B8=D1=82=20=D0=BC=D0=B5=D0=B6=D0=B4=D1=83?= =?UTF-8?q?=20=D0=BF=D1=80=D0=BE=D0=B5=D0=BA=D1=82=D0=B0=D0=BC=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake/compiler.cmake | 4 +++- cmake/utils.cmake | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index f875af16..05c067f9 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -31,9 +31,11 @@ if(NOT CMAKE_VERSION VERSION_LESS 3.9) cmake_policy(SET CMP0069 NEW) endif() +cmake_policy(SET CMP0054 NEW) + if(CMAKE_VERSION MATCHES ".*MSVC.*" AND CMAKE_VERSION VERSION_LESS 3.16) message(FATAL_ERROR "CMake from MSVC kit is unfit! " - "Please use MSVC2019 with modern CMake the original CMake from https://cmake.org/download/") + "Please use MSVC-2019 with modern CMake the original CMake from https://cmake.org/download/") endif() if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 547040ae..adb8004d 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -12,6 +12,21 @@ endif() cmake_policy(PUSH) cmake_policy(VERSION ${CMAKE_MINIMUM_REQUIRED_VERSION}) +macro(add_option HIVE NAME DESCRIPTION DEFAULT) + list(APPEND ${HIVE}_BUILD_OPTIONS ${HIVE}_${NAME}) + if(NOT ${DEFAULT} STREQUAL "AUTO") + option(${HIVE}_${NAME} "${DESCRIPTION}" ${DEFAULT}) + elseif(NOT DEFINED ${HIVE}_${NAME}) + set(${HIVE}_${NAME}_AUTO ON) + endif() +endmacro() + +macro(set_if_undefined VARNAME) + if(NOT DEFINED "${VARNAME}") + set("${VARNAME}" ${ARGN}) + endif() +endmacro() + macro(add_compile_flags languages) foreach(_lang ${languages}) string(REPLACE ";" " " _flags "${ARGN}") From ea3f99f58ff94e69704982d174e05d3a68e3f8e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 12 Dec 2024 13:07:49 +0300 Subject: [PATCH 385/443] =?UTF-8?q?mdbx-cmake:=20=D1=83=D0=B4=D0=B0=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20`add=5Fmdbx=5Foption()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 70 +++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6719ed24..dc22f2c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -621,14 +621,6 @@ endif() # ###################################################################################################################### set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) -macro(add_mdbx_option NAME DESCRIPTION DEFAULT) - list(APPEND MDBX_BUILD_OPTIONS ${NAME}) - if(NOT ${DEFAULT} STREQUAL "AUTO") - option(${NAME} "${DESCRIPTION}" ${DEFAULT}) - elseif(NOT DEFINED ${NAME}) - set(${NAME}_AUTO ON) - endif() -endmacro() if(IOS) set(MDBX_BUILD_TOOLS_DEFAULT OFF) @@ -640,64 +632,60 @@ else() set(MDBX_BUILD_TOOLS_DEFAULT ON) endif() -add_mdbx_option(MDBX_INSTALL_STATIC "Build and install libmdbx for static linking" OFF) -add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) -add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy/drop)" ${MDBX_BUILD_TOOLS_DEFAULT}) +add_option(MDBX INSTALL_STATIC "Build and install libmdbx for static linking" OFF) +add_option(MDBX BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)" ${BUILD_SHARED_LIBS}) +add_option(MDBX BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy/drop)" ${MDBX_BUILD_TOOLS_DEFAULT}) cmake_dependent_option(MDBX_INSTALL_MANPAGES "Install man-pages for MDBX tools (mdbx_chk/stat/dump/load/copy)" ON MDBX_BUILD_TOOLS OFF) -add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON) -add_mdbx_option(MDBX_ENV_CHECKPID "Checking PID inside libmdbx's API against reuse DB environment after the fork()" - AUTO) +add_option(MDBX TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON) +add_option(MDBX ENV_CHECKPID "Checking PID inside libmdbx's API against reuse DB environment after the fork()" AUTO) mark_as_advanced(MDBX_ENV_CHECKPID) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF) + add_option(MDBX DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF) mark_as_advanced(MDBX_DISABLE_GNU_SOURCE) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" OR IOS) - add_mdbx_option(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) + add_option(MDBX APPLE_SPEED_INSTEADOF_DURABILITY "Disable use fcntl(F_FULLFSYNC) in favor of speed" OFF) mark_as_advanced(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY) endif() if(WIN32) if(MDBX_NTDLL_EXTRA_IMPLIB) - add_mdbx_option(MDBX_WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) + add_option(MDBX WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF) endif() set(MDBX_AVOID_MSYNC_DEFAULT ON) else() - add_mdbx_option(MDBX_USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) + add_option(MDBX USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO) mark_as_advanced(MDBX_USE_OFDLOCKS) - add_mdbx_option(MDBX_USE_MINCORE "Use Unix' mincore() to determine whether DB-pages are resident in memory" ON) + add_option(MDBX USE_MINCORE "Use Unix' mincore() to determine whether DB-pages are resident in memory" ON) mark_as_advanced(MDBX_USE_MINCORE) set(MDBX_AVOID_MSYNC_DEFAULT OFF) endif() -add_mdbx_option( - MDBX_AVOID_MSYNC +add_option( + MDBX AVOID_MSYNC "Disable in-memory database updating with consequent flush-to-disk/msync syscall in `MDBX_WRITEMAP` mode" ${MDBX_AVOID_MSYNC_DEFAULT}) -add_mdbx_option(MDBX_MMAP_NEEDS_JOLT "Assume system needs explicit syscall to sync/flush/write modified mapped memory" - AUTO) +add_option(MDBX MMAP_NEEDS_JOLT "Assume system needs explicit syscall to sync/flush/write modified mapped memory" AUTO) mark_as_advanced(MDBX_MMAP_NEEDS_JOLT) -add_mdbx_option(MDBX_LOCKING "Locking method (Windows=-1, SystemV=5, POSIX=1988, POSIX=2001, POSIX=2008)" AUTO) +add_option(MDBX LOCKING "Locking method (Windows=-1, SystemV=5, POSIX=1988, POSIX=2001, POSIX=2008)" AUTO) mark_as_advanced(MDBX_LOCKING) -add_mdbx_option(MDBX_TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake" AUTO) +add_option(MDBX TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake" AUTO) mark_as_advanced(MDBX_TRUST_RTC) -add_mdbx_option(MDBX_FORCE_ASSERTIONS "Force enable assertion checking" OFF) -add_mdbx_option( - MDBX_DISABLE_VALIDATION +add_option(MDBX FORCE_ASSERTIONS "Force enable assertion checking" OFF) +add_option( + MDBX + DISABLE_VALIDATION "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" OFF) mark_as_advanced(MDBX_DISABLE_VALIDATION) -add_mdbx_option(MDBX_ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON) -add_mdbx_option( - MDBX_ENABLE_BIGFOOT - "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" ON) -add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) -add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) +add_option(MDBX ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON) +add_option(MDBX ENABLE_BIGFOOT + "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" ON) +add_option(MDBX ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) +add_option(MDBX ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) -add_mdbx_option( - MDBX_ENABLE_DBI_SPARSE - "Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions" ON) -add_mdbx_option(MDBX_ENABLE_DBI_LOCKFREE - "Support for deferred releasing and a lockfree path to quickly open DBI handles" ON) +add_option(MDBX ENABLE_DBI_SPARSE + "Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions" ON) +add_option(MDBX ENABLE_DBI_LOCKFREE "Support for deferred releasing and a lockfree path to quickly open DBI handles" ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") @@ -705,11 +693,11 @@ if(NOT MDBX_AMALGAMATED_SOURCE) else() set(MDBX_ALLOY_BUILD_DEFAULT ON) endif() - add_mdbx_option(MDBX_ALLOY_BUILD "Build MDBX library through single/alloyed object file" ${MDBX_ALLOY_BUILD_DEFAULT}) + add_option(MDBX ALLOY_BUILD "Build MDBX library through single/alloyed object file" ${MDBX_ALLOY_BUILD_DEFAULT}) endif() if((MDBX_BUILD_TOOLS OR MDBX_ENABLE_TESTS) AND MDBX_BUILD_SHARED_LIBRARY) - add_mdbx_option(MDBX_LINK_TOOLS_NONSTATIC "Link MDBX tools with non-static libmdbx" OFF) + add_option(MDBX LINK_TOOLS_NONSTATIC "Link MDBX tools with non-static libmdbx" OFF) else() unset(MDBX_LINK_TOOLS_NONSTATIC CACHE) endif() From ccdb6255e97285eae0d684c91391088e3283cdb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Dec 2024 07:57:57 +0300 Subject: [PATCH 386/443] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80?= =?UTF-8?q?=D0=B0=D1=82=20`MDBX=5FEINVAL`=20=D0=BF=D1=80=D0=B8=20=D0=BF?= =?UTF-8?q?=D0=BE=D0=BF=D1=8B=D1=82=D0=BA=D0=B5=20=D0=B7=D0=B0=D0=BF=D1=83?= =?UTF-8?q?=D1=81=D1=82=D0=B8=D1=82=D1=8C=20=D0=B2=D0=BB=D0=BE=D0=B6=D0=B5?= =?UTF-8?q?=D0=BD=D0=BD=D1=83=D1=8E=20=D1=87=D0=B8=D1=82=D0=B0=D1=8E=D1=89?= =?UTF-8?q?=D1=83=D1=8E=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D1=8E.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/txn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/txn.c b/src/txn.c index 166e7682..0f6cdb53 100644 --- a/src/txn.c +++ b/src/txn.c @@ -1424,7 +1424,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M return MDBX_EINVAL; *ret = nullptr; - if (unlikely((flags & ~txn_rw_begin_flags) && (flags & ~txn_ro_begin_flags))) + if (unlikely((flags & ~txn_rw_begin_flags) && (parent || (flags & ~txn_ro_begin_flags)))) return MDBX_EINVAL; int rc = check_env(env, true); From c66dac50c32f79f21612264dfb02a85d20394d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Dec 2024 22:30:40 +0300 Subject: [PATCH 387/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`osal=5Fbootid()`=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20LXC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Из LXC-контейнера не доступен файл хостовой системы "/proc/sys/kernel/random/boot_id". Вместо него, при каждом старте контейнера, создается и заполняется случайными данными собственный boot_id смонтированный через bind из tmpfs. https://github.com/lxc/lxc/issues/3027 Поэтому полноценный контроль по boot_id не возможен, так как при рестарте LXC-контейнера (но не хоста) boot_id будет меняться, хотя данные в unified page cache сохраняются. Таким образом, при рестарте LXC-контейнера, libmdbx будет производить откат БД до крайней точки устойчивой фиксации, что может приводить к утрате данных пользователя в случаях когда они могли быть сохранены. Однако, улучшить ситуацию пока не представляется возможным, как минимум до доступности boot_id хостовой системы изнутри LXC-контейнера. Этот коммит частично улучшает ситуацию тем, что позволяет использовать фейковый/замещенный boot_id размещенный в файловой системе с типом tmpfs при работе внутри LXC-контейнера. --- src/osal.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/osal.c b/src/osal.c index cdef5712..01f0b11d 100644 --- a/src/osal.c +++ b/src/osal.c @@ -2877,12 +2877,32 @@ __cold MDBX_MAYBE_UNUSED static bool bootid_parse_uuid(bin128_t *s, const void * } #if defined(__linux__) || defined(__gnu_linux__) + +__cold static bool is_inside_lxc(void) { + bool inside_lxc = false; + FILE *mounted = setmntent("/proc/mounts", "r"); + if (mounted) { + const struct mntent *ent; + while (nullptr != (ent = getmntent(mounted))) { + if (strcmp(ent->mnt_fsname, "lxcfs") == 0 && strncmp(ent->mnt_dir, "/proc/", 6) == 0) { + inside_lxc = true; + break; + } + } + endmntent(mounted); + } + return inside_lxc; +} + __cold static bool proc_read_uuid(const char *path, bin128_t *target) { const int fd = open(path, O_RDONLY | O_NOFOLLOW); if (fd != -1) { struct statfs fs; char buf[42]; - const ssize_t len = (fstatfs(fd, &fs) == 0 && fs.f_type == /* procfs */ 0x9FA0) ? read(fd, buf, sizeof(buf)) : -1; + const ssize_t len = (fstatfs(fd, &fs) == 0 && + (fs.f_type == /* procfs */ 0x9FA0 || (fs.f_type == /* tmpfs */ 0x1021994 && is_inside_lxc()))) + ? read(fd, buf, sizeof(buf)) + : -1; const int err = close(fd); assert(err == 0); (void)err; @@ -3061,10 +3081,27 @@ __cold static bin128_t osal_bootid(void) { } #endif /* __NetBSD__ */ +#if !(defined(_WIN32) || defined(_WIN64)) + if (!got_machineid) { + int fd = open("/etc/machine-id", O_RDONLY); + if (fd == -1) + fd = open("/var/lib/dbus/machine-id", O_RDONLY); + if (fd != -1) { + char buf[42]; + const ssize_t len = read(fd, buf, sizeof(buf)); + const int err = close(fd); + assert(err == 0); + (void)err; + if (len > 0) + got_machineid = bootid_parse_uuid(&uuid, buf, len); + } + } +#endif /* !Windows */ + #if _XOPEN_SOURCE_EXTENDED if (!got_machineid) { - const int hostid = gethostid(); - if (hostid > 0) { + const long hostid = gethostid(); + if (hostid != 0 && hostid != -1) { bootid_collect(&uuid, &hostid, sizeof(hostid)); got_machineid = true; } From a845522db76f73364ea8d7a7217c68e6c40aa5ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 15 Dec 2024 22:17:12 +0300 Subject: [PATCH 388/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B5=D0=B3=D1=80?= =?UTF-8?q?=D0=B5=D1=81=D1=81=D0=B0=20=D1=81=D0=BE=D1=81=D1=82=D0=BE=D1=8F?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20dupsort-=D0=BA=D1=83=D1=80=D1=81=D0=BE?= =?UTF-8?q?=D1=80=D0=B0=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5=20`cursor=5Fput(A?= =?UTF-8?q?PPEND)`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit При добавлении нового ключа в append-режиме, в случае когда в текущей (последней) позиции с ключом связаны несколько значений и (соответственно) вложенный dupsort-курсор инициализирован, вставка происходила без сброса вложенного курсора. В результате вложенный курсор логически оставался стоять на multivalue-данных связанных с предыдущей позицией основного курсора, т.е. переходил в неконсистентное состояние. Ошибка проявлялась возвратом неверных значений из mdbx_cursor_count() или срабатывание assert-проверки в отладочных сборках. --- src/cursor.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/cursor.c b/src/cursor.c index a4bffd76..5ec88a77 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -898,13 +898,16 @@ __hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsig if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->ki[mc->top]); - if ((mc->tree->flags & MDBX_DUPSORT) && node_size(key, data) > env->leaf_nodemax) { - /* Too big for a node, insert in sub-DB. Set up an empty - * "old sub-page" for convert_to_subtree to expand to a full page. */ - fp->dupfix_ksize = (mc->tree->flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; - fp->lower = fp->upper = 0; - old_data.iov_len = PAGEHDRSZ; - goto convert_to_subtree; + if (mc->tree->flags & MDBX_DUPSORT) { + inner_gone(mc); + if (node_size(key, data) > env->leaf_nodemax) { + /* Too big for a node, insert in sub-DB. Set up an empty + * "old sub-page" for convert_to_subtree to expand to a full page. */ + fp->dupfix_ksize = (mc->tree->flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; + fp->lower = fp->upper = 0; + old_data.iov_len = PAGEHDRSZ; + goto convert_to_subtree; + } } } else { /* there's only a key anyway, so this is a no-op */ From 90b187c3bad7e4a8b1399afcfb774881f0e73cf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 16 Dec 2024 11:30:10 +0300 Subject: [PATCH 389/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BE=D0=BA=20=D0=B2=20`inner=5Fhollow()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cursor.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cursor.h b/src/cursor.h index 4ae45222..0ea13903 100644 --- a/src/cursor.h +++ b/src/cursor.h @@ -174,7 +174,16 @@ MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_pointed(co } MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_hollow(const MDBX_cursor *mc) { - return !mc->subcur || is_hollow(&mc->subcur->cursor); + const bool r = !mc->subcur || is_hollow(&mc->subcur->cursor); +#if MDBX_DEBUG || MDBX_FORCE_ASSERTIONS + if (!r) { + cASSERT(mc, is_filled(mc)); + const page_t *mp = mc->pg[mc->top]; + const node_t *node = page_node(mp, mc->ki[mc->top]); + cASSERT(mc, node_flags(node) & N_DUP); + } +#endif /* MDBX_DEBUG || MDBX_FORCE_ASSERTIONS */ + return r; } MDBX_MAYBE_UNUSED static inline void inner_gone(MDBX_cursor *mc) { From 526ed28de15c2bb92a693799b11d25c7d0ba0bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 16 Dec 2024 11:54:24 +0300 Subject: [PATCH 390/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fcount=5Fex()`?= =?UTF-8?q?=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 28 +++++++++++++++++++++++++++- src/api-cursor.c | 43 ++++++++++++++++++++++++++++++++++++------- src/layout-dxb.h | 2 +- 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/mdbx.h b/mdbx.h index 9854c6e5..16fa1066 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5701,9 +5701,11 @@ LIBMDBX_API int mdbx_cursor_put(MDBX_cursor *cursor, const MDBX_val *key, MDBX_v * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, MDBX_put_flags_t flags); -/** \brief Return count of duplicates for current key. +/** \brief Return count values (aka duplicates) for current key. * \ingroup c_crud * + * \see mdbx_cursor_count_ex + * * This call is valid for all tables, but reasonable only for that support * sorted duplicate data items \ref MDBX_DUPSORT. * @@ -5718,6 +5720,30 @@ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, MDBX_put_flags_t flags); * was specified. */ LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *pcount); +/** \brief Return count values (aka duplicates) and nested b-tree statistics for current key. + * \ingroup c_crud + * + * \see mdbx_dbi_stat + * \see mdbx_dbi_dupsort_depthmask + * \see mdbx_cursor_count + * + * This call is valid for all tables, but reasonable only for that support + * sorted duplicate data items \ref MDBX_DUPSORT. + * + * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). + * \param [out] pcount Address where the count will be stored. + * \param [out] stat The address of an \ref MDBX_stat structure where + * the statistics of a nested b-tree will be copied. + * \param [in] bytes The size of \ref MDBX_stat. + * + * \returns A non-zero error value on failure and 0 on success, + * some possible errors are: + * \retval MDBX_THREAD_MISMATCH Given transaction is not owned + * by current thread. + * \retval MDBX_EINVAL Cursor is not initialized, or an invalid parameter + * was specified. */ +LIBMDBX_API int mdbx_cursor_count_ex(const MDBX_cursor *mc, size_t *count, MDBX_stat *stat, size_t bytes); + /** \brief Determines whether the cursor is pointed to a key-value pair or not, * i.e. was not positioned or points to the end of data. * \ingroup c_cursors diff --git a/src/api-cursor.c b/src/api-cursor.c index 24d21fce..e52f5817 100644 --- a/src/api-cursor.c +++ b/src/api-cursor.c @@ -308,8 +308,7 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, bool ignore_ return (l->flags & z_eof_hard) - (r->flags & z_eof_hard); } -/* Return the count of duplicate data items for the current key */ -int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { +int mdbx_cursor_count_ex(const MDBX_cursor *mc, size_t *count, MDBX_stat *ns, size_t bytes) { if (unlikely(mc == nullptr)) return LOG_IFERR(MDBX_EINVAL); @@ -320,21 +319,51 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - if (unlikely(countp == nullptr)) - return LOG_IFERR(MDBX_EINVAL); + if (ns) { + const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) + return LOG_IFERR(MDBX_EINVAL); + memset(ns, 0, sizeof(*ns)); + } - if ((*countp = is_filled(mc)) > 0) { + size_t nvals = 0; + if (is_filled(mc)) { + nvals = 1; if (!inner_hollow(mc)) { const page_t *mp = mc->pg[mc->top]; const node_t *node = page_node(mp, mc->ki[mc->top]); cASSERT(mc, node_flags(node) & N_DUP); - *countp = - unlikely(mc->subcur->nested_tree.items > PTRDIFF_MAX) ? PTRDIFF_MAX : (size_t)mc->subcur->nested_tree.items; + const tree_t *nt = &mc->subcur->nested_tree; + nvals = unlikely(nt->items > PTRDIFF_MAX) ? PTRDIFF_MAX : (size_t)nt->items; + if (ns) { + ns->ms_psize = (unsigned)node_ds(node); + if (node_flags(node) & N_TREE) { + ns->ms_psize = mc->txn->env->ps; + ns->ms_depth = nt->height; + ns->ms_branch_pages = nt->branch_pages; + } + cASSERT(mc, nt->large_pages == 0); + ns->ms_leaf_pages = nt->leaf_pages; + ns->ms_entries = nt->items; + if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(ns->ms_mod_txnid))) + ns->ms_mod_txnid = nt->mod_txnid; + } } } + + if (likely(count)) + *count = nvals; + return MDBX_SUCCESS; } +int mdbx_cursor_count(const MDBX_cursor *mc, size_t *count) { + if (unlikely(count == nullptr)) + return LOG_IFERR(MDBX_EINVAL); + + return mdbx_cursor_count_ex(mc, count, nullptr, 0); +} + int mdbx_cursor_on_first(const MDBX_cursor *mc) { if (unlikely(mc == nullptr)) return LOG_IFERR(MDBX_EINVAL); diff --git a/src/layout-dxb.h b/src/layout-dxb.h index ed2f261d..3d9514c8 100644 --- a/src/layout-dxb.h +++ b/src/layout-dxb.h @@ -62,7 +62,7 @@ typedef struct tree { uint16_t height; /* height of this tree */ uint32_t dupfix_size; /* key-size for MDBX_DUPFIXED (DUPFIX pages) */ pgno_t root; /* the root page of this tree */ - pgno_t branch_pages; /* number of internal pages */ + pgno_t branch_pages; /* number of branch pages */ pgno_t leaf_pages; /* number of leaf pages */ pgno_t large_pages; /* number of large pages */ uint64_t sequence; /* table sequence counter */ From 122562cf9c9773904e7ee029350434c151538451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 16 Dec 2024 12:16:11 +0300 Subject: [PATCH 391/443] =?UTF-8?q?mdbx-tests:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=BE=D0=BF=D1=86=D0=B8=D0=B8=20`data.dups`=20=D0=B2=20`data.m?= =?UTF-8?q?ulti`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/config.c++ | 2 +- test/main.c++ | 2 +- test/stochastic.sh | 28 ++++++++++++++-------------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/test/config.c++ b/test/config.c++ index e35e74f2..514f9ae4 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -349,7 +349,7 @@ const struct option_verb table_bits[] = {{"key.reverse", unsigned(MDBX_REVERSEKE {"data.integer", unsigned(MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT)}, {"data.fixed", unsigned(MDBX_DUPFIXED | MDBX_DUPSORT)}, {"data.reverse", unsigned(MDBX_REVERSEDUP | MDBX_DUPSORT)}, - {"data.dups", unsigned(MDBX_DUPSORT)}, + {"data.multi", unsigned(MDBX_DUPSORT)}, {nullptr, 0}}; static void dump_verbs(const char *caption, size_t bits, const struct option_verb *verbs) { diff --git a/test/main.c++ b/test/main.c++ index 90c3701e..59c58729 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -102,7 +102,7 @@ MDBX_NORETURN void usage(void) { " --table={[+-]FLAG}[,[+-]FLAG]...\n" " key.reverse == MDBX_REVERSEKEY\n" " key.integer == MDBX_INTEGERKEY\n" - " data.dups == MDBX_DUPSORT\n" + " data.multi == MDBX_DUPSORT\n" " data.integer == MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT\n" " data.fixed == MDBX_DUPFIXED | MDBX_DUPSORT\n" " data.reverse == MDBX_REVERSEDUP | MDBX_DUPSORT\n"); diff --git a/test/stochastic.sh b/test/stochastic.sh index 0fb64ca2..18ea6737 100755 --- a/test/stochastic.sh +++ b/test/stochastic.sh @@ -530,13 +530,13 @@ function pass { split=30 caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ @@ -547,13 +547,13 @@ function pass { split=24 caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ @@ -564,19 +564,19 @@ function pass { split=16 caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ @@ -588,19 +588,19 @@ function pass { if [ "$EXTRA" != "no" ]; then split=10 caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ @@ -612,13 +612,13 @@ function pass { split=4 caption="$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.multi --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ From 6ed4dcb4ea4b278ffadf2f11e2f5d4b1f39ea1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Dec 2024 23:39:14 +0300 Subject: [PATCH 392/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=82=D0=BB=D0=B0=D0=B4?= =?UTF-8?q?=D0=BE=D1=87=D0=BD=D1=8B=D1=85=20=D1=81=D0=BE=D0=BE=D0=B1=D1=89?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=BF=D1=80=D0=B8=20=D0=B2=D0=BE?= =?UTF-8?q?=D0=B7=D0=B2=D1=80=D0=B0=D1=82=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1?= =?UTF-8?q?=D0=BE=D0=BA=20=D0=B8=D0=B7=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/txn.c | 74 +++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/src/txn.c b/src/txn.c index 0f6cdb53..627ddc79 100644 --- a/src/txn.c +++ b/src/txn.c @@ -438,7 +438,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { bailout: if (latency) memset(latency, 0, sizeof(*latency)); - return rc; + return LOG_IFERR(rc); } MDBX_env *const env = txn->env; @@ -817,7 +817,7 @@ provide_latency: latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0; latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0); } - return rc; + return LOG_IFERR(rc); fail: txn->flags |= MDBX_TXN_ERROR; @@ -1384,13 +1384,13 @@ int txn_end(MDBX_txn *txn, unsigned mode) { int mdbx_txn_renew(MDBX_txn *txn) { if (unlikely(!txn)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(txn->signature != txn_signature)) - return MDBX_EBADSIGN; + return LOG_IFERR(MDBX_EBADSIGN); if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) { int rc = mdbx_txn_reset(txn); @@ -1405,13 +1405,13 @@ int mdbx_txn_renew(MDBX_txn *txn) { (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); } - return rc; + return LOG_IFERR(rc); } int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) { int rc = check_txn(txn, MDBX_TXN_FINISHED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); txn->userctx = ctx; return MDBX_SUCCESS; @@ -1421,31 +1421,31 @@ void *mdbx_txn_get_userctx(const MDBX_txn *txn) { return check_txn(txn, MDBX_TXN int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret, void *context) { if (unlikely(!ret)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); *ret = nullptr; if (unlikely((flags & ~txn_rw_begin_flags) && (parent || (flags & ~txn_ro_begin_flags)))) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(env->flags & MDBX_RDONLY & ~flags)) /* write txn in RDONLY env */ - return MDBX_EACCESS; + return LOG_IFERR(MDBX_EACCESS); MDBX_txn *txn = nullptr; if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ rc = check_txn_rw(parent, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (env->options.spill_parent4child_denominator) { /* Spill dirty-pages of parent to provide dirtyroom for child txn */ rc = txn_spill(parent, nullptr, parent->tw.dirtylist->length / env->options.spill_parent4child_denominator); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); } tASSERT(parent, audit_ex(parent, 0, false) == 0); @@ -1470,10 +1470,8 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M ((flags & MDBX_TXN_RDONLY) ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) : 0) + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_state[0])); txn = osal_malloc(size); - if (unlikely(txn == nullptr)) { - DEBUG("calloc: %s", "failed"); - return MDBX_ENOMEM; - } + if (unlikely(txn == nullptr)) + return LOG_IFERR(MDBX_ENOMEM); #if MDBX_DEBUG memset(txn, 0xCD, size); VALGRIND_MAKE_MEM_UNDEFINED(txn, size); @@ -1508,7 +1506,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M pnl_free(txn->tw.relist); dpl_free(txn); osal_free(txn); - return rc; + return LOG_IFERR(rc); } /* Move loose pages to reclaimed list */ @@ -1618,22 +1616,22 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M txn->dbs[FREE_DBI].root); } - return rc; + return LOG_IFERR(rc); } int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { int rc = check_txn(txn, MDBX_TXN_FINISHED); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!info)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); MDBX_env *const env = txn->env; #if MDBX_ENV_CHECKPID if (unlikely(env->pid != osal_getpid())) { env->flags |= ENV_FATAL_ERROR; - return MDBX_PANIC; + return LOG_IFERR(MDBX_PANIC); } #endif /* MDBX_ENV_CHECKPID */ @@ -1758,11 +1756,11 @@ MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) { int mdbx_txn_reset(MDBX_txn *txn) { int rc = check_txn(txn, 0); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); /* This call is only valid for read-only txns */ if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) - return MDBX_EINVAL; + return LOG_IFERR(MDBX_EINVAL); /* LY: don't close DBI-handles */ rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); @@ -1770,14 +1768,14 @@ int mdbx_txn_reset(MDBX_txn *txn) { tASSERT(txn, txn->signature == txn_signature); tASSERT(txn, txn->owner == 0); } - return rc; + return LOG_IFERR(rc); } int mdbx_txn_break(MDBX_txn *txn) { do { int rc = check_txn(txn, 0); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); txn->flags |= MDBX_TXN_ERROR; if (txn->flags & MDBX_TXN_RDONLY) break; @@ -1789,52 +1787,52 @@ int mdbx_txn_break(MDBX_txn *txn) { int mdbx_txn_abort(MDBX_txn *txn) { int rc = check_txn(txn, 0); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); rc = check_env(txn->env, true); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == MDBX_NOSTICKYTHREADS && unlikely(txn->owner != osal_thread_self())) { mdbx_txn_break(txn); - return MDBX_THREAD_MISMATCH; + return LOG_IFERR(MDBX_THREAD_MISMATCH); } - return txn_abort(txn); + return LOG_IFERR(txn_abort(txn)); } int mdbx_txn_park(MDBX_txn *txn, bool autounpark) { STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_ERROR); int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) - return MDBX_TXN_INVALID; + return LOG_IFERR(MDBX_TXN_INVALID); if (unlikely((txn->flags & MDBX_TXN_ERROR))) { rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); - return rc ? rc : MDBX_OUSTED; + return LOG_IFERR(rc ? rc : MDBX_OUSTED); } - return txn_park(txn, autounpark); + return LOG_IFERR(txn_park(txn, autounpark)); } int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted) { STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED + MDBX_TXN_ERROR); int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED - MDBX_TXN_ERROR); if (unlikely(rc != MDBX_SUCCESS)) - return rc; + return LOG_IFERR(rc); if (unlikely(!F_ISSET(txn->flags, MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) return MDBX_SUCCESS; rc = txn_unpark(txn); if (likely(rc != MDBX_OUSTED) || !restart_if_ousted) - return rc; + return LOG_IFERR(rc); tASSERT(txn, txn->flags & MDBX_TXN_FINISHED); rc = txn_renew(txn, MDBX_TXN_RDONLY); - return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : rc; + return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : LOG_IFERR(rc); } int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) { @@ -1848,7 +1846,7 @@ int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) { * mdbx_txn_break(), но далее любое её использование приведет к завершению * при распарковке. */ if ((txn->flags & (bad_bits | MDBX_TXN_AUTOUNPARK)) != (MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK)) - return MDBX_BAD_TXN; + return LOG_IFERR(MDBX_BAD_TXN); tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); return mdbx_txn_unpark((MDBX_txn *)txn, false); From 5168c80be885ca8a3d8325d9cada62dd61befe80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 16 Dec 2024 16:43:49 +0300 Subject: [PATCH 393/443] =?UTF-8?q?mdbx:=20=D1=81=D0=B1=D0=BE=D1=80=20?= =?UTF-8?q?=D0=B7=D0=B0=D1=82=D1=80=D0=B0=D1=82=20=D0=BD=D0=B0=20`pnl=5Fme?= =?UTF-8?q?rge()`=20=D0=BF=D1=80=D0=B8=20=D0=B2=D0=BA=D0=BB=D1=8E=D1=87?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B8=20`MDBX=5FENABLE=5FPROFGC`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 6 ++++++ src/gc-get.c | 8 ++++++++ src/layout-lck.h | 6 ++++++ src/txn.c | 8 ++++++++ 4 files changed, 28 insertions(+) diff --git a/mdbx.h b/mdbx.h index 16fa1066..fd29387a 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4139,6 +4139,12 @@ struct MDBX_commit_latency { /** \brief Количество страничных промахов (page faults) внутри GC * при выделении и подготовки страниц для самой GC. */ uint32_t self_majflt; + /* Для разборок с pnl_merge() */ + struct { + uint32_t time; + uint64_t volume; + uint32_t calls; + } pnl_merge_work, pnl_merge_self; } gc_prof; }; #ifndef __cplusplus diff --git a/src/gc-get.c b/src/gc-get.c index 196001f9..a32f2755 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -1064,7 +1064,15 @@ next_gc:; } /* Merge in descending sorted order */ +#if MDBX_ENABLE_PROFGC + const uint64_t merge_begin = osal_monotime(); +#endif /* MDBX_ENABLE_PROFGC */ pnl_merge(txn->tw.relist, gc_pnl); +#if MDBX_ENABLE_PROFGC + prof->pnl_merge.calls += 1; + prof->pnl_merge.volume += MDBX_PNL_GETSIZE(txn->tw.relist); + prof->pnl_merge.time += osal_monotime() - merge_begin; +#endif /* MDBX_ENABLE_PROFGC */ flags |= ALLOC_SHOULD_SCAN; if (AUDIT_ENABLED()) { if (unlikely(!pnl_check(txn->tw.relist, txn->geo.first_unallocated))) { diff --git a/src/layout-lck.h b/src/layout-lck.h index f4a2a368..b635c595 100644 --- a/src/layout-lck.h +++ b/src/layout-lck.h @@ -50,6 +50,12 @@ typedef struct gc_prof_stat { uint32_t spe_counter; /* page faults (hard page faults) */ uint32_t majflt; + /* Для разборок с pnl_merge() */ + struct { + uint64_t time; + uint64_t volume; + uint32_t calls; + } pnl_merge; } gc_prof_stat_t; /* Statistics of pages operations for all transactions, diff --git a/src/txn.c b/src/txn.c index 627ddc79..38e53e5a 100644 --- a/src/txn.c +++ b/src/txn.c @@ -418,6 +418,14 @@ static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { latency->gc_prof.wipes = ptr->gc_prof.wipes; latency->gc_prof.flushes = ptr->gc_prof.flushes; latency->gc_prof.kicks = ptr->gc_prof.kicks; + + latency->gc_prof.pnl_merge_work.time = osal_monotime_to_16dot16(ptr->gc_prof.work.pnl_merge.time); + latency->gc_prof.pnl_merge_work.calls = ptr->gc_prof.work.pnl_merge.calls; + latency->gc_prof.pnl_merge_work.volume = ptr->gc_prof.work.pnl_merge.volume; + latency->gc_prof.pnl_merge_self.time = osal_monotime_to_16dot16(ptr->gc_prof.self.pnl_merge.time); + latency->gc_prof.pnl_merge_self.calls = ptr->gc_prof.self.pnl_merge.calls; + latency->gc_prof.pnl_merge_self.volume = ptr->gc_prof.self.pnl_merge.volume; + if (txn == env->basal_txn) memset(&ptr->gc_prof, 0, sizeof(ptr->gc_prof)); } else From 4607184999437a29ffbc8acae446401c7ade44d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 17 Dec 2024 18:52:08 +0300 Subject: [PATCH 394/443] =?UTF-8?q?mdbx:=20=D0=BC=D0=B0=D0=BA=D1=80=D0=BE?= =?UTF-8?q?=D1=81=20`osal=5Fmalloc=5Fusable=5Fsize()`=20=D0=B2=D0=BC=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=BE=20=D0=BD=D0=B5=D0=BF=D0=BE=D1=81=D1=80=D0=B5?= =?UTF-8?q?=D0=B4=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20`malloc=5Fusable=5Fsize()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dpl.c | 6 +++--- src/osal.h | 8 ++++---- src/pnl.c | 18 +++++++++--------- src/txl.c | 12 ++++++------ 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/dpl.c b/src/dpl.c index 6055ea7b..8e0c5dab 100644 --- a/src/dpl.c +++ b/src/dpl.c @@ -41,9 +41,9 @@ dpl_t *dpl_reserve(MDBX_txn *txn, size_t size) { size_t bytes = dpl_size2bytes((size < PAGELIST_LIMIT) ? size : PAGELIST_LIMIT); dpl_t *const dl = osal_realloc(txn->tw.dirtylist, bytes); if (likely(dl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(dl); -#endif /* malloc_usable_size */ +#ifdef osal_malloc_usable_size + bytes = osal_malloc_usable_size(dl); +#endif /* osal_malloc_usable_size */ dl->detent = dpl_bytes2size(bytes); tASSERT(txn, txn->tw.dirtylist == nullptr || dl->length <= dl->detent); txn->tw.dirtylist = dl; diff --git a/src/osal.h b/src/osal.h index 5a049a24..c82f8af1 100644 --- a/src/osal.h +++ b/src/osal.h @@ -153,12 +153,12 @@ typedef pthread_mutex_t osal_fastmutex_t; #endif /* Platform */ #if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) -/* malloc_usable_size() already provided */ +#define osal_malloc_usable_size(ptr) malloc_usable_size(ptr) #elif defined(__APPLE__) -#define malloc_usable_size(ptr) malloc_size(ptr) +#define osal_malloc_usable_size(ptr) malloc_size(ptr) #elif defined(_MSC_VER) && !MDBX_WITHOUT_MSVC_CRT -#define malloc_usable_size(ptr) _msize(ptr) -#endif /* malloc_usable_size */ +#define osal_malloc_usable_size(ptr) _msize(ptr) +#endif /* osal_malloc_usable_size */ /*----------------------------------------------------------------------------*/ /* OS abstraction layer stuff */ diff --git a/src/pnl.c b/src/pnl.c index d40fe7e5..9ad5f9bd 100644 --- a/src/pnl.c +++ b/src/pnl.c @@ -7,9 +7,9 @@ MDBX_INTERNAL pnl_t pnl_alloc(size_t size) { size_t bytes = pnl_size2bytes(size); pnl_t pnl = osal_malloc(bytes); if (likely(pnl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(pnl); -#endif /* malloc_usable_size */ +#ifdef osal_malloc_usable_size + bytes = osal_malloc_usable_size(pnl); +#endif /* osal_malloc_usable_size */ pnl[0] = pnl_bytes2size(bytes); assert(pnl[0] >= size); pnl += 1; @@ -33,9 +33,9 @@ MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl) { size_t bytes = pnl_size2bytes(MDBX_PNL_INITIAL * 2); pnl_t pnl = osal_realloc(*ppnl - 1, bytes); if (likely(pnl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(pnl); -#endif /* malloc_usable_size */ +#ifdef osal_malloc_usable_size + bytes = osal_malloc_usable_size(pnl); +#endif /* osal_malloc_usable_size */ *pnl = pnl_bytes2size(bytes); *ppnl = pnl + 1; } @@ -57,9 +57,9 @@ MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wa size_t bytes = pnl_size2bytes(size); pnl_t pnl = osal_realloc(*ppnl - 1, bytes); if (likely(pnl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(pnl); -#endif /* malloc_usable_size */ +#ifdef osal_malloc_usable_size + bytes = osal_malloc_usable_size(pnl); +#endif /* osal_malloc_usable_size */ *pnl = pnl_bytes2size(bytes); assert(*pnl >= wanna); *ppnl = pnl + 1; diff --git a/src/txl.c b/src/txl.c index 024b099f..d2296740 100644 --- a/src/txl.c +++ b/src/txl.c @@ -21,9 +21,9 @@ MDBX_INTERNAL txl_t txl_alloc(void) { size_t bytes = txl_size2bytes(txl_initial); txl_t txl = osal_malloc(bytes); if (likely(txl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(txl); -#endif /* malloc_usable_size */ +#ifdef osal_malloc_usable_size + bytes = osal_malloc_usable_size(txl); +#endif /* osal_malloc_usable_size */ txl[0] = txl_bytes2size(bytes); assert(txl[0] >= txl_initial); txl += 1; @@ -52,9 +52,9 @@ MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wa size_t bytes = txl_size2bytes(size); txl_t txl = osal_realloc(*ptxl - 1, bytes); if (likely(txl)) { -#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) - bytes = malloc_usable_size(txl); -#endif /* malloc_usable_size */ +#ifdef osal_malloc_usable_size + bytes = osal_malloc_usable_size(txl); +#endif /* osal_malloc_usable_size */ *txl = txl_bytes2size(bytes); assert(*txl >= wanna); *ptxl = txl + 1; From ba6df2bb6d2c8ddb133fb73ec6bf52a561682456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 17 Dec 2024 17:47:45 +0300 Subject: [PATCH 395/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20API-=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B9=20=D0=B2=20api-=D1=84=D0=B0=D0=B9=D0=BB=D1=8B.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 36 +- src/alloy.c | 16 +- src/{cold.c => api-cold.c} | 105 -- src/{copy.c => api-copy.c} | 0 src/api-dbi.c | 324 +++++ src/api-env.c | 105 ++ src/api-extra.c | 59 + src/{misc.c => api-misc.c} | 0 src/{env-opts.c => api-opts.c} | 0 ...{range-estimate.c => api-range-estimate.c} | 3 + src/api-txn-data.c | 449 ++++++ src/api-txn.c | 1217 +++++++++++------ src/dbi.c | 333 +---- src/dbi.h | 14 + src/internals.h | 5 - src/mvcc-readers.c | 137 -- src/proto.h | 3 + src/{tree.c => tree-ops.c} | 0 src/{page-search.c => tree-search.c} | 0 src/txn.c | 942 ++----------- 20 files changed, 1884 insertions(+), 1864 deletions(-) rename src/{cold.c => api-cold.c} (83%) rename src/{copy.c => api-copy.c} (100%) create mode 100644 src/api-dbi.c rename src/{misc.c => api-misc.c} (100%) rename src/{env-opts.c => api-opts.c} (100%) rename src/{range-estimate.c => api-range-estimate.c} (99%) create mode 100644 src/api-txn-data.c rename src/{tree.c => tree-ops.c} (100%) rename src/{page-search.c => tree-search.c} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index dc22f2c7..8f8d1b02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,12 +60,18 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/alloy.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cold.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-copy.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cursor.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-dbi.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-env.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-extra.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-key-transform.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-misc.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-opts.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-range-estimate.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn-data.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-ops.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-types.h" @@ -74,9 +80,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/coherency.c" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cold.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/copy.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.c" @@ -86,7 +90,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dxb.c" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env-opts.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/essentials.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-get.c" @@ -111,7 +114,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx.c++" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.h" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/misc.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mvcc-readers.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.h" @@ -124,12 +126,11 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.h" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-search.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree-search.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/preface.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/proto.h" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/range-estimate.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/refund.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/sort.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.c" @@ -145,7 +146,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/stat.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.h" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree-ops.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txn.c" @@ -156,7 +157,8 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.h" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.c" - AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h") + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt") set(MDBX_AMALGAMATED_SOURCE FALSE) find_program(GIT git) if(NOT GIT) @@ -755,10 +757,17 @@ else() list( APPEND LIBMDBX_SOURCES + "${MDBX_SOURCE_DIR}/api-cold.c" + "${MDBX_SOURCE_DIR}/api-copy.c" "${MDBX_SOURCE_DIR}/api-cursor.c" + "${MDBX_SOURCE_DIR}/api-dbi.c" "${MDBX_SOURCE_DIR}/api-env.c" "${MDBX_SOURCE_DIR}/api-extra.c" "${MDBX_SOURCE_DIR}/api-key-transform.c" + "${MDBX_SOURCE_DIR}/api-misc.c" + "${MDBX_SOURCE_DIR}/api-opts.c" + "${MDBX_SOURCE_DIR}/api-range-estimate.c" + "${MDBX_SOURCE_DIR}/api-txn-data.c" "${MDBX_SOURCE_DIR}/api-txn.c" "${MDBX_SOURCE_DIR}/atomics-ops.h" "${MDBX_SOURCE_DIR}/atomics-types.h" @@ -767,8 +776,6 @@ else() "${MDBX_SOURCE_DIR}/cogs.c" "${MDBX_SOURCE_DIR}/cogs.h" "${MDBX_SOURCE_DIR}/coherency.c" - "${MDBX_SOURCE_DIR}/cold.c" - "${MDBX_SOURCE_DIR}/copy.c" "${MDBX_SOURCE_DIR}/cursor.c" "${MDBX_SOURCE_DIR}/cursor.h" "${MDBX_SOURCE_DIR}/dbi.c" @@ -776,7 +783,6 @@ else() "${MDBX_SOURCE_DIR}/dpl.c" "${MDBX_SOURCE_DIR}/dpl.h" "${MDBX_SOURCE_DIR}/dxb.c" - "${MDBX_SOURCE_DIR}/env-opts.c" "${MDBX_SOURCE_DIR}/env.c" "${MDBX_SOURCE_DIR}/essentials.h" "${MDBX_SOURCE_DIR}/gc-get.c" @@ -792,7 +798,6 @@ else() "${MDBX_SOURCE_DIR}/logging_and_debug.h" "${MDBX_SOURCE_DIR}/meta.c" "${MDBX_SOURCE_DIR}/meta.h" - "${MDBX_SOURCE_DIR}/misc.c" "${MDBX_SOURCE_DIR}/mvcc-readers.c" "${MDBX_SOURCE_DIR}/node.c" "${MDBX_SOURCE_DIR}/node.h" @@ -804,12 +809,11 @@ else() "${MDBX_SOURCE_DIR}/page-iov.h" "${MDBX_SOURCE_DIR}/page-ops.c" "${MDBX_SOURCE_DIR}/page-ops.h" - "${MDBX_SOURCE_DIR}/page-search.c" + "${MDBX_SOURCE_DIR}/tree-search.c" "${MDBX_SOURCE_DIR}/pnl.c" "${MDBX_SOURCE_DIR}/pnl.h" "${MDBX_SOURCE_DIR}/preface.h" "${MDBX_SOURCE_DIR}/proto.h" - "${MDBX_SOURCE_DIR}/range-estimate.c" "${MDBX_SOURCE_DIR}/refund.c" "${MDBX_SOURCE_DIR}/sort.h" "${MDBX_SOURCE_DIR}/spill.c" @@ -817,7 +821,7 @@ else() "${MDBX_SOURCE_DIR}/table.c" "${MDBX_SOURCE_DIR}/tls.c" "${MDBX_SOURCE_DIR}/tls.h" - "${MDBX_SOURCE_DIR}/tree.c" + "${MDBX_SOURCE_DIR}/tree-ops.c" "${MDBX_SOURCE_DIR}/txl.c" "${MDBX_SOURCE_DIR}/txl.h" "${MDBX_SOURCE_DIR}/txn.c" diff --git a/src/alloy.c b/src/alloy.c index f2cce532..076d39da 100644 --- a/src/alloy.c +++ b/src/alloy.c @@ -4,22 +4,26 @@ #define xMDBX_ALLOY 1 /* alloyed build */ #include "internals.h" /* must be included first */ +#include "api-cold.c" +#include "api-copy.c" #include "api-cursor.c" +#include "api-dbi.c" #include "api-env.c" #include "api-extra.c" #include "api-key-transform.c" +#include "api-misc.c" +#include "api-opts.c" +#include "api-range-estimate.c" +#include "api-txn-data.c" #include "api-txn.c" #include "audit.c" #include "chk.c" #include "cogs.c" #include "coherency.c" -#include "cold.c" -#include "copy.c" #include "cursor.c" #include "dbi.c" #include "dpl.c" #include "dxb.c" -#include "env-opts.c" #include "env.c" #include "gc-get.c" #include "gc-put.c" @@ -29,21 +33,19 @@ #include "lck.c" #include "logging_and_debug.c" #include "meta.c" -#include "misc.c" #include "mvcc-readers.c" #include "node.c" #include "osal.c" #include "page-get.c" #include "page-iov.c" #include "page-ops.c" -#include "page-search.c" #include "pnl.c" -#include "range-estimate.c" #include "refund.c" #include "spill.c" #include "table.c" #include "tls.c" -#include "tree.c" +#include "tree-ops.c" +#include "tree-search.c" #include "txl.c" #include "txn.c" #include "utils.c" diff --git a/src/cold.c b/src/api-cold.c similarity index 83% rename from src/cold.c rename to src/api-cold.c index 11260ace..dfa082db 100644 --- a/src/cold.c +++ b/src/api-cold.c @@ -128,111 +128,6 @@ __cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t fl /*----------------------------------------------------------------------------*/ -__cold static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) { - st->ms_depth += db->height; - st->ms_branch_pages += db->branch_pages; - st->ms_leaf_pages += db->leaf_pages; - st->ms_overflow_pages += db->large_pages; - st->ms_entries += db->items; - if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) - st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid; -} - -__cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { - memset(st, 0, bytes); - - int err = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - cursor_couple_t cx; - err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); - if (unlikely(err != MDBX_SUCCESS)) - return err; - - const MDBX_env *const env = txn->env; - st->ms_psize = env->ps; - TXN_FOREACH_DBI_FROM(txn, dbi, - /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { - if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) - stat_add(txn->dbs + dbi, st, bytes); - } - - if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { - - /* scan and account not opened named tables */ - err = tree_search(&cx.outer, nullptr, Z_FIRST); - while (err == MDBX_SUCCESS) { - const page_t *mp = cx.outer.pg[cx.outer.top]; - for (size_t i = 0; i < page_numkeys(mp); i++) { - const node_t *node = page_node(mp, i); - if (node_flags(node) != N_TREE) - continue; - if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node)); - return MDBX_CORRUPTED; - } - - /* skip opened and already accounted */ - const MDBX_val name = {node_key(node), node_ks(node)}; - TXN_FOREACH_DBI_USER(txn, dbi) { - if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && - env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) { - node = nullptr; - break; - } - } - - if (node) { - tree_t db; - memcpy(&db, node_data(node), sizeof(db)); - stat_add(&db, st, bytes); - } - } - err = cursor_sibling_right(&cx.outer); - } - if (unlikely(err != MDBX_NOTFOUND)) - return err; - } - - return MDBX_SUCCESS; -} - -__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) { - if (unlikely(!dest)) - return LOG_IFERR(MDBX_EINVAL); - const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); - if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) - return LOG_IFERR(MDBX_EINVAL); - - if (likely(txn)) { - if (env && unlikely(txn->env != env)) - return LOG_IFERR(MDBX_EINVAL); - return LOG_IFERR(stat_acc(txn, dest, bytes)); - } - - int err = check_env(env, true); - if (unlikely(err != MDBX_SUCCESS)) - return LOG_IFERR(err); - - if (env->txn && env_txn0_owned(env)) - /* inside write-txn */ - return LOG_IFERR(stat_acc(env->txn, dest, bytes)); - - MDBX_txn *tmp_txn; - err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn); - if (unlikely(err != MDBX_SUCCESS)) - return LOG_IFERR(err); - - const int rc = stat_acc(tmp_txn, dest, bytes); - err = mdbx_txn_abort(tmp_txn); - if (unlikely(err != MDBX_SUCCESS)) - return LOG_IFERR(err); - return LOG_IFERR(rc); -} - -/*----------------------------------------------------------------------------*/ - static size_t estimate_rss(size_t database_bytes) { return database_bytes + database_bytes / 64 + (512 + MDBX_WORDBITS * 16) * MEGABYTE; } diff --git a/src/copy.c b/src/api-copy.c similarity index 100% rename from src/copy.c rename to src/api-copy.c diff --git a/src/api-dbi.c b/src/api-dbi.c new file mode 100644 index 00000000..5f52d770 --- /dev/null +++ b/src/api-dbi.c @@ -0,0 +1,324 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { + return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr)); +} + +int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { + return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp)); +} + +static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { + MDBX_val thunk, *name; + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META) + name = (void *)name_cstr; + else { + thunk.iov_len = strlen(name_cstr); + thunk.iov_base = (void *)name_cstr; + name = &thunk; + } + return dbi_open(txn, name, flags, dbi, keycmp, datacmp); +} + +int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { + return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr)); +} + +int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { + return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp)); +} + +__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (txn->dbs[dbi].height) { + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + rc = tree_drop(&cx.outer, dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT)); + txn->cursors[dbi] = cx.outer.next; + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + } + + /* Invalidate the dropped DB's cursors */ + for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next) + be_poor(mc); + + if (!del || dbi < CORE_DBS) { + /* reset the DB record, mark it dirty */ + txn->dbi_state[dbi] |= DBI_DIRTY; + txn->dbs[dbi].height = 0; + txn->dbs[dbi].branch_pages = 0; + txn->dbs[dbi].leaf_pages = 0; + txn->dbs[dbi].large_pages = 0; + txn->dbs[dbi].items = 0; + txn->dbs[dbi].root = P_INVALID; + txn->dbs[dbi].sequence = 0; + /* txn->dbs[dbi].mod_txnid = txn->txnid; */ + txn->flags |= MDBX_TXN_DIRTY; + return MDBX_SUCCESS; + } + + MDBX_env *const env = txn->env; + MDBX_val name = env->kvs[dbi].name; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (likely(rc == MDBX_SUCCESS)) { + rc = cursor_seek(&cx.outer, &name, nullptr, MDBX_SET).err; + if (likely(rc == MDBX_SUCCESS)) { + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + rc = cursor_del(&cx.outer, N_TREE); + txn->cursors[MAIN_DBI] = cx.outer.next; + if (likely(rc == MDBX_SUCCESS)) { + tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY); + tASSERT(txn, txn->flags & MDBX_TXN_DIRTY); + txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; + rc = osal_fastmutex_acquire(&env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) + return LOG_IFERR(dbi_close_release(env, dbi)); + } + } + } + + txn->flags |= MDBX_TXN_ERROR; + return LOG_IFERR(rc); +} + +__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { + MDBX_val thunk, *name; + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META) + name = (void *)name_cstr; + else { + thunk.iov_len = strlen(name_cstr); + thunk.iov_base = (void *)name_cstr; + name = &thunk; + } + return mdbx_dbi_rename2(txn, dbi, name); +} + +__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || + new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(dbi < CORE_DBS)) + return LOG_IFERR(MDBX_EINVAL); + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + rc = osal_fastmutex_acquire(&txn->env->dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name); + if (pair.defer) + pair.defer->next = nullptr; + dbi_defer_release(txn->env, pair.defer); + rc = pair.err; + } + return LOG_IFERR(rc); +} + +int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(dbi < CORE_DBS)) + return (dbi == MAIN_DBI) ? MDBX_SUCCESS : LOG_IFERR(MDBX_BAD_DBI); + + if (unlikely(dbi >= env->max_dbi)) + return LOG_IFERR(MDBX_BAD_DBI); + + if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi)) + return LOG_IFERR(MDBX_BAD_DBI); + + rc = osal_fastmutex_acquire(&env->dbi_lock); + if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) { + retry: + if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { + /* LY: Опасный код, так как env->txn может быть изменено в другом потоке. + * К сожалению тут нет надежного решения и может быть падение при неверном + * использовании API (вызове mdbx_dbi_close конкурентно с завершением + * пишущей транзакции). + * + * Для минимизации вероятности падения сначала проверяем dbi-флаги + * в basal_txn, а уже после в env->txn. Таким образом, падение может быть + * только при коллизии с завершением вложенной транзакции. + * + * Альтернативно можно попробовать выполнять обновление/put записи в + * mainDb соответствующей таблице закрываемого хендла. Семантически это + * верный путь, но проблема в текущем API, в котором исторически dbi-хендл + * живет и закрывается вне транзакции. Причем проблема не только в том, + * что нет указателя на текущую пишущую транзакцию, а в том что + * пользователь точно не ожидает что закрытие хендла приведет к + * скрытой/непрозрачной активности внутри транзакции потенциально + * выполняемой в другом потоке. Другими словами, проблема может быть + * только при неверном использовании API и если пользователь это + * допускает, то точно не будет ожидать скрытых действий внутри + * транзакции, и поэтому этот путь потенциально более опасен. */ + const MDBX_txn *const hazard = env->txn; + osal_compiler_barrier(); + if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { + bailout_dirty_dbi: + osal_fastmutex_release(&env->dbi_lock); + return LOG_IFERR(MDBX_DANGLING_DBI); + } + osal_memory_barrier(); + if (unlikely(hazard != env->txn)) + goto retry; + if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 && + hazard->signature == txn_signature && + (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) + goto bailout_dirty_dbi; + osal_compiler_barrier(); + if (unlikely(hazard != env->txn)) + goto retry; + } + rc = dbi_close_release(env, dbi); + } + return LOG_IFERR(rc); +} + +int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { + if (unlikely(!flags || !state)) + return LOG_IFERR(MDBX_EINVAL); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); + if (unlikely(rc != MDBX_SUCCESS)) { + *flags = 0; + *state = 0; + return LOG_IFERR(rc); + } + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + *flags = 0; + *state = 0; + return LOG_IFERR(rc); + } + + *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; + *state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); + return MDBX_SUCCESS; +} + +static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { + st->ms_depth = db->height; + st->ms_branch_pages = db->branch_pages; + st->ms_leaf_pages = db->leaf_pages; + st->ms_overflow_pages = db->large_pages; + st->ms_entries = db->items; + if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) + st->ms_mod_txnid = db->mod_txnid; +} + +__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { + if (unlikely(!dest)) + return LOG_IFERR(MDBX_EINVAL); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) { + rc = MDBX_EINVAL; + goto bailout; + } + + if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) { + rc = MDBX_BAD_TXN; + goto bailout; + } + + if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { + rc = tbl_fetch((MDBX_txn *)txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + + dest->ms_psize = txn->env->ps; + stat_get(&txn->dbs[dbi], dest, bytes); + return MDBX_SUCCESS; + +bailout: + memset(dest, 0, bytes); + return LOG_IFERR(rc); +} + +__cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) { + if (unlikely(!func)) + return LOG_IFERR(MDBX_EINVAL); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS; + rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) { + node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + if (node_flags(node) != N_TREE) + continue; + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size", + (unsigned)node_ds(node)); + rc = MDBX_CORRUPTED; + break; + } + + tree_t reside; + const tree_t *tree = memcpy(&reside, node_data(node), sizeof(reside)); + const MDBX_val name = {node_key(node), node_ks(node)}; + const MDBX_env *const env = txn->env; + MDBX_dbi dbi = 0; + for (size_t i = CORE_DBS; i < env->n_dbi; ++i) { + if (i >= txn->n_dbi || !(env->dbs_flags[i] & DB_VALID)) + continue; + if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[i].name)) + continue; + + tree = dbi_dig(txn, i, &reside); + dbi = (MDBX_dbi)i; + break; + } + + MDBX_stat stat; + stat_get(tree, &stat, sizeof(stat)); + rc = func(ctx, txn, &name, tree->flags, &stat, dbi); + if (rc != MDBX_SUCCESS) + goto bailout; + } + rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; + +bailout: + txn->cursors[MAIN_DBI] = cx.outer.next; + return LOG_IFERR(rc); +} diff --git a/src/api-env.c b/src/api-env.c index b9d0488c..cbfba423 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -1315,3 +1315,108 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { return LOG_IFERR(env_sync(env, force, nonblock)); } + +/*----------------------------------------------------------------------------*/ + +static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) { + st->ms_depth += db->height; + st->ms_branch_pages += db->branch_pages; + st->ms_leaf_pages += db->leaf_pages; + st->ms_overflow_pages += db->large_pages; + st->ms_entries += db->items; + if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) + st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid; +} + +static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { + memset(st, 0, bytes); + + int err = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + cursor_couple_t cx; + err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + const MDBX_env *const env = txn->env; + st->ms_psize = env->ps; + TXN_FOREACH_DBI_FROM(txn, dbi, + /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { + if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) + stat_add(txn->dbs + dbi, st, bytes); + } + + if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { + + /* scan and account not opened named tables */ + err = tree_search(&cx.outer, nullptr, Z_FIRST); + while (err == MDBX_SUCCESS) { + const page_t *mp = cx.outer.pg[cx.outer.top]; + for (size_t i = 0; i < page_numkeys(mp); i++) { + const node_t *node = page_node(mp, i); + if (node_flags(node) != N_TREE) + continue; + if (unlikely(node_ds(node) != sizeof(tree_t))) { + ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node)); + return MDBX_CORRUPTED; + } + + /* skip opened and already accounted */ + const MDBX_val name = {node_key(node), node_ks(node)}; + TXN_FOREACH_DBI_USER(txn, dbi) { + if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && + env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) { + node = nullptr; + break; + } + } + + if (node) { + tree_t db; + memcpy(&db, node_data(node), sizeof(db)); + stat_add(&db, st, bytes); + } + } + err = cursor_sibling_right(&cx.outer); + } + if (unlikely(err != MDBX_NOTFOUND)) + return err; + } + + return MDBX_SUCCESS; +} + +__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) { + if (unlikely(!dest)) + return LOG_IFERR(MDBX_EINVAL); + const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) + return LOG_IFERR(MDBX_EINVAL); + + if (likely(txn)) { + if (env && unlikely(txn->env != env)) + return LOG_IFERR(MDBX_EINVAL); + return LOG_IFERR(stat_acc(txn, dest, bytes)); + } + + int err = check_env(env, true); + if (unlikely(err != MDBX_SUCCESS)) + return LOG_IFERR(err); + + if (env->txn && env_txn0_owned(env)) + /* inside write-txn */ + return LOG_IFERR(stat_acc(env->txn, dest, bytes)); + + MDBX_txn *tmp_txn; + err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn); + if (unlikely(err != MDBX_SUCCESS)) + return LOG_IFERR(err); + + const int rc = stat_acc(tmp_txn, dest, bytes); + err = mdbx_txn_abort(tmp_txn); + if (unlikely(err != MDBX_SUCCESS)) + return LOG_IFERR(err); + return LOG_IFERR(rc); +} diff --git a/src/api-extra.c b/src/api-extra.c index e74c3bbc..dd394f7b 100644 --- a/src/api-extra.c +++ b/src/api-extra.c @@ -72,6 +72,65 @@ __cold int mdbx_reader_check(MDBX_env *env, int *dead) { return LOG_IFERR(mvcc_cleanup_dead(env, false, dead)); } +__cold int mdbx_thread_register(const MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!env->lck_mmap.lck)) + return LOG_IFERR((env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM); + + if (unlikely((env->flags & ENV_TXKEY) == 0)) { + eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); + return LOG_IFERR(MDBX_EINVAL) /* MDBX_NOSTICKYTHREADS mode */; + } + + eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); + reader_slot_t *r = thread_rthc_get(env->me_txkey); + if (unlikely(r != nullptr)) { + eASSERT(env, r->pid.weak == env->pid); + eASSERT(env, r->tid.weak == osal_thread_self()); + if (unlikely(r->pid.weak != env->pid)) + return LOG_IFERR(MDBX_BAD_RSLOT); + return MDBX_RESULT_TRUE /* already registered */; + } + + return LOG_IFERR(mvcc_bind_slot((MDBX_env *)env).err); +} + +__cold int mdbx_thread_unregister(const MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!env->lck_mmap.lck)) + return MDBX_RESULT_TRUE; + + if (unlikely((env->flags & ENV_TXKEY) == 0)) { + eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); + return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */; + } + + eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); + reader_slot_t *r = thread_rthc_get(env->me_txkey); + if (unlikely(r == nullptr)) + return MDBX_RESULT_TRUE /* not registered */; + + eASSERT(env, r->pid.weak == env->pid); + eASSERT(env, r->tid.weak == osal_thread_self()); + if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self())) + return LOG_IFERR(MDBX_BAD_RSLOT); + + eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD); + if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD)) + return LOG_IFERR(MDBX_BUSY) /* transaction is still active */; + + atomic_store32(&r->pid, 0, mo_Relaxed); + atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease); + thread_rthc_set(env->me_txkey, nullptr); + return MDBX_SUCCESS; +} + /*------------------------------------------------------------------------------ * Locking API */ diff --git a/src/misc.c b/src/api-misc.c similarity index 100% rename from src/misc.c rename to src/api-misc.c diff --git a/src/env-opts.c b/src/api-opts.c similarity index 100% rename from src/env-opts.c rename to src/api-opts.c diff --git a/src/range-estimate.c b/src/api-range-estimate.c similarity index 99% rename from src/range-estimate.c rename to src/api-range-estimate.c index ea093088..de3e1d12 100644 --- a/src/range-estimate.c +++ b/src/api-range-estimate.c @@ -142,6 +142,9 @@ __hot static ptrdiff_t estimate(const tree_t *tree, diff_t *const __restrict dr) } } +/*------------------------------------------------------------------------------ + * Range-Estimation API */ + __hot int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last, ptrdiff_t *distance_items) { if (unlikely(first == nullptr || last == nullptr || distance_items == nullptr)) return LOG_IFERR(MDBX_EINVAL); diff --git a/src/api-txn-data.c b/src/api-txn-data.c new file mode 100644 index 00000000..f12b06d3 --- /dev/null +++ b/src/api-txn-data.c @@ -0,0 +1,449 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 + +#include "internals.h" + +__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { + if (unlikely(!mask)) + return LOG_IFERR(MDBX_EINVAL); + + *mask = 0; + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0) + return MDBX_RESULT_TRUE; + + MDBX_val key, data; + rc = outer_first(&cx.outer, &key, &data); + while (rc == MDBX_SUCCESS) { + const node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + const tree_t *db = node_data(node); + const unsigned flags = node_flags(node); + switch (flags) { + case N_BIG: + case 0: + /* single-value entry, deep = 0 */ + *mask |= 1 << 0; + break; + case N_DUP: + /* single sub-page, deep = 1 */ + *mask |= 1 << 1; + break; + case N_DUP | N_TREE: + /* sub-tree */ + *mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height); + break; + default: + ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node-size", flags); + return LOG_IFERR(MDBX_CORRUPTED); + } + rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); + } + + return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc); +} + +int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { + if (unlikely(canary == nullptr)) + return LOG_IFERR(MDBX_EINVAL); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) { + memset(canary, 0, sizeof(*canary)); + return LOG_IFERR(rc); + } + + *canary = txn->canary; + return MDBX_SUCCESS; +} + +int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { + DKBUF_DEBUG; + DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!key || !data)) + return LOG_IFERR(MDBX_EINVAL); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + return LOG_IFERR(cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err); +} + +int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!key || !data)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) + return LOG_IFERR(MDBX_BAD_TXN); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + return LOG_IFERR(cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND)); +} + +int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count) { + DKBUF_DEBUG; + DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!key || !data)) + return LOG_IFERR(MDBX_EINVAL); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + rc = cursor_seek(&cx.outer, key, data, MDBX_SET_KEY).err; + if (unlikely(rc != MDBX_SUCCESS)) { + if (values_count) + *values_count = 0; + return LOG_IFERR(rc); + } + + if (values_count) { + *values_count = 1; + if (inner_pointed(&cx.outer)) + *values_count = + (sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || cx.inner.nested_tree.items <= PTRDIFF_MAX) + ? (size_t)cx.inner.nested_tree.items + : PTRDIFF_MAX; + } + return MDBX_SUCCESS; +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (likely(canary)) { + if (txn->canary.x == canary->x && txn->canary.y == canary->y && txn->canary.z == canary->z) + return MDBX_SUCCESS; + txn->canary.x = canary->x; + txn->canary.y = canary->y; + txn->canary.z = canary->z; + } + txn->canary.v = txn->txnid; + txn->flags |= MDBX_TXN_DIRTY; + + return MDBX_SUCCESS; +} + +/* Функция сообщает находится ли указанный адрес в "грязной" странице у + * заданной пишущей транзакции. В конечном счете это позволяет избавиться от + * лишнего копирования данных из НЕ-грязных страниц. + * + * "Грязные" страницы - это те, которые уже были изменены в ходе пишущей + * транзакции. Соответственно, какие-либо дальнейшие изменения могут привести + * к перезаписи таких страниц. Поэтому все функции, выполняющие изменения, в + * качестве аргументов НЕ должны получать указатели на данные в таких + * страницах. В свою очередь "НЕ грязные" страницы перед модификацией будут + * скопированы. + * + * Другими словами, данные из "грязных" страниц должны быть либо скопированы + * перед передачей в качестве аргументов для дальнейших модификаций, либо + * отвергнуты на стадии проверки корректности аргументов. + * + * Таким образом, функция позволяет как избавится от лишнего копирования, + * так и выполнить более полную проверку аргументов. + * + * ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только + * так гарантируется что актуальный заголовок страницы будет физически + * расположен в той-же странице памяти, в том числе для многостраничных + * P_LARGE страниц с длинными данными. */ +int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + const MDBX_env *env = txn->env; + const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base); + if (offset >= 0) { + const pgno_t pgno = bytes2pgno(env, offset); + if (likely(pgno < txn->geo.first_unallocated)) { + const page_t *page = pgno2page(env, pgno); + if (unlikely(page->pgno != pgno || (page->flags & P_ILL_BITS) != 0)) { + /* The ptr pointed into middle of a large page, + * not to the beginning of a data. */ + return LOG_IFERR(MDBX_EINVAL); + } + return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + } + if ((size_t)offset < env->dxb_mmap.limit) { + /* Указатель адресует что-то в пределах mmap, но за границей + * распределенных страниц. Такое может случится если mdbx_is_dirty() + * вызывается после операции, в ходе которой грязная страница была + * возвращена в нераспределенное пространство. */ + return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE; + } + } + + /* Страница вне используемого mmap-диапазона, т.е. либо в функцию был + * передан некорректный адрес, либо адрес в теневой странице, которая была + * выделена посредством malloc(). + * + * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная", + * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */ + return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE; +} + +int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!key)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(dbi <= FREE_DBI)) + return LOG_IFERR(MDBX_BAD_DBI); + + if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) + return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + MDBX_val proxy; + MDBX_cursor_op op = MDBX_SET; + unsigned flags = MDBX_ALLDUPS; + if (data) { + proxy = *data; + data = &proxy; + op = MDBX_GET_BOTH; + flags = 0; + } + rc = cursor_seek(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err; + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + rc = cursor_del(&cx.outer, flags); + txn->cursors[dbi] = cx.outer.next; + return LOG_IFERR(rc); +} + +int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!key || !data)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(dbi <= FREE_DBI)) + return LOG_IFERR(MDBX_BAD_DBI); + + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | + MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE))) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) + return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + + /* LY: support for update (explicit overwrite) */ + if (flags & MDBX_CURRENT) { + rc = cursor_seek(&cx.outer, (MDBX_val *)key, nullptr, MDBX_SET).err; + if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && (flags & MDBX_ALLDUPS) == 0) { + /* LY: allows update (explicit overwrite) only for unique keys */ + node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); + if (node_flags(node) & N_DUP) { + tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); + rc = MDBX_EMULTIVAL; + if ((flags & MDBX_NOOVERWRITE) == 0) { + flags -= MDBX_CURRENT; + rc = cursor_del(&cx.outer, MDBX_ALLDUPS); + } + } + } + } + + if (likely(rc == MDBX_SUCCESS)) + rc = cursor_put_checklen(&cx.outer, key, data, flags); + txn->cursors[dbi] = cx.outer.next; + + return LOG_IFERR(rc); +} + +//------------------------------------------------------------------------------ + +/* Позволяет обновить или удалить существующую запись с получением + * в old_data предыдущего значения данных. При этом если new_data равен + * нулю, то выполняется удаление, иначе обновление/вставка. + * + * Текущее значение может находиться в уже измененной (грязной) странице. + * В этом случае страница будет перезаписана при обновлении, а само старое + * значение утрачено. Поэтому исходно в old_data должен быть передан + * дополнительный буфер для копирования старого значения. + * Если переданный буфер слишком мал, то функция вернет -1, установив + * old_data->iov_len в соответствующее значение. + * + * Для не-уникальных ключей также возможен второй сценарий использования, + * когда посредством old_data из записей с одинаковым ключом для + * удаления/обновления выбирается конкретная. Для выбора этого сценария + * во flags следует одновременно указать MDBX_CURRENT и MDBX_NOOVERWRITE. + * Именно эта комбинация выбрана, так как она лишена смысла, и этим позволяет + * идентифицировать запрос такого сценария. + * + * Функция может быть замещена соответствующими операциями с курсорами + * после двух доработок (TODO): + * - внешняя аллокация курсоров, в том числе на стеке (без malloc). + * - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE). + */ + +int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, + MDBX_put_flags_t flags, MDBX_preserve_func preserver, void *preserver_context) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!key || !old_data || old_data == new_data)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(old_data->iov_base == nullptr && old_data->iov_len)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(new_data == nullptr && (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(dbi <= FREE_DBI)) + return LOG_IFERR(MDBX_BAD_DBI); + + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | + MDBX_APPENDDUP | MDBX_CURRENT))) + return LOG_IFERR(MDBX_EINVAL); + + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + cx.outer.next = txn->cursors[dbi]; + txn->cursors[dbi] = &cx.outer; + + MDBX_val present_key = *key; + if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) { + /* в old_data значение для выбора конкретного дубликата */ + if (unlikely(!(txn->dbs[dbi].flags & MDBX_DUPSORT))) { + rc = MDBX_EINVAL; + goto bailout; + } + + /* убираем лишний бит, он был признаком запрошенного режима */ + flags -= MDBX_NOOVERWRITE; + + rc = cursor_seek(&cx.outer, &present_key, old_data, MDBX_GET_BOTH).err; + if (rc != MDBX_SUCCESS) + goto bailout; + } else { + /* в old_data буфер для сохранения предыдущего значения */ + if (unlikely(new_data && old_data->iov_base == new_data->iov_base)) + return LOG_IFERR(MDBX_EINVAL); + MDBX_val present_data; + rc = cursor_seek(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err; + if (unlikely(rc != MDBX_SUCCESS)) { + old_data->iov_base = nullptr; + old_data->iov_len = 0; + if (rc != MDBX_NOTFOUND || (flags & MDBX_CURRENT)) + goto bailout; + } else if (flags & MDBX_NOOVERWRITE) { + rc = MDBX_KEYEXIST; + *old_data = present_data; + goto bailout; + } else { + page_t *page = cx.outer.pg[cx.outer.top]; + if (txn->dbs[dbi].flags & MDBX_DUPSORT) { + if (flags & MDBX_CURRENT) { + /* disallow update/delete for multi-values */ + node_t *node = page_node(page, cx.outer.ki[cx.outer.top]); + if (node_flags(node) & N_DUP) { + tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); + if (cx.outer.subcur->nested_tree.items > 1) { + rc = MDBX_EMULTIVAL; + goto bailout; + } + } + /* В LMDB флажок MDBX_CURRENT здесь приведет + * к замене данных без учета MDBX_DUPSORT сортировки, + * но здесь это в любом случае допустимо, так как мы + * проверили что для ключа есть только одно значение. */ + } + } + + if (is_modifable(txn, page)) { + if (new_data && cmp_lenfast(&present_data, new_data) == 0) { + /* если данные совпадают, то ничего делать не надо */ + *old_data = *new_data; + goto bailout; + } + rc = preserver ? preserver(preserver_context, old_data, present_data.iov_base, present_data.iov_len) + : MDBX_SUCCESS; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } else { + *old_data = present_data; + } + flags |= MDBX_CURRENT; + } + } + + if (likely(new_data)) + rc = cursor_put_checklen(&cx.outer, key, new_data, flags); + else + rc = cursor_del(&cx.outer, flags & MDBX_ALLDUPS); + +bailout: + txn->cursors[dbi] = cx.outer.next; + return LOG_IFERR(rc); +} + +static int default_value_preserver(void *context, MDBX_val *target, const void *src, size_t bytes) { + (void)context; + if (unlikely(target->iov_len < bytes)) { + target->iov_base = nullptr; + target->iov_len = bytes; + return MDBX_RESULT_TRUE; + } + memcpy(target->iov_base, src, target->iov_len = bytes); + return MDBX_SUCCESS; +} + +int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, + MDBX_put_flags_t flags) { + return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, default_value_preserver, nullptr); +} diff --git a/src/api-txn.c b/src/api-txn.c index 2b3ba0d8..508f37d5 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -34,447 +34,858 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) return (lag > INT_MAX) ? INT_MAX : (int)lag; } -__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { - if (unlikely(!mask)) +MDBX_env *mdbx_txn_env(const MDBX_txn *txn) { + if (unlikely(!txn || txn->signature != txn_signature || txn->env->signature.weak != env_signature)) + return nullptr; + return txn->env; +} + +uint64_t mdbx_txn_id(const MDBX_txn *txn) { + if (unlikely(!txn || txn->signature != txn_signature)) + return 0; + return txn->txnid; +} + +MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) { + STATIC_ASSERT( + (MDBX_TXN_INVALID & (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | + txn_gc_drained | txn_shrink_allowed | txn_rw_begin_flags | txn_ro_begin_flags)) == 0); + if (unlikely(!txn || txn->signature != txn_signature)) + return MDBX_TXN_INVALID; + assert(0 == (int)(txn->flags & MDBX_TXN_INVALID)); + + MDBX_txn_flags_t flags = txn->flags; + if (F_ISSET(flags, MDBX_TXN_PARKED | MDBX_TXN_RDONLY) && txn->to.reader && + safe64_read(&txn->to.reader->tid) == MDBX_TID_TXN_OUSTED) + flags |= MDBX_TXN_OUSTED; + return flags; +} + +int mdbx_txn_reset(MDBX_txn *txn) { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + /* This call is only valid for read-only txns */ + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) return LOG_IFERR(MDBX_EINVAL); - *mask = 0; - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0) - return MDBX_RESULT_TRUE; - - MDBX_val key, data; - rc = outer_first(&cx.outer, &key, &data); - while (rc == MDBX_SUCCESS) { - const node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); - const tree_t *db = node_data(node); - const unsigned flags = node_flags(node); - switch (flags) { - case N_BIG: - case 0: - /* single-value entry, deep = 0 */ - *mask |= 1 << 0; - break; - case N_DUP: - /* single sub-page, deep = 1 */ - *mask |= 1 << 1; - break; - case N_DUP | N_TREE: - /* sub-tree */ - *mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height); - break; - default: - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node-size", flags); - return LOG_IFERR(MDBX_CORRUPTED); - } - rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); + /* LY: don't close DBI-handles */ + rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); + if (rc == MDBX_SUCCESS) { + tASSERT(txn, txn->signature == txn_signature); + tASSERT(txn, txn->owner == 0); } - - return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc); -} - -int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { - if (unlikely(canary == nullptr)) - return LOG_IFERR(MDBX_EINVAL); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) { - memset(canary, 0, sizeof(*canary)); - return LOG_IFERR(rc); - } - - *canary = txn->canary; - return MDBX_SUCCESS; -} - -int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { - DKBUF_DEBUG; - DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!key || !data)) - return LOG_IFERR(MDBX_EINVAL); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - return LOG_IFERR(cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err); -} - -int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!key || !data)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) - return LOG_IFERR(MDBX_BAD_TXN); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - return LOG_IFERR(cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND)); -} - -int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count) { - DKBUF_DEBUG; - DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!key || !data)) - return LOG_IFERR(MDBX_EINVAL); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - rc = cursor_seek(&cx.outer, key, data, MDBX_SET_KEY).err; - if (unlikely(rc != MDBX_SUCCESS)) { - if (values_count) - *values_count = 0; - return LOG_IFERR(rc); - } - - if (values_count) { - *values_count = 1; - if (inner_pointed(&cx.outer)) - *values_count = - (sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || cx.inner.nested_tree.items <= PTRDIFF_MAX) - ? (size_t)cx.inner.nested_tree.items - : PTRDIFF_MAX; - } - return MDBX_SUCCESS; -} - -/*----------------------------------------------------------------------------*/ - -int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (likely(canary)) { - if (txn->canary.x == canary->x && txn->canary.y == canary->y && txn->canary.z == canary->z) - return MDBX_SUCCESS; - txn->canary.x = canary->x; - txn->canary.y = canary->y; - txn->canary.z = canary->z; - } - txn->canary.v = txn->txnid; - txn->flags |= MDBX_TXN_DIRTY; - - return MDBX_SUCCESS; -} - -/* Функция сообщает находится ли указанный адрес в "грязной" странице у - * заданной пишущей транзакции. В конечном счете это позволяет избавиться от - * лишнего копирования данных из НЕ-грязных страниц. - * - * "Грязные" страницы - это те, которые уже были изменены в ходе пишущей - * транзакции. Соответственно, какие-либо дальнейшие изменения могут привести - * к перезаписи таких страниц. Поэтому все функции, выполняющие изменения, в - * качестве аргументов НЕ должны получать указатели на данные в таких - * страницах. В свою очередь "НЕ грязные" страницы перед модификацией будут - * скопированы. - * - * Другими словами, данные из "грязных" страниц должны быть либо скопированы - * перед передачей в качестве аргументов для дальнейших модификаций, либо - * отвергнуты на стадии проверки корректности аргументов. - * - * Таким образом, функция позволяет как избавится от лишнего копирования, - * так и выполнить более полную проверку аргументов. - * - * ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только - * так гарантируется что актуальный заголовок страницы будет физически - * расположен в той-же странице памяти, в том числе для многостраничных - * P_LARGE страниц с длинными данными. */ -int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - const MDBX_env *env = txn->env; - const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base); - if (offset >= 0) { - const pgno_t pgno = bytes2pgno(env, offset); - if (likely(pgno < txn->geo.first_unallocated)) { - const page_t *page = pgno2page(env, pgno); - if (unlikely(page->pgno != pgno || (page->flags & P_ILL_BITS) != 0)) { - /* The ptr pointed into middle of a large page, - * not to the beginning of a data. */ - return LOG_IFERR(MDBX_EINVAL); - } - return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; - } - if ((size_t)offset < env->dxb_mmap.limit) { - /* Указатель адресует что-то в пределах mmap, но за границей - * распределенных страниц. Такое может случится если mdbx_is_dirty() - * вызывается после операции, в ходе которой грязная страница была - * возвращена в нераспределенное пространство. */ - return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE; - } - } - - /* Страница вне используемого mmap-диапазона, т.е. либо в функцию был - * передан некорректный адрес, либо адрес в теневой странице, которая была - * выделена посредством malloc(). - * - * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная", - * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */ - return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE; -} - -int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!key)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(dbi <= FREE_DBI)) - return LOG_IFERR(MDBX_BAD_DBI); - - if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - MDBX_val proxy; - MDBX_cursor_op op = MDBX_SET; - unsigned flags = MDBX_ALLDUPS; - if (data) { - proxy = *data; - data = &proxy; - op = MDBX_GET_BOTH; - flags = 0; - } - rc = cursor_seek(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err; - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - cx.outer.next = txn->cursors[dbi]; - txn->cursors[dbi] = &cx.outer; - rc = cursor_del(&cx.outer, flags); - txn->cursors[dbi] = cx.outer.next; return LOG_IFERR(rc); } -int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); +int mdbx_txn_break(MDBX_txn *txn) { + do { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + txn->flags |= MDBX_TXN_ERROR; + if (txn->flags & MDBX_TXN_RDONLY) + break; + txn = txn->nested; + } while (txn); + return MDBX_SUCCESS; +} + +int mdbx_txn_abort(MDBX_txn *txn) { + int rc = check_txn(txn, 0); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - if (unlikely(!key || !data)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(dbi <= FREE_DBI)) - return LOG_IFERR(MDBX_BAD_DBI); - - if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | - MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE))) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) - return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); + rc = check_env(txn->env, true); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - cx.outer.next = txn->cursors[dbi]; - txn->cursors[dbi] = &cx.outer; - /* LY: support for update (explicit overwrite) */ - if (flags & MDBX_CURRENT) { - rc = cursor_seek(&cx.outer, (MDBX_val *)key, nullptr, MDBX_SET).err; - if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && (flags & MDBX_ALLDUPS) == 0) { - /* LY: allows update (explicit overwrite) only for unique keys */ - node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); - if (node_flags(node) & N_DUP) { - tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); - rc = MDBX_EMULTIVAL; - if ((flags & MDBX_NOOVERWRITE) == 0) { - flags -= MDBX_CURRENT; - rc = cursor_del(&cx.outer, MDBX_ALLDUPS); - } - } - } + if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == MDBX_NOSTICKYTHREADS && + unlikely(txn->owner != osal_thread_self())) { + mdbx_txn_break(txn); + return LOG_IFERR(MDBX_THREAD_MISMATCH); } - if (likely(rc == MDBX_SUCCESS)) - rc = cursor_put_checklen(&cx.outer, key, data, flags); - txn->cursors[dbi] = cx.outer.next; + return LOG_IFERR(txn_abort(txn)); +} +int mdbx_txn_park(MDBX_txn *txn, bool autounpark) { + STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_ERROR); + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) + return LOG_IFERR(MDBX_TXN_INVALID); + + if (unlikely((txn->flags & MDBX_TXN_ERROR))) { + rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); + return LOG_IFERR(rc ? rc : MDBX_OUSTED); + } + + return LOG_IFERR(txn_park(txn, autounpark)); +} + +int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted) { + STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED + MDBX_TXN_ERROR); + int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED - MDBX_TXN_ERROR); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + if (unlikely(!F_ISSET(txn->flags, MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) + return MDBX_SUCCESS; + + rc = txn_unpark(txn); + if (likely(rc != MDBX_OUSTED) || !restart_if_ousted) + return LOG_IFERR(rc); + + tASSERT(txn, txn->flags & MDBX_TXN_FINISHED); + rc = txn_renew(txn, MDBX_TXN_RDONLY); + return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : LOG_IFERR(rc); +} + +int mdbx_txn_renew(MDBX_txn *txn) { + if (unlikely(!txn)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(txn->signature != txn_signature)) + return LOG_IFERR(MDBX_EBADSIGN); + + if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) + return LOG_IFERR(MDBX_EINVAL); + + if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) { + int rc = mdbx_txn_reset(txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + + int rc = txn_renew(txn, MDBX_TXN_RDONLY); + if (rc == MDBX_SUCCESS) { + tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self()); + DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, + (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->env, txn->dbs[MAIN_DBI].root, + txn->dbs[FREE_DBI].root); + } return LOG_IFERR(rc); } -//------------------------------------------------------------------------------ - -/* Позволяет обновить или удалить существующую запись с получением - * в old_data предыдущего значения данных. При этом если new_data равен - * нулю, то выполняется удаление, иначе обновление/вставка. - * - * Текущее значение может находиться в уже измененной (грязной) странице. - * В этом случае страница будет перезаписана при обновлении, а само старое - * значение утрачено. Поэтому исходно в old_data должен быть передан - * дополнительный буфер для копирования старого значения. - * Если переданный буфер слишком мал, то функция вернет -1, установив - * old_data->iov_len в соответствующее значение. - * - * Для не-уникальных ключей также возможен второй сценарий использования, - * когда посредством old_data из записей с одинаковым ключом для - * удаления/обновления выбирается конкретная. Для выбора этого сценария - * во flags следует одновременно указать MDBX_CURRENT и MDBX_NOOVERWRITE. - * Именно эта комбинация выбрана, так как она лишена смысла, и этим позволяет - * идентифицировать запрос такого сценария. - * - * Функция может быть замещена соответствующими операциями с курсорами - * после двух доработок (TODO): - * - внешняя аллокация курсоров, в том числе на стеке (без malloc). - * - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE). - */ - -int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, - MDBX_put_flags_t flags, MDBX_preserve_func preserver, void *preserver_context) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); +int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) { + int rc = check_txn(txn, MDBX_TXN_FINISHED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - if (unlikely(!key || !old_data || old_data == new_data)) + txn->userctx = ctx; + return MDBX_SUCCESS; +} + +void *mdbx_txn_get_userctx(const MDBX_txn *txn) { return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; } + +int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret, void *context) { + if (unlikely(!ret)) + return LOG_IFERR(MDBX_EINVAL); + *ret = nullptr; + + if (unlikely((flags & ~txn_rw_begin_flags) && (parent || (flags & ~txn_ro_begin_flags)))) return LOG_IFERR(MDBX_EINVAL); - if (unlikely(old_data->iov_base == nullptr && old_data->iov_len)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(new_data == nullptr && (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(dbi <= FREE_DBI)) - return LOG_IFERR(MDBX_BAD_DBI); - - if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND | - MDBX_APPENDDUP | MDBX_CURRENT))) - return LOG_IFERR(MDBX_EINVAL); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); + int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - cx.outer.next = txn->cursors[dbi]; - txn->cursors[dbi] = &cx.outer; - MDBX_val present_key = *key; - if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) { - /* в old_data значение для выбора конкретного дубликата */ - if (unlikely(!(txn->dbs[dbi].flags & MDBX_DUPSORT))) { - rc = MDBX_EINVAL; - goto bailout; + if (unlikely(env->flags & MDBX_RDONLY & ~flags)) /* write txn in RDONLY env */ + return LOG_IFERR(MDBX_EACCESS); + + MDBX_txn *txn = nullptr; + if (parent) { + /* Nested transactions: Max 1 child, write txns only, no writemap */ + rc = check_txn_rw(parent, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (env->options.spill_parent4child_denominator) { + /* Spill dirty-pages of parent to provide dirtyroom for child txn */ + rc = txn_spill(parent, nullptr, parent->tw.dirtylist->length / env->options.spill_parent4child_denominator); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); } + tASSERT(parent, audit_ex(parent, 0, false) == 0); - /* убираем лишний бит, он был признаком запрошенного режима */ - flags -= MDBX_NOOVERWRITE; + flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); + } else if ((flags & MDBX_TXN_RDONLY) == 0) { + /* Reuse preallocated write txn. However, do not touch it until + * txn_renew() succeeds, since it currently may be active. */ + txn = env->basal_txn; + goto renew; + } - rc = cursor_seek(&cx.outer, &present_key, old_data, MDBX_GET_BOTH).err; - if (rc != MDBX_SUCCESS) - goto bailout; - } else { - /* в old_data буфер для сохранения предыдущего значения */ - if (unlikely(new_data && old_data->iov_base == new_data->iov_base)) - return LOG_IFERR(MDBX_EINVAL); - MDBX_val present_data; - rc = cursor_seek(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err; + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); + const size_t base = + (flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn); + const size_t size = base + + ((flags & MDBX_TXN_RDONLY) ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) : 0) + + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_state[0])); + txn = osal_malloc(size); + if (unlikely(txn == nullptr)) + return LOG_IFERR(MDBX_ENOMEM); +#if MDBX_DEBUG + memset(txn, 0xCD, size); + VALGRIND_MAKE_MEM_UNDEFINED(txn, size); +#endif /* MDBX_DEBUG */ + MDBX_ANALYSIS_ASSUME(size > base); + memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); + txn->dbs = ptr_disp(txn, base); + txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); +#if MDBX_DEBUG + txn->cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ +#endif + txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); + txn->flags = flags; + txn->env = env; + + if (parent) { + tASSERT(parent, dpl_check(parent)); +#if MDBX_ENABLE_DBI_SPARSE + txn->dbi_sparse = parent->dbi_sparse; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->dbi_seqs = parent->dbi_seqs; + txn->geo = parent->geo; + rc = dpl_alloc(txn); + if (likely(rc == MDBX_SUCCESS)) { + const size_t len = MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; + txn->tw.relist = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.relist)) + rc = MDBX_ENOMEM; + } if (unlikely(rc != MDBX_SUCCESS)) { - old_data->iov_base = nullptr; - old_data->iov_len = 0; - if (rc != MDBX_NOTFOUND || (flags & MDBX_CURRENT)) - goto bailout; - } else if (flags & MDBX_NOOVERWRITE) { - rc = MDBX_KEYEXIST; - *old_data = present_data; - goto bailout; - } else { - page_t *page = cx.outer.pg[cx.outer.top]; - if (txn->dbs[dbi].flags & MDBX_DUPSORT) { - if (flags & MDBX_CURRENT) { - /* disallow update/delete for multi-values */ - node_t *node = page_node(page, cx.outer.ki[cx.outer.top]); - if (node_flags(node) & N_DUP) { - tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1); - if (cx.outer.subcur->nested_tree.items > 1) { - rc = MDBX_EMULTIVAL; - goto bailout; + nested_failed: + pnl_free(txn->tw.relist); + dpl_free(txn); + osal_free(txn); + return LOG_IFERR(rc); + } + + /* Move loose pages to reclaimed list */ + if (parent->tw.loose_count) { + do { + page_t *lp = parent->tw.loose_pages; + tASSERT(parent, lp->flags == P_LOOSE); + rc = pnl_insert_span(&parent->tw.relist, lp->pgno, 1); + if (unlikely(rc != MDBX_SUCCESS)) + goto nested_failed; + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + parent->tw.loose_pages = page_next(lp); + /* Remove from dirty list */ + page_wash(parent, dpl_exist(parent, lp->pgno), lp, 1); + } while (parent->tw.loose_pages); + parent->tw.loose_count = 0; +#if MDBX_ENABLE_REFUND + parent->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + tASSERT(parent, dpl_check(parent)); + } + txn->tw.dirtyroom = parent->tw.dirtyroom; + txn->tw.dirtylru = parent->tw.dirtylru; + + dpl_sort(parent); + if (parent->tw.spilled.list) + spill_purge(parent); + + tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= MDBX_PNL_GETSIZE(parent->tw.relist)); + memcpy(txn->tw.relist, parent->tw.relist, MDBX_PNL_SIZEOF(parent->tw.relist)); + eASSERT(env, pnl_check_allocated(txn->tw.relist, (txn->geo.first_unallocated /* LY: intentional assignment + here, only for assertion */ + = parent->geo.first_unallocated) - + MDBX_ENABLE_REFUND)); + + txn->tw.gc.time_acc = parent->tw.gc.time_acc; + txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed; + if (parent->tw.gc.reclaimed) { + txn->tw.gc.reclaimed = parent->tw.gc.reclaimed; + parent->tw.gc.reclaimed = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed); + } + + txn->tw.retired_pages = parent->tw.retired_pages; + parent->tw.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages); + + txn->txnid = parent->txnid; + txn->front_txnid = parent->front_txnid + 1; +#if MDBX_ENABLE_REFUND + txn->tw.loose_refund_wl = 0; +#endif /* MDBX_ENABLE_REFUND */ + txn->canary = parent->canary; + parent->flags |= MDBX_TXN_HAS_CHILD; + parent->nested = txn; + txn->parent = parent; + txn->owner = parent->owner; + txn->tw.troika = parent->tw.troika; + + txn->cursors[FREE_DBI] = nullptr; + txn->cursors[MAIN_DBI] = nullptr; + txn->dbi_state[FREE_DBI] = parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->dbi_state[MAIN_DBI] = parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + memset(txn->dbi_state + CORE_DBS, 0, (txn->n_dbi = parent->n_dbi) - CORE_DBS); + memcpy(txn->dbs, parent->dbs, sizeof(txn->dbs[0]) * CORE_DBS); + + tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == + (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit)); + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); + env->txn = txn; + tASSERT(parent, parent->cursors[FREE_DBI] == nullptr); + rc = parent->cursors[MAIN_DBI] ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) : MDBX_SUCCESS; + if (AUDIT_ENABLED() && ASSERT_ENABLED()) { + txn->signature = txn_signature; + tASSERT(txn, audit_ex(txn, 0, false) == 0); + } + if (unlikely(rc != MDBX_SUCCESS)) + txn_end(txn, TXN_END_FAIL_BEGINCHILD); + } else { /* MDBX_TXN_RDONLY */ + txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + renew: + rc = txn_renew(txn, flags); + } + + if (unlikely(rc != MDBX_SUCCESS)) { + if (txn != env->basal_txn) + osal_free(txn); + } else { + if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) + eASSERT(env, txn->flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)); + else if (flags & MDBX_TXN_RDONLY) + eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | + /* Win32: SRWL flag */ txn_shrink_allowed)) == 0); + else { + eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | txn_shrink_allowed | MDBX_NOMETASYNC | + MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); + assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed); + } + txn->signature = txn_signature; + txn->userctx = context; + *ret = txn; + DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, + (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root, + txn->dbs[FREE_DBI].root); + } + + return LOG_IFERR(rc); +} + +int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { + STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR - MDBX_TXN_PARKED); + const uint64_t ts_0 = latency ? osal_monotime() : 0; + uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; + + int rc = check_txn(txn, MDBX_TXN_FINISHED); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_BAD_TXN && (txn->flags & MDBX_TXN_RDONLY)) { + rc = MDBX_RESULT_TRUE; + goto fail; + } + bailout: + if (latency) + memset(latency, 0, sizeof(*latency)); + return LOG_IFERR(rc); + } + + MDBX_env *const env = txn->env; + if (MDBX_ENV_CHECKPID && unlikely(env->pid != osal_getpid())) { + env->flags |= ENV_FATAL_ERROR; + rc = MDBX_PANIC; + goto bailout; + } + + if (unlikely(txn->flags & MDBX_TXN_ERROR)) { + rc = MDBX_RESULT_TRUE; + goto fail; + } + + /* txn_end() mode for a commit which writes nothing */ + unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; + if (unlikely(txn->flags & MDBX_TXN_RDONLY)) + goto done; + + if ((txn->flags & MDBX_NOSTICKYTHREADS) && unlikely(txn->owner != osal_thread_self())) { + rc = MDBX_THREAD_MISMATCH; + goto fail; + } + + if (txn->nested) { + rc = mdbx_txn_commit_ex(txn->nested, nullptr); + tASSERT(txn, txn->nested == nullptr); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + + if (unlikely(txn != env->txn)) { + DEBUG("%s", "attempt to commit unknown transaction"); + rc = MDBX_EINVAL; + goto fail; + } + + if (txn->parent) { + tASSERT(txn, audit_ex(txn, 0, false) == 0); + eASSERT(env, txn != env->basal_txn); + MDBX_txn *const parent = txn->parent; + eASSERT(env, parent->signature == txn_signature); + eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); + eASSERT(env, dpl_check(txn)); + + if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) { + TXN_FOREACH_DBI_ALL(txn, i) { + tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0); + if ((txn->dbi_state[i] & DBI_STALE) && !(parent->dbi_state[i] & DBI_STALE)) + tASSERT(txn, memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); + } + + tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0); + tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0); + tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); + tASSERT(txn, txn->tw.loose_count == 0); + + /* fast completion of pure nested transaction */ + VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid); + end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; + goto done; + } + + /* Preserve space for spill list to avoid parent's state corruption + * if allocation fails. */ + const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; + tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; + if (retired_delta) { + rc = pnl_need(&txn->tw.relist, retired_delta); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + + if (txn->tw.spilled.list) { + if (parent->tw.spilled.list) { + rc = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list)); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + spill_purge(txn); + } + + if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent && + !dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) { + rc = MDBX_ENOMEM; + goto fail; + } + + //------------------------------------------------------------------------- + + parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; + txn->tw.gc.reclaimed = nullptr; + + parent->tw.retired_pages = txn->tw.retired_pages; + txn->tw.retired_pages = nullptr; + + pnl_free(parent->tw.relist); + parent->tw.relist = txn->tw.relist; + txn->tw.relist = nullptr; + parent->tw.gc.time_acc = txn->tw.gc.time_acc; + parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed; + + parent->geo = txn->geo; + parent->canary = txn->canary; + parent->flags |= txn->flags & MDBX_TXN_DIRTY; + + /* Move loose pages to parent */ +#if MDBX_ENABLE_REFUND + parent->tw.loose_refund_wl = txn->tw.loose_refund_wl; +#endif /* MDBX_ENABLE_REFUND */ + parent->tw.loose_count = txn->tw.loose_count; + parent->tw.loose_pages = txn->tw.loose_pages; + + /* Merge our cursors into parent's and close them */ + txn_done_cursors(txn, true); + end_mode |= TXN_END_EOTDONE; + + /* Update parent's DBs array */ + eASSERT(env, parent->n_dbi == txn->n_dbi); + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { + parent->dbs[dbi] = txn->dbs[dbi]; + /* preserve parent's status */ + const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still", + parent->dbi_state[dbi], state); + parent->dbi_state[dbi] = state; + } else { + eASSERT(env, txn->dbi_state[dbi] == (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); + } + } + + if (latency) { + ts_1 = osal_monotime(); + ts_2 = /* no gc-update */ ts_1; + ts_3 = /* no audit */ ts_2; + ts_4 = /* no write */ ts_3; + ts_5 = /* no sync */ ts_4; + } + txn_merge(parent, txn, parent_retired_len); + env->txn = parent; + parent->nested = nullptr; + tASSERT(parent, dpl_check(parent)); + +#if MDBX_ENABLE_REFUND + txn_refund(parent); + if (ASSERT_ENABLED()) { + /* Check parent's loose pages not suitable for refund */ + for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) { + tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + /* Check parent's reclaimed pages not suitable for refund */ + if (MDBX_PNL_GETSIZE(parent->tw.relist)) + tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < parent->geo.first_unallocated); + } +#endif /* MDBX_ENABLE_REFUND */ + + txn->signature = 0; + osal_free(txn); + tASSERT(parent, audit_ex(parent, 0, false) == 0); + rc = MDBX_SUCCESS; + goto provide_latency; + } + + if (!txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == + (txn->parent ? txn->parent->tw.dirtyroom : env->options.dp_limit)); + } + txn_done_cursors(txn, false); + end_mode |= TXN_END_EOTDONE; + + if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && + (txn->flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { + TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); } +#if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT + rc = txn_end(txn, end_mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + rc = MDBX_RESULT_TRUE; + goto provide_latency; +#else + goto done; +#endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ + } + + DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, (void *)txn, + (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); + + if (txn->n_dbi > CORE_DBS) { + /* Update table root pointers */ + cursor_couple_t cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + cx.outer.next = txn->cursors[MAIN_DBI]; + txn->cursors[MAIN_DBI] = &cx.outer; + TXN_FOREACH_DBI_USER(txn, i) { + if ((txn->dbi_state[i] & DBI_DIRTY) == 0) + continue; + tree_t *const db = &txn->dbs[i]; + DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN " -> %" PRIaTXN, i, db->mod_txnid, txn->txnid); + /* Может быть mod_txnid > front после коммита вложенных тразакций */ + db->mod_txnid = txn->txnid; + MDBX_val data = {db, sizeof(tree_t)}; + rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_TREE); + if (unlikely(rc != MDBX_SUCCESS)) { + txn->cursors[MAIN_DBI] = cx.outer.next; + goto fail; + } + } + txn->cursors[MAIN_DBI] = cx.outer.next; + } + + ts_1 = latency ? osal_monotime() : 0; + + gcu_t gcu_ctx; + gc_cputime = latency ? osal_cputime(nullptr) : 0; + rc = gc_update_init(txn, &gcu_ctx); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + rc = gc_update(txn, &gcu_ctx); + gc_cputime = latency ? osal_cputime(nullptr) - gc_cputime : 0; + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + + tASSERT(txn, txn->tw.loose_count == 0); + txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[FREE_DBI].mod_txnid; + + txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[MAIN_DBI].mod_txnid; + + ts_2 = latency ? osal_monotime() : 0; + ts_3 = ts_2; + if (AUDIT_ENABLED()) { + rc = audit_ex(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages), true); + ts_3 = osal_monotime(); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + } + + bool need_flush_for_nometasync = false; + const meta_ptr_t head = meta_recent(env, &txn->tw.troika); + const uint32_t meta_sync_txnid = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); + /* sync prev meta */ + if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) { + /* Исправление унаследованного от LMDB недочета: + * + * Всё хорошо, если все процессы работающие с БД не используют WRITEMAP. + * Тогда мета-страница (обновленная, но не сброшенная на диск) будет + * сохранена в результате fdatasync() при записи данных этой транзакции. + * + * Всё хорошо, если все процессы работающие с БД используют WRITEMAP + * без MDBX_AVOID_MSYNC. + * Тогда мета-страница (обновленная, но не сброшенная на диск) будет + * сохранена в результате msync() при записи данных этой транзакции. + * + * Если же в процессах работающих с БД используется оба метода, как sync() + * в режиме MDBX_WRITEMAP, так и записи через файловый дескриптор, то + * становится невозможным обеспечить фиксацию на диске мета-страницы + * предыдущей транзакции и данных текущей транзакции, за счет одной + * sync-операцией выполняемой после записи данных текущей транзакции. + * Соответственно, требуется явно обновлять мета-страницу, что полностью + * уничтожает выгоду от NOMETASYNC. */ + const uint32_t txnid_dist = ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD + : MDBX_NOMETASYNC_LAZY_WRITEMAP; + /* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync() + * или msync() для гарантированной фиксации на диске мета-страницы, + * которая была "лениво" отправлена на запись в предыдущей транзакции, + * но не сброшена на диск из-за активного режима MDBX_NOMETASYNC. */ + if ( +#if defined(_WIN32) || defined(_WIN64) + !env->ioring.overlapped_fd && +#endif + meta_sync_txnid == (uint32_t)head.txnid - txnid_dist) + need_flush_for_nometasync = true; + else { + rc = meta_sync(env, head); + if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("txn-%s: error %d", "presync-meta", rc); + goto fail; + } + } + } + + if (txn->tw.dirtylist) { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); + tASSERT(txn, txn->tw.loose_count == 0); + + mdbx_filehandle_t fd = +#if defined(_WIN32) || defined(_WIN64) + env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; + (void)need_flush_for_nometasync; +#else + (need_flush_for_nometasync || env->dsync_fd == INVALID_HANDLE_VALUE || + txn->tw.dirtylist->length > env->options.writethrough_threshold || + atomic_load64(&env->lck->unsynced_pages, mo_Relaxed)) + ? env->lazy_fd + : env->dsync_fd; +#endif /* Windows */ + + iov_ctx_t write_ctx; + rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, txn->tw.dirtylist->pages_including_loose, fd, false); + if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("txn-%s: error %d", "iov-init", rc); + goto fail; + } + + rc = txn_write(txn, &write_ctx); + if (unlikely(rc != MDBX_SUCCESS)) { + ERROR("txn-%s: error %d", "write", rc); + goto fail; + } + } else { + tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); + env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages; + if (!env->lck->eoos_timestamp.weak) + env->lck->eoos_timestamp.weak = osal_monotime(); + } + + /* TODO: use ctx.flush_begin & ctx.flush_end for range-sync */ + ts_4 = latency ? osal_monotime() : 0; + + meta_t meta; + memcpy(meta.magic_and_version, head.ptr_c->magic_and_version, 8); + meta.reserve16 = head.ptr_c->reserve16; + meta.validator_id = head.ptr_c->validator_id; + meta.extra_pagehdr = head.ptr_c->extra_pagehdr; + unaligned_poke_u64(4, meta.pages_retired, + unaligned_peek_u64(4, head.ptr_c->pages_retired) + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + meta.geometry = txn->geo; + meta.trees.gc = txn->dbs[FREE_DBI]; + meta.trees.main = txn->dbs[MAIN_DBI]; + meta.canary = txn->canary; + memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid)); + + txnid_t commit_txnid = txn->txnid; +#if MDBX_ENABLE_BIGFOOT + if (gcu_ctx.bigfoot > txn->txnid) { + commit_txnid = gcu_ctx.bigfoot; + TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid)); + } +#endif + meta.unsafe_sign = DATASIGN_NONE; + meta_set_txnid(env, &meta, commit_txnid); + + rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, &txn->tw.troika); + + ts_5 = latency ? osal_monotime() : 0; + if (unlikely(rc != MDBX_SUCCESS)) { + env->flags |= ENV_FATAL_ERROR; + ERROR("txn-%s: error %d", "sync", rc); + goto fail; + } + + end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE; + +done: + if (latency) + txn_take_gcprof(txn, latency); + rc = txn_end(txn, end_mode); + +provide_latency: + if (latency) { + latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0; + latency->gc_wallclock = (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0; + latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0; + latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0; + latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0; + latency->sync = (ts_5 > ts_4) ? osal_monotime_to_16dot16(ts_5 - ts_4) : 0; + const uint64_t ts_6 = osal_monotime(); + latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0; + latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0); + } + return LOG_IFERR(rc); + +fail: + txn->flags |= MDBX_TXN_ERROR; + if (latency) + txn_take_gcprof(txn, latency); + txn_abort(txn); + goto provide_latency; +} + +int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { + int rc = check_txn(txn, MDBX_TXN_FINISHED); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!info)) + return LOG_IFERR(MDBX_EINVAL); + + MDBX_env *const env = txn->env; +#if MDBX_ENV_CHECKPID + if (unlikely(env->pid != osal_getpid())) { + env->flags |= ENV_FATAL_ERROR; + return LOG_IFERR(MDBX_PANIC); + } +#endif /* MDBX_ENV_CHECKPID */ + + info->txn_id = txn->txnid; + info->txn_space_used = pgno2bytes(env, txn->geo.first_unallocated); + + if (txn->flags & MDBX_TXN_RDONLY) { + meta_ptr_t head; + uint64_t head_retired; + troika_t troika = meta_tap(env); + do { + /* fetch info from volatile head */ + head = meta_recent(env, &troika); + head_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); + info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->geometry.now); + info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->geometry.upper); + info->txn_space_leftover = pgno2bytes(env, head.ptr_v->geometry.now - head.ptr_v->geometry.first_unallocated); + } while (unlikely(meta_should_retry(env, &troika))); + + info->txn_reader_lag = head.txnid - info->txn_id; + info->txn_space_dirty = info->txn_space_retired = 0; + uint64_t reader_snapshot_pages_retired = 0; + if (txn->to.reader && + ((txn->flags & MDBX_TXN_PARKED) == 0 || safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) && + head_retired > + (reader_snapshot_pages_retired = atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))) { + info->txn_space_dirty = info->txn_space_retired = + pgno2bytes(env, (pgno_t)(head_retired - reader_snapshot_pages_retired)); + + size_t retired_next_reader = 0; + lck_t *const lck = env->lck_mmap.lck; + if (scan_rlt && info->txn_reader_lag > 1 && lck) { + /* find next more recent reader */ + txnid_t next_reader = head.txnid; + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); + for (size_t i = 0; i < snap_nreaders; ++i) { + retry: + if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { + jitter4testing(true); + const uint64_t snap_tid = safe64_read(&lck->rdt[i].tid); + const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); + const uint64_t snap_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease); + if (unlikely(snap_retired != atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed)) || + snap_txnid != safe64_read(&lck->rdt[i].txnid) || snap_tid != safe64_read(&lck->rdt[i].tid)) + goto retry; + if (snap_txnid <= txn->txnid) { + retired_next_reader = 0; + break; + } + if (snap_txnid < next_reader && snap_tid >= MDBX_TID_TXN_OUSTED) { + next_reader = snap_txnid; + retired_next_reader = pgno2bytes( + env, (pgno_t)(snap_retired - atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))); } } - /* В LMDB флажок MDBX_CURRENT здесь приведет - * к замене данных без учета MDBX_DUPSORT сортировки, - * но здесь это в любом случае допустимо, так как мы - * проверили что для ключа есть только одно значение. */ } } - - if (is_modifable(txn, page)) { - if (new_data && cmp_lenfast(&present_data, new_data) == 0) { - /* если данные совпадают, то ничего делать не надо */ - *old_data = *new_data; - goto bailout; + info->txn_space_dirty = retired_next_reader; + } + } else { + info->txn_space_limit_soft = pgno2bytes(env, txn->geo.now); + info->txn_space_limit_hard = pgno2bytes(env, txn->geo.upper); + info->txn_space_retired = + pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages : MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom); + info->txn_space_dirty = + pgno2bytes(env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose + : (txn->tw.writemap_dirty_npages + txn->tw.writemap_spilled_npages)); + info->txn_reader_lag = INT64_MAX; + lck_t *const lck = env->lck_mmap.lck; + if (scan_rlt && lck) { + txnid_t oldest_snapshot = txn->txnid; + const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); + if (snap_nreaders) { + oldest_snapshot = txn_snapshot_oldest(txn); + if (oldest_snapshot == txn->txnid - 1) { + /* check if there is at least one reader */ + bool exists = false; + for (size_t i = 0; i < snap_nreaders; ++i) { + if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->txnid > safe64_read(&lck->rdt[i].txnid)) { + exists = true; + break; + } + } + oldest_snapshot += !exists; } - rc = preserver ? preserver(preserver_context, old_data, present_data.iov_base, present_data.iov_len) - : MDBX_SUCCESS; - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } else { - *old_data = present_data; } - flags |= MDBX_CURRENT; + info->txn_reader_lag = txn->txnid - oldest_snapshot; } } - if (likely(new_data)) - rc = cursor_put_checklen(&cx.outer, key, new_data, flags); - else - rc = cursor_del(&cx.outer, flags & MDBX_ALLDUPS); - -bailout: - txn->cursors[dbi] = cx.outer.next; - return LOG_IFERR(rc); -} - -static int default_value_preserver(void *context, MDBX_val *target, const void *src, size_t bytes) { - (void)context; - if (unlikely(target->iov_len < bytes)) { - target->iov_base = nullptr; - target->iov_len = bytes; - return MDBX_RESULT_TRUE; - } - memcpy(target->iov_base, src, target->iov_len = bytes); return MDBX_SUCCESS; } - -int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data, - MDBX_put_flags_t flags) { - return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, default_value_preserver, nullptr); -} diff --git a/src/dbi.c b/src/dbi.c index 60b32d8a..7716e6d2 100644 --- a/src/dbi.c +++ b/src/dbi.c @@ -132,7 +132,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { return MDBX_BAD_DBI; } -static int defer_and_release(MDBX_env *const env, defer_free_item_t *const chain) { +int dbi_defer_release(MDBX_env *const env, defer_free_item_t *const chain) { size_t length = 0; defer_free_item_t *obsolete_chain = nullptr; #if MDBX_ENABLE_DBI_LOCKFREE @@ -229,7 +229,7 @@ int dbi_update(MDBX_txn *txn, int keep) { eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && !env->kvs[i].name.iov_base); } env->n_dbi = (unsigned)i; - defer_and_release(env, defer_chain); + dbi_defer_release(env, defer_chain); } return MDBX_SUCCESS; } @@ -594,25 +594,7 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDB return rc; } -static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi, - MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - MDBX_val thunk, *name; - if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META) - name = (void *)name_cstr; - else { - thunk.iov_len = strlen(name_cstr); - thunk.iov_base = (void *)name_cstr; - name = &thunk; - } - return dbi_open(txn, name, flags, dbi, keycmp, datacmp); -} - -struct dbi_rename_result { - defer_free_item_t *defer; - int err; -}; - -__cold static struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { +__cold struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { struct dbi_rename_result pair; pair.defer = nullptr; pair.err = dbi_check(txn, dbi); @@ -690,259 +672,6 @@ static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { return defer_item; } -/*----------------------------------------------------------------------------*/ -/* API */ - -int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { - return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr)); -} - -int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) { - return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr)); -} - -int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, - MDBX_cmp_func *datacmp) { - return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp)); -} - -int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, - MDBX_cmp_func *datacmp) { - return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp)); -} - -__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (txn->dbs[dbi].height) { - cx.outer.next = txn->cursors[dbi]; - txn->cursors[dbi] = &cx.outer; - rc = tree_drop(&cx.outer, dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT)); - txn->cursors[dbi] = cx.outer.next; - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - } - - /* Invalidate the dropped DB's cursors */ - for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next) - be_poor(mc); - - if (!del || dbi < CORE_DBS) { - /* reset the DB record, mark it dirty */ - txn->dbi_state[dbi] |= DBI_DIRTY; - txn->dbs[dbi].height = 0; - txn->dbs[dbi].branch_pages = 0; - txn->dbs[dbi].leaf_pages = 0; - txn->dbs[dbi].large_pages = 0; - txn->dbs[dbi].items = 0; - txn->dbs[dbi].root = P_INVALID; - txn->dbs[dbi].sequence = 0; - /* txn->dbs[dbi].mod_txnid = txn->txnid; */ - txn->flags |= MDBX_TXN_DIRTY; - return MDBX_SUCCESS; - } - - MDBX_env *const env = txn->env; - MDBX_val name = env->kvs[dbi].name; - rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (likely(rc == MDBX_SUCCESS)) { - rc = cursor_seek(&cx.outer, &name, nullptr, MDBX_SET).err; - if (likely(rc == MDBX_SUCCESS)) { - cx.outer.next = txn->cursors[MAIN_DBI]; - txn->cursors[MAIN_DBI] = &cx.outer; - rc = cursor_del(&cx.outer, N_TREE); - txn->cursors[MAIN_DBI] = cx.outer.next; - if (likely(rc == MDBX_SUCCESS)) { - tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY); - tASSERT(txn, txn->flags & MDBX_TXN_DIRTY); - txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; - rc = osal_fastmutex_acquire(&env->dbi_lock); - if (likely(rc == MDBX_SUCCESS)) - return LOG_IFERR(defer_and_release(env, dbi_close_locked(env, dbi))); - } - } - } - txn->flags |= MDBX_TXN_ERROR; - return LOG_IFERR(rc); -} - -__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { - MDBX_val thunk, *name; - if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META) - name = (void *)name_cstr; - else { - thunk.iov_len = strlen(name_cstr); - thunk.iov_base = (void *)name_cstr; - name = &thunk; - } - return LOG_IFERR(mdbx_dbi_rename2(txn, dbi, name)); -} - -int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(dbi < CORE_DBS)) - return (dbi == MAIN_DBI) ? MDBX_SUCCESS : LOG_IFERR(MDBX_BAD_DBI); - - if (unlikely(dbi >= env->max_dbi)) - return LOG_IFERR(MDBX_BAD_DBI); - - if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi)) - return LOG_IFERR(MDBX_BAD_DBI); - - rc = osal_fastmutex_acquire(&env->dbi_lock); - if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) { - retry: - if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { - /* LY: Опасный код, так как env->txn может быть изменено в другом потоке. - * К сожалению тут нет надежного решения и может быть падение при неверном - * использовании API (вызове mdbx_dbi_close конкурентно с завершением - * пишущей транзакции). - * - * Для минимизации вероятности падения сначала проверяем dbi-флаги - * в basal_txn, а уже после в env->txn. Таким образом, падение может быть - * только при коллизии с завершением вложенной транзакции. - * - * Альтернативно можно попробовать выполнять обновление/put записи в - * mainDb соответствующей таблице закрываемого хендла. Семантически это - * верный путь, но проблема в текущем API, в котором исторически dbi-хендл - * живет и закрывается вне транзакции. Причем проблема не только в том, - * что нет указателя на текущую пишущую транзакцию, а в том что - * пользователь точно не ожидает что закрытие хендла приведет к - * скрытой/непрозрачной активности внутри транзакции потенциально - * выполняемой в другом потоке. Другими словами, проблема может быть - * только при неверном использовании API и если пользователь это - * допускает, то точно не будет ожидать скрытых действий внутри - * транзакции, и поэтому этот путь потенциально более опасен. */ - const MDBX_txn *const hazard = env->txn; - osal_compiler_barrier(); - if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { - bailout_dirty_dbi: - osal_fastmutex_release(&env->dbi_lock); - return LOG_IFERR(MDBX_DANGLING_DBI); - } - osal_memory_barrier(); - if (unlikely(hazard != env->txn)) - goto retry; - if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 && - hazard->signature == txn_signature && - (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) - goto bailout_dirty_dbi; - osal_compiler_barrier(); - if (unlikely(hazard != env->txn)) - goto retry; - } - rc = defer_and_release(env, dbi_close_locked(env, dbi)); - } - return LOG_IFERR(rc); -} - -int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { - if (unlikely(!flags || !state)) - return LOG_IFERR(MDBX_EINVAL); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); - if (unlikely(rc != MDBX_SUCCESS)) { - *flags = 0; - *state = 0; - return LOG_IFERR(rc); - } - - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - *flags = 0; - *state = 0; - return LOG_IFERR(rc); - } - - *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; - *state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); - return MDBX_SUCCESS; -} - -__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) { - int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || - new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(dbi < CORE_DBS)) - return LOG_IFERR(MDBX_EINVAL); - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - rc = osal_fastmutex_acquire(&txn->env->dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name); - if (pair.defer) - pair.defer->next = nullptr; - defer_and_release(txn->env, pair.defer); - rc = pair.err; - } - return LOG_IFERR(rc); -} - -static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { - st->ms_depth = db->height; - st->ms_branch_pages = db->branch_pages; - st->ms_leaf_pages = db->leaf_pages; - st->ms_overflow_pages = db->large_pages; - st->ms_entries = db->items; - if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) - st->ms_mod_txnid = db->mod_txnid; -} - -__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { - if (unlikely(!dest)) - return LOG_IFERR(MDBX_EINVAL); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); - if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) { - rc = MDBX_EINVAL; - goto bailout; - } - - if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) { - rc = MDBX_BAD_TXN; - goto bailout; - } - - if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { - rc = tbl_fetch((MDBX_txn *)txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - } - - dest->ms_psize = txn->env->ps; - stat_get(&txn->dbs[dbi], dest, bytes); - return MDBX_SUCCESS; - -bailout: - memset(dest, 0, bytes); - return LOG_IFERR(rc); -} - __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback) { const MDBX_txn *dig = txn; do { @@ -966,58 +695,4 @@ __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fall return fallback; } -__cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) { - if (unlikely(!func)) - return LOG_IFERR(MDBX_EINVAL); - - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - cx.outer.next = txn->cursors[MAIN_DBI]; - txn->cursors[MAIN_DBI] = &cx.outer; - for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS; - rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) { - node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]); - if (node_flags(node) != N_TREE) - continue; - if (unlikely(node_ds(node) != sizeof(tree_t))) { - ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size", - (unsigned)node_ds(node)); - rc = MDBX_CORRUPTED; - break; - } - - tree_t reside; - const tree_t *tree = memcpy(&reside, node_data(node), sizeof(reside)); - const MDBX_val name = {node_key(node), node_ks(node)}; - const MDBX_env *const env = txn->env; - MDBX_dbi dbi = 0; - for (size_t i = CORE_DBS; i < env->n_dbi; ++i) { - if (i >= txn->n_dbi || !(env->dbs_flags[i] & DB_VALID)) - continue; - if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[i].name)) - continue; - - tree = dbi_dig(txn, i, &reside); - dbi = (MDBX_dbi)i; - break; - } - - MDBX_stat stat; - stat_get(tree, &stat, sizeof(stat)); - rc = func(ctx, txn, &name, tree->flags, &stat, dbi); - if (rc != MDBX_SUCCESS) - goto bailout; - } - rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; - -bailout: - txn->cursors[MAIN_DBI] = cx.outer.next; - return LOG_IFERR(rc); -} +int dbi_close_release(MDBX_env *env, MDBX_dbi dbi) { return dbi_defer_release(env, dbi_close_locked(env, dbi)); } diff --git a/src/dbi.h b/src/dbi.h index c06f5bd1..c654a0fa 100644 --- a/src/dbi.h +++ b/src/dbi.h @@ -124,4 +124,18 @@ MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned u MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +typedef struct defer_free_item { + struct defer_free_item *next; + uint64_t timestamp; +} defer_free_item_t; + +MDBX_INTERNAL int dbi_defer_release(MDBX_env *const env, defer_free_item_t *const chain); +MDBX_INTERNAL int dbi_close_release(MDBX_env *env, MDBX_dbi dbi); MDBX_INTERNAL const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback); + +struct dbi_rename_result { + defer_free_item_t *defer; + int err; +}; + +MDBX_INTERNAL struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name); diff --git a/src/internals.h b/src/internals.h index 2dca2d23..aae09e42 100644 --- a/src/internals.h +++ b/src/internals.h @@ -315,11 +315,6 @@ struct cursor_couple { subcur_t inner; }; -struct defer_free_item { - struct defer_free_item *next; - uint64_t timestamp; -}; - enum env_flags { /* Failed to update the meta page. Probably an I/O error. */ ENV_FATAL_ERROR = INT32_MIN /* 0x80000000 */, diff --git a/src/mvcc-readers.c b/src/mvcc-readers.c index d51576da..be0220a9 100644 --- a/src/mvcc-readers.c +++ b/src/mvcc-readers.c @@ -300,82 +300,6 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, int *d return rc; } -int txn_park(MDBX_txn *txn, bool autounpark) { - reader_slot_t *const rslot = txn->to.reader; - tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY); - tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED); - if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY)) - return MDBX_BAD_TXN; - - const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); - const uint64_t tid = atomic_load64(&rslot->tid, mo_Relaxed); - const uint64_t txnid = atomic_load64(&rslot->txnid, mo_Relaxed); - if (unlikely(pid != txn->env->pid)) { - ERROR("unexpected pid %u%s%u", pid, " != must ", txn->env->pid); - return MDBX_PROBLEM; - } - if (unlikely(tid != txn->owner || txnid != txn->txnid)) { - ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%0zx" - " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, - tid, " != must ", txn->owner, txnid, " != must ", txn->txnid); - return MDBX_BAD_RSLOT; - } - - atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease); - atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed); - txn->flags += autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED; - return MDBX_SUCCESS; -} - -int txn_unpark(MDBX_txn *txn) { - if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != - (MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) - return MDBX_BAD_TXN; - - for (reader_slot_t *const rslot = txn->to.reader; rslot; atomic_yield()) { - const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); - uint64_t tid = safe64_read(&rslot->tid); - uint64_t txnid = safe64_read(&rslot->txnid); - if (unlikely(pid != txn->env->pid)) { - ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid); - return MDBX_PROBLEM; - } - if (unlikely(tid == MDBX_TID_TXN_OUSTED || txnid >= SAFE64_INVALID_THRESHOLD)) - break; - if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) { - ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, tid, " != must ", - MDBX_TID_TXN_OUSTED, txnid, " != must ", txn->txnid); - break; - } - if (unlikely((txn->flags & MDBX_TXN_ERROR))) - break; - -#if MDBX_64BIT_CAS - if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner))) - continue; -#else - atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), mo_Relaxed); - if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)txn->owner))) { - atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), mo_AcquireRelease); - continue; - } -#endif - txnid = safe64_read(&rslot->txnid); - tid = safe64_read(&rslot->tid); - if (unlikely(txnid != txn->txnid || tid != txn->owner)) { - ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%zx" - " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, - tid, " != must ", txn->owner, txnid, " != must ", txn->txnid); - break; - } - txn->flags &= ~(MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK); - return MDBX_SUCCESS; - } - - int err = txn_end(txn, TXN_END_OUSTED | TXN_END_RESET | TXN_END_UPDATE); - return err ? err : MDBX_OUSTED; -} - __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler); osal_memory_fence(mo_AcquireRelease, false); @@ -488,64 +412,3 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { } return oldest; } - -/*----------------------------------------------------------------------------*/ - -__cold int mdbx_thread_register(const MDBX_env *env) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!env->lck_mmap.lck)) - return LOG_IFERR((env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM); - - if (unlikely((env->flags & ENV_TXKEY) == 0)) { - eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); - return LOG_IFERR(MDBX_EINVAL) /* MDBX_NOSTICKYTHREADS mode */; - } - - eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); - reader_slot_t *r = thread_rthc_get(env->me_txkey); - if (unlikely(r != nullptr)) { - eASSERT(env, r->pid.weak == env->pid); - eASSERT(env, r->tid.weak == osal_thread_self()); - if (unlikely(r->pid.weak != env->pid)) - return LOG_IFERR(MDBX_BAD_RSLOT); - return MDBX_RESULT_TRUE /* already registered */; - } - - return LOG_IFERR(mvcc_bind_slot((MDBX_env *)env).err); -} - -__cold int mdbx_thread_unregister(const MDBX_env *env) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!env->lck_mmap.lck)) - return MDBX_RESULT_TRUE; - - if (unlikely((env->flags & ENV_TXKEY) == 0)) { - eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS); - return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */; - } - - eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY); - reader_slot_t *r = thread_rthc_get(env->me_txkey); - if (unlikely(r == nullptr)) - return MDBX_RESULT_TRUE /* not registered */; - - eASSERT(env, r->pid.weak == env->pid); - eASSERT(env, r->tid.weak == osal_thread_self()); - if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self())) - return LOG_IFERR(MDBX_BAD_RSLOT); - - eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD); - if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD)) - return LOG_IFERR(MDBX_BUSY) /* transaction is still active */; - - atomic_store32(&r->pid, 0, mo_Relaxed); - atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease); - thread_rthc_set(env->me_txkey, nullptr); - return MDBX_SUCCESS; -} diff --git a/src/proto.h b/src/proto.h index d4cc67f4..bb8d1386 100644 --- a/src/proto.h +++ b/src/proto.h @@ -46,6 +46,7 @@ MDBX_INTERNAL int txn_renew(MDBX_txn *txn, unsigned flags); MDBX_INTERNAL int txn_park(MDBX_txn *txn, bool autounpark); MDBX_INTERNAL int txn_unpark(MDBX_txn *txn); MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits); +MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn, const bool merge); #define TXN_END_NAMES \ {"committed", "empty-commit", "abort", "reset", "fail-begin", "fail-beginchild", "ousted", nullptr} @@ -67,6 +68,8 @@ enum { }; MDBX_INTERNAL int txn_end(MDBX_txn *txn, unsigned mode); MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx); +MDBX_INTERNAL void txn_take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency); +MDBX_INTERNAL void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len); /* env.c */ MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode); diff --git a/src/tree.c b/src/tree-ops.c similarity index 100% rename from src/tree.c rename to src/tree-ops.c diff --git a/src/page-search.c b/src/tree-search.c similarity index 100% rename from src/page-search.c rename to src/tree-search.c diff --git a/src/txn.c b/src/txn.c index 38e53e5a..3ac333fc 100644 --- a/src/txn.c +++ b/src/txn.c @@ -7,7 +7,7 @@ __hot txnid_t txn_snapshot_oldest(const MDBX_txn *const txn) { return mvcc_shapshot_oldest(txn->env, txn->tw.troika.txnid[txn->tw.troika.prefer_steady]); } -static void done_cursors(MDBX_txn *txn, const bool merge) { +void txn_done_cursors(MDBX_txn *txn, const bool merge) { tASSERT(txn, txn->cursors[FREE_DBI] == nullptr); TXN_FOREACH_DBI_FROM(txn, i, /* skip FREE_DBI */ 1) { MDBX_cursor *mc = txn->cursors[i]; @@ -65,7 +65,7 @@ int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { } /* Merge child txn into parent */ -static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { +void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0); dpl_t *const src = dpl_sort(txn); @@ -395,7 +395,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t } } -static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { +void txn_take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { MDBX_env *const env = txn->env; if (MDBX_ENABLE_PROFGC) { pgop_stat_t *const ptr = &env->lck->pgops; @@ -432,409 +432,6 @@ static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { memset(&latency->gc_prof, 0, sizeof(latency->gc_prof)); } -int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { - STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR - MDBX_TXN_PARKED); - const uint64_t ts_0 = latency ? osal_monotime() : 0; - uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; - - int rc = check_txn(txn, MDBX_TXN_FINISHED); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == MDBX_BAD_TXN && (txn->flags & MDBX_TXN_RDONLY)) { - rc = MDBX_RESULT_TRUE; - goto fail; - } - bailout: - if (latency) - memset(latency, 0, sizeof(*latency)); - return LOG_IFERR(rc); - } - - MDBX_env *const env = txn->env; - if (MDBX_ENV_CHECKPID && unlikely(env->pid != osal_getpid())) { - env->flags |= ENV_FATAL_ERROR; - rc = MDBX_PANIC; - goto bailout; - } - - if (unlikely(txn->flags & MDBX_TXN_ERROR)) { - rc = MDBX_RESULT_TRUE; - goto fail; - } - - /* txn_end() mode for a commit which writes nothing */ - unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; - if (unlikely(txn->flags & MDBX_TXN_RDONLY)) - goto done; - - if ((txn->flags & MDBX_NOSTICKYTHREADS) && unlikely(txn->owner != osal_thread_self())) { - rc = MDBX_THREAD_MISMATCH; - goto fail; - } - - if (txn->nested) { - rc = mdbx_txn_commit_ex(txn->nested, nullptr); - tASSERT(txn, txn->nested == nullptr); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - if (unlikely(txn != env->txn)) { - DEBUG("%s", "attempt to commit unknown transaction"); - rc = MDBX_EINVAL; - goto fail; - } - - if (txn->parent) { - tASSERT(txn, audit_ex(txn, 0, false) == 0); - eASSERT(env, txn != env->basal_txn); - MDBX_txn *const parent = txn->parent; - eASSERT(env, parent->signature == txn_signature); - eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); - eASSERT(env, dpl_check(txn)); - - if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) { - TXN_FOREACH_DBI_ALL(txn, i) { - tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0); - if ((txn->dbi_state[i] & DBI_STALE) && !(parent->dbi_state[i] & DBI_STALE)) - tASSERT(txn, memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); - } - - tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0); - tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0); - tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); - tASSERT(txn, txn->tw.loose_count == 0); - - /* fast completion of pure nested transaction */ - VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid); - end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; - goto done; - } - - /* Preserve space for spill list to avoid parent's state corruption - * if allocation fails. */ - const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; - tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; - if (retired_delta) { - rc = pnl_need(&txn->tw.relist, retired_delta); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - if (txn->tw.spilled.list) { - if (parent->tw.spilled.list) { - rc = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list)); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - spill_purge(txn); - } - - if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent && - !dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) { - rc = MDBX_ENOMEM; - goto fail; - } - - //------------------------------------------------------------------------- - - parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; - txn->tw.gc.reclaimed = nullptr; - - parent->tw.retired_pages = txn->tw.retired_pages; - txn->tw.retired_pages = nullptr; - - pnl_free(parent->tw.relist); - parent->tw.relist = txn->tw.relist; - txn->tw.relist = nullptr; - parent->tw.gc.time_acc = txn->tw.gc.time_acc; - parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed; - - parent->geo = txn->geo; - parent->canary = txn->canary; - parent->flags |= txn->flags & MDBX_TXN_DIRTY; - - /* Move loose pages to parent */ -#if MDBX_ENABLE_REFUND - parent->tw.loose_refund_wl = txn->tw.loose_refund_wl; -#endif /* MDBX_ENABLE_REFUND */ - parent->tw.loose_count = txn->tw.loose_count; - parent->tw.loose_pages = txn->tw.loose_pages; - - /* Merge our cursors into parent's and close them */ - done_cursors(txn, true); - end_mode |= TXN_END_EOTDONE; - - /* Update parent's DBs array */ - eASSERT(env, parent->n_dbi == txn->n_dbi); - TXN_FOREACH_DBI_ALL(txn, dbi) { - if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { - parent->dbs[dbi] = txn->dbs[dbi]; - /* preserve parent's status */ - const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); - DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still", - parent->dbi_state[dbi], state); - parent->dbi_state[dbi] = state; - } else { - eASSERT(env, txn->dbi_state[dbi] == (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); - } - } - - if (latency) { - ts_1 = osal_monotime(); - ts_2 = /* no gc-update */ ts_1; - ts_3 = /* no audit */ ts_2; - ts_4 = /* no write */ ts_3; - ts_5 = /* no sync */ ts_4; - } - txn_merge(parent, txn, parent_retired_len); - env->txn = parent; - parent->nested = nullptr; - tASSERT(parent, dpl_check(parent)); - -#if MDBX_ENABLE_REFUND - txn_refund(parent); - if (ASSERT_ENABLED()) { - /* Check parent's loose pages not suitable for refund */ - for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) { - tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); - VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); - } - /* Check parent's reclaimed pages not suitable for refund */ - if (MDBX_PNL_GETSIZE(parent->tw.relist)) - tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < parent->geo.first_unallocated); - } -#endif /* MDBX_ENABLE_REFUND */ - - txn->signature = 0; - osal_free(txn); - tASSERT(parent, audit_ex(parent, 0, false) == 0); - rc = MDBX_SUCCESS; - goto provide_latency; - } - - if (!txn->tw.dirtylist) { - tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - } else { - tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom : env->options.dp_limit)); - } - done_cursors(txn, false); - end_mode |= TXN_END_EOTDONE; - - if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && - (txn->flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { - TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); } -#if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT - rc = txn_end(txn, end_mode); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - rc = MDBX_RESULT_TRUE; - goto provide_latency; -#else - goto done; -#endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ - } - - DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, (void *)txn, - (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); - - if (txn->n_dbi > CORE_DBS) { - /* Update table root pointers */ - cursor_couple_t cx; - rc = cursor_init(&cx.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - cx.outer.next = txn->cursors[MAIN_DBI]; - txn->cursors[MAIN_DBI] = &cx.outer; - TXN_FOREACH_DBI_USER(txn, i) { - if ((txn->dbi_state[i] & DBI_DIRTY) == 0) - continue; - tree_t *const db = &txn->dbs[i]; - DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN " -> %" PRIaTXN, i, db->mod_txnid, txn->txnid); - /* Может быть mod_txnid > front после коммита вложенных тразакций */ - db->mod_txnid = txn->txnid; - MDBX_val data = {db, sizeof(tree_t)}; - rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_TREE); - if (unlikely(rc != MDBX_SUCCESS)) { - txn->cursors[MAIN_DBI] = cx.outer.next; - goto fail; - } - } - txn->cursors[MAIN_DBI] = cx.outer.next; - } - - ts_1 = latency ? osal_monotime() : 0; - - gcu_t gcu_ctx; - gc_cputime = latency ? osal_cputime(nullptr) : 0; - rc = gc_update_init(txn, &gcu_ctx); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - rc = gc_update(txn, &gcu_ctx); - gc_cputime = latency ? osal_cputime(nullptr) - gc_cputime : 0; - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - - tASSERT(txn, txn->tw.loose_count == 0); - txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[FREE_DBI].mod_txnid; - - txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[MAIN_DBI].mod_txnid; - - ts_2 = latency ? osal_monotime() : 0; - ts_3 = ts_2; - if (AUDIT_ENABLED()) { - rc = audit_ex(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages), true); - ts_3 = osal_monotime(); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - bool need_flush_for_nometasync = false; - const meta_ptr_t head = meta_recent(env, &txn->tw.troika); - const uint32_t meta_sync_txnid = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed); - /* sync prev meta */ - if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) { - /* Исправление унаследованного от LMDB недочета: - * - * Всё хорошо, если все процессы работающие с БД не используют WRITEMAP. - * Тогда мета-страница (обновленная, но не сброшенная на диск) будет - * сохранена в результате fdatasync() при записи данных этой транзакции. - * - * Всё хорошо, если все процессы работающие с БД используют WRITEMAP - * без MDBX_AVOID_MSYNC. - * Тогда мета-страница (обновленная, но не сброшенная на диск) будет - * сохранена в результате msync() при записи данных этой транзакции. - * - * Если же в процессах работающих с БД используется оба метода, как sync() - * в режиме MDBX_WRITEMAP, так и записи через файловый дескриптор, то - * становится невозможным обеспечить фиксацию на диске мета-страницы - * предыдущей транзакции и данных текущей транзакции, за счет одной - * sync-операцией выполняемой после записи данных текущей транзакции. - * Соответственно, требуется явно обновлять мета-страницу, что полностью - * уничтожает выгоду от NOMETASYNC. */ - const uint32_t txnid_dist = ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD - : MDBX_NOMETASYNC_LAZY_WRITEMAP; - /* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync() - * или msync() для гарантированной фиксации на диске мета-страницы, - * которая была "лениво" отправлена на запись в предыдущей транзакции, - * но не сброшена на диск из-за активного режима MDBX_NOMETASYNC. */ - if ( -#if defined(_WIN32) || defined(_WIN64) - !env->ioring.overlapped_fd && -#endif - meta_sync_txnid == (uint32_t)head.txnid - txnid_dist) - need_flush_for_nometasync = true; - else { - rc = meta_sync(env, head); - if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("txn-%s: error %d", "presync-meta", rc); - goto fail; - } - } - } - - if (txn->tw.dirtylist) { - tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); - tASSERT(txn, txn->tw.loose_count == 0); - - mdbx_filehandle_t fd = -#if defined(_WIN32) || defined(_WIN64) - env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd; - (void)need_flush_for_nometasync; -#else - (need_flush_for_nometasync || env->dsync_fd == INVALID_HANDLE_VALUE || - txn->tw.dirtylist->length > env->options.writethrough_threshold || - atomic_load64(&env->lck->unsynced_pages, mo_Relaxed)) - ? env->lazy_fd - : env->dsync_fd; -#endif /* Windows */ - - iov_ctx_t write_ctx; - rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, txn->tw.dirtylist->pages_including_loose, fd, false); - if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("txn-%s: error %d", "iov-init", rc); - goto fail; - } - - rc = txn_write(txn, &write_ctx); - if (unlikely(rc != MDBX_SUCCESS)) { - ERROR("txn-%s: error %d", "write", rc); - goto fail; - } - } else { - tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC); - env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages; - if (!env->lck->eoos_timestamp.weak) - env->lck->eoos_timestamp.weak = osal_monotime(); - } - - /* TODO: use ctx.flush_begin & ctx.flush_end for range-sync */ - ts_4 = latency ? osal_monotime() : 0; - - meta_t meta; - memcpy(meta.magic_and_version, head.ptr_c->magic_and_version, 8); - meta.reserve16 = head.ptr_c->reserve16; - meta.validator_id = head.ptr_c->validator_id; - meta.extra_pagehdr = head.ptr_c->extra_pagehdr; - unaligned_poke_u64(4, meta.pages_retired, - unaligned_peek_u64(4, head.ptr_c->pages_retired) + MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - meta.geometry = txn->geo; - meta.trees.gc = txn->dbs[FREE_DBI]; - meta.trees.main = txn->dbs[MAIN_DBI]; - meta.canary = txn->canary; - memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid)); - - txnid_t commit_txnid = txn->txnid; -#if MDBX_ENABLE_BIGFOOT - if (gcu_ctx.bigfoot > txn->txnid) { - commit_txnid = gcu_ctx.bigfoot; - TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid)); - } -#endif - meta.unsafe_sign = DATASIGN_NONE; - meta_set_txnid(env, &meta, commit_txnid); - - rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, &txn->tw.troika); - - ts_5 = latency ? osal_monotime() : 0; - if (unlikely(rc != MDBX_SUCCESS)) { - env->flags |= ENV_FATAL_ERROR; - ERROR("txn-%s: error %d", "sync", rc); - goto fail; - } - - end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE; - -done: - if (latency) - take_gcprof(txn, latency); - rc = txn_end(txn, end_mode); - -provide_latency: - if (latency) { - latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0; - latency->gc_wallclock = (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0; - latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0; - latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0; - latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0; - latency->sync = (ts_5 > ts_4) ? osal_monotime_to_16dot16(ts_5 - ts_4) : 0; - const uint64_t ts_6 = osal_monotime(); - latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0; - latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0); - } - return LOG_IFERR(rc); - -fail: - txn->flags |= MDBX_TXN_ERROR; - if (latency) - take_gcprof(txn, latency); - txn_abort(txn); - goto provide_latency; -} - int txn_abort(MDBX_txn *txn) { if (txn->flags & MDBX_TXN_RDONLY) /* LY: don't close DBI-handles */ @@ -1260,7 +857,7 @@ int txn_end(MDBX_txn *txn, unsigned mode) { txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root); if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ - done_cursors(txn, false); + txn_done_cursors(txn, false); int rc = MDBX_SUCCESS; if (txn->flags & MDBX_TXN_RDONLY) { @@ -1388,461 +985,6 @@ int txn_end(MDBX_txn *txn, unsigned mode) { return rc; } -/*----------------------------------------------------------------------------*/ - -int mdbx_txn_renew(MDBX_txn *txn) { - if (unlikely(!txn)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(txn->signature != txn_signature)) - return LOG_IFERR(MDBX_EBADSIGN); - - if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) - return LOG_IFERR(MDBX_EINVAL); - - if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) { - int rc = mdbx_txn_reset(txn); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - - int rc = txn_renew(txn, MDBX_TXN_RDONLY); - if (rc == MDBX_SUCCESS) { - tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self()); - DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, - (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->env, txn->dbs[MAIN_DBI].root, - txn->dbs[FREE_DBI].root); - } - return LOG_IFERR(rc); -} - -int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) { - int rc = check_txn(txn, MDBX_TXN_FINISHED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - txn->userctx = ctx; - return MDBX_SUCCESS; -} - -void *mdbx_txn_get_userctx(const MDBX_txn *txn) { return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; } - -int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret, void *context) { - if (unlikely(!ret)) - return LOG_IFERR(MDBX_EINVAL); - *ret = nullptr; - - if (unlikely((flags & ~txn_rw_begin_flags) && (parent || (flags & ~txn_ro_begin_flags)))) - return LOG_IFERR(MDBX_EINVAL); - - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(env->flags & MDBX_RDONLY & ~flags)) /* write txn in RDONLY env */ - return LOG_IFERR(MDBX_EACCESS); - - MDBX_txn *txn = nullptr; - if (parent) { - /* Nested transactions: Max 1 child, write txns only, no writemap */ - rc = check_txn_rw(parent, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (env->options.spill_parent4child_denominator) { - /* Spill dirty-pages of parent to provide dirtyroom for child txn */ - rc = txn_spill(parent, nullptr, parent->tw.dirtylist->length / env->options.spill_parent4child_denominator); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - } - tASSERT(parent, audit_ex(parent, 0, false) == 0); - - flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP); - } else if ((flags & MDBX_TXN_RDONLY) == 0) { - /* Reuse preallocated write txn. However, do not touch it until - * txn_renew() succeeds, since it currently may be active. */ - txn = env->basal_txn; - goto renew; - } - - const intptr_t bitmap_bytes = -#if MDBX_ENABLE_DBI_SPARSE - ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT; -#else - 0; -#endif /* MDBX_ENABLE_DBI_SPARSE */ - STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); - const size_t base = - (flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn); - const size_t size = base + - ((flags & MDBX_TXN_RDONLY) ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) : 0) + - env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_state[0])); - txn = osal_malloc(size); - if (unlikely(txn == nullptr)) - return LOG_IFERR(MDBX_ENOMEM); -#if MDBX_DEBUG - memset(txn, 0xCD, size); - VALGRIND_MAKE_MEM_UNDEFINED(txn, size); -#endif /* MDBX_DEBUG */ - MDBX_ANALYSIS_ASSUME(size > base); - memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); - txn->dbs = ptr_disp(txn, base); - txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); -#if MDBX_DEBUG - txn->cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ -#endif - txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); - txn->flags = flags; - txn->env = env; - - if (parent) { - tASSERT(parent, dpl_check(parent)); -#if MDBX_ENABLE_DBI_SPARSE - txn->dbi_sparse = parent->dbi_sparse; -#endif /* MDBX_ENABLE_DBI_SPARSE */ - txn->dbi_seqs = parent->dbi_seqs; - txn->geo = parent->geo; - rc = dpl_alloc(txn); - if (likely(rc == MDBX_SUCCESS)) { - const size_t len = MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; - txn->tw.relist = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.relist)) - rc = MDBX_ENOMEM; - } - if (unlikely(rc != MDBX_SUCCESS)) { - nested_failed: - pnl_free(txn->tw.relist); - dpl_free(txn); - osal_free(txn); - return LOG_IFERR(rc); - } - - /* Move loose pages to reclaimed list */ - if (parent->tw.loose_count) { - do { - page_t *lp = parent->tw.loose_pages; - tASSERT(parent, lp->flags == P_LOOSE); - rc = pnl_insert_span(&parent->tw.relist, lp->pgno, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto nested_failed; - MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); - VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); - parent->tw.loose_pages = page_next(lp); - /* Remove from dirty list */ - page_wash(parent, dpl_exist(parent, lp->pgno), lp, 1); - } while (parent->tw.loose_pages); - parent->tw.loose_count = 0; -#if MDBX_ENABLE_REFUND - parent->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ - tASSERT(parent, dpl_check(parent)); - } - txn->tw.dirtyroom = parent->tw.dirtyroom; - txn->tw.dirtylru = parent->tw.dirtylru; - - dpl_sort(parent); - if (parent->tw.spilled.list) - spill_purge(parent); - - tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= MDBX_PNL_GETSIZE(parent->tw.relist)); - memcpy(txn->tw.relist, parent->tw.relist, MDBX_PNL_SIZEOF(parent->tw.relist)); - eASSERT(env, pnl_check_allocated(txn->tw.relist, (txn->geo.first_unallocated /* LY: intentional assignment - here, only for assertion */ - = parent->geo.first_unallocated) - - MDBX_ENABLE_REFUND)); - - txn->tw.gc.time_acc = parent->tw.gc.time_acc; - txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed; - if (parent->tw.gc.reclaimed) { - txn->tw.gc.reclaimed = parent->tw.gc.reclaimed; - parent->tw.gc.reclaimed = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed); - } - - txn->tw.retired_pages = parent->tw.retired_pages; - parent->tw.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages); - - txn->txnid = parent->txnid; - txn->front_txnid = parent->front_txnid + 1; -#if MDBX_ENABLE_REFUND - txn->tw.loose_refund_wl = 0; -#endif /* MDBX_ENABLE_REFUND */ - txn->canary = parent->canary; - parent->flags |= MDBX_TXN_HAS_CHILD; - parent->nested = txn; - txn->parent = parent; - txn->owner = parent->owner; - txn->tw.troika = parent->tw.troika; - - txn->cursors[FREE_DBI] = nullptr; - txn->cursors[MAIN_DBI] = nullptr; - txn->dbi_state[FREE_DBI] = parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - txn->dbi_state[MAIN_DBI] = parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); - memset(txn->dbi_state + CORE_DBS, 0, (txn->n_dbi = parent->n_dbi) - CORE_DBS); - memcpy(txn->dbs, parent->dbs, sizeof(txn->dbs[0]) * CORE_DBS); - - tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == - (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit)); - tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == - (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit)); - env->txn = txn; - tASSERT(parent, parent->cursors[FREE_DBI] == nullptr); - rc = parent->cursors[MAIN_DBI] ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) : MDBX_SUCCESS; - if (AUDIT_ENABLED() && ASSERT_ENABLED()) { - txn->signature = txn_signature; - tASSERT(txn, audit_ex(txn, 0, false) == 0); - } - if (unlikely(rc != MDBX_SUCCESS)) - txn_end(txn, TXN_END_FAIL_BEGINCHILD); - } else { /* MDBX_TXN_RDONLY */ - txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); -#if MDBX_ENABLE_DBI_SPARSE - txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - renew: - rc = txn_renew(txn, flags); - } - - if (unlikely(rc != MDBX_SUCCESS)) { - if (txn != env->basal_txn) - osal_free(txn); - } else { - if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) - eASSERT(env, txn->flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)); - else if (flags & MDBX_TXN_RDONLY) - eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP | - /* Win32: SRWL flag */ txn_shrink_allowed)) == 0); - else { - eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | txn_shrink_allowed | MDBX_NOMETASYNC | - MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0); - assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed); - } - txn->signature = txn_signature; - txn->userctx = context; - *ret = txn; - DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, - (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root, - txn->dbs[FREE_DBI].root); - } - - return LOG_IFERR(rc); -} - -int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { - int rc = check_txn(txn, MDBX_TXN_FINISHED); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if (unlikely(!info)) - return LOG_IFERR(MDBX_EINVAL); - - MDBX_env *const env = txn->env; -#if MDBX_ENV_CHECKPID - if (unlikely(env->pid != osal_getpid())) { - env->flags |= ENV_FATAL_ERROR; - return LOG_IFERR(MDBX_PANIC); - } -#endif /* MDBX_ENV_CHECKPID */ - - info->txn_id = txn->txnid; - info->txn_space_used = pgno2bytes(env, txn->geo.first_unallocated); - - if (txn->flags & MDBX_TXN_RDONLY) { - meta_ptr_t head; - uint64_t head_retired; - troika_t troika = meta_tap(env); - do { - /* fetch info from volatile head */ - head = meta_recent(env, &troika); - head_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired); - info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->geometry.now); - info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->geometry.upper); - info->txn_space_leftover = pgno2bytes(env, head.ptr_v->geometry.now - head.ptr_v->geometry.first_unallocated); - } while (unlikely(meta_should_retry(env, &troika))); - - info->txn_reader_lag = head.txnid - info->txn_id; - info->txn_space_dirty = info->txn_space_retired = 0; - uint64_t reader_snapshot_pages_retired = 0; - if (txn->to.reader && - ((txn->flags & MDBX_TXN_PARKED) == 0 || safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) && - head_retired > - (reader_snapshot_pages_retired = atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))) { - info->txn_space_dirty = info->txn_space_retired = - pgno2bytes(env, (pgno_t)(head_retired - reader_snapshot_pages_retired)); - - size_t retired_next_reader = 0; - lck_t *const lck = env->lck_mmap.lck; - if (scan_rlt && info->txn_reader_lag > 1 && lck) { - /* find next more recent reader */ - txnid_t next_reader = head.txnid; - const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); - for (size_t i = 0; i < snap_nreaders; ++i) { - retry: - if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) { - jitter4testing(true); - const uint64_t snap_tid = safe64_read(&lck->rdt[i].tid); - const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid); - const uint64_t snap_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease); - if (unlikely(snap_retired != atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed)) || - snap_txnid != safe64_read(&lck->rdt[i].txnid) || snap_tid != safe64_read(&lck->rdt[i].tid)) - goto retry; - if (snap_txnid <= txn->txnid) { - retired_next_reader = 0; - break; - } - if (snap_txnid < next_reader && snap_tid >= MDBX_TID_TXN_OUSTED) { - next_reader = snap_txnid; - retired_next_reader = pgno2bytes( - env, (pgno_t)(snap_retired - atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))); - } - } - } - } - info->txn_space_dirty = retired_next_reader; - } - } else { - info->txn_space_limit_soft = pgno2bytes(env, txn->geo.now); - info->txn_space_limit_hard = pgno2bytes(env, txn->geo.upper); - info->txn_space_retired = - pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages : MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom); - info->txn_space_dirty = - pgno2bytes(env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose - : (txn->tw.writemap_dirty_npages + txn->tw.writemap_spilled_npages)); - info->txn_reader_lag = INT64_MAX; - lck_t *const lck = env->lck_mmap.lck; - if (scan_rlt && lck) { - txnid_t oldest_snapshot = txn->txnid; - const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); - if (snap_nreaders) { - oldest_snapshot = txn_snapshot_oldest(txn); - if (oldest_snapshot == txn->txnid - 1) { - /* check if there is at least one reader */ - bool exists = false; - for (size_t i = 0; i < snap_nreaders; ++i) { - if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->txnid > safe64_read(&lck->rdt[i].txnid)) { - exists = true; - break; - } - } - oldest_snapshot += !exists; - } - } - info->txn_reader_lag = txn->txnid - oldest_snapshot; - } - } - - return MDBX_SUCCESS; -} - -MDBX_env *mdbx_txn_env(const MDBX_txn *txn) { - if (unlikely(!txn || txn->signature != txn_signature || txn->env->signature.weak != env_signature)) - return nullptr; - return txn->env; -} - -uint64_t mdbx_txn_id(const MDBX_txn *txn) { - if (unlikely(!txn || txn->signature != txn_signature)) - return 0; - return txn->txnid; -} - -MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) { - STATIC_ASSERT( - (MDBX_TXN_INVALID & (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD | - txn_gc_drained | txn_shrink_allowed | txn_rw_begin_flags | txn_ro_begin_flags)) == 0); - if (unlikely(!txn || txn->signature != txn_signature)) - return MDBX_TXN_INVALID; - assert(0 == (int)(txn->flags & MDBX_TXN_INVALID)); - - MDBX_txn_flags_t flags = txn->flags; - if (F_ISSET(flags, MDBX_TXN_PARKED | MDBX_TXN_RDONLY) && txn->to.reader && - safe64_read(&txn->to.reader->tid) == MDBX_TID_TXN_OUSTED) - flags |= MDBX_TXN_OUSTED; - return flags; -} - -int mdbx_txn_reset(MDBX_txn *txn) { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - /* This call is only valid for read-only txns */ - if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) - return LOG_IFERR(MDBX_EINVAL); - - /* LY: don't close DBI-handles */ - rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); - if (rc == MDBX_SUCCESS) { - tASSERT(txn, txn->signature == txn_signature); - tASSERT(txn, txn->owner == 0); - } - return LOG_IFERR(rc); -} - -int mdbx_txn_break(MDBX_txn *txn) { - do { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - txn->flags |= MDBX_TXN_ERROR; - if (txn->flags & MDBX_TXN_RDONLY) - break; - txn = txn->nested; - } while (txn); - return MDBX_SUCCESS; -} - -int mdbx_txn_abort(MDBX_txn *txn) { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - rc = check_env(txn->env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - - if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == MDBX_NOSTICKYTHREADS && - unlikely(txn->owner != osal_thread_self())) { - mdbx_txn_break(txn); - return LOG_IFERR(MDBX_THREAD_MISMATCH); - } - - return LOG_IFERR(txn_abort(txn)); -} - -int mdbx_txn_park(MDBX_txn *txn, bool autounpark) { - STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_ERROR); - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) - return LOG_IFERR(MDBX_TXN_INVALID); - - if (unlikely((txn->flags & MDBX_TXN_ERROR))) { - rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); - return LOG_IFERR(rc ? rc : MDBX_OUSTED); - } - - return LOG_IFERR(txn_park(txn, autounpark)); -} - -int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted) { - STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED + MDBX_TXN_ERROR); - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED - MDBX_TXN_ERROR); - if (unlikely(rc != MDBX_SUCCESS)) - return LOG_IFERR(rc); - if (unlikely(!F_ISSET(txn->flags, MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) - return MDBX_SUCCESS; - - rc = txn_unpark(txn); - if (likely(rc != MDBX_OUSTED) || !restart_if_ousted) - return LOG_IFERR(rc); - - tASSERT(txn, txn->flags & MDBX_TXN_FINISHED); - rc = txn_renew(txn, MDBX_TXN_RDONLY); - return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : LOG_IFERR(rc); -} - int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) { tASSERT(txn, (bad_bits & MDBX_TXN_PARKED) && (txn->flags & bad_bits)); /* Здесь осознано заложено отличие в поведении припаркованных транзакций: @@ -1859,3 +1001,79 @@ int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) { tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); return mdbx_txn_unpark((MDBX_txn *)txn, false); } + +int txn_park(MDBX_txn *txn, bool autounpark) { + reader_slot_t *const rslot = txn->to.reader; + tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY); + tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED); + if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY)) + return MDBX_BAD_TXN; + + const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); + const uint64_t tid = atomic_load64(&rslot->tid, mo_Relaxed); + const uint64_t txnid = atomic_load64(&rslot->txnid, mo_Relaxed); + if (unlikely(pid != txn->env->pid)) { + ERROR("unexpected pid %u%s%u", pid, " != must ", txn->env->pid); + return MDBX_PROBLEM; + } + if (unlikely(tid != txn->owner || txnid != txn->txnid)) { + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%0zx" + " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, + tid, " != must ", txn->owner, txnid, " != must ", txn->txnid); + return MDBX_BAD_RSLOT; + } + + atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease); + atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed); + txn->flags += autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED; + return MDBX_SUCCESS; +} + +int txn_unpark(MDBX_txn *txn) { + if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != + (MDBX_TXN_RDONLY | MDBX_TXN_PARKED))) + return MDBX_BAD_TXN; + + for (reader_slot_t *const rslot = txn->to.reader; rslot; atomic_yield()) { + const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed); + uint64_t tid = safe64_read(&rslot->tid); + uint64_t txnid = safe64_read(&rslot->txnid); + if (unlikely(pid != txn->env->pid)) { + ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid); + return MDBX_PROBLEM; + } + if (unlikely(tid == MDBX_TID_TXN_OUSTED || txnid >= SAFE64_INVALID_THRESHOLD)) + break; + if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) { + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, tid, " != must ", + MDBX_TID_TXN_OUSTED, txnid, " != must ", txn->txnid); + break; + } + if (unlikely((txn->flags & MDBX_TXN_ERROR))) + break; + +#if MDBX_64BIT_CAS + if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner))) + continue; +#else + atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), mo_Relaxed); + if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)txn->owner))) { + atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), mo_AcquireRelease); + continue; + } +#endif + txnid = safe64_read(&rslot->txnid); + tid = safe64_read(&rslot->tid); + if (unlikely(txnid != txn->txnid || tid != txn->owner)) { + ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%zx" + " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, + tid, " != must ", txn->owner, txnid, " != must ", txn->txnid); + break; + } + txn->flags &= ~(MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK); + return MDBX_SUCCESS; + } + + int err = txn_end(txn, TXN_END_OUSTED | TXN_END_RESET | TXN_END_UPDATE); + return err ? err : MDBX_OUSTED; +} From e15079ec68522ec896ee98f7bacf25e8a0e4d9e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 17 Dec 2024 22:00:33 +0300 Subject: [PATCH 396/443] =?UTF-8?q?mdbx:=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20`log=5Fif=5Ferror()`=20=D1=80=D0=B0?= =?UTF-8?q?=D0=B4=D0=B8=20=D1=83=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D1=8F=20=D0=BB=D0=BE=D0=B6=D0=BD=D1=8B=D1=85=20"may=20be?= =?UTF-8?q?=20used=20uninitialized"=20=D0=BF=D1=80=D0=B5=D0=B4=D1=83=D0=BF?= =?UTF-8?q?=D1=80=D0=B5=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=B2=20LTO?= =?UTF-8?q?-=D1=81=D0=B1=D0=BE=D1=80=D0=BA=D0=B0=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit При включении LTO анализатор путей выполнения внутри GCC начинает укачивать из-за выражений вида `return LOG_IFERR(MDBX_EINVAL);` Проблема в том, что несмотря на __builtin_assume() и __builtin_unreachable(), комплятор не хочет видеть что функция log_if_error() всегда возвращает получаемое значение. А если допустить что значение будет изменено, то вместо ошибки может быть MDBX_SUCCESS, и тогда в вызывающем как-бы может произойти обращение к неинициализированным данным, что и беспокоит компилятор. Например, при сборке mdbx_load: ‘txn_info.txn_space_dirty’ may be used uninitialized [-Wmaybe-uninitialized] Проэтому проще пойти анализатору навстречу и упростить исходный код. Теперь код ошибки явно пробрасывается через тело inline-функции, но это требует 1-2 дополнительных процессорных инструкции на каждое применение макроса LOG_IFERROR. Также здесь откатывается коммит 81a8127084d9a6a7777bb375e029062330e51979. --- src/api-cold.c | 16 ++++++-------- src/api-dbi.c | 48 +++++++++++++++-------------------------- src/api-misc.c | 10 +++------ src/api-txn-data.c | 9 ++++---- src/logging_and_debug.c | 3 +-- src/logging_and_debug.h | 23 ++++---------------- 6 files changed, 37 insertions(+), 72 deletions(-) diff --git a/src/api-cold.c b/src/api-cold.c index dfa082db..7498d8ab 100644 --- a/src/api-cold.c +++ b/src/api-cold.c @@ -361,17 +361,15 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, bool onoff) return MDBX_SUCCESS; } -__cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) { - if (unlikely(!arg)) +__cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *flags) { + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(!flags)) return LOG_IFERR(MDBX_EINVAL); - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) { - *arg = 0; - return LOG_IFERR(rc); - } - - *arg = env->flags & ENV_USABLE_FLAGS; + *flags = env->flags & ENV_USABLE_FLAGS; return MDBX_SUCCESS; } diff --git a/src/api-dbi.c b/src/api-dbi.c index 5f52d770..a102f8b6 100644 --- a/src/api-dbi.c +++ b/src/api-dbi.c @@ -197,22 +197,16 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { } int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { - if (unlikely(!flags || !state)) - return LOG_IFERR(MDBX_EINVAL); - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED); - if (unlikely(rc != MDBX_SUCCESS)) { - *flags = 0; - *state = 0; + if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - } rc = dbi_check(txn, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - *flags = 0; - *state = 0; + if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - } + + if (unlikely(!flags || !state)) + return LOG_IFERR(MDBX_EINVAL); *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS; *state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); @@ -230,41 +224,33 @@ static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) { } __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { - if (unlikely(!dest)) - return LOG_IFERR(MDBX_EINVAL); - int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + return LOG_IFERR(rc); rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + return LOG_IFERR(rc); - const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); - if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) { - rc = MDBX_EINVAL; - goto bailout; - } - - if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) { - rc = MDBX_BAD_TXN; - goto bailout; - } + if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) + return LOG_IFERR(MDBX_BAD_TXN); if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { rc = tbl_fetch((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + return LOG_IFERR(rc); } + if (unlikely(!dest)) + return LOG_IFERR(MDBX_EINVAL); + + const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); + if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) + return LOG_IFERR(MDBX_EINVAL); + dest->ms_psize = txn->env->ps; stat_get(&txn->dbs[dbi], dest, bytes); return MDBX_SUCCESS; - -bailout: - memset(dest, 0, bytes); - return LOG_IFERR(rc); } __cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) { diff --git a/src/api-misc.c b/src/api-misc.c index ef3a172f..c34c9df7 100644 --- a/src/api-misc.c +++ b/src/api-misc.c @@ -29,21 +29,17 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) { - bailout: - if (likely(result)) - *result = ~UINT64_C(0); + if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); - } rc = dbi_check(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + return LOG_IFERR(rc); if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { rc = tbl_fetch(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + return LOG_IFERR(rc); } tree_t *dbs = &txn->dbs[dbi]; diff --git a/src/api-txn-data.c b/src/api-txn-data.c index f12b06d3..b3cee3c3 100644 --- a/src/api-txn-data.c +++ b/src/api-txn-data.c @@ -6,8 +6,8 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { if (unlikely(!mask)) return LOG_IFERR(MDBX_EINVAL); - *mask = 0; + int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); @@ -16,6 +16,7 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_ rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); + if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0) return MDBX_RESULT_TRUE; @@ -50,15 +51,15 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_ } int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) { - if (unlikely(canary == nullptr)) - return LOG_IFERR(MDBX_EINVAL); - int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) { memset(canary, 0, sizeof(*canary)); return LOG_IFERR(rc); } + if (unlikely(canary == nullptr)) + return LOG_IFERR(MDBX_EINVAL); + *canary = txn->canary; return MDBX_SUCCESS; } diff --git a/src/logging_and_debug.c b/src/logging_and_debug.c index e09f4dd6..f796db35 100644 --- a/src/logging_and_debug.c +++ b/src/logging_and_debug.c @@ -56,14 +56,13 @@ __cold void debug_log(int level, const char *function, int line, const char *fmt va_end(args); } -__cold int log_error(const int err, const char *func, unsigned line) { +__cold void log_error(const int err, const char *func, unsigned line) { assert(err != MDBX_SUCCESS); if (unlikely(globals.loglevel >= MDBX_LOG_DEBUG) && (globals.loglevel >= MDBX_LOG_TRACE || !(err == MDBX_RESULT_TRUE || err == MDBX_NOTFOUND))) { char buf[256]; debug_log(MDBX_LOG_ERROR, func, line, "error %d (%s)\n", err, mdbx_strerror_r(err, buf, sizeof(buf))); } - return err; } /* Dump a val in ascii or hexadecimal. */ diff --git a/src/logging_and_debug.h b/src/logging_and_debug.h index 9382eafc..e99ccea5 100644 --- a/src/logging_and_debug.h +++ b/src/logging_and_debug.h @@ -148,27 +148,12 @@ MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, char buf4unknown[ #define DVAL_DEBUG(x) ("-") #endif -MDBX_INTERNAL int log_error(const int err, const char *func, unsigned line); +MDBX_INTERNAL void log_error(const int err, const char *func, unsigned line); MDBX_MAYBE_UNUSED static inline int log_if_error(const int err, const char *func, unsigned line) { - if (likely(err == MDBX_SUCCESS)) - return err; - int rc = log_error(err, func, line); -#if __has_c_attribute(assume) - [[assume(rc == err && rc != MDBX_SUCCESS)]]; -#endif -#if defined(__clang__) || __has_builtin(assume) - __builtin_assume(rc == err && rc != MDBX_SUCCESS); -#endif - if (rc != err || rc == MDBX_SUCCESS) { -#if defined(__GNUC__) - __builtin_unreachable(); -#elif defined(_MSC_VER) && !defined(__clang__) - __assume(0); -#endif - rc = err; - } - return rc; + if (unlikely(err != MDBX_SUCCESS)) + log_error(err, func, line); + return err; } #define LOG_IFERR(err) log_if_error((err), __func__, __LINE__) From 5a9eea8accaa7328b40051666e570336c51bdb7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Dec 2024 01:21:00 +0300 Subject: [PATCH 397/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 3750b7d3..3672b4d3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,6 +5,73 @@ English version [by liar Google](https://gitflic-ru.translate.goog/project/erthi and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +## v0.13.3 в процессе + +Благодарности: + + - [Алексей (Keller) Костюк](https://t.me/keller18306) за сообщения об ошибках и недочетах. + - [Erigon](https://docs.erigon.tech/) за спонсорство. + +Новое: + + - В API добавлена функция `mdbx_cursor_count_ex()` позволяющая получить как количество мульти-значений + соответствующих текущему ключу, так и информацию о вложенном дереве хранящем эти значения. + +Изменение поведения: + + - Теперь при включении профилирования GC (сборка с опцией `MDBX_ENABLE_PROFGC=ON`) + подсчитываются затраты времени ЦПУ на слияние списков страниц (на работу функции `pnl_merge()`). + + - В утилите тестирования значение режима данных переименовано из `data.dups` в `data.multi`. + +Исправления: + + - Устранён регресс состояния вложенного/dupsort курсора после вставки данных в `MDBX_APPEND`-режиме. + + При добавлении нового ключа в append-режиме, в случае когда в текущей + (последней) позиции с ключом связаны несколько значений и + (соответственно) вложенный dupsort-курсор инициализирован, вставка + происходила без сброса вложенного курсора. + + В результате вложенный курсор логически оставался стоять на + multivalue-данных связанных с предыдущей позицией основного курсора, + т.е. переходил в неконсистентное состояние. + + Ошибка проявлялась возвратом неверных значений из `mdbx_cursor_count()` + или срабатывание assert-проверки в отладочных сборках. + + - Получение boot_id при работе внутри LXC-контейнера. + + Из LXC-контейнера не доступен файл хостовой системы `/proc/sys/kernel/random/boot_id`. + Вместо него, при каждом старте контейнера, создается и заполняется + случайными данными собственный boot_id смонтированный через bind из `tmpfs`. + https://github.com/lxc/lxc/issues/3027 + + Ранее этот замещенный bootid отбраковывался внутри libmdbx, + так как располагается в `tmpfs`, а не файловой системе `/proc`. + Теперь при работе внутри LXC-контейнера такой bootid будет использоваться. + + Однако, полноценный контроль по boot_id не возможен, так как при + рестарте LXC-контейнера (но не хоста) boot_id будет меняться, хотя + данные в unified page cache сохраняются. + + Таким образом, при рестарте LXC-контейнера, libmdbx будет производить + откат БД до крайней точки устойчивой фиксации, что может приводить к + утрате данных пользователя в случаях когда они могли быть сохранены. + Однако, улучшить ситуацию пока не представляется возможным, как минимум + до доступности boot_id хостовой системы изнутри LXC-контейнера. + + - Устранёна ошибка неверной обработки попытки запуска вложенной читающей транзакции. + Теперь в таких ситуациях возвращается ошибка `MDBX_EINVAL`, так как вложенность + поддерживается только для транзакций чтения-записи. + + Ошибка была внесена при рефакторинге, коммитом `2f2df1ee76ab137ee66d00af69a82a30dc0d6deb` + чуть более 5 лет назад и долго оставалось не замеченной. + + +-------------------------------------------------------------------------------- + + ## v0.13.2 "Прошлогодний Снег" (Last Year's Snow) от 2024-12-11 Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов From 891fa1d435472a709b41fbda3ad61ca286db6867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Dec 2024 01:25:50 +0300 Subject: [PATCH 398/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20doxygen-?= =?UTF-8?q?=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8?= =?UTF-8?q?=D0=B5=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index fd29387a..771402ee 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5716,7 +5716,7 @@ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, MDBX_put_flags_t flags); * sorted duplicate data items \ref MDBX_DUPSORT. * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). - * \param [out] pcount Address where the count will be stored. + * \param [out] count Address where the count will be stored. * * \returns A non-zero error value on failure and 0 on success, * some possible errors are: @@ -5724,7 +5724,7 @@ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, MDBX_put_flags_t flags); * by current thread. * \retval MDBX_EINVAL Cursor is not initialized, or an invalid parameter * was specified. */ -LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *pcount); +LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *count); /** \brief Return count values (aka duplicates) and nested b-tree statistics for current key. * \ingroup c_crud @@ -5737,7 +5737,7 @@ LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *pcount); * sorted duplicate data items \ref MDBX_DUPSORT. * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). - * \param [out] pcount Address where the count will be stored. + * \param [out] count Address where the count will be stored. * \param [out] stat The address of an \ref MDBX_stat structure where * the statistics of a nested b-tree will be copied. * \param [in] bytes The size of \ref MDBX_stat. @@ -5748,7 +5748,7 @@ LIBMDBX_API int mdbx_cursor_count(const MDBX_cursor *cursor, size_t *pcount); * by current thread. * \retval MDBX_EINVAL Cursor is not initialized, or an invalid parameter * was specified. */ -LIBMDBX_API int mdbx_cursor_count_ex(const MDBX_cursor *mc, size_t *count, MDBX_stat *stat, size_t bytes); +LIBMDBX_API int mdbx_cursor_count_ex(const MDBX_cursor *cursor, size_t *count, MDBX_stat *stat, size_t bytes); /** \brief Determines whether the cursor is pointed to a key-value pair or not, * i.e. was not positioned or points to the end of data. From e43cf69a0cf794316f5dbba3914090193f2cecf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Dec 2024 12:27:37 +0300 Subject: [PATCH 399/443] =?UTF-8?q?mdbx-doc:=20=D0=B8=D0=B7=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B1=D0=B0=D0=B7=D1=8B=20?= =?UTF-8?q?=D1=81=D1=81=D1=8B=D0=BB=D0=BE=D0=BA=20online-=D0=BF=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=B2=D0=BE=D0=B4=D0=B0=20=D1=81=20`gitflic.ru`=20?= =?UTF-8?q?=D0=BD=D0=B0=20`libmdbx.dqdkfa.ru`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 3672b4d3..4a6386f0 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,8 +1,8 @@ ChangeLog ========= -English version [by liar Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) -and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +English version [by liar Google](https://libmdbx-dqdkfa-ru.translate.goog/md__change_log.html?_x_tr_sl=ru&_x_tr_tl=en) +and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx.dqdkfa.ru/md__change_log.html). ## v0.13.3 в процессе From 0d1c08677d3c61003ae3784e766a6ecde550ec5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 18 Dec 2024 13:01:52 +0300 Subject: [PATCH 400/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 4a6386f0..7da54360 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,13 +17,6 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx - В API добавлена функция `mdbx_cursor_count_ex()` позволяющая получить как количество мульти-значений соответствующих текущему ключу, так и информацию о вложенном дереве хранящем эти значения. -Изменение поведения: - - - Теперь при включении профилирования GC (сборка с опцией `MDBX_ENABLE_PROFGC=ON`) - подсчитываются затраты времени ЦПУ на слияние списков страниц (на работу функции `pnl_merge()`). - - - В утилите тестирования значение режима данных переименовано из `data.dups` в `data.multi`. - Исправления: - Устранён регресс состояния вложенного/dupsort курсора после вставки данных в `MDBX_APPEND`-режиме. @@ -40,24 +33,25 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Ошибка проявлялась возвратом неверных значений из `mdbx_cursor_count()` или срабатывание assert-проверки в отладочных сборках. - - Получение boot_id при работе внутри LXC-контейнера. + - Поддержка получения boot_id при работе внутри LXC-контейнера. Из LXC-контейнера не доступен файл хостовой системы `/proc/sys/kernel/random/boot_id`. Вместо него, при каждом старте контейнера, создается и заполняется случайными данными собственный boot_id смонтированный через bind из `tmpfs`. https://github.com/lxc/lxc/issues/3027 - Ранее этот замещенный bootid отбраковывался внутри libmdbx, - так как располагается в `tmpfs`, а не файловой системе `/proc`. + Ранее этот подставной/замещенный boot_id отбраковывался внутри libmdbx, + так как файл располагается в `tmpfs`, а не в файловой системе `/proc`. + В результате boot_id для проверки целостности БД не был доступен. Теперь при работе внутри LXC-контейнера такой bootid будет использоваться. - Однако, полноценный контроль по boot_id не возможен, так как при + Однако, полноценно работающий контроль по boot_id не возможен, так как при рестарте LXC-контейнера (но не хоста) boot_id будет меняться, хотя данные в unified page cache сохраняются. - Таким образом, при рестарте LXC-контейнера, libmdbx будет производить - откат БД до крайней точки устойчивой фиксации, что может приводить к - утрате данных пользователя в случаях когда они могли быть сохранены. + Таким образом, при рестарте LXC-контейнера без рестарта хоста, libmdbx придется + откатить состояние БД до крайней точки устойчивой фиксации, что повлечет + утрату данных пользователя в случаях когда они могли быть сохранены. Однако, улучшить ситуацию пока не представляется возможным, как минимум до доступности boot_id хостовой системы изнутри LXC-контейнера. @@ -68,6 +62,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Ошибка была внесена при рефакторинге, коммитом `2f2df1ee76ab137ee66d00af69a82a30dc0d6deb` чуть более 5 лет назад и долго оставалось не замеченной. +Изменение поведения: + + - Теперь при включении профилирования GC (сборка с опцией `MDBX_ENABLE_PROFGC=ON`) + подсчитываются затраты времени ЦПУ на слияние списков страниц, т.е. на работу функции `pnl_merge()`. + + - В утилите тестирования значение режима данных переименовано из `data.dups` в `data.multi`. + -------------------------------------------------------------------------------- From d3a27d27f6a0368d84b514c66570ea5622894adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 19 Dec 2024 17:49:40 +0300 Subject: [PATCH 401/443] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20yandex-=D0=BC=D0=B5?= =?UTF-8?q?=D1=82=D1=80=D0=B8=D0=BA=D0=B8=20=D0=B2=20=D0=B3=D0=B5=D0=BD?= =?UTF-8?q?=D0=B5=D1=80=D0=B8=D1=80=D1=83=D0=B5=D0=BC=D1=8B=D0=B9=20html.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Doxyfile.in | 2 +- docs/header.html | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 docs/header.html diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index c29a8711..198f2e05 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -1280,7 +1280,7 @@ HTML_FILE_EXTENSION = .html # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_HEADER = +HTML_HEADER = header.html # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard diff --git a/docs/header.html b/docs/header.html new file mode 100644 index 00000000..f6b746ca --- /dev/null +++ b/docs/header.html @@ -0,0 +1,96 @@ + + + + + + + +$projectname: $title +$title + + + + + + + + + + + + + + +$treeview +$search +$mathjax +$darkmode + +$extrastylesheet + + + + +
+ + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
$projectname $projectnumber +
+
$projectbrief
+
+
$projectbrief
+
$searchbox
$searchbox
+
+ + + + + + From 462af2be48bb864f07170455bb99eb8ba494d555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 20 Dec 2024 11:46:01 +0300 Subject: [PATCH 402/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1?= =?UTF-8?q?=D0=BA=D0=B8=20=D1=80=D0=B5=D0=B4=D0=B0=D0=BA=D1=82=D0=B8=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20README.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7b3d8631..72e7cc5d 100644 --- a/README.md +++ b/README.md @@ -412,7 +412,7 @@ since release the version 1.0. ## Source code embedding -_libmdbx_ provides two official three for integration in source code form: +_libmdbx_ provides three official ways for integration in source code form: 1. Using an amalgamated source code which available in the [releases section](https://gitflic.ru/project/erthink/libmdbx/release) on GitFlic. > An amalgamated source code includes all files required to build and From ab57ce7d5f8579be56e8f744d2041b982e46a495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 21 Dec 2024 19:36:54 +0300 Subject: [PATCH 403/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=81=D1=8B=D0=BB=D0=BA?= =?UTF-8?q?=D0=B8=20=D0=BD=D0=B0=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B5=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=D0=B2=D1=8F=D0=B7=D0=BA=D0=B8=20=D0=BA=20Python.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 72e7cc5d..753034ba 100644 --- a/README.md +++ b/README.md @@ -647,7 +647,7 @@ Bindings | Rust | [libmdbx-rs](https://github.com/vorot93/libmdbx-rs) | [Artem Vorotnikov](https://github.com/vorot93) | | Rust | [mdbx](https://crates.io/crates/mdbx) | [gcxfd](https://github.com/gcxfd) | | Java | [mdbxjni](https://github.com/castortech/mdbxjni) | [Castor Technologies](https://castortech.com/) | -| Python (draft) | [python-bindings](https://libmdbx.dqdkfa.ru/dead-github/commits/python-bindings) branch | [Noel Kuntze](https://github.com/Thermi) +| Python | [PyPi/libmdbx](https://pypi.org/project/libmdbx/) | [Lazymio](https://github.com/wtdcode) | | .NET (obsolete) | [mdbx.NET](https://github.com/wangjia184/mdbx.NET) | [Jerry Wang](https://github.com/wangjia184) | From 40596865348ec04f3d290a48d99937cead15eb46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 20 Dec 2024 20:33:56 +0300 Subject: [PATCH 404/443] =?UTF-8?q?mdbx:=20=D0=BE=D0=BF=D0=B5=D1=87=D0=B0?= =?UTF-8?q?=D1=82=D0=BA=D0=B8=20=D0=B2=20=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=82=D0=B0=D1=80=D0=B8=D1=8F=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-get.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gc-get.c b/src/gc-get.c index a32f2755..b33ba209 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -610,7 +610,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { * диском будет более кучным, а у страниц ближе к концу БД будет больше шансов * попасть под авто-компактификацию. Частично эта тактика уже реализована, но * для её эффективности требуется явно приоритезировать выделение страниц: - * - поддерживать для relist, для ближних и для дальних страниц; + * - поддерживать два relist, для ближних и для дальних страниц; * - использовать страницы из дальнего списка, если первый пуст, * а второй слишком большой, либо при пустой GC. * @@ -618,11 +618,11 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { * регионы будут линейными, что принципиально ускоряет запись на HDD. * Одновременно, в среднем это не повлияет на чтение, точнее говоря, если * порядок чтения не совпадает с порядком изменения (иначе говоря, если - * чтение не коррклирует с обновлениями и/или вставками) то не повлияет, иначе + * чтение не коррелирует с обновлениями и/или вставками) то не повлияет, иначе * может ускорить. Однако, последовательности в среднем достаточно редки. * Поэтому для эффективности требуется аккумулировать и поддерживать в ОЗУ * огромные списки страниц, а затем сохранять их обратно в БД. Текущий формат - * БД (без битовых карт) для этого крайне не удачен. Поэтому эта тактика не + * БД (без сжатых битовых карт) для этого крайне не удачен. Поэтому эта тактика не * имеет шансов быть успешной без смены формата БД (Mithril). * * 3. Стараться экономить последовательности страниц. Это позволяет избегать @@ -631,7 +631,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { * информации от приложения библиотека не может знать насколько * востребованными будут последовательности в ближайшей перспективе, а * экономия последовательностей "на всякий случай" не только затратна - * сама-по-себе, но и работает во вред. + * сама-по-себе, но и работает во вред (добавляет хаоса). * * Поэтому: * - в TODO добавляется разделение relist на «ближние» и «дальние» страницы, From 0339aa56d932741814297ba43626bf2185aa60fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 20 Dec 2024 20:34:49 +0300 Subject: [PATCH 405/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BD?= =?UTF-8?q?=D0=BE=D1=81=20`prefault=5Fwrite=5Factivated`=20=D0=B2=20=D1=82?= =?UTF-8?q?=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D1=8E.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-get.c | 8 ++++---- src/internals.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gc-get.c b/src/gc-get.c index b33ba209..39b68bba 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -768,7 +768,7 @@ static inline pgr_t page_alloc_finalize(MDBX_env *const env, MDBX_txn *const txn * обновляться PTE с последующей генерацией page-fault и чтением данных из * грязной I/O очереди. Из-за этого штраф за лишнюю запись может быть * сравним с избегаемым ненужным чтением. */ - if (env->prefault_write_activated) { + if (txn->tw.prefault_write_activated) { void *const pattern = ptr_disp(env->page_auxbuf, need_clean ? env->ps : env->ps * 2); size_t file_offset = pgno2bytes(env, pgno); if (likely(num == 1)) { @@ -900,8 +900,8 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) gc->dbi_state = txn->dbi_state; gc->top_and_flags = z_fresh_mark; - env->prefault_write_activated = env->options.prefault_write; - if (env->prefault_write_activated) { + txn->tw.prefault_write_activated = env->options.prefault_write; + if (txn->tw.prefault_write_activated) { /* Проверка посредством minicore() существенно снижает затраты, но в * простейших случаях (тривиальный бенчмарк) интегральная производительность * становится вдвое меньше. А на платформах без mincore() и с проблемной @@ -914,7 +914,7 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) (txn->dbs[FREE_DBI].branch_pages == 0 && txn->geo.now < 1234) || /* Не суетимся если страница в зоне включенного упреждающего чтения */ (readahead_enabled && pgno + num < readahead_edge)) - env->prefault_write_activated = false; + txn->tw.prefault_write_activated = false; } retry_gc_refresh_oldest:; diff --git a/src/internals.h b/src/internals.h index aae09e42..a7f15143 100644 --- a/src/internals.h +++ b/src/internals.h @@ -212,6 +212,7 @@ struct MDBX_txn { troika_t troika; /* In write txns, array of cursors for each DB */ pnl_t __restrict relist; /* Reclaimed GC pages */ + bool prefault_write_activated; struct { /* The list of reclaimed txns from GC */ txl_t __restrict reclaimed; @@ -436,7 +437,6 @@ struct MDBX_env { } me_sysv_ipc; #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */ bool incore; - bool prefault_write_activated; #if MDBX_ENABLE_DBI_LOCKFREE defer_free_item_t *defer_free; From ffb7918525a75859464464836bcef5e5bbc619d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 20 Dec 2024 20:35:47 +0300 Subject: [PATCH 406/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE=D0=BD=D0=B8=D0=B6?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D1=80=D0=BE=D0=B2=D0=BD=D1=8F?= =?UTF-8?q?=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20"reserve=20depleted"=20=D0=BF=D1=80=D0=B8=20=D0=BE?= =?UTF-8?q?=D0=B1=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B8=20GC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-put.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gc-put.c b/src/gc-put.c index 04999c6a..28f13f20 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -415,7 +415,7 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) if (unlikely(ctx->rid <= MIN_TXNID)) { ctx->dense = true; if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <= ctx->reused_slot)) { - NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= " + VERBOSE("** restart: reserve depleted (reused_gc_slot %zu >= " "gc.reclaimed %zu)", ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); goto return_restart; @@ -838,7 +838,7 @@ retry: if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) { if (!left) break; - NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN + VERBOSE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN ", left %zu", ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; @@ -850,7 +850,7 @@ retry: if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { if (!left) break; - NOTICE("** restart: reserve depleted (fill_idx %zu >= " + VERBOSE("** restart: reserve depleted (fill_idx %zu >= " "gc.reclaimed %zu, left %zu", ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left); ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; From 225fb79eb2be3ec49ca074b364987063ad29ac96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 19 Dec 2024 22:03:03 +0300 Subject: [PATCH 407/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20repnl/?= =?UTF-8?q?retxl.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-env.c | 4 +- src/api-txn.c | 42 ++++++------ src/audit.c | 10 +-- src/dpl.c | 8 +-- src/env.c | 4 +- src/gc-get.c | 136 ++++++++++++++++++------------------- src/gc-put.c | 173 ++++++++++++++++++++++++------------------------ src/internals.h | 8 +-- src/page-ops.c | 6 +- src/refund.c | 6 +- src/tree-ops.c | 2 +- src/txn.c | 22 +++--- 12 files changed, 210 insertions(+), 211 deletions(-) diff --git a/src/api-env.c b/src/api-env.c index cbfba423..1787f7fe 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -510,8 +510,8 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_env_flags txn->flags = MDBX_TXN_FINISHED; env->basal_txn = txn; txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); - txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) { + txn->tw.repnl = pnl_alloc(MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.retired_pages || !txn->tw.repnl)) { rc = MDBX_ENOMEM; goto bailout; } diff --git a/src/api-txn.c b/src/api-txn.c index 508f37d5..3e930f04 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -257,14 +257,14 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M txn->geo = parent->geo; rc = dpl_alloc(txn); if (likely(rc == MDBX_SUCCESS)) { - const size_t len = MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count; - txn->tw.relist = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.relist)) + const size_t len = MDBX_PNL_GETSIZE(parent->tw.repnl) + parent->tw.loose_count; + txn->tw.repnl = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.repnl)) rc = MDBX_ENOMEM; } if (unlikely(rc != MDBX_SUCCESS)) { nested_failed: - pnl_free(txn->tw.relist); + pnl_free(txn->tw.repnl); dpl_free(txn); osal_free(txn); return LOG_IFERR(rc); @@ -275,7 +275,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M do { page_t *lp = parent->tw.loose_pages; tASSERT(parent, lp->flags == P_LOOSE); - rc = pnl_insert_span(&parent->tw.relist, lp->pgno, 1); + rc = pnl_insert_span(&parent->tw.repnl, lp->pgno, 1); if (unlikely(rc != MDBX_SUCCESS)) goto nested_failed; MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); @@ -297,18 +297,18 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M if (parent->tw.spilled.list) spill_purge(parent); - tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= MDBX_PNL_GETSIZE(parent->tw.relist)); - memcpy(txn->tw.relist, parent->tw.relist, MDBX_PNL_SIZEOF(parent->tw.relist)); - eASSERT(env, pnl_check_allocated(txn->tw.relist, (txn->geo.first_unallocated /* LY: intentional assignment + tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.repnl) >= MDBX_PNL_GETSIZE(parent->tw.repnl)); + memcpy(txn->tw.repnl, parent->tw.repnl, MDBX_PNL_SIZEOF(parent->tw.repnl)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, (txn->geo.first_unallocated /* LY: intentional assignment here, only for assertion */ - = parent->geo.first_unallocated) - - MDBX_ENABLE_REFUND)); + = parent->geo.first_unallocated) - + MDBX_ENABLE_REFUND)); txn->tw.gc.time_acc = parent->tw.gc.time_acc; txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed; - if (parent->tw.gc.reclaimed) { - txn->tw.gc.reclaimed = parent->tw.gc.reclaimed; - parent->tw.gc.reclaimed = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed); + if (parent->tw.gc.retxl) { + txn->tw.gc.retxl = parent->tw.gc.retxl; + parent->tw.gc.retxl = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.retxl); } txn->tw.retired_pages = parent->tw.retired_pages; @@ -464,7 +464,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; if (retired_delta) { - rc = pnl_need(&txn->tw.relist, retired_delta); + rc = pnl_need(&txn->tw.repnl, retired_delta); if (unlikely(rc != MDBX_SUCCESS)) goto fail; } @@ -486,15 +486,15 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { //------------------------------------------------------------------------- - parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; - txn->tw.gc.reclaimed = nullptr; + parent->tw.gc.retxl = txn->tw.gc.retxl; + txn->tw.gc.retxl = nullptr; parent->tw.retired_pages = txn->tw.retired_pages; txn->tw.retired_pages = nullptr; - pnl_free(parent->tw.relist); - parent->tw.relist = txn->tw.relist; - txn->tw.relist = nullptr; + pnl_free(parent->tw.repnl); + parent->tw.repnl = txn->tw.repnl; + txn->tw.repnl = nullptr; parent->tw.gc.time_acc = txn->tw.gc.time_acc; parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed; @@ -550,8 +550,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); } /* Check parent's reclaimed pages not suitable for refund */ - if (MDBX_PNL_GETSIZE(parent->tw.relist)) - tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < parent->geo.first_unallocated); + if (MDBX_PNL_GETSIZE(parent->tw.repnl)) + tASSERT(parent, MDBX_PNL_MOST(parent->tw.repnl) + 1 < parent->geo.first_unallocated); } #endif /* MDBX_ENABLE_REFUND */ diff --git a/src/audit.c b/src/audit.c index 05c69632..f5b20f4a 100644 --- a/src/audit.c +++ b/src/audit.c @@ -28,7 +28,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool don const MDBX_env *const env = txn->env; size_t pending = 0; if ((txn->flags & MDBX_TXN_RDONLY) == 0) - pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.relist) + + pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.repnl) + (MDBX_PNL_GETSIZE(txn->tw.retired_pages) - retired_stored); cursor_couple_t cx; @@ -46,9 +46,9 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool don return MDBX_CORRUPTED; } txnid_t id = unaligned_peek_u64(4, key.iov_base); - if (txn->tw.gc.reclaimed) { - for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed); ++i) - if (id == txn->tw.gc.reclaimed[i]) + if (txn->tw.gc.retxl) { + for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.gc.retxl); ++i) + if (id == txn->tw.gc.retxl[i]) goto skip; } else if (id <= txn->tw.gc.last_reclaimed) goto skip; @@ -93,7 +93,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool don if ((txn->flags & MDBX_TXN_RDONLY) == 0) ERROR("audit @%" PRIaTXN ": %zu(pending) = %zu(loose) + " "%zu(reclaimed) + %zu(retired-pending) - %zu(retired-stored)", - txn->txnid, pending, txn->tw.loose_count, MDBX_PNL_GETSIZE(txn->tw.relist), + txn->txnid, pending, txn->tw.loose_count, MDBX_PNL_GETSIZE(txn->tw.repnl), txn->tw.retired_pages ? MDBX_PNL_GETSIZE(txn->tw.retired_pages) : 0, retired_stored); ERROR("audit @%" PRIaTXN ": %zu(pending) + %zu" "(gc) + %zu(count) = %zu(total) <> %zu" diff --git a/src/dpl.c b/src/dpl.c index 8e0c5dab..9f244b19 100644 --- a/src/dpl.c +++ b/src/dpl.c @@ -362,12 +362,12 @@ __cold bool dpl_check(MDBX_txn *txn) { return false; } - const size_t rpa = pnl_search(txn->tw.relist, dp->pgno, txn->geo.first_unallocated); - tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.relist) || txn->tw.relist[rpa] != dp->pgno); - if (rpa <= MDBX_PNL_GETSIZE(txn->tw.relist) && unlikely(txn->tw.relist[rpa] == dp->pgno)) + const size_t rpa = pnl_search(txn->tw.repnl, dp->pgno, txn->geo.first_unallocated); + tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.repnl) || txn->tw.repnl[rpa] != dp->pgno); + if (rpa <= MDBX_PNL_GETSIZE(txn->tw.repnl) && unlikely(txn->tw.repnl[rpa] == dp->pgno)) return false; if (num > 1) { - const size_t rpb = pnl_search(txn->tw.relist, dp->pgno + num - 1, txn->geo.first_unallocated); + const size_t rpb = pnl_search(txn->tw.repnl, dp->pgno + num - 1, txn->geo.first_unallocated); tASSERT(txn, rpa == rpb); if (unlikely(rpa != rpb)) return false; diff --git a/src/env.c b/src/env.c index 91af9413..5de253f4 100644 --- a/src/env.c +++ b/src/env.c @@ -612,10 +612,10 @@ __cold int env_close(MDBX_env *env, bool resurrect_after_fork) { } if (env->basal_txn) { dpl_free(env->basal_txn); - txl_free(env->basal_txn->tw.gc.reclaimed); + txl_free(env->basal_txn->tw.gc.retxl); pnl_free(env->basal_txn->tw.retired_pages); pnl_free(env->basal_txn->tw.spilled.list); - pnl_free(env->basal_txn->tw.relist); + pnl_free(env->basal_txn->tw.repnl); osal_free(env->basal_txn); env->basal_txn = nullptr; } diff --git a/src/gc-get.c b/src/gc-get.c index 39b68bba..ff70ba3c 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -591,17 +591,17 @@ static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, const uint } __hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed); + const size_t len = MDBX_PNL_GETSIZE(txn->tw.gc.retxl); for (size_t i = 1; i <= len; ++i) - if (txn->tw.gc.reclaimed[i] == id) + if (txn->tw.gc.retxl[i] == id) return true; return false; } -__hot static pgno_t relist_get_single(MDBX_txn *txn) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); +__hot static pgno_t repnl_get_single(MDBX_txn *txn) { + const size_t len = MDBX_PNL_GETSIZE(txn->tw.repnl); assert(len > 0); - pgno_t *target = MDBX_PNL_EDGE(txn->tw.relist); + pgno_t *target = MDBX_PNL_EDGE(txn->tw.repnl); const ptrdiff_t dir = MDBX_PNL_ASCENDING ? 1 : -1; /* Есть ТРИ потенциально выигрышные, но противо-направленные тактики: @@ -610,7 +610,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { * диском будет более кучным, а у страниц ближе к концу БД будет больше шансов * попасть под авто-компактификацию. Частично эта тактика уже реализована, но * для её эффективности требуется явно приоритезировать выделение страниц: - * - поддерживать два relist, для ближних и для дальних страниц; + * - поддерживать два repnl, для ближних и для дальних страниц; * - использовать страницы из дальнего списка, если первый пуст, * а второй слишком большой, либо при пустой GC. * @@ -634,7 +634,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { * сама-по-себе, но и работает во вред (добавляет хаоса). * * Поэтому: - * - в TODO добавляется разделение relist на «ближние» и «дальние» страницы, + * - в TODO добавляется разделение repnl на «ближние» и «дальние» страницы, * с последующей реализацией первой тактики; * - преимущественное использование последовательностей отправляется * в MithrilDB как составляющая "HDD frendly" feature; @@ -669,7 +669,7 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { #else /* вырезаем элемент с перемещением хвоста */ const pgno_t pgno = *scan; - MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); + MDBX_PNL_SETSIZE(txn->tw.repnl, len - 1); while (++scan <= target) scan[-1] = *scan; return pgno; @@ -682,44 +682,44 @@ __hot static pgno_t relist_get_single(MDBX_txn *txn) { const pgno_t pgno = *target; #if MDBX_PNL_ASCENDING /* вырезаем элемент с перемещением хвоста */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); - for (const pgno_t *const end = txn->tw.relist + len - 1; target <= end; ++target) + MDBX_PNL_SETSIZE(txn->tw.repnl, len - 1); + for (const pgno_t *const end = txn->tw.repnl + len - 1; target <= end; ++target) *target = target[1]; #else /* перемещать хвост не нужно, просто усекам список */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - 1); + MDBX_PNL_SETSIZE(txn->tw.repnl, len - 1); #endif return pgno; } -__hot static pgno_t relist_get_sequence(MDBX_txn *txn, const size_t num, uint8_t flags) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.relist); - pgno_t *edge = MDBX_PNL_EDGE(txn->tw.relist); +__hot static pgno_t repnl_get_sequence(MDBX_txn *txn, const size_t num, uint8_t flags) { + const size_t len = MDBX_PNL_GETSIZE(txn->tw.repnl); + pgno_t *edge = MDBX_PNL_EDGE(txn->tw.repnl); assert(len >= num && num > 1); const size_t seq = num - 1; #if !MDBX_PNL_ASCENDING if (edge[-(ptrdiff_t)seq] - *edge == seq) { if (unlikely(flags & ALLOC_RESERVE)) return P_INVALID; - assert(edge == scan4range_checker(txn->tw.relist, seq)); + assert(edge == scan4range_checker(txn->tw.repnl, seq)); /* перемещать хвост не нужно, просто усекам список */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - num); + MDBX_PNL_SETSIZE(txn->tw.repnl, len - num); return *edge; } #endif pgno_t *target = scan4seq_impl(edge, len, seq); - assert(target == scan4range_checker(txn->tw.relist, seq)); + assert(target == scan4range_checker(txn->tw.repnl, seq)); if (target) { if (unlikely(flags & ALLOC_RESERVE)) return P_INVALID; const pgno_t pgno = *target; /* вырезаем найденную последовательность с перемещением хвоста */ - MDBX_PNL_SETSIZE(txn->tw.relist, len - num); + MDBX_PNL_SETSIZE(txn->tw.repnl, len - num); #if MDBX_PNL_ASCENDING - for (const pgno_t *const end = txn->tw.relist + len - num; target <= end; ++target) + for (const pgno_t *const end = txn->tw.repnl + len - num; target <= end; ++target) *target = target[num]; #else - for (const pgno_t *const end = txn->tw.relist + len; ++target <= end;) + for (const pgno_t *const end = txn->tw.repnl + len; ++target <= end;) target[-(ptrdiff_t)num] = *target; #endif return pgno; @@ -829,7 +829,7 @@ static inline pgr_t page_alloc_finalize(MDBX_env *const env, MDBX_txn *const txn ret.err = page_dirty(txn, ret.page, (pgno_t)num); bailout: - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); #if MDBX_ENABLE_PROFGC size_t majflt_after; prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before; @@ -849,7 +849,7 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) #endif /* MDBX_ENABLE_PROFGC */ eASSERT(env, num > 0 || (flags & ALLOC_RESERVE)); - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); size_t newnext; const uint64_t monotime_begin = (MDBX_ENABLE_PROFGC || (num > 1 && env->options.gc_time_limit)) ? osal_monotime() : 0; @@ -864,15 +864,15 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) #if MDBX_ENABLE_PROFGC prof->xpages += 1; #endif /* MDBX_ENABLE_PROFGC */ - if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && - MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); - pgno = relist_get_sequence(txn, num, flags); + if (MDBX_PNL_GETSIZE(txn->tw.repnl) >= num) { + eASSERT(env, MDBX_PNL_LAST(txn->tw.repnl) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.repnl) < txn->geo.first_unallocated); + pgno = repnl_get_sequence(txn, num, flags); if (likely(pgno)) goto done; } } else { - eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->tw.relist) == 0); + eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->tw.repnl) == 0); eASSERT(env, !(flags & ALLOC_RESERVE) || num == 0); } @@ -890,7 +890,7 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) * Иначе попытка увеличить резерв может приводить к необходимости ещё * большего резерва из-за увеличения списка переработанных страниц. */ (flags & ALLOC_RESERVE) == 0) { - if (txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->tw.relist) < env->maxgc_large1page / 2) + if (txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->tw.repnl) < env->maxgc_large1page / 2) flags += ALLOC_COALESCE; } @@ -930,9 +930,9 @@ retry_gc_have_oldest: txnid_t id = 0; MDBX_cursor_op op = MDBX_FIRST; if (flags & ALLOC_LIFO) { - if (!txn->tw.gc.reclaimed) { - txn->tw.gc.reclaimed = txl_alloc(); - if (unlikely(!txn->tw.gc.reclaimed)) { + if (!txn->tw.gc.retxl) { + txn->tw.gc.retxl = txl_alloc(); + if (unlikely(!txn->tw.gc.retxl)) { ret.err = MDBX_ENOMEM; goto fail; } @@ -1000,9 +1000,9 @@ next_gc:; } const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl); - TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl)); - if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= env->maxgc_large1page)) { + if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl) >= env->maxgc_large1page)) { /* Don't try to coalesce too much. */ if (flags & ALLOC_SHOULD_SCAN) { eASSERT(env, flags & ALLOC_COALESCE); @@ -1012,32 +1012,32 @@ next_gc:; env->lck->pgops.gc_prof.coalescences += 1; #endif /* MDBX_ENABLE_PROFGC */ TRACE("clear %s %s", "ALLOC_COALESCE", "since got threshold"); - if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && - MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); + if (MDBX_PNL_GETSIZE(txn->tw.repnl) >= num) { + eASSERT(env, MDBX_PNL_LAST(txn->tw.repnl) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.repnl) < txn->geo.first_unallocated); if (likely(num == 1)) { - pgno = relist_get_single(txn); + pgno = repnl_get_single(txn); goto done; } - pgno = relist_get_sequence(txn, num, flags); + pgno = repnl_get_sequence(txn, num, flags); if (likely(pgno)) goto done; } flags -= ALLOC_COALESCE | ALLOC_SHOULD_SCAN; } - if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(txn->tw.relist) >= env->options.rp_augment_limit) && + if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(txn->tw.repnl) >= env->options.rp_augment_limit) && ((/* not a slot-request from gc-update */ num && /* have enough unallocated space */ txn->geo.upper >= txn->geo.first_unallocated + num && monotime_since_cached(monotime_begin, &now_cache) + txn->tw.gc.time_acc >= env->options.gc_time_limit) || - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= PAGELIST_LIMIT)) { + gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl) >= PAGELIST_LIMIT)) { /* Stop reclaiming to avoid large/overflow the page list. This is a rare * case while search for a continuously multi-page region in a * large database, see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */ NOTICE("stop reclaiming %s: %zu (current) + %zu " "(chunk) -> %zu, rp_augment_limit %u", - likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) < PAGELIST_LIMIT) ? "since rp_augment_limit was reached" - : "to avoid PNL overflow", - MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.relist), + likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl) < PAGELIST_LIMIT) ? "since rp_augment_limit was reached" + : "to avoid PNL overflow", + MDBX_PNL_GETSIZE(txn->tw.repnl), gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl), env->options.rp_augment_limit); goto depleted_gc; } @@ -1046,13 +1046,13 @@ next_gc:; /* Remember ID of readed GC record */ txn->tw.gc.last_reclaimed = id; if (flags & ALLOC_LIFO) { - ret.err = txl_append(&txn->tw.gc.reclaimed, id); + ret.err = txl_append(&txn->tw.gc.retxl, id); if (unlikely(ret.err != MDBX_SUCCESS)) goto fail; } - /* Append PNL from GC record to tw.relist */ - ret.err = pnl_need(&txn->tw.relist, gc_len); + /* Append PNL from GC record to tw.repnl */ + ret.err = pnl_need(&txn->tw.repnl, gc_len); if (unlikely(ret.err != MDBX_SUCCESS)) goto fail; @@ -1067,36 +1067,36 @@ next_gc:; #if MDBX_ENABLE_PROFGC const uint64_t merge_begin = osal_monotime(); #endif /* MDBX_ENABLE_PROFGC */ - pnl_merge(txn->tw.relist, gc_pnl); + pnl_merge(txn->tw.repnl, gc_pnl); #if MDBX_ENABLE_PROFGC prof->pnl_merge.calls += 1; - prof->pnl_merge.volume += MDBX_PNL_GETSIZE(txn->tw.relist); + prof->pnl_merge.volume += MDBX_PNL_GETSIZE(txn->tw.repnl); prof->pnl_merge.time += osal_monotime() - merge_begin; #endif /* MDBX_ENABLE_PROFGC */ flags |= ALLOC_SHOULD_SCAN; if (AUDIT_ENABLED()) { - if (unlikely(!pnl_check(txn->tw.relist, txn->geo.first_unallocated))) { + if (unlikely(!pnl_check(txn->tw.repnl, txn->geo.first_unallocated))) { ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid txn retired-list"); ret.err = MDBX_CORRUPTED; goto fail; } } else { - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated)); } eASSERT(env, dpl_check(txn)); - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->geo.first_unallocated); - if (MDBX_ENABLE_REFUND && MDBX_PNL_GETSIZE(txn->tw.relist) && - unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->geo.first_unallocated - 1)) { + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.repnl) == 0 || MDBX_PNL_MOST(txn->tw.repnl) < txn->geo.first_unallocated); + if (MDBX_ENABLE_REFUND && MDBX_PNL_GETSIZE(txn->tw.repnl) && + unlikely(MDBX_PNL_MOST(txn->tw.repnl) == txn->geo.first_unallocated - 1)) { /* Refund suitable pages into "unallocated" space */ txn_refund(txn); } - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); /* Done for a kick-reclaim mode, actually no page needed */ if (unlikely(num == 0)) { eASSERT(env, ret.err == MDBX_SUCCESS); - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, MDBX_PNL_GETSIZE(txn->tw.repnl)); goto early_exit; } @@ -1104,33 +1104,33 @@ next_gc:; eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); if (flags & ALLOC_COALESCE) { - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, MDBX_PNL_GETSIZE(txn->tw.repnl)); goto next_gc; } scan: eASSERT(env, flags & ALLOC_SHOULD_SCAN); eASSERT(env, num > 0); - if (MDBX_PNL_GETSIZE(txn->tw.relist) >= num) { - eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->geo.first_unallocated && - MDBX_PNL_FIRST(txn->tw.relist) < txn->geo.first_unallocated); + if (MDBX_PNL_GETSIZE(txn->tw.repnl) >= num) { + eASSERT(env, MDBX_PNL_LAST(txn->tw.repnl) < txn->geo.first_unallocated && + MDBX_PNL_FIRST(txn->tw.repnl) < txn->geo.first_unallocated); if (likely(num == 1)) { eASSERT(env, !(flags & ALLOC_RESERVE)); - pgno = relist_get_single(txn); + pgno = repnl_get_single(txn); goto done; } - pgno = relist_get_sequence(txn, num, flags); + pgno = repnl_get_sequence(txn, num, flags); if (likely(pgno)) goto done; } flags -= ALLOC_SHOULD_SCAN; if (ret.err == MDBX_SUCCESS) { - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, MDBX_PNL_GETSIZE(txn->tw.repnl)); goto next_gc; } depleted_gc: - TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, MDBX_PNL_GETSIZE(txn->tw.relist)); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, MDBX_PNL_GETSIZE(txn->tw.repnl)); ret.err = MDBX_NOTFOUND; if (flags & ALLOC_SHOULD_SCAN) goto scan; @@ -1269,7 +1269,7 @@ done: if (likely((flags & ALLOC_RESERVE) == 0)) { if (pgno) { eASSERT(env, pgno + num <= txn->geo.first_unallocated && pgno >= NUM_METAS); - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); } else { pgno = txn->geo.first_unallocated; txn->geo.first_unallocated += (pgno_t)num; @@ -1281,7 +1281,7 @@ done: if (unlikely(ret.err != MDBX_SUCCESS)) { fail: eASSERT(env, ret.err != MDBX_SUCCESS); - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); int level; const char *what; if (flags & ALLOC_RESERVE) { @@ -1297,7 +1297,7 @@ done: "unable alloc %zu %s, alloc-flags 0x%x, err %d, txn-flags " "0x%x, re-list-len %zu, loose-count %zu, gc: height %u, " "branch %zu, leaf %zu, large %zu, entries %zu\n", - num, what, flags, ret.err, txn->flags, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count, + num, what, flags, ret.err, txn->flags, MDBX_PNL_GETSIZE(txn->tw.repnl), txn->tw.loose_count, txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages, (size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages, (size_t)txn->dbs[FREE_DBI].items); @@ -1346,8 +1346,8 @@ __hot pgr_t gc_alloc_single(const MDBX_cursor *const mc) { return ret; } - if (likely(MDBX_PNL_GETSIZE(txn->tw.relist) > 0)) - return page_alloc_finalize(txn->env, txn, mc, relist_get_single(txn), 1); + if (likely(MDBX_PNL_GETSIZE(txn->tw.repnl) > 0)) + return page_alloc_finalize(txn->env, txn, mc, repnl_get_single(txn), 1); return gc_alloc_ex(mc, 1, ALLOC_DEFAULT); } diff --git a/src/gc-put.c b/src/gc-put.c index 28f13f20..4c6fd070 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -11,7 +11,7 @@ MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(const gcu_t *ctx) { return is_lifo(ctx->cursor.txn) ? " lifo" : " fifo"; } -static inline size_t backlog_size(MDBX_txn *txn) { return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count; } +static inline size_t backlog_size(MDBX_txn *txn) { return MDBX_PNL_GETSIZE(txn->tw.repnl) + txn->tw.loose_count; } static int clean_stored_retired(MDBX_txn *txn, gcu_t *ctx) { int err = MDBX_SUCCESS; @@ -72,35 +72,35 @@ static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { tASSERT(txn, is_pointed(&ctx->cursor) || txn->dbs[FREE_DBI].leaf_pages == 0); const intptr_t retired_left = MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored; - size_t for_relist = 0; + size_t for_repnl = 0; if (MDBX_ENABLE_BIGFOOT && retired_left > 0) { - for_relist = (retired_left + txn->env->maxgc_large1page - 1) / txn->env->maxgc_large1page; + for_repnl = (retired_left + txn->env->maxgc_large1page - 1) / txn->env->maxgc_large1page; const size_t per_branch_page = txn->env->maxgc_per_branch; - for (size_t entries = for_relist; entries > 1; for_split += entries) + for (size_t entries = for_repnl; entries > 1; for_split += entries) entries = (entries + per_branch_page - 1) / per_branch_page; } else if (!MDBX_ENABLE_BIGFOOT && retired_left != 0) { - for_relist = largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + for_repnl = largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); } const size_t for_tree_before_touch = for_cow + for_rebalance + for_split; const size_t for_tree_after_touch = for_rebalance + for_split; - const size_t for_all_before_touch = for_relist + for_tree_before_touch; - const size_t for_all_after_touch = for_relist + for_tree_after_touch; + const size_t for_all_before_touch = for_repnl + for_tree_before_touch; + const size_t for_all_after_touch = for_repnl + for_tree_after_touch; - if (likely(for_relist < 2 && backlog_size(txn) > for_all_before_touch) && + if (likely(for_repnl < 2 && backlog_size(txn) > for_all_before_touch) && (ctx->cursor.top < 0 || is_modifable(txn, ctx->cursor.pg[ctx->cursor.top]))) return MDBX_SUCCESS; TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " "4split %zu, " "4cow %zu, 4tree %zu)", - ctx->retired_stored, retired_left, backlog_size(txn), for_all_before_touch, for_relist, for_split, for_cow, + ctx->retired_stored, retired_left, backlog_size(txn), for_all_before_touch, for_repnl, for_split, for_cow, for_tree_before_touch); int err = touch_gc(ctx); TRACE("== after-touch, backlog %zu, err %d", backlog_size(txn), err); - if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) && + if (!MDBX_ENABLE_BIGFOOT && unlikely(for_repnl > 1) && MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored && err == MDBX_SUCCESS) { if (unlikely(ctx->retired_stored)) { err = clean_stored_retired(txn, ctx); @@ -109,9 +109,9 @@ static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) { if (!ctx->retired_stored) return /* restart by tail-recursion */ prepare_backlog(txn, ctx); } - err = gc_alloc_ex(&ctx->cursor, for_relist, ALLOC_RESERVE).err; + err = gc_alloc_ex(&ctx->cursor, for_repnl, ALLOC_RESERVE).err; TRACE("== after-4linear, backlog %zu, err %d", backlog_size(txn), err); - cASSERT(&ctx->cursor, backlog_size(txn) >= for_relist || err != MDBX_SUCCESS); + cASSERT(&ctx->cursor, backlog_size(txn) >= for_repnl || err != MDBX_SUCCESS); } while (backlog_size(txn) < for_all_after_touch && err == MDBX_SUCCESS) @@ -146,10 +146,10 @@ static inline void zeroize_reserved(const MDBX_env *env, MDBX_val pnl) { static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { tASSERT(txn, txn->tw.loose_count > 0); - /* Return loose page numbers to tw.relist, + /* Return loose page numbers to tw.repnl, * though usually none are left at this point. * The pages themselves remain in dirtylist. */ - if (unlikely(!txn->tw.gc.reclaimed && txn->tw.gc.last_reclaimed < 1)) { + if (unlikely(!txn->tw.gc.retxl && txn->tw.gc.last_reclaimed < 1)) { TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix(ctx), txn->tw.loose_count); int err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err; if (err == MDBX_SUCCESS) { @@ -158,7 +158,7 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { } /* Put loose page numbers in tw.retired_pages, - * since unable to return ones to tw.relist. */ + * since unable to return ones to tw.repnl. */ err = pnl_need(&txn->tw.retired_pages, txn->tw.loose_count); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -170,10 +170,10 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { TRACE("%s: append %zu loose-pages to retired-pages", dbg_prefix(ctx), txn->tw.loose_count); } else { /* Room for loose pages + temp PNL with same */ - int err = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2); + int err = pnl_need(&txn->tw.repnl, 2 * txn->tw.loose_count + 2); if (unlikely(err != MDBX_SUCCESS)) return err; - pnl_t loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) - txn->tw.loose_count - 1; + pnl_t loose = txn->tw.repnl + MDBX_PNL_ALLOCLEN(txn->tw.repnl) - txn->tw.loose_count - 1; size_t count = 0; for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) { tASSERT(txn, lp->flags == P_LOOSE); @@ -184,7 +184,7 @@ static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) { tASSERT(txn, count == txn->tw.loose_count); MDBX_PNL_SETSIZE(loose, count); pnl_sort(loose, txn->geo.first_unallocated); - pnl_merge(txn->tw.relist, loose); + pnl_merge(txn->tw.repnl, loose); TRACE("%s: append %zu loose-pages to reclaimed-pages", dbg_prefix(ctx), txn->tw.loose_count); } @@ -359,16 +359,15 @@ typedef struct gcu_rid_result { static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) { rid_t r; if (is_lifo(txn)) { - if (txn->tw.gc.reclaimed == nullptr) { - txn->tw.gc.reclaimed = txl_alloc(); - if (unlikely(!txn->tw.gc.reclaimed)) { + if (txn->tw.gc.retxl == nullptr) { + txn->tw.gc.retxl = txl_alloc(); + if (unlikely(!txn->tw.gc.retxl)) { r.err = MDBX_ENOMEM; goto return_error; } } - if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && - left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page && - !ctx->dense) { + if (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) < txl_max && + left > (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot) * txn->env->maxgc_large1page && !ctx->dense) { /* Hужен свободный для для сохранения списка страниц. */ bool need_cleanup = false; txnid_t snap_oldest = 0; @@ -377,11 +376,11 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) r.err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err; snap_oldest = txn->env->lck->cached_oldest.weak; if (likely(r.err == MDBX_SUCCESS)) { - TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx), MDBX_PNL_LAST(txn->tw.gc.reclaimed)); + TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx), MDBX_PNL_LAST(txn->tw.gc.retxl)); need_cleanup = true; } - } while (r.err == MDBX_SUCCESS && MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && - left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page); + } while (r.err == MDBX_SUCCESS && MDBX_PNL_GETSIZE(txn->tw.gc.retxl) < txl_max && + left > (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot) * txn->env->maxgc_large1page); if (likely(r.err == MDBX_SUCCESS)) { TRACE("%s: got enough from GC.", dbg_prefix(ctx)); @@ -390,12 +389,12 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) /* LY: some troubles... */ goto return_error; - if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { + if (MDBX_PNL_GETSIZE(txn->tw.gc.retxl)) { if (need_cleanup) { - txl_sort(txn->tw.gc.reclaimed); + txl_sort(txn->tw.gc.retxl); ctx->cleaned_slot = 0; } - ctx->rid = MDBX_PNL_LAST(txn->tw.gc.reclaimed); + ctx->rid = MDBX_PNL_LAST(txn->tw.gc.retxl); } else { tASSERT(txn, txn->tw.gc.last_reclaimed == 0); if (unlikely(txn_snapshot_oldest(txn) != snap_oldest)) @@ -410,14 +409,14 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) /* В GC нет годных к переработке записей, * будем использовать свободные id в обратном порядке. */ - while (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max && - left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) * txn->env->maxgc_large1page) { + while (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) < txl_max && + left > (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot) * txn->env->maxgc_large1page) { if (unlikely(ctx->rid <= MIN_TXNID)) { ctx->dense = true; - if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <= ctx->reused_slot)) { + if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.retxl) <= ctx->reused_slot)) { VERBOSE("** restart: reserve depleted (reused_gc_slot %zu >= " - "gc.reclaimed %zu)", - ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + "gc.reclaimed %zu)", + ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.retxl)); goto return_restart; } break; @@ -445,7 +444,7 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) } tASSERT(txn, !ctx->dense); - r.err = txl_append(&txn->tw.gc.reclaimed, ctx->rid); + r.err = txl_append(&txn->tw.gc.retxl, ctx->rid); if (unlikely(r.err != MDBX_SUCCESS)) goto return_error; @@ -469,12 +468,12 @@ static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) } } - const size_t i = MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot; - tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); - r.rid = txn->tw.gc.reclaimed[i]; + const size_t i = MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot; + tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.gc.retxl)); + r.rid = txn->tw.gc.retxl[i]; TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix(ctx), r.rid, i); } else { - tASSERT(txn, txn->tw.gc.reclaimed == nullptr); + tASSERT(txn, txn->tw.gc.retxl == nullptr); if (unlikely(ctx->rid == 0)) { ctx->rid = txn_snapshot_oldest(txn); MDBX_val key; @@ -523,9 +522,9 @@ return_error: return r; } -/* Cleanups reclaimed GC (aka freeDB) records, saves the retired-list (aka +/* Cleanups retxl GC (aka freeDB) records, saves the retired-list (aka * freelist) of current transaction to GC, puts back into GC leftover of the - * reclaimed pages with chunking. This recursive changes the reclaimed-list, + * retxl pages with chunking. This recursive changes the retxl-list, * loose-list and retired-list. Keep trying until it stabilizes. * * NOTE: This code is a consequence of many iterations of adding crutches (aka @@ -539,7 +538,7 @@ int gc_update(MDBX_txn *txn, gcu_t *ctx) { txn->cursors[FREE_DBI] = &ctx->cursor; int rc; - /* txn->tw.relist[] can grow and shrink during this call. + /* txn->tw.repnl[] can grow and shrink during this call. * txn->tw.gc.last_reclaimed and txn->tw.retired_pages[] can only grow. * But page numbers cannot disappear from txn->tw.retired_pages[]. */ retry_clean_adj: @@ -548,7 +547,7 @@ retry: ctx->loop += !(ctx->prev_first_unallocated > txn->geo.first_unallocated); TRACE(">> restart, loop %u", ctx->loop); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) { ERROR("txn #%" PRIaTXN " too more loops %u, bailout", txn->txnid, ctx->loop); @@ -575,17 +574,17 @@ retry: /* Come back here after each Put() in case retired-list changed */ TRACE("%s", " >> continue"); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); MDBX_val key, data; if (is_lifo(txn)) { - if (ctx->cleaned_slot < (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)) { + if (ctx->cleaned_slot < (txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0)) { ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; ctx->fill_idx = ~0u; /* LY: cleanup reclaimed records. */ do { - ctx->cleaned_id = txn->tw.gc.reclaimed[++ctx->cleaned_slot]; + ctx->cleaned_id = txn->tw.gc.retxl[++ctx->cleaned_slot]; tASSERT(txn, ctx->cleaned_slot > 0 && ctx->cleaned_id <= env->lck->cached_oldest.weak); key.iov_base = &ctx->cleaned_id; key.iov_len = sizeof(ctx->cleaned_id); @@ -603,8 +602,8 @@ retry: rc = cursor_del(&ctx->cursor, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); - txl_sort(txn->tw.gc.reclaimed); + } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.retxl)); + txl_sort(txn->tw.gc.retxl); } } else { /* Удаляем оставшиеся вынутые из GC записи. */ @@ -645,7 +644,7 @@ retry: } } - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (AUDIT_ENABLED()) { rc = audit_ex(txn, ctx->retired_stored, false); @@ -655,7 +654,7 @@ retry: /* return suitable into unallocated space */ if (txn_refund(txn)) { - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); if (AUDIT_ENABLED()) { rc = audit_ex(txn, ctx->retired_stored, false); if (unlikely(rc != MDBX_SUCCESS)) @@ -674,14 +673,14 @@ retry: tASSERT(txn, txn->tw.loose_pages == 0); } - if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) && - (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) > env->maxgc_large1page / 2)) { + if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.repnl)) && + (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.repnl) > env->maxgc_large1page / 2)) { TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), ctx->amount, - MDBX_PNL_GETSIZE(txn->tw.relist)); - ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist); + MDBX_PNL_GETSIZE(txn->tw.repnl)); + ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.repnl); goto retry; } - ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist); + ctx->amount = MDBX_PNL_GETSIZE(txn->tw.repnl); if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { /* store retired-list into GC */ @@ -691,7 +690,7 @@ retry: continue; } - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, txn->tw.loose_count == 0); TRACE("%s", " >> reserving"); @@ -705,7 +704,7 @@ retry: "lifo-reclaimed-slots %zu, " "reused-gc-slots %zu", dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left, - txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0, ctx->reused_slot); + txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0, ctx->reused_slot); if (0 >= (intptr_t)left) break; @@ -723,14 +722,14 @@ retry: size_t chunk = left; if (unlikely(left > env->maxgc_large1page)) { - const size_t avail_gc_slots = txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot + 1 + const size_t avail_gc_slots = txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot + 1 : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid : INT16_MAX; if (likely(avail_gc_slots > 1)) { #if MDBX_ENABLE_BIGFOOT chunk = env->maxgc_large1page; if (avail_gc_slots < INT16_MAX && unlikely(left > env->maxgc_large1page * avail_gc_slots)) - /* TODO: Можно смотреть последовательности какой длины есть в relist + /* TODO: Можно смотреть последовательности какой длины есть в repnl * и пробовать нарезать куски соответствующего размера. * Смысл в том, чтобы не дробить последовательности страниц, * а использовать целиком. */ @@ -750,8 +749,8 @@ retry: size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) /* - 1 + span */; if (tail > avail) { for (size_t i = ctx->amount - span; i > 0; --i) { - if (MDBX_PNL_ASCENDING ? (txn->tw.relist[i] + span) - : (txn->tw.relist[i] - span) == txn->tw.relist[i + span]) { + if (MDBX_PNL_ASCENDING ? (txn->tw.repnl[i] + span) + : (txn->tw.repnl[i] - span) == txn->tw.repnl[i + span]) { span += 1; avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) - 1 + span; if (avail >= tail) @@ -792,7 +791,7 @@ retry: ctx->reserved + chunk + 1, reservation_gc_id); prepare_backlog(txn, ctx); rc = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -803,14 +802,14 @@ retry: continue; } - tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)); + tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0)); TRACE("%s", " >> filling"); /* Fill in the reserved records */ size_t excess_slots = 0; - ctx->fill_idx = txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot : ctx->reused_slot; + ctx->fill_idx = txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot : ctx->reused_slot; rc = MDBX_SUCCESS; - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); if (ctx->amount) { MDBX_val key, data; @@ -818,7 +817,7 @@ retry: key.iov_base = data.iov_base = nullptr; size_t left = ctx->amount, excess = 0; - if (txn->tw.gc.reclaimed == nullptr) { + if (txn->tw.gc.retxl == nullptr) { tASSERT(txn, is_lifo(txn) == 0); rc = outer_first(&ctx->cursor, &key, &data); if (unlikely(rc != MDBX_SUCCESS)) { @@ -831,33 +830,33 @@ retry: while (true) { txnid_t fill_gc_id; - TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, MDBX_PNL_GETSIZE(txn->tw.relist)); - if (txn->tw.gc.reclaimed == nullptr) { + TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, MDBX_PNL_GETSIZE(txn->tw.repnl)); + if (txn->tw.gc.retxl == nullptr) { tASSERT(txn, is_lifo(txn) == 0); fill_gc_id = key.iov_base ? unaligned_peek_u64(4, key.iov_base) : MIN_TXNID; if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) { if (!left) break; VERBOSE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN - ", left %zu", - ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); + ", left %zu", + ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left); ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } ctx->fill_idx -= 1; } else { tASSERT(txn, is_lifo(txn) != 0); - if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) { + if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.retxl)) { if (!left) break; VERBOSE("** restart: reserve depleted (fill_idx %zu >= " - "gc.reclaimed %zu, left %zu", - ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left); + "gc.reclaimed %zu, left %zu", + ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.retxl), left); ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } ctx->fill_idx += 1; - fill_gc_id = txn->tw.gc.reclaimed[ctx->fill_idx]; + fill_gc_id = txn->tw.gc.retxl[ctx->fill_idx]; TRACE("%s: seek-reservation @%" PRIaTXN " at gc.reclaimed[%zu]", dbg_prefix(ctx), fill_gc_id, ctx->fill_idx); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); @@ -865,7 +864,7 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)); + tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0)); tASSERT(txn, fill_gc_id > 0 && fill_gc_id <= env->lck->cached_oldest.weak); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); @@ -889,16 +888,16 @@ retry: goto bailout; zeroize_reserved(env, data); - if (unlikely(txn->tw.loose_count || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + if (unlikely(txn->tw.loose_count || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.repnl))) { NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", ctx->amount, - MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); + MDBX_PNL_GETSIZE(txn->tw.repnl), txn->tw.loose_count); if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1))) goto retry_clean_adj; goto retry; } - if (unlikely(txn->tw.gc.reclaimed ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - : ctx->cleaned_id < txn->tw.gc.last_reclaimed)) { + if (unlikely(txn->tw.gc.retxl ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.retxl) + : ctx->cleaned_id < txn->tw.gc.last_reclaimed)) { NOTICE("%s", "** restart: reclaimed-slots changed"); goto retry; } @@ -911,11 +910,11 @@ retry: pgno_t *dst = data.iov_base; *dst++ = (pgno_t)chunk; - pgno_t *src = MDBX_PNL_BEGIN(txn->tw.relist) + left - chunk; + pgno_t *src = MDBX_PNL_BEGIN(txn->tw.repnl) + left - chunk; memcpy(dst, src, chunk * sizeof(pgno_t)); pgno_t *from = src, *to = src + chunk; TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, dbg_prefix(ctx), chunk, - from - txn->tw.relist, from[0], to - txn->tw.relist, to[-1], fill_gc_id); + from - txn->tw.repnl, from[0], to - txn->tw.repnl, to[-1], fill_gc_id); left -= chunk; if (AUDIT_ENABLED()) { @@ -926,7 +925,7 @@ retry: next: - if (txn->tw.gc.reclaimed == nullptr) { + if (txn->tw.gc.retxl == nullptr) { tASSERT(txn, is_lifo(txn) == 0); rc = outer_next(&ctx->cursor, &key, &data, MDBX_NEXT); if (unlikely(rc != MDBX_SUCCESS)) { @@ -951,9 +950,9 @@ retry: } tASSERT(txn, rc == MDBX_SUCCESS); - if (unlikely(txn->tw.loose_count != 0 || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { + if (unlikely(txn->tw.loose_count != 0 || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.repnl))) { NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", txn->tw.loose_count, ctx->amount, - MDBX_PNL_GETSIZE(txn->tw.relist)); + MDBX_PNL_GETSIZE(txn->tw.repnl)); goto retry; } @@ -966,12 +965,12 @@ retry: goto retry; } - tASSERT(txn, txn->tw.gc.reclaimed == nullptr || ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)); + tASSERT(txn, txn->tw.gc.retxl == nullptr || ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.retxl)); bailout: txn->cursors[FREE_DBI] = ctx->cursor.next; - MDBX_PNL_SETSIZE(txn->tw.relist, 0); + MDBX_PNL_SETSIZE(txn->tw.repnl, 0); #if MDBX_ENABLE_PROFGC env->lck->pgops.gc_prof.wloops += (uint32_t)ctx->loop; #endif /* MDBX_ENABLE_PROFGC */ diff --git a/src/internals.h b/src/internals.h index a7f15143..ddef1fcf 100644 --- a/src/internals.h +++ b/src/internals.h @@ -211,14 +211,14 @@ struct MDBX_txn { struct { troika_t troika; /* In write txns, array of cursors for each DB */ - pnl_t __restrict relist; /* Reclaimed GC pages */ - bool prefault_write_activated; + pnl_t __restrict repnl; /* Reclaimed GC pages */ struct { - /* The list of reclaimed txns from GC */ - txl_t __restrict reclaimed; + /* The list of reclaimed txn-ids from GC */ + txl_t __restrict retxl; txnid_t last_reclaimed; /* ID of last used record */ uint64_t time_acc; } gc; + bool prefault_write_activated; #if MDBX_ENABLE_REFUND pgno_t loose_refund_wl /* FIXME: describe */; #endif /* MDBX_ENABLE_REFUND */ diff --git a/src/page-ops.c b/src/page-ops.c index 6ca5f332..6cecd119 100644 --- a/src/page-ops.c +++ b/src/page-ops.c @@ -608,8 +608,8 @@ status_done: reclaim: DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno); - rc = pnl_insert_span(&txn->tw.relist, pgno, npages); - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + rc = pnl_insert_span(&txn->tw.repnl, pgno, npages); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); tASSERT(txn, dpl_check(txn)); return rc; } @@ -679,7 +679,7 @@ __hot int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, size_t npage if (txn->tw.loose_count) { page_t *lp = txn->tw.loose_pages; DEBUG("purge-and-reclaim loose page %" PRIaPGNO, lp->pgno); - rc = pnl_insert_span(&txn->tw.relist, lp->pgno, 1); + rc = pnl_insert_span(&txn->tw.repnl, lp->pgno, 1); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; size_t di = dpl_search(txn, lp->pgno); diff --git a/src/refund.c b/src/refund.c index 2d1ef607..3d2fc368 100644 --- a/src/refund.c +++ b/src/refund.c @@ -7,7 +7,7 @@ static void refund_reclaimed(MDBX_txn *txn) { /* Scanning in descend order */ pgno_t first_unallocated = txn->geo.first_unallocated; - const pnl_t pnl = txn->tw.relist; + const pnl_t pnl = txn->tw.repnl; tASSERT(txn, MDBX_PNL_GETSIZE(pnl) && MDBX_PNL_MOST(pnl) == first_unallocated - 1); #if MDBX_PNL_ASCENDING size_t i = MDBX_PNL_GETSIZE(pnl); @@ -28,7 +28,7 @@ static void refund_reclaimed(MDBX_txn *txn) { VERBOSE("refunded %" PRIaPGNO " pages: %" PRIaPGNO " -> %" PRIaPGNO, txn->geo.first_unallocated - first_unallocated, txn->geo.first_unallocated, first_unallocated); txn->geo.first_unallocated = first_unallocated; - tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - 1)); + tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - 1)); } static void refund_loose(MDBX_txn *txn) { @@ -178,7 +178,7 @@ bool txn_refund(MDBX_txn *txn) { refund_loose(txn); while (true) { - if (MDBX_PNL_GETSIZE(txn->tw.relist) == 0 || MDBX_PNL_MOST(txn->tw.relist) != txn->geo.first_unallocated - 1) + if (MDBX_PNL_GETSIZE(txn->tw.repnl) == 0 || MDBX_PNL_MOST(txn->tw.repnl) != txn->geo.first_unallocated - 1) break; refund_reclaimed(txn); diff --git a/src/tree-ops.c b/src/tree-ops.c index 67b69ddc..3c51a65c 100644 --- a/src/tree-ops.c +++ b/src/tree-ops.c @@ -889,7 +889,7 @@ retry: goto retry; } if (likely(!involve) && - (likely(mc->tree != &mc->txn->dbs[FREE_DBI]) || mc->txn->tw.loose_pages || MDBX_PNL_GETSIZE(mc->txn->tw.relist) || + (likely(mc->tree != &mc->txn->dbs[FREE_DBI]) || mc->txn->tw.loose_pages || MDBX_PNL_GETSIZE(mc->txn->tw.repnl) || (mc->flags & z_gcu_preparation) || (mc->txn->flags & txn_gc_drained) || room_threshold)) { involve = true; goto retry; diff --git a/src/txn.c b/src/txn.c index 3ac333fc..f2759b88 100644 --- a/src/txn.c +++ b/src/txn.c @@ -85,7 +85,7 @@ void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_ } /* Remove reclaimed pages from parent's dirty list */ - const pnl_t reclaimed_list = parent->tw.relist; + const pnl_t reclaimed_list = parent->tw.repnl; dpl_sift(parent, reclaimed_list, false); /* Move retired pages from parent's dirty & spilled list to reclaimed */ @@ -139,7 +139,7 @@ void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_ } DEBUG("reclaim retired parent's %u -> %zu %s page %" PRIaPGNO, npages, l, kind, pgno); - int err = pnl_insert_span(&parent->tw.relist, pgno, l); + int err = pnl_insert_span(&parent->tw.repnl, pgno, l); ENSURE(txn->env, err == MDBX_SUCCESS); } MDBX_PNL_SETSIZE(parent->tw.retired_pages, w); @@ -651,8 +651,8 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { txn->tw.spilled.least_removed = 0; txn->tw.gc.time_acc = 0; txn->tw.gc.last_reclaimed = 0; - if (txn->tw.gc.reclaimed) - MDBX_PNL_SETSIZE(txn->tw.gc.reclaimed, 0); + if (txn->tw.gc.retxl) + MDBX_PNL_SETSIZE(txn->tw.gc.retxl, 0); env->txn = txn; if ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { @@ -916,7 +916,7 @@ int txn_end(MDBX_txn *txn, unsigned mode) { /* Export or close DBI handles created in this txn */ rc = dbi_update(txn, mode & TXN_END_UPDATE); pnl_shrink(&txn->tw.retired_pages); - pnl_shrink(&txn->tw.relist); + pnl_shrink(&txn->tw.repnl); if (!(env->flags & MDBX_WRITEMAP)) dpl_release_shadows(txn); /* The writer mutex was locked in mdbx_txn_begin. */ @@ -926,14 +926,14 @@ int txn_end(MDBX_txn *txn, unsigned mode) { MDBX_txn *const parent = txn->parent; eASSERT(env, parent->signature == txn_signature); eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); - eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); + eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, sizeof(troika_t)) == 0); txn->owner = 0; - if (txn->tw.gc.reclaimed) { - eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) >= (uintptr_t)parent->tw.gc.reclaimed); - MDBX_PNL_SETSIZE(txn->tw.gc.reclaimed, (uintptr_t)parent->tw.gc.reclaimed); - parent->tw.gc.reclaimed = txn->tw.gc.reclaimed; + if (txn->tw.gc.retxl) { + eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.gc.retxl) >= (uintptr_t)parent->tw.gc.retxl); + MDBX_PNL_SETSIZE(txn->tw.gc.retxl, (uintptr_t)parent->tw.gc.retxl); + parent->tw.gc.retxl = txn->tw.gc.retxl; } if (txn->tw.retired_pages) { @@ -949,7 +949,7 @@ int txn_end(MDBX_txn *txn, unsigned mode) { tASSERT(parent, audit_ex(parent, 0, false) == 0); dpl_release_shadows(txn); dpl_free(txn); - pnl_free(txn->tw.relist); + pnl_free(txn->tw.repnl); if (parent->geo.upper != txn->geo.upper || parent->geo.now != txn->geo.now) { /* undo resize performed by child txn */ From bc56a613ab2433c5475bbc4ada9b8ea05a7f27c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 20 Dec 2024 21:32:42 +0300 Subject: [PATCH 408/443] =?UTF-8?q?mdbx:=20=D1=87=D0=B8=D1=81=D1=82=D0=BA?= =?UTF-8?q?=D0=B0=20=D0=B8=D1=81=D1=85=D0=BE=D0=B4=D0=BD=D0=B8=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2=20pnl/dpl/txl.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dpl.c | 2 +- src/pnl.c | 22 +++++++++++----------- src/txl.c | 10 +++++----- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/dpl.c b/src/dpl.c index 9f244b19..7043600e 100644 --- a/src/dpl.c +++ b/src/dpl.c @@ -129,7 +129,7 @@ __hot __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn) { #define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id)) SEARCH_IMPL(dp_bsearch, dp_t, pgno_t, DP_SEARCH_CMP) -__hot __noinline MDBX_INTERNAL size_t dpl_search(const MDBX_txn *txn, pgno_t pgno) { +__hot __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno) { tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); diff --git a/src/pnl.c b/src/pnl.c index 9ad5f9bd..6a7f18ad 100644 --- a/src/pnl.c +++ b/src/pnl.c @@ -3,7 +3,7 @@ #include "internals.h" -MDBX_INTERNAL pnl_t pnl_alloc(size_t size) { +pnl_t pnl_alloc(size_t size) { size_t bytes = pnl_size2bytes(size); pnl_t pnl = osal_malloc(bytes); if (likely(pnl)) { @@ -18,12 +18,12 @@ MDBX_INTERNAL pnl_t pnl_alloc(size_t size) { return pnl; } -MDBX_INTERNAL void pnl_free(pnl_t pnl) { +void pnl_free(pnl_t pnl) { if (likely(pnl)) osal_free(pnl - 1); } -MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl) { +void pnl_shrink(pnl_t __restrict *__restrict ppnl) { assert(pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) >= MDBX_PNL_INITIAL && pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < MDBX_PNL_INITIAL * 3 / 2); assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); @@ -42,7 +42,7 @@ MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl) { } } -MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna) { +int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna) { const size_t allocated = MDBX_PNL_ALLOCLEN(*ppnl); assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); if (likely(allocated >= wanna)) @@ -99,15 +99,15 @@ static __always_inline int __must_check_result pnl_append_stepped(unsigned step, return MDBX_SUCCESS; } -__hot MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +__hot int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { return pnl_append_stepped(2, ppnl, pgno << 1, n); } -__hot MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +__hot int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { return pnl_append_stepped(1, ppnl, pgno, n); } -__hot MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { +__hot int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n) { assert(n > 0); int rc = pnl_need(ppnl, n); if (unlikely(rc != MDBX_SUCCESS)) @@ -125,7 +125,7 @@ __hot MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *pp return MDBX_SUCCESS; } -__hot __noinline MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, const size_t limit) { +__hot __noinline bool pnl_check(const const_pnl_t pnl, const size_t limit) { assert(limit >= MIN_PAGENO - MDBX_ENABLE_REFUND); if (likely(MDBX_PNL_GETSIZE(pnl))) { if (unlikely(MDBX_PNL_GETSIZE(pnl) > PAGELIST_LIMIT)) @@ -179,7 +179,7 @@ static __always_inline void pnl_merge_inner(pgno_t *__restrict dst, const pgno_t } while (likely(src_b > src_b_detent)); } -__hot MDBX_INTERNAL size_t pnl_merge(pnl_t dst, const pnl_t src) { +__hot size_t pnl_merge(pnl_t dst, const pnl_t src) { assert(pnl_check_allocated(dst, MAX_PAGENO + 1)); assert(pnl_check(src, MAX_PAGENO + 1)); const size_t src_len = MDBX_PNL_GETSIZE(src); @@ -215,7 +215,7 @@ RADIXSORT_IMPL(pgno, pgno_t, MDBX_PNL_EXTRACT_KEY, MDBX_PNL_PREALLOC_FOR_RADIXSO SORT_IMPL(pgno_sort, false, pgno_t, MDBX_PNL_ORDERED) -__hot __noinline MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl) { +__hot __noinline void pnl_sort_nochk(pnl_t pnl) { if (likely(MDBX_PNL_GETSIZE(pnl) < MDBX_RADIXSORT_THRESHOLD) || unlikely(!pgno_radixsort(&MDBX_PNL_FIRST(pnl), MDBX_PNL_GETSIZE(pnl)))) pgno_sort(MDBX_PNL_BEGIN(pnl), MDBX_PNL_END(pnl)); @@ -223,7 +223,7 @@ __hot __noinline MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl) { SEARCH_IMPL(pgno_bsearch, pgno_t, pgno_t, MDBX_PNL_ORDERED) -__hot __noinline MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno) { +__hot __noinline size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno) { const pgno_t *begin = MDBX_PNL_BEGIN(pnl); const pgno_t *it = pgno_bsearch(begin, MDBX_PNL_GETSIZE(pnl), pgno); const pgno_t *end = begin + MDBX_PNL_GETSIZE(pnl); diff --git a/src/txl.c b/src/txl.c index d2296740..301cf339 100644 --- a/src/txl.c +++ b/src/txl.c @@ -17,7 +17,7 @@ static inline size_t txl_bytes2size(const size_t bytes) { return size - 2; } -MDBX_INTERNAL txl_t txl_alloc(void) { +txl_t txl_alloc(void) { size_t bytes = txl_size2bytes(txl_initial); txl_t txl = osal_malloc(bytes); if (likely(txl)) { @@ -32,12 +32,12 @@ MDBX_INTERNAL txl_t txl_alloc(void) { return txl; } -MDBX_INTERNAL void txl_free(txl_t txl) { +void txl_free(txl_t txl) { if (likely(txl)) osal_free(txl - 1); } -MDBX_INTERNAL int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wanna) { +int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wanna) { const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptxl); assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); if (likely(allocated >= wanna)) @@ -78,9 +78,9 @@ static __always_inline void txl_xappend(txl_t __restrict txl, txnid_t id) { #define TXNID_SORT_CMP(first, last) ((first) > (last)) SORT_IMPL(txnid_sort, false, txnid_t, TXNID_SORT_CMP) -MDBX_INTERNAL void txl_sort(txl_t txl) { txnid_sort(MDBX_PNL_BEGIN(txl), MDBX_PNL_END(txl)); } +void txl_sort(txl_t txl) { txnid_sort(MDBX_PNL_BEGIN(txl), MDBX_PNL_END(txl)); } -MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id) { +int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id) { if (unlikely(MDBX_PNL_GETSIZE(*ptxl) == MDBX_PNL_ALLOCLEN(*ptxl))) { int rc = txl_need(ptxl, txl_granulate); if (unlikely(rc != MDBX_SUCCESS)) From b9e4c1ea7381277195495a997fe0abc8c109a0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 21 Dec 2024 16:49:40 +0300 Subject: [PATCH 409/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D1=87=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20`txl=5Fcontain()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gc-get.c | 8 +------- src/txl.c | 8 ++++++++ src/txl.h | 2 ++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/gc-get.c b/src/gc-get.c index ff70ba3c..c6d2c62b 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -590,13 +590,7 @@ static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, const uint return true; } -__hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { - const size_t len = MDBX_PNL_GETSIZE(txn->tw.gc.retxl); - for (size_t i = 1; i <= len; ++i) - if (txn->tw.gc.retxl[i] == id) - return true; - return false; -} +static inline bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { return txl_contain(txn->tw.gc.retxl, id); } __hot static pgno_t repnl_get_single(MDBX_txn *txn) { const size_t len = MDBX_PNL_GETSIZE(txn->tw.repnl); diff --git a/src/txl.c b/src/txl.c index 301cf339..3c64e085 100644 --- a/src/txl.c +++ b/src/txl.c @@ -89,3 +89,11 @@ int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id) { txl_xappend(*ptxl, id); return MDBX_SUCCESS; } + +__hot bool txl_contain(const txl_t txl, txnid_t id) { + const size_t len = MDBX_PNL_GETSIZE(txl); + for (size_t i = 1; i <= len; ++i) + if (txl[i] == id) + return true; + return false; +} diff --git a/src/txl.h b/src/txl.h index e80db522..d8d67e05 100644 --- a/src/txl.h +++ b/src/txl.h @@ -22,3 +22,5 @@ MDBX_INTERNAL void txl_free(txl_t txl); MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id); MDBX_INTERNAL void txl_sort(txl_t txl); + +MDBX_INTERNAL bool txl_contain(const txl_t txl, txnid_t id); From a76e06a48e2797100db857184a90687d054770be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 21 Dec 2024 18:05:39 +0300 Subject: [PATCH 410/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D1=81=D0=BE?= =?UTF-8?q?=D0=B3=D0=BB=D0=B0=D1=81=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B8=20`MDBX=5FDPL=5FPREALLOC=5FFOR=5FRADIXSORT`?= =?UTF-8?q?=20=D0=B8=20assert-=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA?= =?UTF-8?q?=D0=B8=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`dpl=5Fbytes2si?= =?UTF-8?q?ze()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dpl.c b/src/dpl.c index 7043600e..1d2bef79 100644 --- a/src/dpl.c +++ b/src/dpl.c @@ -20,10 +20,10 @@ static inline size_t dpl_size2bytes(ptrdiff_t size) { static inline size_t dpl_bytes2size(const ptrdiff_t bytes) { size_t size = (bytes - sizeof(dpl_t)) / sizeof(dp_t); - assert(size > CURSOR_STACK_SIZE && size <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); #if MDBX_DPL_PREALLOC_FOR_RADIXSORT size >>= 1; #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ + assert(size > CURSOR_STACK_SIZE && size <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE); return size; } From bfc6795762869ebcdc10942ca72f1a5c8427dba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 22 Dec 2024 09:25:28 +0300 Subject: [PATCH 411/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B5=D0=B3=D1=80=D0=B5?= =?UTF-8?q?=D1=81=D1=81=D0=B0=20=D0=BD=D0=B5-=D0=BE=D1=82=D0=BF=D1=83?= =?UTF-8?q?=D1=81=D0=BA=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BC=D1=8C=D1=8E=D1=82?= =?UTF-8?q?=D0=B5=D0=BA=D1=81=D0=B0=20=D0=BF=D1=80=D0=B8=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=BF=D1=8B=D1=82=D0=BA=D0=B8=20=D0=BF=D0=BE=D0=B2=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=B7=D0=B0=D0=BA=D1=80=D1=8B?= =?UTF-8?q?=D1=82=D0=B8=D1=8F=20dbi-=D1=85=D0=B5=D0=BD=D0=B4=D0=BB=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ошибка была внесена 2024-10-23 коммитом v0.13.1-35-g3049bb87b5b14d83b16d121c186ce8fb3f21383e. --- src/api-dbi.c | 95 +++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/src/api-dbi.c b/src/api-dbi.c index a102f8b6..93bbdf21 100644 --- a/src/api-dbi.c +++ b/src/api-dbi.c @@ -146,53 +146,58 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { if (unlikely(dbi >= env->max_dbi)) return LOG_IFERR(MDBX_BAD_DBI); - if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi)) - return LOG_IFERR(MDBX_BAD_DBI); - rc = osal_fastmutex_acquire(&env->dbi_lock); - if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) { - retry: - if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { - /* LY: Опасный код, так как env->txn может быть изменено в другом потоке. - * К сожалению тут нет надежного решения и может быть падение при неверном - * использовании API (вызове mdbx_dbi_close конкурентно с завершением - * пишущей транзакции). - * - * Для минимизации вероятности падения сначала проверяем dbi-флаги - * в basal_txn, а уже после в env->txn. Таким образом, падение может быть - * только при коллизии с завершением вложенной транзакции. - * - * Альтернативно можно попробовать выполнять обновление/put записи в - * mainDb соответствующей таблице закрываемого хендла. Семантически это - * верный путь, но проблема в текущем API, в котором исторически dbi-хендл - * живет и закрывается вне транзакции. Причем проблема не только в том, - * что нет указателя на текущую пишущую транзакцию, а в том что - * пользователь точно не ожидает что закрытие хендла приведет к - * скрытой/непрозрачной активности внутри транзакции потенциально - * выполняемой в другом потоке. Другими словами, проблема может быть - * только при неверном использовании API и если пользователь это - * допускает, то точно не будет ожидать скрытых действий внутри - * транзакции, и поэтому этот путь потенциально более опасен. */ - const MDBX_txn *const hazard = env->txn; - osal_compiler_barrier(); - if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { - bailout_dirty_dbi: - osal_fastmutex_release(&env->dbi_lock); - return LOG_IFERR(MDBX_DANGLING_DBI); - } - osal_memory_barrier(); - if (unlikely(hazard != env->txn)) - goto retry; - if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 && - hazard->signature == txn_signature && - (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) - goto bailout_dirty_dbi; - osal_compiler_barrier(); - if (unlikely(hazard != env->txn)) - goto retry; - } - rc = dbi_close_release(env, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return LOG_IFERR(rc); + + if (unlikely(dbi >= env->n_dbi)) { + rc = MDBX_BAD_DBI; + bailout: + osal_fastmutex_release(&env->dbi_lock); + return LOG_IFERR(rc); } + + while (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) { + /* LY: Опасный код, так как env->txn может быть изменено в другом потоке. + * К сожалению тут нет надежного решения и может быть падение при неверном + * использовании API (вызове mdbx_dbi_close конкурентно с завершением + * пишущей транзакции). + * + * Для минимизации вероятности падения сначала проверяем dbi-флаги + * в basal_txn, а уже после в env->txn. Таким образом, падение может быть + * только при коллизии с завершением вложенной транзакции. + * + * Альтернативно можно попробовать выполнять обновление/put записи в + * mainDb соответствующей таблице закрываемого хендла. Семантически это + * верный путь, но проблема в текущем API, в котором исторически dbi-хендл + * живет и закрывается вне транзакции. Причем проблема не только в том, + * что нет указателя на текущую пишущую транзакцию, а в том что + * пользователь точно не ожидает что закрытие хендла приведет к + * скрытой/непрозрачной активности внутри транзакции потенциально + * выполняемой в другом потоке. Другими словами, проблема может быть + * только при неверном использовании API и если пользователь это + * допускает, то точно не будет ожидать скрытых действий внутри + * транзакции, и поэтому этот путь потенциально более опасен. */ + const MDBX_txn *const hazard = env->txn; + osal_compiler_barrier(); + if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { + rc = MDBX_DANGLING_DBI; + goto bailout; + } + osal_memory_barrier(); + if (unlikely(hazard != env->txn)) + continue; + if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 && + hazard->signature == txn_signature && + (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) { + rc = MDBX_DANGLING_DBI; + goto bailout; + } + osal_compiler_barrier(); + if (likely(hazard == env->txn)) + break; + } + rc = dbi_close_release(env, dbi); return LOG_IFERR(rc); } From 471b14a147e11ee9a8832c3968c17c356d9c3c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 22 Dec 2024 09:36:48 +0300 Subject: [PATCH 412/443] =?UTF-8?q?mdbx-tests:=20=D0=BF=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D0=BA=D0=B0=20=D1=81=D0=BB=D1=83=D1=87=D0=B0=D1=8F?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B3=D0=BE?= =?UTF-8?q?=20=D0=B7=D0=B0=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D1=8F=20dbi-?= =?UTF-8?q?=D1=85=D0=B5=D0=BD=D0=B4=D0=BB=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/early_close_dbi.c++ | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/extra/early_close_dbi.c++ b/test/extra/early_close_dbi.c++ index 42bb7adc..3eeea42c 100644 --- a/test/extra/early_close_dbi.c++ +++ b/test/extra/early_close_dbi.c++ @@ -98,5 +98,31 @@ int main(int argc, char *argv[]) { err = mdbx_env_close_ex(environment, true); assert(err == MDBX_SUCCESS); + // ------------------------------------------------------------------------- + + auto env = mdbx::env_managed(db_filename, mdbx::env_managed::operate_parameters(2)); + auto txn = env.start_write(); + auto dbi = txn.create_map("keller-case"); + txn.commit(); + + txn = env.start_write(); + txn.rename_map(dbi, "keller-case.renamed"); + txn.commit(); + + txn = env.start_write(); + auto dbi2 = txn.create_map("keller-case"); + txn.drop_map(dbi); + txn.drop_map(dbi2); + txn.commit(); + + err = mdbx_dbi_close(env, dbi); + assert(err == MDBX_BAD_DBI); + if (err != MDBX_BAD_DBI) + return 1; + err = mdbx_dbi_close(env, dbi2); + assert(err == MDBX_BAD_DBI); + if (err != MDBX_BAD_DBI) + return 2; + return 0; } From 00c5bbcc5e2a2a805fcdefd91df3cb3b68ebd5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 22 Dec 2024 09:53:33 +0300 Subject: [PATCH 413/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 7da54360..faa27345 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -19,6 +19,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Исправления: + - Устранён регресс не-отпускания мьютекса при попытки повторного закрытия dbi-дескриптора, + в том числе при попытке явно закрыть дескриптор после удаления связанной с ним таблицы. + + Вместо возврата ошибки `MDBX_BAD_DBI` происходил выход из тела функции по успешному пути, + но без освобождения захваченной блокировки. + Ошибка была внесена 2024-10-23 коммитом v0.13.1-35-g3049bb87b5b14d83b16d121c186ce8fb3f21383e. + - Устранён регресс состояния вложенного/dupsort курсора после вставки данных в `MDBX_APPEND`-режиме. При добавлении нового ключа в append-режиме, в случае когда в текущей From 214f5d4de467bd5df3a22a0d1a1fe067d8c2923c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 22 Dec 2024 18:30:38 +0300 Subject: [PATCH 414/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20README.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 753034ba..df7e67c8 100644 --- a/README.md +++ b/README.md @@ -191,8 +191,7 @@ and [CoW](https://en.wikipedia.org/wiki/Copy-on-write). - Append operation for efficient bulk insertion of pre-sorted data. -- No [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) nor any -transaction journal. No crash recovery needed. No maintenance is required. +- No [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) nor any transaction journal. No crash recovery needed. No maintenance is required. - No internal cache and/or memory management, all done by basic OS services. @@ -264,7 +263,11 @@ the user's point of view. > and up to 30% faster when _libmdbx_ compiled with specific build options > which downgrades several runtime checks to be match with LMDB behaviour. > - > These and other results could be easily reproduced with [ioArena](https://abf.io/erthink/ioarena) just by `make bench-quartet` command, + > However, libmdbx may be slower than LMDB on Windows, since uses native file locking API. + > These locks are really slow, but they prevent an inconsistent backup from being obtained by copying the DB file during an ongoing write transaction. + > So I think this is the right decision, and for speed, it's better to use Linux, or ask Microsoft to fix up file locks. + > + > Noted above and other results could be easily reproduced with [ioArena](https://abf.io/erthink/ioarena) just by `make bench-quartet` command, > including comparisons with [RockDB](https://en.wikipedia.org/wiki/RocksDB) > and [WiredTiger](https://en.wikipedia.org/wiki/WiredTiger). From 98b28213ce06e9c6c3059712e859500a3e72c283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 22 Dec 2024 18:30:58 +0300 Subject: [PATCH 415/443] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index faa27345..f1b77857 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -9,8 +9,9 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Благодарности: - - [Алексей (Keller) Костюк](https://t.me/keller18306) за сообщения об ошибках и недочетах. + - [Алексею Костюку (aka Keller)](https://t.me/keller18306) за сообщения об ошибках и недочетах. - [Erigon](https://docs.erigon.tech/) за спонсорство. + - [Lazymio](https://github.com/wtdcode) за новые [привязки к Python](https://pypi.org/project/libmdbx/). Новое: @@ -19,12 +20,12 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Исправления: - - Устранён регресс не-отпускания мьютекса при попытки повторного закрытия dbi-дескриптора, + - Устранён регресс неразблокировки мьютекса при попытки повторного закрытия dbi-дескриптора, в том числе при попытке явно закрыть дескриптор после удаления связанной с ним таблицы. Вместо возврата ошибки `MDBX_BAD_DBI` происходил выход из тела функции по успешному пути, но без освобождения захваченной блокировки. - Ошибка была внесена 2024-10-23 коммитом v0.13.1-35-g3049bb87b5b14d83b16d121c186ce8fb3f21383e. + Ошибка была внесена 2024-10-23 коммитом `3049bb87b5b14d83b16d121c186ce8fb3f21383e`. - Устранён регресс состояния вложенного/dupsort курсора после вставки данных в `MDBX_APPEND`-режиме. From c8c541649cc681b96f2342ad492c907c92a7c172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 27 Dec 2024 00:32:09 +0300 Subject: [PATCH 416/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20=D0=BA=D0=BE=D0=BD=D1=82=D1=80=D0=BE?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B4=D0=BB=D0=B8=D0=BD=D1=8B=20=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B0=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`cu?= =?UTF-8?q?rsor=5Fseek()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ранее проверка внутри cursor_seek() не позволяла искать ключи длиннее чем можно поместить в таблицу, что при поиске/позиционировании не является ошибкой для ключей переменного размера. --- src/cursor.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cursor.c b/src/cursor.c index 5ec88a77..8e92297e 100644 --- a/src/cursor.c +++ b/src/cursor.c @@ -1701,7 +1701,9 @@ __hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cur csr_t ret; ret.exact = false; - if (unlikely(key->iov_len < mc->clc->k.lmin || key->iov_len > mc->clc->k.lmax)) { + if (unlikely(key->iov_len < mc->clc->k.lmin || + (key->iov_len > mc->clc->k.lmax && + (mc->clc->k.lmin == mc->clc->k.lmax || MDBX_DEBUG || MDBX_FORCE_ASSERTIONS)))) { cASSERT(mc, !"Invalid key-size"); ret.err = MDBX_BAD_VALSIZE; return ret; From 26f6fd351a6c42f64708954c44d7e19d2f1b5aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 27 Dec 2024 09:35:57 +0300 Subject: [PATCH 417/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE=D0=B4=D1=81=D1=82?= =?UTF-8?q?=D1=80=D0=BE=D0=B9=D0=BA=D0=B0=20dirty-pages-limit=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=20=D1=81=D1=82=D0=B0=D1=80=D1=82=D0=B5=20=D1=82?= =?UTF-8?q?=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-opts.c | 147 ++++++++++++++++++++++++++++++++++++++---------- src/env.c | 25 +------- src/internals.h | 1 + src/proto.h | 8 ++- src/txn.c | 33 ++++++----- 5 files changed, 143 insertions(+), 71 deletions(-) diff --git a/src/api-opts.c b/src/api-opts.c index c3bab5f2..d470bf25 100644 --- a/src/api-opts.c +++ b/src/api-opts.c @@ -3,7 +3,37 @@ #include "internals.h" -__cold static unsigned default_rp_augment_limit(const MDBX_env *env) { +static pgno_t env_max_pgno(const MDBX_env *env) { + return env->ps ? bytes2pgno(env, env->geo_in_bytes.upper ? env->geo_in_bytes.upper : MAX_MAPSIZE) : PAGELIST_LIMIT; +} + +__cold pgno_t default_dp_limit(const MDBX_env *env) { + /* auto-setup dp_limit by "The42" ;-) */ + intptr_t total_ram_pages, avail_ram_pages; + int err = mdbx_get_sysraminfo(nullptr, &total_ram_pages, &avail_ram_pages); + pgno_t dp_limit = 1024; + if (unlikely(err != MDBX_SUCCESS)) + ERROR("mdbx_get_sysraminfo(), rc %d", err); + else { + size_t estimate = (size_t)(total_ram_pages + avail_ram_pages) / 42; + if (env->ps) { + if (env->ps > globals.sys_pagesize) + estimate /= env->ps / globals.sys_pagesize; + else if (env->ps < globals.sys_pagesize) + estimate *= globals.sys_pagesize / env->ps; + } + dp_limit = (pgno_t)estimate; + } + + dp_limit = (dp_limit < PAGELIST_LIMIT) ? dp_limit : PAGELIST_LIMIT; + const pgno_t max_pgno = env_max_pgno(env); + if (dp_limit > max_pgno - NUM_METAS) + dp_limit = max_pgno - NUM_METAS; + dp_limit = (dp_limit > CURSOR_STACK_SIZE * 4) ? dp_limit : CURSOR_STACK_SIZE * 4; + return dp_limit; +} + +__cold static pgno_t default_rp_augment_limit(const MDBX_env *env) { const size_t timeframe = /* 16 секунд */ 16 << 16; const size_t remain_1sec = (env->options.gc_time_limit < timeframe) ? timeframe - (size_t)env->options.gc_time_limit : 0; @@ -45,15 +75,51 @@ static uint16_t default_subpage_reserve_limit(const MDBX_env *env) { return 2753 /* 4.2% */; } +static uint16_t default_merge_threshold_16dot16_percent(const MDBX_env *env) { + (void)env; + return 65536 / 4 /* 25% */; +} + +static pgno_t default_dp_reserve_limit(const MDBX_env *env) { + (void)env; + return MDBX_PNL_INITIAL; +} + +static pgno_t default_dp_initial(const MDBX_env *env) { + (void)env; + return MDBX_PNL_INITIAL; +} + +static uint8_t default_spill_max_denominator(const MDBX_env *env) { + (void)env; + return 8; +} + +static uint8_t default_spill_min_denominator(const MDBX_env *env) { + (void)env; + return 8; +} + +static uint8_t default_spill_parent4child_denominator(const MDBX_env *env) { + (void)env; + return 0; +} + +static uint8_t default_dp_loose_limit(const MDBX_env *env) { + (void)env; + return 64; +} + void env_options_init(MDBX_env *env) { - env->options.rp_augment_limit = MDBX_PNL_INITIAL; - env->options.dp_reserve_limit = MDBX_PNL_INITIAL; - env->options.dp_initial = MDBX_PNL_INITIAL; - env->options.spill_max_denominator = 8; - env->options.spill_min_denominator = 8; - env->options.spill_parent4child_denominator = 0; - env->options.dp_loose_limit = 64; - env->options.merge_threshold_16dot16_percent = 65536 / 4 /* 25% */; + env->options.rp_augment_limit = default_rp_augment_limit(env); + env->options.dp_reserve_limit = default_dp_reserve_limit(env); + env->options.dp_initial = default_dp_initial(env); + env->options.dp_limit = default_dp_limit(env); + env->options.spill_max_denominator = default_spill_max_denominator(env); + env->options.spill_min_denominator = default_spill_min_denominator(env); + env->options.spill_parent4child_denominator = default_spill_parent4child_denominator(env); + env->options.dp_loose_limit = default_dp_loose_limit(env); + env->options.merge_threshold_16dot16_percent = default_merge_threshold_16dot16_percent(env); if (default_prefer_waf_insteadof_balance(env)) env->options.prefer_waf_insteadof_balance = true; @@ -71,12 +137,31 @@ void env_options_init(MDBX_env *env) { env->options.subpage.reserve_limit = default_subpage_reserve_limit(env); } +void env_options_adjust_dp_limit(MDBX_env *env) { + if (!env->options.flags.non_auto.dp_limit) + env->options.dp_limit = default_dp_limit(env); + else { + const pgno_t max_pgno = env_max_pgno(env); + if (env->options.dp_limit > max_pgno - NUM_METAS) + env->options.dp_limit = max_pgno - NUM_METAS; + if (env->options.dp_limit < CURSOR_STACK_SIZE * 4) + env->options.dp_limit = CURSOR_STACK_SIZE * 4; + } + if (env->options.dp_initial > env->options.dp_limit && env->options.dp_initial > default_dp_initial(env)) + env->options.dp_initial = env->options.dp_limit; + env->options.need_dp_limit_adjust = false; +} + void env_options_adjust_defaults(MDBX_env *env) { if (!env->options.flags.non_auto.rp_augment_limit) env->options.rp_augment_limit = default_rp_augment_limit(env); if (!env->options.flags.non_auto.prefault_write) env->options.prefault_write = default_prefault_write(env); + env->options.need_dp_limit_adjust = true; + if (!env->txn) + env_options_adjust_dp_limit(env); + const size_t basis = env->geo_in_bytes.now; /* TODO: use options? */ const unsigned factor = 9; @@ -85,7 +170,6 @@ void env_options_adjust_defaults(MDBX_env *env) { : basis >> factor; threshold = (threshold < env->geo_in_bytes.shrink || !env->geo_in_bytes.shrink) ? threshold : env->geo_in_bytes.shrink; - env->madv_threshold = bytes2pgno(env, bytes_align2os_bytes(env, threshold)); } @@ -163,7 +247,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64 case MDBX_opt_dp_reserve_limit: if (value == /* default */ UINT64_MAX) - value = INT_MAX; + value = default_dp_reserve_limit(env); if (unlikely(value > INT_MAX)) return LOG_IFERR(MDBX_EINVAL); if (env->options.dp_reserve_limit != (unsigned)value) { @@ -218,9 +302,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64 case MDBX_opt_txn_dp_limit: case MDBX_opt_txn_dp_initial: - if (value == /* default */ UINT64_MAX) - value = PAGELIST_LIMIT; - if (unlikely(value > PAGELIST_LIMIT || value < CURSOR_STACK_SIZE * 4)) + if (value != /* default */ UINT64_MAX && unlikely(value > PAGELIST_LIMIT || value < CURSOR_STACK_SIZE * 4)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->flags & MDBX_RDONLY)) return LOG_IFERR(MDBX_EACCESS); @@ -233,40 +315,45 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64 if (env->txn) err = MDBX_EPERM /* unable change during transaction */; else { - const pgno_t value32 = (pgno_t)value; - if (option == MDBX_opt_txn_dp_initial && env->options.dp_initial != value32) { - env->options.dp_initial = value32; - if (env->options.dp_limit < value32) { - env->options.dp_limit = value32; - env->options.flags.non_auto.dp_limit = 1; + const pgno_t max_pgno = env_max_pgno(env); + if (option == MDBX_opt_txn_dp_initial) { + if (value == /* default */ UINT64_MAX) + env->options.dp_initial = default_dp_initial(env); + else { + env->options.dp_initial = (pgno_t)value; + if (env->options.dp_initial > max_pgno) + env->options.dp_initial = (max_pgno > CURSOR_STACK_SIZE * 4) ? max_pgno : CURSOR_STACK_SIZE * 4; } } - if (option == MDBX_opt_txn_dp_limit && env->options.dp_limit != value32) { - env->options.dp_limit = value32; - env->options.flags.non_auto.dp_limit = 1; - if (env->options.dp_initial > value32) - env->options.dp_initial = value32; + if (option == MDBX_opt_txn_dp_limit) { + if (value == /* default */ UINT64_MAX) { + env->options.flags.non_auto.dp_limit = 0; + } else { + env->options.flags.non_auto.dp_limit = 1; + env->options.dp_limit = (pgno_t)value; + } + env_options_adjust_dp_limit(env); } } break; case MDBX_opt_spill_max_denominator: if (value == /* default */ UINT64_MAX) - value = 8; + value = default_spill_max_denominator(env); if (unlikely(value > 255)) return LOG_IFERR(MDBX_EINVAL); env->options.spill_max_denominator = (uint8_t)value; break; case MDBX_opt_spill_min_denominator: if (value == /* default */ UINT64_MAX) - value = 8; + value = default_spill_min_denominator(env); if (unlikely(value > 255)) return LOG_IFERR(MDBX_EINVAL); env->options.spill_min_denominator = (uint8_t)value; break; case MDBX_opt_spill_parent4child_denominator: if (value == /* default */ UINT64_MAX) - value = 0; + value = default_spill_parent4child_denominator(env); if (unlikely(value > 255)) return LOG_IFERR(MDBX_EINVAL); env->options.spill_parent4child_denominator = (uint8_t)value; @@ -274,7 +361,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64 case MDBX_opt_loose_limit: if (value == /* default */ UINT64_MAX) - value = 64; + value = default_dp_loose_limit(env); if (unlikely(value > 255)) return LOG_IFERR(MDBX_EINVAL); env->options.dp_loose_limit = (uint8_t)value; @@ -282,7 +369,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64 case MDBX_opt_merge_threshold_16dot16_percent: if (value == /* default */ UINT64_MAX) - value = 65536 / 4 /* 25% */; + value = default_merge_threshold_16dot16_percent(env); if (unlikely(value < 8192 || value > 32768)) return LOG_IFERR(MDBX_EINVAL); env->options.merge_threshold_16dot16_percent = (unsigned)value; diff --git a/src/env.c b/src/env.c index 5de253f4..58ee0346 100644 --- a/src/env.c +++ b/src/env.c @@ -52,30 +52,7 @@ __cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) { eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); recalculate_merge_thresholds(env); recalculate_subpage_thresholds(env); - - const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); - if (!env->options.flags.non_auto.dp_limit) { - /* auto-setup dp_limit by "The42" ;-) */ - intptr_t total_ram_pages, avail_ram_pages; - int err = mdbx_get_sysraminfo(nullptr, &total_ram_pages, &avail_ram_pages); - if (unlikely(err != MDBX_SUCCESS)) - ERROR("mdbx_get_sysraminfo(), rc %d", err); - else { - size_t reasonable_dpl_limit = (size_t)(total_ram_pages + avail_ram_pages) / 42; - if (pagesize > globals.sys_pagesize) - reasonable_dpl_limit /= pagesize / globals.sys_pagesize; - else if (pagesize < globals.sys_pagesize) - reasonable_dpl_limit *= globals.sys_pagesize / pagesize; - reasonable_dpl_limit = (reasonable_dpl_limit < PAGELIST_LIMIT) ? reasonable_dpl_limit : PAGELIST_LIMIT; - reasonable_dpl_limit = - (reasonable_dpl_limit > CURSOR_STACK_SIZE * 4) ? reasonable_dpl_limit : CURSOR_STACK_SIZE * 4; - env->options.dp_limit = (unsigned)reasonable_dpl_limit; - } - } - if (env->options.dp_limit > max_pgno - NUM_METAS) - env->options.dp_limit = max_pgno - NUM_METAS; - if (env->options.dp_initial > env->options.dp_limit) - env->options.dp_initial = env->options.dp_limit; + env_options_adjust_dp_limit(env); return env->ps; } diff --git a/src/internals.h b/src/internals.h index ddef1fcf..ea76b2f4 100644 --- a/src/internals.h +++ b/src/internals.h @@ -401,6 +401,7 @@ struct MDBX_env { bool prefault_write; bool prefer_waf_insteadof_balance; /* Strive to minimize WAF instead of balancing pages fullment */ + bool need_dp_limit_adjust; struct { uint16_t limit; uint16_t room_threshold; diff --git a/src/proto.h b/src/proto.h index bb8d1386..24ad39a0 100644 --- a/src/proto.h +++ b/src/proto.h @@ -77,11 +77,15 @@ MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinf MDBX_INTERNAL int env_sync(MDBX_env *env, bool force, bool nonblock); MDBX_INTERNAL int env_close(MDBX_env *env, bool resurrect_after_fork); MDBX_INTERNAL bool env_txn0_owned(const MDBX_env *env); -MDBX_INTERNAL void env_options_init(MDBX_env *env); -MDBX_INTERNAL void env_options_adjust_defaults(MDBX_env *env); MDBX_INTERNAL int __must_check_result env_page_auxbuffer(MDBX_env *env); MDBX_INTERNAL unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize); +/* api-opt.c */ +MDBX_INTERNAL void env_options_init(MDBX_env *env); +MDBX_INTERNAL void env_options_adjust_defaults(MDBX_env *env); +MDBX_INTERNAL void env_options_adjust_dp_limit(MDBX_env *env); +MDBX_INTERNAL pgno_t default_dp_limit(const MDBX_env *env); + /* tree.c */ MDBX_INTERNAL int tree_drop(MDBX_cursor *mc, const bool may_have_tables); MDBX_INTERNAL int __must_check_result tree_rebalance(MDBX_cursor *mc); diff --git a/src/txn.c b/src/txn.c index f2759b88..42203580 100644 --- a/src/txn.c +++ b/src/txn.c @@ -654,21 +654,6 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { if (txn->tw.gc.retxl) MDBX_PNL_SETSIZE(txn->tw.gc.retxl, 0); env->txn = txn; - - if ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { - rc = dpl_alloc(txn); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - txn->tw.dirtyroom = txn->env->options.dp_limit; - txn->tw.dirtylru = MDBX_DEBUG ? UINT32_MAX / 3 - 42 : 0; - } else { - tASSERT(txn, txn->tw.dirtylist == nullptr); - txn->tw.dirtylist = nullptr; - txn->tw.dirtyroom = MAX_PAGENO; - txn->tw.dirtylru = 0; - } - eASSERT(env, txn->tw.writemap_dirty_npages == 0); - eASSERT(env, txn->tw.writemap_spilled_npages == 0); } txn->front_txnid = txn->txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); @@ -834,6 +819,24 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { #endif /* Windows */ } else { tASSERT(txn, txn == env->basal_txn); + + if (env->options.need_dp_limit_adjust) + env_options_adjust_dp_limit(env); + if ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { + rc = dpl_alloc(txn); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + txn->tw.dirtyroom = txn->env->options.dp_limit; + txn->tw.dirtylru = MDBX_DEBUG ? UINT32_MAX / 3 - 42 : 0; + } else { + tASSERT(txn, txn->tw.dirtylist == nullptr); + txn->tw.dirtylist = nullptr; + txn->tw.dirtyroom = MAX_PAGENO; + txn->tw.dirtylru = 0; + } + eASSERT(env, txn->tw.writemap_dirty_npages == 0); + eASSERT(env, txn->tw.writemap_spilled_npages == 0); + MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); rc = cursor_init(gc, txn, FREE_DBI); if (rc != MDBX_SUCCESS) From df8b15f6397086aead5e8a7d7ec3c1e0d2236479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 28 Dec 2024 09:38:08 +0300 Subject: [PATCH 418/443] =?UTF-8?q?mdbx:=20`const`=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8=20?= =?UTF-8?q?=D0=B2=20`txn=5Ftake=5Fgcprof()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/proto.h | 2 +- src/txn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/proto.h b/src/proto.h index 24ad39a0..857a6794 100644 --- a/src/proto.h +++ b/src/proto.h @@ -68,7 +68,7 @@ enum { }; MDBX_INTERNAL int txn_end(MDBX_txn *txn, unsigned mode); MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx); -MDBX_INTERNAL void txn_take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency); +MDBX_INTERNAL void txn_take_gcprof(const MDBX_txn *txn, MDBX_commit_latency *latency); MDBX_INTERNAL void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len); /* env.c */ diff --git a/src/txn.c b/src/txn.c index 42203580..db3af6ed 100644 --- a/src/txn.c +++ b/src/txn.c @@ -395,7 +395,7 @@ void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_ } } -void txn_take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) { +void txn_take_gcprof(const MDBX_txn *txn, MDBX_commit_latency *latency) { MDBX_env *const env = txn->env; if (MDBX_ENABLE_PROFGC) { pgop_stat_t *const ptr = &env->lck->pgops; From 5ff508093503f531edfac6723feef17848712bdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 28 Dec 2024 09:52:19 +0300 Subject: [PATCH 419/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index f1b77857..c055445e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -77,6 +77,16 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx - В утилите тестирования значение режима данных переименовано из `data.dups` в `data.multi`. + - Доработан контроль длины ключа внутри `cursor_seek()`. + + Ранее проверка внутри `cursor_seek()` не позволяла искать ключи длиннее, чем можно поместить в таблицу. + Однако, при поиске/позиционировании это не является ошибкой для таблиц с ключами переменного размера. + + - Если посредством `mdbx_env_set_option(MDBX_opt_txn_dp_limit)` пользователем не задано собственно значение, + то выполняется подстройка dirty-pages-limit при старте каждой не-вложенной пишущей транзакций, + исходя из объёма доступного ОЗУ и размера БД. + + -------------------------------------------------------------------------------- From 63dba2876d5f8d30b0dc09a98159691157cd8a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 28 Dec 2024 22:56:17 +0300 Subject: [PATCH 420/443] =?UTF-8?q?mdbx-doc:=20=D0=BA=D0=BE=D1=80=D1=80?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=BE?= =?UTF-8?q?=D0=BF=D0=B8=D1=81=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=B7=D0=BD=D0=B0?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=BE-=D1=83=D0=BC?= =?UTF-8?q?=D0=BE=D0=BB=D1=87=D0=B0=D0=BD=D0=B8=D1=8E=20`MDBX=5Fopt=5Ftxn?= =?UTF-8?q?=5Fdp=5Flimit`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index 771402ee..ffb8ef69 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2190,7 +2190,8 @@ typedef enum MDBX_option { * spill to disk instead. * * The `MDBX_opt_txn_dp_limit` controls described threshold for the current - * process. Default is 65536, it is usually enough for most cases. */ + * process. Default is 1/42 of the sum of whole and currently available RAM + * size, which the same ones are reported by \ref mdbx_get_sysraminfo(). */ MDBX_opt_txn_dp_limit, /** \brief Controls the in-process initial allocation size for dirty pages From 1e4e2eb3c8d747c4a9b338d06a1fe0431b35b2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 29 Dec 2024 08:42:48 +0300 Subject: [PATCH 421/443] =?UTF-8?q?mdbx-doc:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5?= =?UTF-8?q?=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20=D0=BA=D0=BE=D0=BC?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/internals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internals.h b/src/internals.h index ea76b2f4..8af6995f 100644 --- a/src/internals.h +++ b/src/internals.h @@ -122,7 +122,7 @@ typedef struct clc { * использования такого компаратора. * - размер kvx_t становится равным 8 словам. * - * Трюки и прочая экономия на списках: + * Трюки и прочая экономия на спичках: * - не храним dbi внутри курсора, вместо этого вычисляем его как разницу между * dbi_state курсора и началом таблицы dbi_state в транзакции. Смысл тут в * экономии кол-ва полей при инициализации курсора. Затрат это не создает, From 1bf008ac16949d003668887895c07824e31e6e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 30 Dec 2024 17:49:42 +0300 Subject: [PATCH 422/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20=D0=BA=D0=BE=D0=BD=D1=82=D1=80=D0=BE?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=BF=D0=BE=D1=82=D0=BE=D0=BA=D0=B0-=D0=B2?= =?UTF-8?q?=D0=BB=D0=B0=D0=B4=D0=B5=D0=BB=D1=8C=D1=86=D0=B0=20=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Теперь допускается commit/abort вложенных транзакций из любого треда в режиме MDBX_NOSTICKYTHREADS. 2. Более наглядные/явные проверки без зависимости от больше/меньше. Одна проверка внутри check_txn() для всех основных случаев (bad_bits != 0) и две проверки для abort/reset/break (bad_bits == 0). +-------------------------------------------------------------------------------------------------------+ | Три анализируемых txn->flags | Проверка txn->owner == osal_thread_self() | +-----------------+------------+--------------+-----------------------+---------------------------------+ | NOSTICKYTHREADS | TXN_RDONLY | TXN_FINISHED | usual (bad_bits != 0) | abort/reset/break (bad_bits==0) | | - | - | - | + | + | | - | - | + | + | + | | - | + | - | + | + | | - | + | + | + | - | | + | - | - | - | - | | + | - | + | + | + | | + | + | - | - | - | | + | + | + | + | - | +-------------------------------------------------------------------------------------------------------+ --- src/api-txn.c | 25 ++++++++++++++++--------- src/cogs.h | 25 ++++++++++++++----------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/api-txn.c b/src/api-txn.c index 3e930f04..24598992 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -385,6 +385,9 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { const uint64_t ts_0 = latency ? osal_monotime() : 0; uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0; + /* txn_end() mode for a commit which writes nothing */ + unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; + int rc = check_txn(txn, MDBX_TXN_FINISHED); if (unlikely(rc != MDBX_SUCCESS)) { if (rc == MDBX_BAD_TXN && (txn->flags & MDBX_TXN_RDONLY)) { @@ -404,18 +407,22 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { goto bailout; } - if (unlikely(txn->flags & MDBX_TXN_ERROR)) { - rc = MDBX_RESULT_TRUE; - goto fail; + if (unlikely(txn->flags & MDBX_TXN_RDONLY)) { + if (txn->flags & MDBX_TXN_ERROR) { + rc = MDBX_RESULT_TRUE; + goto fail; + } + goto done; } - /* txn_end() mode for a commit which writes nothing */ - unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; - if (unlikely(txn->flags & MDBX_TXN_RDONLY)) - goto done; - - if ((txn->flags & MDBX_NOSTICKYTHREADS) && unlikely(txn->owner != osal_thread_self())) { + if (!txn->parent && (txn->flags & MDBX_NOSTICKYTHREADS) && unlikely(txn->owner != osal_thread_self())) { + txn->flags |= MDBX_TXN_ERROR; rc = MDBX_THREAD_MISMATCH; + return LOG_IFERR(rc); + } + + if (unlikely(txn->flags & MDBX_TXN_ERROR)) { + rc = MDBX_RESULT_TRUE; goto fail; } diff --git a/src/cogs.h b/src/cogs.h index 41d79b40..5b8f20c7 100644 --- a/src/cogs.h +++ b/src/cogs.h @@ -400,32 +400,35 @@ static inline int check_env(const MDBX_env *env, const bool wanna_active) { return MDBX_SUCCESS; } -static inline int check_txn(const MDBX_txn *txn, int bad_bits) { +static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) { if (unlikely(!txn)) return MDBX_EINVAL; if (unlikely(txn->signature != txn_signature)) return MDBX_EBADSIGN; - if (bad_bits && unlikely(txn->flags & bad_bits)) { - if ((bad_bits & MDBX_TXN_PARKED) == 0) - return MDBX_BAD_TXN; - else - return txn_check_badbits_parked(txn, bad_bits); + if (bad_bits) { + if (unlikely(!txn->env->dxb_mmap.base)) + return MDBX_EPERM; + + if (unlikely(txn->flags & bad_bits)) { + if ((bad_bits & MDBX_TXN_PARKED) == 0) + return MDBX_BAD_TXN; + else + return txn_check_badbits_parked(txn, bad_bits); + } } tASSERT(txn, (txn->flags & MDBX_TXN_FINISHED) || (txn->flags & MDBX_NOSTICKYTHREADS) == (txn->env->flags & MDBX_NOSTICKYTHREADS)); #if MDBX_TXN_CHECKOWNER - STATIC_ASSERT((long)MDBX_NOSTICKYTHREADS > (long)MDBX_TXN_FINISHED); - if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) < MDBX_TXN_FINISHED && + if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) != MDBX_NOSTICKYTHREADS && + !(bad_bits /* abort/reset/txn-break */ == 0 && + ((txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)) == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED))) && unlikely(txn->owner != osal_thread_self())) return txn->owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN; #endif /* MDBX_TXN_CHECKOWNER */ - if (bad_bits && unlikely(!txn->env->dxb_mmap.base)) - return MDBX_EPERM; - return MDBX_SUCCESS; } From 0a364aefbb78721a4decea8ec48b44998800d9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Jan 2025 11:15:19 +0300 Subject: [PATCH 423/443] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`txn::make=5Fbroken()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- mdbx.h++ | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index ffb8ef69..af01e309 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4235,7 +4235,7 @@ LIBMDBX_INLINE_API(int, mdbx_txn_commit, (MDBX_txn * txn)) { return mdbx_txn_com * \retval MDBX_EINVAL Transaction handle is NULL. */ LIBMDBX_API int mdbx_txn_abort(MDBX_txn *txn); -/** \brief Marks transaction as broken. +/** \brief Marks transaction as broken to prevent further operations. * \ingroup c_transactions * * Function keeps the transaction handle and corresponding locks, but makes diff --git a/mdbx.h++ b/mdbx.h++ index b5e28262..2d895d03 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3799,6 +3799,9 @@ public: /// \brief Renew read-only transaction. inline void renew_reading(); + /// \brief Marks transaction as broken to prevent further operations. + inline void make_broken(); + /// \brief Park read-only transaction. inline void park_reading(bool autounpark = true); @@ -5578,6 +5581,8 @@ inline uint64_t txn::id() const { inline void txn::reset_reading() { error::success_or_throw(::mdbx_txn_reset(handle_)); } +inline void txn::make_broken() { error::success_or_throw(::mdbx_txn_break(handle_)); } + inline void txn::renew_reading() { error::success_or_throw(::mdbx_txn_renew(handle_)); } inline void txn::park_reading(bool autounpark) { error::success_or_throw(::mdbx_txn_park(handle_, autounpark)); } From dc98f06d2c316869f9e7fd226d903980e826b180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Jan 2025 21:23:45 +0300 Subject: [PATCH 424/443] =?UTF-8?q?mdbx:=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D0=B2=D0=BE?= =?UTF-8?q?=D0=B7=D0=B2=D1=80=D0=B0=D1=82=20`MDBX=5FINCOMPATIBLE`=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=20=D0=BF=D0=BE=D0=BF=D1=8B=D1=82=D0=BA=D0=B5=20?= =?UTF-8?q?=D0=B7=D0=B0=D0=BF=D1=83=D1=81=D0=BA=D0=B0=20=D0=B2=D0=BB=D0=BE?= =?UTF-8?q?=D0=B6=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9=20=D0=B2=20=D1=80=D0=B5?= =?UTF-8?q?=D0=B6=D0=B8=D0=BC=D0=B5=20`MDBX=5FWRITEMAP`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-txn.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/api-txn.c b/src/api-txn.c index 24598992..6831fd5d 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -199,8 +199,13 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, M if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ rc = check_txn_rw(parent, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_BAD_TXN && (parent->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED)) == 0) { + ERROR("%s mode is incompatible with nested transactions", "MDBX_WRITEMAP"); + rc = MDBX_INCOMPATIBLE; + } return LOG_IFERR(rc); + } if (env->options.spill_parent4child_denominator) { /* Spill dirty-pages of parent to provide dirtyroom for child txn */ From 10ac9a9c503221dc576089043292692f58e72881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 4 Jan 2025 04:01:41 +0300 Subject: [PATCH 425/443] =?UTF-8?q?mdbx-tests:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/txn`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 1 + test/extra/txn.c++ | 292 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 293 insertions(+) create mode 100644 test/extra/txn.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5a162685..28c4e74e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -306,6 +306,7 @@ else() add_extra_test(crunched_delete TIMEOUT 10800) add_extra_test(dbi) add_extra_test(open) + add_extra_test(txn) endif() add_extra_test(hex_base64_base58) endif() diff --git a/test/extra/txn.c++ b/test/extra/txn.c++ new file mode 100644 index 00000000..d817cc7e --- /dev/null +++ b/test/extra/txn.c++ @@ -0,0 +1,292 @@ +#include "mdbx.h++" + +#include + +#if !defined(__cpp_lib_latch) && __cpp_lib_latch < 201907L + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + std::cout << "FAKE-OK (since no C++20 std::thread and/or std::latch)\n"; + return EXIT_SUCCESS; +} + +#else + +#include +#include + +static char log_buffer[1024]; + +static void logger_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg, + unsigned length) noexcept { + (void)length; + (void)loglevel; + fprintf(stdout, "%s:%u %s", function, line, msg); +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + bool ok = true; + int err; + + mdbx_setup_debug_nofmt(MDBX_LOG_VERBOSE, MDBX_DBG_ASSERT, logger_nofmt, log_buffer, sizeof(log_buffer)); + + mdbx::path path = "test-txn"; + mdbx::env::remove(path); + mdbx::env::operate_parameters operateParameters(100, 10); + + { + mdbx::env_managed::create_parameters createParameters; + createParameters.geometry.make_dynamic(21 * mdbx::env::geometry::MiB, 84 * mdbx::env::geometry::MiB); + + operateParameters.options.no_sticky_threads = false; + mdbx::env_managed env(path, createParameters, operateParameters); + auto txn = env.start_write(false); + /* mdbx::map_handle testHandle = */ txn.create_map("xyz", mdbx::key_mode::usual, mdbx::value_mode::single); + txn.commit(); + + //------------------------------------- + txn = env.start_write(); + MDBX_txn *c_txn = txn; + err = mdbx_txn_reset(txn); + assert(err == MDBX_EINVAL); + ok = ok && err == MDBX_EINVAL; + + err = mdbx_txn_break(txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + + err = mdbx_txn_commit(txn); + assert(err == MDBX_RESULT_TRUE); + ok = ok && err == MDBX_RESULT_TRUE; + + //------------------------------------- + err = mdbx_txn_begin(env, nullptr, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + assert(c_txn == (const MDBX_txn *)txn); + + err = mdbx_txn_break(txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + + err = mdbx_txn_reset(txn); + assert(err == MDBX_EINVAL); + ok = ok && err == MDBX_EINVAL; + + err = mdbx_txn_commit(txn); + assert(err == MDBX_RESULT_TRUE); + ok = ok && err == MDBX_RESULT_TRUE; + + err = mdbx_txn_abort(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + //------------------------------------- + err = mdbx_txn_begin(env, nullptr, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + assert(c_txn == (const MDBX_txn *)txn); + txn.commit(); + + err = mdbx_txn_reset(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + err = mdbx_txn_break(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + err = mdbx_txn_abort(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + + //===================================== + + txn = env.start_read(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + txn.make_broken(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + txn.reset_reading(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + txn.abort(); + + //------------------------------------- + + txn = env.start_read(); + txn.reset_reading(); + txn.make_broken(); + txn.abort(); + + //===================================== + + std::latch s(1); + txn = env.start_read(); + c_txn = txn; + + std::thread t([&]() { + s.wait(); + err = mdbx_txn_reset(c_txn); + assert(err == MDBX_THREAD_MISMATCH); + ok = ok && err == MDBX_THREAD_MISMATCH; + err = mdbx_txn_break(c_txn); + assert(err == MDBX_THREAD_MISMATCH); + ok = ok && err == MDBX_THREAD_MISMATCH; + err = mdbx_txn_commit(c_txn); + assert(err == MDBX_THREAD_MISMATCH); + ok = ok && err == MDBX_THREAD_MISMATCH; + err = mdbx_txn_abort(c_txn); + assert(err == MDBX_THREAD_MISMATCH); + ok = ok && err == MDBX_THREAD_MISMATCH; + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + }); + + s.count_down(); + t.join(); + } + + //===================================== + //===================================== + + { + operateParameters.options.no_sticky_threads = true; + operateParameters.options.nested_write_transactions = true; + mdbx::env_managed env(path, operateParameters); + + //------------------------------------- + auto txn = env.start_write(); + MDBX_txn *c_txn = txn; + err = mdbx_txn_reset(txn); + assert(err == MDBX_EINVAL); + ok = ok && err == MDBX_EINVAL; + + err = mdbx_txn_break(txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + + err = mdbx_txn_commit(txn); + assert(err == MDBX_RESULT_TRUE); + ok = ok && err == MDBX_RESULT_TRUE; + + //------------------------------------- + err = mdbx_txn_begin(env, nullptr, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + assert(c_txn == (const MDBX_txn *)txn); + + err = mdbx_txn_break(txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + + err = mdbx_txn_reset(txn); + assert(err == MDBX_EINVAL); + ok = ok && err == MDBX_EINVAL; + + err = mdbx_txn_commit(txn); + assert(err == MDBX_RESULT_TRUE); + ok = ok && err == MDBX_RESULT_TRUE; + + err = mdbx_txn_abort(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + //------------------------------------- + err = mdbx_txn_begin(env, nullptr, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + assert(c_txn == (const MDBX_txn *)txn); + txn.commit(); + + err = mdbx_txn_reset(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + err = mdbx_txn_break(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + err = mdbx_txn_abort(c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + + //===================================== + + txn = env.start_read(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + txn.make_broken(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + txn.reset_reading(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_BAD_TXN); + ok = ok && err == MDBX_BAD_TXN; + txn.abort(); + + //------------------------------------- + + txn = env.start_read(); + txn.reset_reading(); + txn.make_broken(); + txn.abort(); + + //===================================== + + std::latch s1(1), s2(1), s3(1); + txn = env.start_read(); + c_txn = txn; + + std::thread t([&]() { + s1.wait(); + err = mdbx_txn_break(c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + err = mdbx_txn_reset(c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + txn.renew_reading(); + s2.count_down(); + + s3.wait(); + err = mdbx_txn_begin(env, txn, MDBX_TXN_READWRITE, &c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + err = mdbx_txn_commit(c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + c_txn = txn; + err = mdbx_txn_commit(c_txn); + assert(err == MDBX_THREAD_MISMATCH); + ok = ok && err == MDBX_THREAD_MISMATCH; + err = mdbx_txn_abort(c_txn); + assert(err == MDBX_THREAD_MISMATCH); + ok = ok && err == MDBX_THREAD_MISMATCH; + err = mdbx_txn_break(c_txn); + assert(err == MDBX_SUCCESS); + ok = ok && err == MDBX_SUCCESS; + err = mdbx_txn_reset(c_txn); + assert(err == MDBX_EINVAL); + ok = ok && err == MDBX_EINVAL; + }); + + s1.count_down(); + s2.wait(); + txn.commit(); + txn = env.start_write(); + s3.count_down(); + + t.join(); + txn.abort(); + } + + std::cout << (ok ? "OK\n" : "FAIL\n"); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} + +#endif /* __cpp_lib_latch */ From fef7c25a65dd1df5eb494321329c205c397b4cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 4 Jan 2025 11:53:20 +0300 Subject: [PATCH 426/443] =?UTF-8?q?mdbx-make:=20`mkdir=20-p`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=BF=D0=BE=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D1=81=D0=B1=D0=BE=D1=80=D0=BA=D0=B8=20=D0=B1=D0=B5?= =?UTF-8?q?=D0=B7=20=D0=BE=D1=87=D0=B8=D1=81=D1=82=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GNUmakefile b/GNUmakefile index 65827d0d..b0e669e6 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -296,7 +296,7 @@ lib-shared libmdbx.$(SO_SUFFIX): mdbx-dylib.o $(call select_by,MDBX_BUILD_CXX,md ninja: cmake-build cmake-build: @echo "-G Ninja . && cmake --build ." - $(QUIET)mkdir @cmake-ninja-build && $(CMAKE) $(CMAKE_OPT) -G Ninja -S . -B @cmake-ninja-build && $(CMAKE) --build @cmake-ninja-build + $(QUIET)mkdir -p @cmake-ninja-build && $(CMAKE) $(CMAKE_OPT) -G Ninja -S . -B @cmake-ninja-build && $(CMAKE) --build @cmake-ninja-build #> dist-cutoff-begin ifeq ($(wildcard mdbx.c),mdbx.c) From 5350ed8a3b992c3ba0b07317a406b35460824254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 4 Jan 2025 11:54:11 +0300 Subject: [PATCH 427/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index c055445e..5b327c0b 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -15,9 +15,11 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Новое: - - В API добавлена функция `mdbx_cursor_count_ex()` позволяющая получить как количество мульти-значений + - В C API добавлена функция `mdbx_cursor_count_ex()` позволяющая получить как количество мульти-значений соответствующих текущему ключу, так и информацию о вложенном дереве хранящем эти значения. + - В C++ API добавлен метод `mdbx::txn::make_broken()` аналогичный `mdbx_txn_break()`. + Исправления: - Устранён регресс неразблокировки мьютекса при попытки повторного закрытия dbi-дескриптора, @@ -86,6 +88,11 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx то выполняется подстройка dirty-pages-limit при старте каждой не-вложенной пишущей транзакций, исходя из объёма доступного ОЗУ и размера БД. + - Теперь в режиме `MDBX_NOSTICKYTHREADS` допускается commit/abort вложенных транзакций из любого треда/потока. + + - Теперь при попытке запуска вложенных транзакций в режиме `MDBX_WRITEMAP` производится + логирование и возврат ошибки `MDBX_INCOMPATIBLE`. + -------------------------------------------------------------------------------- From bad6e3c2e2f3b23d3bd702212ef915c89363cebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 21 Dec 2024 09:19:27 +0300 Subject: [PATCH 428/443] =?UTF-8?q?mdbx:=20=D0=BE=D1=87=D0=B8=D1=81=D1=82?= =?UTF-8?q?=D0=BA=D0=B0=20=D1=84=D0=BB=D0=B0=D0=B6=D0=BA=D0=B0=20`ENV=5FTX?= =?UTF-8?q?KEY`=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`rthc=5Fdtor()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit На штатную работу это никак не влияет, но немного облегчит разбор ситуаций когда глобальный конструктор не вызывается, либо делается попытка вызвать его дважды (из-за ошибок rtc/libc, etc). --- src/tls.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tls.c b/src/tls.c index 7590c65f..79f228f2 100644 --- a/src/tls.c +++ b/src/tls.c @@ -515,6 +515,7 @@ __cold void rthc_dtor(const uint32_t current_pid) { continue; if (!(env->flags & ENV_TXKEY)) continue; + env->flags -= ENV_TXKEY; reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0]; reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers]; thread_key_delete(env->me_txkey); From 5ba257fafc65bd473fcc2e71bbacc5f7be2d2eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 24 Dec 2024 20:08:49 +0300 Subject: [PATCH 429/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=83=D0=BF=D1=83=D1=89=D0=B5?= =?UTF-8?q?=D0=BD=D0=BD=D0=BE=D0=B3=D0=BE=20`static`=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20`txl=5Freserve()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/txl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/txl.c b/src/txl.c index 3c64e085..c9830f81 100644 --- a/src/txl.c +++ b/src/txl.c @@ -37,7 +37,7 @@ void txl_free(txl_t txl) { osal_free(txl - 1); } -int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wanna) { +static int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wanna) { const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptxl); assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); if (likely(allocated >= wanna)) From faa9753d2dcaf71dc136b245bafac45c6fdb311d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 5 Jan 2025 14:35:39 +0300 Subject: [PATCH 430/443] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BC?= =?UTF-8?q?=D0=B5=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D0=BA=D0=BE?= =?UTF-8?q?=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80?= =?UTF-8?q?=D0=B8=D1=8F=20=D1=80=D0=B0=D0=B7=D0=BC=D0=B5=D1=87=D0=B0=D1=8E?= =?UTF-8?q?=D1=89=D0=B5=D0=B3=D0=BE=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=BB=D1=8F=20=D0=BF?= =?UTF-8?q?=D0=B8=D1=88=D1=83=D1=89=D0=B5=D0=B9=20=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/internals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internals.h b/src/internals.h index 8af6995f..bb7fabfa 100644 --- a/src/internals.h +++ b/src/internals.h @@ -210,7 +210,6 @@ struct MDBX_txn { } to; struct { troika_t troika; - /* In write txns, array of cursors for each DB */ pnl_t __restrict repnl; /* Reclaimed GC pages */ struct { /* The list of reclaimed txn-ids from GC */ @@ -249,6 +248,7 @@ struct MDBX_txn { size_t writemap_dirty_npages; size_t writemap_spilled_npages; }; + /* In write txns, next is located the array of cursors for each DB */ } tw; }; }; From b00e8ea13f7bbfc55bd6396e63caf92de9237267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 7 Jan 2025 19:34:06 +0300 Subject: [PATCH 431/443] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`txl=5Fconta?= =?UTF-8?q?in()`=20=D0=B2=20`audit()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/audit.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/audit.c b/src/audit.c index f5b20f4a..91e18873 100644 --- a/src/audit.c +++ b/src/audit.c @@ -46,11 +46,7 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool don return MDBX_CORRUPTED; } txnid_t id = unaligned_peek_u64(4, key.iov_base); - if (txn->tw.gc.retxl) { - for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.gc.retxl); ++i) - if (id == txn->tw.gc.retxl[i]) - goto skip; - } else if (id <= txn->tw.gc.last_reclaimed) + if (txn->tw.gc.retxl ? txl_contain(txn->tw.gc.retxl, id) : (id <= txn->tw.gc.last_reclaimed)) goto skip; } gc += *(pgno_t *)data.iov_base; From 16997a88b01eb7bd13cb6417596d7d10027fb112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 7 Jan 2025 16:10:26 +0300 Subject: [PATCH 432/443] =?UTF-8?q?mdbx-tests:=20=D1=83=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D1=8C=D1=88=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BB-?= =?UTF-8?q?=D0=B2=D0=B0=20=D0=B8=D1=82=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D0=B9?= =?UTF-8?q?=20=D0=B2=20crunched-delete.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/crunched_delete.c++ | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/extra/crunched_delete.c++ b/test/extra/crunched_delete.c++ index fde48950..5655fd8a 100644 --- a/test/extra/crunched_delete.c++ +++ b/test/extra/crunched_delete.c++ @@ -7,10 +7,8 @@ #if MDBX_DEBUG || !defined(NDEBUG) || defined(__APPLE__) || defined(_WIN32) #define NN 1024 -#elif defined(MDBX_CI) -#define NN 4096 #else -#define NN 16384 +#define NN 4096 #endif std::string format_va(const char *fmt, va_list ap) { From 3a02ca88ea595919932e6277d5cf439ccccca509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 7 Jan 2025 15:18:27 +0300 Subject: [PATCH 433/443] =?UTF-8?q?mdbx-make:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=86=D0=B5=D0=BB=D0=B8?= =?UTF-8?q?=20`ctest`=20=D0=B8=20=D0=B5=D1=91=20=D0=BF=D1=80=D0=B8=D0=B2?= =?UTF-8?q?=D1=8F=D0=B7=D0=BA=D0=B0=20=D0=BA=20`make=20check`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index b0e669e6..355ce295 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -54,6 +54,8 @@ CFLAGS_EXTRA ?= LD ?= ld CMAKE ?= cmake CMAKE_OPT ?= +CTEST ?= ctest +CTEST_OPT ?= # target directory for `make dist` DIST_DIR ?= dist @@ -295,9 +297,13 @@ lib-shared libmdbx.$(SO_SUFFIX): mdbx-dylib.o $(call select_by,MDBX_BUILD_CXX,md ninja: cmake-build cmake-build: - @echo "-G Ninja . && cmake --build ." + @echo " RUN: cmake -G Ninja && cmake --build" $(QUIET)mkdir -p @cmake-ninja-build && $(CMAKE) $(CMAKE_OPT) -G Ninja -S . -B @cmake-ninja-build && $(CMAKE) --build @cmake-ninja-build +ctest: cmake-build + @echo " RUN: ctest .." + $(QUIET)$(CTEST) --test-dir @cmake-ninja-build --parallel `(nproc | sysctl -n hw.ncpu | echo 2) 2>/dev/null` --schedule-random $(CTEST_OPT) + #> dist-cutoff-begin ifeq ($(wildcard mdbx.c),mdbx.c) #< dist-cutoff-end @@ -418,7 +424,7 @@ MDBX_SMOKE_EXTRA ?= check: DESTDIR = $(shell pwd)/@check-install check: CMAKE_OPT = -Werror=dev -check: smoke-assertion ninja dist install test +check: smoke-assertion ninja dist install test ctest smoke-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1 -UNDEBUG -DMDBX_DEBUG=0) smoke-assertion: smoke From 820bd458188e3da7376564777c75c0172dd96dd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Jan 2025 22:09:07 +0300 Subject: [PATCH 434/443] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D1=82?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE=20`default`-=D0=B7=D0=BD=D0=B0?= =?UTF-8?q?=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=B4=D0=BB=D1=8F=20=D0=B3?= =?UTF-8?q?=D0=B5=D0=BE=D0=BC=D0=B5=D1=82=D1=80=D0=B8=D0=B8=20=D0=BF=D0=BE?= =?UTF-8?q?-=D1=83=D0=BC=D0=BE=D0=BB=D1=87=D0=B0=D0=BD=D0=B8=D1=8E=20?= =?UTF-8?q?=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE=20min/max.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 2d895d03..e96c2012 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3071,7 +3071,7 @@ public: }; /// \brief The lower bound of database size in bytes. - intptr_t size_lower{minimal_value}; + intptr_t size_lower{default_value}; /// \brief The size in bytes to setup the database size for now. /// \details It is recommended always pass \ref default_value in this @@ -3088,7 +3088,7 @@ public: /// robustly because there may be a lack of appropriate system resources /// (which are extremely volatile in a multi-process multi-threaded /// environment). - intptr_t size_upper{maximal_value}; + intptr_t size_upper{default_value}; /// \brief The growth step in bytes, must be greater than zero to allow the /// database to grow. @@ -3105,12 +3105,12 @@ public: intptr_t pagesize{default_value}; inline geometry &make_fixed(intptr_t size) noexcept; - inline geometry &make_dynamic(intptr_t lower = minimal_value, intptr_t upper = maximal_value) noexcept; + inline geometry &make_dynamic(intptr_t lower = default_value, intptr_t upper = default_value) noexcept; MDBX_CXX11_CONSTEXPR geometry() noexcept {} MDBX_CXX11_CONSTEXPR geometry(const geometry &) noexcept = default; MDBX_CXX11_CONSTEXPR geometry(intptr_t size_lower, intptr_t size_now = default_value, - intptr_t size_upper = maximal_value, intptr_t growth_step = default_value, + intptr_t size_upper = default_value, intptr_t growth_step = default_value, intptr_t shrink_threshold = default_value, intptr_t pagesize = default_value) noexcept : size_lower(size_lower), size_now(size_now), size_upper(size_upper), growth_step(growth_step), shrink_threshold(shrink_threshold), pagesize(pagesize) {} From 9c8f90b7131cfe85f507d7b977f1b340a93cf12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Jan 2025 23:17:50 +0300 Subject: [PATCH 435/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20=D1=8D=D0=B2=D1=80=D0=B8=D1=81=D1=82?= =?UTF-8?q?=D0=B8=D0=BA=20=D0=B4=D0=BB=D1=8F=20=D0=B2=D1=8B=D0=B1=D0=BE?= =?UTF-8?q?=D1=80=D0=B0/=D0=BF=D0=BE=D0=B4=D1=81=D1=82=D1=80=D0=BE=D0=B9?= =?UTF-8?q?=D0=BA=D0=B8=20default-=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B9=20=D0=B2=20`mdbx=5Fenv=5Fset=5Fgeometry()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-env.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/api-env.c b/src/api-env.c index 1787f7fe..dfd7ca6f 100644 --- a/src/api-env.c +++ b/src/api-env.c @@ -956,7 +956,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si const bool inside_txn = txn0_owned && env->txn; bool should_unlock = false; -#if MDBX_DEBUG +#if MDBX_DEBUG && 0 /* минимальные шаги для проверки/отладки уже не нужны */ if (growth_step < 0) { growth_step = 1; if (shrink_threshold < 0) @@ -1045,9 +1045,10 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si goto bailout; } + const bool size_lower_default = size_lower < 0; if (size_lower <= 0) { - size_lower = MIN_MAPSIZE; - if (MIN_MAPSIZE / pagesize < MIN_PAGENO) + size_lower = (size_lower == 0) ? MIN_MAPSIZE : pagesize * MDBX_WORDBITS; + if (size_lower / pagesize < MIN_PAGENO) size_lower = MIN_PAGENO * pagesize; } if (size_lower >= INTPTR_MAX) { @@ -1056,11 +1057,6 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si size_lower = pagesize * (MAX_PAGENO + 1); } - if (size_now <= 0) { - size_now = size_lower; - if (size_upper >= size_lower && size_now > size_upper) - size_now = size_upper; - } if (size_now >= INTPTR_MAX) { size_now = reasonable_db_maxsize(); if ((size_t)size_now / pagesize > MAX_PAGENO + 1) @@ -1068,9 +1064,9 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si } if (size_upper <= 0) { - if (growth_step == 0 || size_upper == 0) + if ((growth_step == 0 || size_upper == 0) && size_now >= size_lower) size_upper = size_now; - else if (size_now >= reasonable_db_maxsize() / 2) + else if (size_now <= 0 || size_now >= reasonable_db_maxsize() / 2) size_upper = reasonable_db_maxsize(); else if ((size_t)size_now >= MAX_MAPSIZE32 / 2 && (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) size_upper = MAX_MAPSIZE32; @@ -1089,13 +1085,21 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si } if (unlikely(size_lower < (intptr_t)MIN_MAPSIZE || size_lower > size_upper)) { + /* паранойа на случай переполнения при невероятных значениях */ rc = MDBX_EINVAL; goto bailout; } + if (size_now <= 0) { + size_now = size_lower; + if (size_upper >= size_lower && size_now > size_upper) + size_now = size_upper; + } + if ((uint64_t)size_lower / pagesize < MIN_PAGENO) { size_lower = pagesize * MIN_PAGENO; if (unlikely(size_lower > size_upper)) { + /* паранойа на случай переполнения при невероятных значениях */ rc = MDBX_EINVAL; goto bailout; } @@ -1135,12 +1139,17 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si if (growth_step < 0) { growth_step = ((size_t)(size_upper - size_lower)) / 42; - if (growth_step > size_lower && size_lower < (intptr_t)MEGABYTE) + if (!size_lower_default && growth_step > size_lower && size_lower < (intptr_t)MEGABYTE) growth_step = size_lower; + else if (growth_step / size_lower > 64) + growth_step = size_lower << 6; if (growth_step < 65536) growth_step = 65536; - if ((size_t)growth_step > MAX_MAPSIZE / 64) - growth_step = MAX_MAPSIZE / 64; + if ((size_upper - size_lower) / growth_step > 65536) + growth_step = (size_upper - size_lower) >> 16; + const intptr_t growth_step_limit = MEGABYTE * ((MDBX_WORDBITS > 32) ? 4096 : 256); + if (growth_step > growth_step_limit) + growth_step = growth_step_limit; } if (growth_step == 0 && shrink_threshold > 0) growth_step = 1; From dcc8708d6a6361b23d56db400115616d75a015f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Jan 2025 02:35:48 +0300 Subject: [PATCH 436/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog=20(=D0=B7=D0=B0?= =?UTF-8?q?=D0=BF=D0=BB=D0=B0=D0=BD=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=20?= =?UTF-8?q?=D0=B2=D1=8B=D0=BF=D1=83=D1=81=D0=BA=20v0.13.3).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 5b327c0b..7430a2b7 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,7 +5,7 @@ English version [by liar Google](https://libmdbx-dqdkfa-ru.translate.goog/md__ch and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx.dqdkfa.ru/md__change_log.html). -## v0.13.3 в процессе +## v0.13.3 выпуск запланирован на 2025-01-12 Благодарности: @@ -93,6 +93,12 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx - Теперь при попытке запуска вложенных транзакций в режиме `MDBX_WRITEMAP` производится логирование и возврат ошибки `MDBX_INCOMPATIBLE`. + - Теперь в C++ API в конструкторах/инициализаторах и методах связанных с формированием геометрии БД + по-умолчанию используются только `default`-значений, в том числа для минимального и максимального + размера. В результате, при создании БД с геометрией по-умолчанию не происходит выбор максимального + размера страницы из-за очень большого максимального размера БД. + + - Внутри `mdbx_env_set_geometry()` доработаны эвристики для подбора параметров геометрии БД запрошенных пользователем "по-умолчанию". -------------------------------------------------------------------------------- From b75e16f4f85fd54048db02dfd00edeb561b4d183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Jan 2025 23:16:55 +0300 Subject: [PATCH 437/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20null-dereference=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B3=D1=80=D0=B5=D1=81=D1=81=D0=B0=20=D0=B2=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B6=D0=B8=D0=BC=D0=B5=20readonly-without-lck.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck-posix.c | 11 ++++++++--- src/txn.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/lck-posix.c b/src/lck-posix.c index 4cf83c46..41f49f48 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -823,10 +823,11 @@ MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env) { int lck_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); + eASSERT(env, env->basal_txn || (env->lck == lckless_stub(env) && (env->flags & MDBX_RDONLY))); jitter4testing(true); const int err = osal_ipclock_lock(env, &env->lck->wrt_lock, dont_wait); int rc = err; - if (likely(!MDBX_IS_ERROR(err))) { + if (likely(env->basal_txn && !MDBX_IS_ERROR(err))) { eASSERT(env, !env->basal_txn->owner || err == /* если другой поток в этом-же процессе завершился не освободив блокировку */ MDBX_RESULT_TRUE); @@ -839,8 +840,12 @@ int lck_txn_lock(MDBX_env *env, bool dont_wait) { void lck_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); - eASSERT(env, env->basal_txn->owner == osal_thread_self()); - env->basal_txn->owner = 0; + if (env->basal_txn) { + eASSERT(env, !env->basal_txn || env->basal_txn->owner == osal_thread_self()); + env->basal_txn->owner = 0; + } else { + eASSERT(env, env->lck == lckless_stub(env) && (env->flags & MDBX_RDONLY)); + } int err = osal_ipclock_unlock(env, &env->lck->wrt_lock); TRACE("<< err %d", err); if (unlikely(err != MDBX_SUCCESS)) diff --git a/src/txn.c b/src/txn.c index db3af6ed..478f4d07 100644 --- a/src/txn.c +++ b/src/txn.c @@ -503,7 +503,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; return MDBX_SUCCESS; } - txn->owner = (uintptr_t)r->tid.weak; + txn->owner = likely(r) ? (uintptr_t)r->tid.weak : ((env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self()); if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && unlikely(env->basal_txn->owner == txn->owner) && (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) return MDBX_TXN_OVERLAPPING; From 92a49c7c8c30d9ae6a903c734091a2e9632423e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Jan 2025 02:16:44 +0300 Subject: [PATCH 438/443] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B8=D1=81=D0=BA=D0=B0?= =?UTF-8?q?=20=D0=BF=D0=BE=D1=82=D0=B5=D1=80=D0=B8/=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D0=B7=D0=B0=D0=BF=D0=B8=D1=81=D0=B8=20`errno`=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=20=D0=BD=D0=B5=D0=BE=D0=B6=D0=B8=D0=B4=D0=B0=D0=BD?= =?UTF-8?q?=D0=BD=D1=8B=D1=85=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B0=D1=85?= =?UTF-8?q?=20=D0=B2=20`close()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/osal.c b/src/osal.c index 01f0b11d..dd1c7b4b 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1343,6 +1343,7 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, const assert(stub_fd2 == -1); *fd = dup(stub_fd2 = *fd); } + const int err = errno; if (stub_fd0 != -1) close(stub_fd0); if (stub_fd1 != -1) @@ -1361,7 +1362,7 @@ MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, const #endif /* STDIN_FILENO == 0 && STDERR_FILENO == 2 */ if (*fd < 0) - return errno; + return err; #if defined(FD_CLOEXEC) && !defined(O_CLOEXEC) const int fd_flags = fcntl(*fd, F_GETFD); From 0297136648a368fed6fd1e31f61071be4a562177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Jan 2025 02:32:12 +0300 Subject: [PATCH 439/443] =?UTF-8?q?mdbx:=20=D1=83=D0=BB=D1=83=D1=87=D1=88?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B0=D0=B2=D1=82=D0=BE-=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D0=B5=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=B2=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=20without-lck?= =?UTF-8?q?=20=D0=BF=D1=80=D0=B8=20=D0=BE=D1=82=D0=BA=D1=80=D1=8B=D1=82?= =?UTF-8?q?=D0=B8=D0=B8=20=D0=91=D0=94=20=D0=BD=D0=B0=20read-only-=D0=BD?= =?UTF-8?q?=D0=BE=D1=81=D0=B8=D1=82=D0=B5=D0=BB=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/lck.c b/src/lck.c index 1c7c3811..3b8b4a6e 100644 --- a/src/lck.c +++ b/src/lck.c @@ -135,25 +135,22 @@ __cold int lck_setup(MDBX_env *env, mdbx_mode_t mode) { switch (err) { default: return err; - case MDBX_ENOFILE: case MDBX_EACCESS: case MDBX_EPERM: if (!F_ISSET(env->flags, MDBX_RDONLY | MDBX_EXCLUSIVE)) return err; break; + case MDBX_ENOFILE: case MDBX_EROFS: if ((env->flags & MDBX_RDONLY) == 0) return err; - break; - } - - if (err != MDBX_ENOFILE) { /* ENSURE the file system is read-only */ err = osal_check_fs_rdonly(env->lazy_fd, env->pathname.lck, err); if (err != MDBX_SUCCESS && /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ !(err == MDBX_ENOSYS && (env->flags & MDBX_EXCLUSIVE))) return err; + break; } /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ From 8408a2eed3e699ee6d74257cfd3064689bcf51d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Jan 2025 11:54:24 +0300 Subject: [PATCH 440/443] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=80=D0=B0=D0=B7=D0=B4=D0=B5?= =?UTF-8?q?=D0=BB=D0=B8=D1=82=D0=B5=D0=BB=D1=8F=20=D0=BC=D0=B5=D0=B6=D0=B4?= =?UTF-8?q?=D1=83=20`MDBX=5FBUILD=5FFLAGS=5FCONFIG`=20=D0=B8=20`MDBX=5FBUI?= =?UTF-8?q?LD=5FFLAGS`=20(=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8=D0=BA?= =?UTF-8?q?=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/global.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/global.c b/src/global.c index 69340319..e8d13cf4 100644 --- a/src/global.c +++ b/src/global.c @@ -424,6 +424,9 @@ __dll_export #ifdef MDBX_BUILD_FLAGS_CONFIG MDBX_BUILD_FLAGS_CONFIG #endif /* MDBX_BUILD_FLAGS_CONFIG */ +#if defined(MDBX_BUILD_FLAGS_CONFIG) && defined(MDBX_BUILD_FLAGS) + " " +#endif #ifdef MDBX_BUILD_FLAGS MDBX_BUILD_FLAGS #endif /* MDBX_BUILD_FLAGS */ From e5fe2796325f7017f74827c0cb52a2b2ee3b370f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Jan 2025 14:30:39 +0300 Subject: [PATCH 441/443] =?UTF-8?q?mdbx:=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1?= =?UTF-8?q?=D0=BE=D0=BA=20=D0=BF=D1=80=D0=B8=20=D0=BE=D1=82=D0=BA=D1=80?= =?UTF-8?q?=D1=8B=D1=82=D0=B8=D0=B8=20lck-=D1=84=D0=B0=D0=B9=D0=BB=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/lck.c b/src/lck.c index 3b8b4a6e..097aeb69 100644 --- a/src/lck.c +++ b/src/lck.c @@ -133,28 +133,33 @@ __cold int lck_setup(MDBX_env *env, mdbx_mode_t mode) { int err = osal_openfile(MDBX_OPEN_LCK, env, env->pathname.lck, &env->lck_mmap.fd, mode); if (err != MDBX_SUCCESS) { switch (err) { - default: - return err; case MDBX_EACCESS: case MDBX_EPERM: - if (!F_ISSET(env->flags, MDBX_RDONLY | MDBX_EXCLUSIVE)) - return err; - break; + if (F_ISSET(env->flags, MDBX_RDONLY | MDBX_EXCLUSIVE)) + break; + __fallthrough /* fall through */; case MDBX_ENOFILE: case MDBX_EROFS: - if ((env->flags & MDBX_RDONLY) == 0) - return err; - /* ENSURE the file system is read-only */ - err = osal_check_fs_rdonly(env->lazy_fd, env->pathname.lck, err); - if (err != MDBX_SUCCESS && - /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ - !(err == MDBX_ENOSYS && (env->flags & MDBX_EXCLUSIVE))) - return err; - break; + if (env->flags & MDBX_RDONLY) { + /* ENSURE the file system is read-only */ + int err_rofs = osal_check_fs_rdonly(env->lazy_fd, env->pathname.lck, err); + if (err_rofs == MDBX_SUCCESS || + /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ + (err_rofs == MDBX_ENOSYS && (env->flags & MDBX_EXCLUSIVE))) + break; + if (err_rofs != MDBX_ENOSYS) + err = err_rofs; + } + __fallthrough /* fall through */; + default: + ERROR("unable to open lck-file %" MDBX_PRIsPATH ", env-flags 0x%X, err %d", env->pathname.lck, env->flags, err); + return err; } /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ env->lck_mmap.fd = INVALID_HANDLE_VALUE; + NOTICE("continue %" MDBX_PRIsPATH " within without-lck mode, env-flags 0x%X, lck-error %d", env->pathname.dxb, + env->flags, err); } rthc_lock(); From c751977bf73b9d69e149f1539411294be0714ff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Jan 2025 14:38:20 +0300 Subject: [PATCH 442/443] =?UTF-8?q?mdbx-tools:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=BE=D0=B3=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BE=D1=88=D0=B8?= =?UTF-8?q?=D0=B1=D0=BE=D0=BA/=D0=BF=D1=80=D0=B5=D0=B4=D1=83=D0=BF=D1=80?= =?UTF-8?q?=D0=B5=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=BF=D0=BE=20?= =?UTF-8?q?=D0=B2=D1=81=D0=B5=20=D1=83=D1=82=D0=B8=D0=BB=D0=B8=D1=82=D1=8B?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/copy.c | 16 ++++++++++++++++ src/tools/drop.c | 16 ++++++++++++++++ src/tools/dump.c | 16 ++++++++++++++++ src/tools/load.c | 21 +++++++++++++++++++-- src/tools/stat.c | 18 +++++++++++++++++- 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/tools/copy.c b/src/tools/copy.c index 122bea72..6b781025 100644 --- a/src/tools/copy.c +++ b/src/tools/copy.c @@ -53,6 +53,21 @@ static void usage(const char *prog) { exit(EXIT_FAILURE); } +static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { + static const char *const prefixes[] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + " //", // 4 verbose + }; + if (level < MDBX_LOG_DEBUG) { + if (function && line) + fprintf(stderr, "%s", prefixes[level]); + vfprintf(stderr, fmt, args); + } +} + int main(int argc, char *argv[]) { int rc; MDBX_env *env = nullptr; @@ -117,6 +132,7 @@ int main(int argc, char *argv[]) { mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, argv[1], (argc == 2) ? "stdout" : argv[2]); fflush(nullptr); + mdbx_setup_debug(MDBX_LOG_NOTICE, MDBX_DBG_DONTCHANGE, logger); } act = "opening environment"; diff --git a/src/tools/drop.c b/src/tools/drop.c index 9f5cfbfd..3c6e0f10 100644 --- a/src/tools/drop.c +++ b/src/tools/drop.c @@ -57,6 +57,21 @@ static void error(const char *func, int rc) { fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); } +static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { + static const char *const prefixes[] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + " //", // 4 verbose + }; + if (level < MDBX_LOG_DEBUG) { + if (function && line) + fprintf(stderr, "%s", prefixes[level]); + vfprintf(stderr, fmt, args); + } +} + int main(int argc, char *argv[]) { int i, rc; MDBX_env *env; @@ -126,6 +141,7 @@ int main(int argc, char *argv[]) { printf("mdbx_drop %s (%s, T-%s)\nRunning for %s/%s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, subname ? subname : "@MAIN"); fflush(nullptr); + mdbx_setup_debug(MDBX_LOG_NOTICE, MDBX_DBG_DONTCHANGE, logger); } rc = mdbx_env_create(&env); diff --git a/src/tools/dump.c b/src/tools/dump.c index f7c1a49d..12e1f88f 100644 --- a/src/tools/dump.c +++ b/src/tools/dump.c @@ -223,6 +223,21 @@ static void usage(void) { exit(EXIT_FAILURE); } +static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { + static const char *const prefixes[] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + " //", // 4 verbose + }; + if (level < MDBX_LOG_DEBUG) { + if (function && line) + fprintf(stderr, "%s", prefixes[level]); + vfprintf(stderr, fmt, args); + } +} + static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { return (a->iov_len == b->iov_len && memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) ? 0 : 1; } @@ -330,6 +345,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "mdbx_dump %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); fflush(nullptr); + mdbx_setup_debug(MDBX_LOG_NOTICE, MDBX_DBG_DONTCHANGE, logger); } err = mdbx_env_create(&env); diff --git a/src/tools/load.c b/src/tools/load.c index efdd50c9..7bb8c822 100644 --- a/src/tools/load.c +++ b/src/tools/load.c @@ -51,6 +51,21 @@ static void error(const char *func, int rc) { } } +static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { + static const char *const prefixes[] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + " //", // 4 verbose + }; + if (level < MDBX_LOG_DEBUG) { + if (function && line) + fprintf(stderr, "%s", prefixes[level]); + vfprintf(stderr, fmt, args); + } +} + static char *valstr(char *line, const char *item) { const size_t len = strlen(item); if (strncmp(line, item, len) != 0) @@ -544,10 +559,12 @@ int main(int argc, char *argv[]) { #endif /* !WINDOWS */ envname = argv[optind]; - if (!quiet) + if (!quiet) { printf("mdbx_load %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); - fflush(nullptr); + fflush(nullptr); + mdbx_setup_debug(MDBX_LOG_NOTICE, MDBX_DBG_DONTCHANGE, logger); + } dbuf.iov_len = 4096; dbuf.iov_base = osal_malloc(dbuf.iov_len); diff --git a/src/tools/stat.c b/src/tools/stat.c index 57c99b45..b3dd6811 100644 --- a/src/tools/stat.c +++ b/src/tools/stat.c @@ -92,6 +92,21 @@ static void error(const char *func, int rc) { fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); } +static void logger(MDBX_log_level_t level, const char *function, int line, const char *fmt, va_list args) { + static const char *const prefixes[] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + " //", // 4 verbose + }; + if (level < MDBX_LOG_DEBUG) { + if (function && line) + fprintf(stderr, "%s", prefixes[level]); + vfprintf(stderr, fmt, args); + } +} + int main(int argc, char *argv[]) { int opt, rc; MDBX_env *env; @@ -183,6 +198,7 @@ int main(int argc, char *argv[]) { printf("mdbx_stat %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); fflush(nullptr); + mdbx_setup_debug(MDBX_LOG_NOTICE, MDBX_DBG_DONTCHANGE, logger); } rc = mdbx_env_create(&env); @@ -287,7 +303,7 @@ int main(int argc, char *argv[]) { goto txn_abort; } if (rc == MDBX_RESULT_TRUE) - printf("Reader Table is empty\n"); + printf("Reader Table is absent\n"); else if (rc == MDBX_SUCCESS && rdrinfo > 1) { int dead; rc = mdbx_reader_check(env, &dead); From 92e2b6287eefcc727e3f80e355dec696db450477 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Jan 2025 16:27:02 +0300 Subject: [PATCH 443/443] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=BF=D1=83=D1=81?= =?UTF-8?q?=D0=BA=200.13.3=20"=D0=9A=D0=BE=D1=80=D0=BE=D0=BB=D1=91=D0=B2"?= =?UTF-8?q?=20(Korolev).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов в день рождения и в память об [Серге́е Па́вловиче Королёве](https://ru.wikipedia.org/wiki/Королёв,_Сергей_Павлович), советском учёном и Главном конструкторе ракетно-космических систем. За перечнем доработок и изменений обращайтесь к [ChangeLog](https://libmdbx.dqdkfa.ru/md__change_log.html). git diff' stat: 67 files changed, 3514 insertions(+), 3004 deletions(-) Signed-off-by: Леонид Юрьев (Leonid Yuriev) --- ChangeLog.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 7430a2b7..16a9c5cc 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,7 +5,11 @@ English version [by liar Google](https://libmdbx-dqdkfa-ru.translate.goog/md__ch and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx.dqdkfa.ru/md__change_log.html). -## v0.13.3 выпуск запланирован на 2025-01-12 +## v0.13.3 "Королёв" от 2025-01-12 + +Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов +в день рождения и в память об [Серге́е Па́вловиче Королёве](https://ru.wikipedia.org/wiki/Королёв,_Сергей_Павлович), +советском учёном и Главном конструкторе ракетно-космических систем. Благодарности: @@ -20,6 +24,9 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx - В C++ API добавлен метод `mdbx::txn::make_broken()` аналогичный `mdbx_txn_break()`. + - В утилиты mdbx_copy, mdbx_drop, mdbx_dump, mdbx_load, mdbx_stat добавлено логирование ошибок, + предупреждений и важных сообщений от libmdbx. + Исправления: - Устранён регресс неразблокировки мьютекса при попытки повторного закрытия dbi-дескриптора, @@ -72,6 +79,11 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/libmdbx Ошибка была внесена при рефакторинге, коммитом `2f2df1ee76ab137ee66d00af69a82a30dc0d6deb` чуть более 5 лет назад и долго оставалось не замеченной. + - Устранён SIGSEGV-регресс обращения к нулевому адресу при работе в режиме только-чтение без использования LCK-файла, + например при размещении БД на носителе доступном только для чтения. + + Ошибка была внесена при реализации функционала парковки читающих транзакций. + Изменение поведения: - Теперь при включении профилирования GC (сборка с опцией `MDBX_ENABLE_PROFGC=ON`)